You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by "István Bakró Nagy (JIRA)" <ji...@apache.org> on 2015/06/12 12:37:01 UTC
[jira] [Updated] (SOLR-7670) solr import files from multiple
dataSource entity
[ https://issues.apache.org/jira/browse/SOLR-7670?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
István Bakró Nagy updated SOLR-7670:
------------------------------------
Description:
I am trying to import files from multiple folders.
My solrconfig.xml invokes the following file to use it with org.apache.solr.handler.dataimport.DataImportHandler.
{{<dataConfig>
<dataSource type="BinFileDataSource" />
<document>
<entity name="files1"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="/w/PDF/"
fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
onError="skip"
recursive="true">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<field column="file" name="fileName"/>
<entity
name="documentImport1"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="title" name="title" meta="true"/>
<field column="text" name="text"/>
<copyField source="content" dest="text"/>
</entity>
</entity>
<entity name="files2"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="/w/KNOW-HOW/"
fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
onError="skip"
recursive="true">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<field column="file" name="fileName"/>
<entity
name="documentImport2"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="title" name="title" meta="true"/>
<field column="text" name="text"/>
<copyField source="content" dest="text"/>
</entity>
</entity>
</document>
</dataConfig>}}
During import I get a FileNotFoundException.
What am I missing?
was:
I am trying to import files from multiple folders.
My solrconfig.xml invokes the following file to use it with org.apache.solr.handler.dataimport.DataImportHandler.
<dataConfig>
<dataSource type="BinFileDataSource" />
<document>
<entity name="files1"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="/w/PDF/"
fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
onError="skip"
recursive="true">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<field column="file" name="fileName"/>
<entity
name="documentImport1"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="title" name="title" meta="true"/>
<field column="text" name="text"/>
<copyField source="content" dest="text"/>
</entity>
</entity>
<entity name="files2"
dataSource="null"
rootEntity="false"
processor="FileListEntityProcessor"
baseDir="/w/KNOW-HOW/"
fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
onError="skip"
recursive="true">
<field column="fileAbsolutePath" name="id" />
<field column="fileSize" name="size" />
<field column="fileLastModified" name="lastModified" />
<field column="file" name="fileName"/>
<entity
name="documentImport2"
processor="TikaEntityProcessor"
url="${files.fileAbsolutePath}"
format="text">
<field column="file" name="fileName"/>
<field column="Author" name="author" meta="true"/>
<field column="title" name="title" meta="true"/>
<field column="text" name="text"/>
<copyField source="content" dest="text"/>
</entity>
</entity>
</document>
</dataConfig>
During import I get a FileNotFoundException.
What am I missing?
> solr import files from multiple dataSource entity
> -------------------------------------------------
>
> Key: SOLR-7670
> URL: https://issues.apache.org/jira/browse/SOLR-7670
> Project: Solr
> Issue Type: Bug
> Affects Versions: 5.1
> Reporter: István Bakró Nagy
> Priority: Minor
> Original Estimate: 24h
> Remaining Estimate: 24h
>
> I am trying to import files from multiple folders.
> My solrconfig.xml invokes the following file to use it with org.apache.solr.handler.dataimport.DataImportHandler.
> {{<dataConfig>
> <dataSource type="BinFileDataSource" />
> <document>
> <entity name="files1"
> dataSource="null"
> rootEntity="false"
> processor="FileListEntityProcessor"
> baseDir="/w/PDF/"
> fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
> onError="skip"
> recursive="true">
> <field column="fileAbsolutePath" name="id" />
> <field column="fileSize" name="size" />
> <field column="fileLastModified" name="lastModified" />
> <field column="file" name="fileName"/>
> <entity
> name="documentImport1"
> processor="TikaEntityProcessor"
> url="${files.fileAbsolutePath}"
> format="text">
> <field column="file" name="fileName"/>
> <field column="Author" name="author" meta="true"/>
> <field column="title" name="title" meta="true"/>
> <field column="text" name="text"/>
> <copyField source="content" dest="text"/>
> </entity>
> </entity>
> <entity name="files2"
> dataSource="null"
> rootEntity="false"
> processor="FileListEntityProcessor"
> baseDir="/w/KNOW-HOW/"
> fileName=".*\.(pdf)|(doc)|(docx)|(ppt)|(pptx)|(xls)|(xlsx)|(odf)|(txt)|(rtf)|(html)|(htm)|(jpg)"
> onError="skip"
> recursive="true">
> <field column="fileAbsolutePath" name="id" />
> <field column="fileSize" name="size" />
> <field column="fileLastModified" name="lastModified" />
> <field column="file" name="fileName"/>
> <entity
> name="documentImport2"
> processor="TikaEntityProcessor"
> url="${files.fileAbsolutePath}"
> format="text">
> <field column="file" name="fileName"/>
> <field column="Author" name="author" meta="true"/>
> <field column="title" name="title" meta="true"/>
> <field column="text" name="text"/>
> <copyField source="content" dest="text"/>
> </entity>
> </entity>
> </document>
> </dataConfig>}}
> During import I get a FileNotFoundException.
> What am I missing?
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org