You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by anarchos78 <ri...@hotmail.com> on 2012/07/05 09:26:44 UTC
Indexing binary files from database issue (no errors)
Greetings friends,
I am trying to index binary files stored in a database (mysql) and I have no
success. I have a solr configured as below:
*Solr file structure*
+solr
+bookledger(core0)
-conf
+lib(all necessary libraries)
+contrib
+dist
+data
+bookledger
-index
-spellchecker
+ktimatologio
-index
-spellchecker
+ktimatologio(core1)
-conf
+lib(all necessary libraries)
+contrib
+dist
As you can see the configuration concerns a multicore solr setup. Now, on
the bookledger(core0) I have indexed binary files successfully (stored in a
database). In the second core when I conduct full-import I see no errors!
Then, when I try to query the binary content the output is like:
[B@660b1b14. What am I missing here?
Thank you in advance,
Tom
Greece
*The solr.xml file:*
<?xml version="1.0" encoding="UTF-8" ?>
<solr persistent="false">
<cores adminPath="/admin/cores">
<core name="ktimatologio" instanceDir="ktimatologio"
dataDir="../data/ktimatologio"/>
<core name="bookledger" instanceDir="bookledger"
dataDir="../data/bookledger"/>
</cores>
</solr>
*The solrconfig.xml file:*
<?xml version="1.0" encoding="UTF-8" ?>
<config>
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<luceneMatchVersion>LUCENE_36</luceneMatchVersion>
<lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" />
<lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" />
<lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
<lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" />
<lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" />
<lib dir="lib/dist/"
regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />
<lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" />
<lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" />
<lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" />
<lib dir="lib/contrib/langid/lib/" regex=".*\.jar" />
<lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" />
<lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" />
<lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" />
<dataDir>${solr.data.dir:}</dataDir>
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
<indexConfig>
</indexConfig>
<jmx />
<updateHandler class="solr.DirectUpdateHandler2">
</updateHandler>
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<filterCache class="solr.FastLRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<queryResultCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<documentCache class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>20</queryResultWindowSize>
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
</arr>
</listener>
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst>
<str name="q">static firstSearcher warming in solrconfig.xml</str>
</lst>
</arr>
</listener>
<useColdSearcher>false</useColdSearcher>
<maxWarmingSearchers>2</maxWarmingSearchers>
</query>
<requestDispatcher>
<requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000" />
</requestDispatcher>
<requestHandler name="/dataimport"
class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">100</int>
</lst>
</requestHandler>
<requestHandler name="/browse" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="wt">velocity</str>
<str name="v.template">browse</str>
<str name="v.layout">layout</str>
<str name="title">Solritas</str>
<str name="df">text</str>
<str name="defType">edismax</str>
<str name="q.alt">*:*</str>
<str name="rows">10</str>
<str name="fl">*,score</str>
<str name="mlt.qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
<int name="mlt.count">3</int>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="facet">on</str>
<str name="facet.field">cat</str>
<str name="facet.field">manu_exact</str>
<str name="facet.query">ipod</str>
<str name="facet.query">GB</str>
<str name="facet.mincount">1</str>
<str name="facet.pivot">cat,inStock</str>
<str name="facet.range.other">after</str>
<str name="facet.range">price</str>
<int name="f.price.facet.range.start">0</int>
<int name="f.price.facet.range.end">600</int>
<int name="f.price.facet.range.gap">50</int>
<str name="facet.range">popularity</str>
<int name="f.popularity.facet.range.start">0</int>
<int name="f.popularity.facet.range.end">10</int>
<int name="f.popularity.facet.range.gap">3</int>
<str name="facet.range">manufacturedate_dt</str>
<str
name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
<str name="f.manufacturedate_dt.facet.range.end">NOW</str>
<str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
<str name="f.manufacturedate_dt.facet.range.other">before</str>
<str name="f.manufacturedate_dt.facet.range.other">after</str>
<str name="hl">on</str>
<str name="hl.fl">text features name</str>
<str name="f.name.hl.fragsize">0</str>
<str name="f.name.hl.alternateField">name</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="/update"
class="solr.XmlUpdateRequestHandler">
</requestHandler>
<requestHandler name="/update/javabin"
class="solr.BinaryUpdateRequestHandler" />
<requestHandler name="/update/csv"
class="solr.CSVRequestHandler"
startup="lazy" />
<requestHandler name="/update/json"
class="solr.JsonUpdateRequestHandler"
startup="lazy" />
<requestHandler name="/update/extract"
startup="lazy"
class="solr.extraction.ExtractingRequestHandler" >
<lst name="defaults">
<str name="fmap.content">text</str>
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<str name="captureAttr">true</str>
<str name="fmap.a">links</str>
<str name="fmap.div">ignored_</str>
</lst>
</requestHandler>
<requestHandler name="/update/xslt"
startup="lazy"
class="solr.XsltUpdateRequestHandler"/>
<requestHandler name="/analysis/field"
startup="lazy"
class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/analysis/document"
class="solr.DocumentAnalysisRequestHandler"
startup="lazy" />
<requestHandler name="/admin/"
class="solr.admin.AdminHandlers" />
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="echoHandler">true</str>
</lst>
</requestHandler>
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">spellchecker</str>
</lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.extendedResults">false</str>
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
<requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="df">text</str>
<bool name="tv">true</bool>
</lst>
<arr name="last-components">
<str>tvComponent</str>
</arr>
</requestHandler>
<searchComponent name="clustering"
enable="${solr.clustering.enabled:false}"
class="solr.clustering.ClusteringComponent" >
<lst name="engine">
<str name="name">default</str>
<str
name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
<str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
<str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
<str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
</lst>
<lst name="engine">
<str name="name">stc</str>
<str
name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
</lst>
</searchComponent>
<requestHandler name="/clustering"
startup="lazy"
enable="${solr.clustering.enabled:false}"
class="solr.SearchHandler">
<lst name="defaults">
<bool name="clustering">true</bool>
<str name="clustering.engine">default</str>
<bool name="clustering.results">true</bool>
<str name="carrot.title">name</str>
<str name="carrot.url">id</str>
<str name="carrot.snippet">features</str>
<bool name="carrot.produceSummary">true</bool>
<bool name="carrot.outputSubClusters">false</bool>
<str name="df">text</str>
<str name="defType">edismax</str>
<str name="qf">
text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
</str>
<str name="q.alt">*:*</str>
<str name="rows">10</str>
<str name="fl">*,score</str>
</lst>
<arr name="last-components">
<str>clustering</str>
</arr>
</requestHandler>
<searchComponent name="terms" class="solr.TermsComponent"/>
<requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>terms</str>
</arr>
</requestHandler>
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="df">text</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>
<searchComponent class="solr.HighlightComponent" name="highlight">
<highlighting>
<fragmenter name="gap"
default="true"
class="solr.highlight.GapFragmenter">
<lst name="defaults">
<int name="hl.fragsize">100</int>
</lst>
</fragmenter>
<fragmenter name="regex"
class="solr.highlight.RegexFragmenter">
<lst name="defaults">
<int name="hl.fragsize">70</int>
<float name="hl.regex.slop">0.5</float>
<str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
</lst>
</fragmenter>
<formatter name="html"
default="true"
class="solr.highlight.HtmlFormatter">
<lst name="defaults">
<str name="hl.simple.pre"></str>
<str name="hl.simple.post"></str>
</lst>
</formatter>
<encoder name="html"
class="solr.highlight.HtmlEncoder" />
<fragListBuilder name="simple"
default="true"
class="solr.highlight.SimpleFragListBuilder"/>
<fragListBuilder name="single"
class="solr.highlight.SingleFragListBuilder"/>
<fragmentsBuilder name="default"
default="true"
class="solr.highlight.ScoreOrderFragmentsBuilder">
</fragmentsBuilder>
<fragmentsBuilder name="colored"
class="solr.highlight.ScoreOrderFragmentsBuilder">
<lst name="defaults">
<str name="hl.tag.pre"></str>
<str name="hl.tag.post"></str>
</lst>
</fragmentsBuilder>
<boundaryScanner name="default"
default="true"
class="solr.highlight.SimpleBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.maxScan">10</str>
<str name="hl.bs.chars">.,!? 	 </str>
</lst>
</boundaryScanner>
<boundaryScanner name="breakIterator"
class="solr.highlight.BreakIteratorBoundaryScanner">
<lst name="defaults">
<str name="hl.bs.type">WORD</str>
<str name="hl.bs.language">en</str>
<str name="hl.bs.country">US</str>
</lst>
</boundaryScanner>
</highlighting>
</searchComponent>
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/plain; charset=UTF-8</str>
</queryResponseWriter>
<queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"
startup="lazy"/>
-->
<queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
<int name="xsltCacheLifetimeSeconds">5</int>
</queryResponseWriter>
<admin>
<defaultQuery>*:*</defaultQuery>
</admin>
</config>
*The schema.xml file:*
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="ktimatologio" version="1.5">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
positionIncrementGap="0"/>
<fieldType name="pint" class="solr.IntField"/>
<fieldType name="plong" class="solr.LongField"/>
<fieldType name="pfloat" class="solr.FloatField"/>
<fieldType name="pdouble" class="solr.DoubleField"/>
<fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
<fieldType name="sint" class="solr.SortableIntField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true" omitNorms="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<fieldType name="text_el" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_ktimatologio" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
<fieldType name="point" class="solr.PointType" dimension="2"
subFieldSuffix="_d"/>
<fieldType name="location" class="solr.LatLonType"
subFieldSuffix="_coordinate"/>
<fieldtype name="geohash" class="solr.GeoHashField"/>
<fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
defaultCurrency="USD" currencyConfig="currency.xml" />
</types>
<fields>
<field name="id" type="string" indexed="true" stored="true"
multiValued="false"/>
<field name="solr_id" type="string" indexed="true" stored="true"
multiValued="false"/>
<field name="title" type="text_ktimatologio" indexed="true"
stored="true"/>
<field name="model" type="text_ktimatologio" indexed="true" stored="true"
multiValued="false"/>
<field name="type" type="text_ktimatologio" indexed="true"
stored="true"/>
<field name="url" type="text_ktimatologio" indexed="true" stored="true"/>
<field name="content" type="text_ktimatologio" indexed="true"
stored="true" multiValued="true"/>
<field name="last_modified" type="string" indexed="true" stored="true"/>
</fields>
<uniqueKey>solr_id</uniqueKey>
<defaultSearchField>content</defaultSearchField>
<solrQueryParser defaultOperator="OR"/>
<copyField source="title" dest="content" />
</schema>
*The data-config.xml file:*
<?xml version="1.0" encoding="utf-8"?>
<dataConfig>
<dataSource type="JdbcDataSource"
autoCommit="true" batchSize="-1"
convertType="false"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
user="root"
password="1a2b3c4d"/>
<dataSource name="fieldReader" type="FieldStreamDataSource" />
<document>
<entity name="aitiologikes_ektheseis"
dataSource="db"
transformer="HTMLStripTransformer"
query="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text'"
deltaImportQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text' and
id='${dataimporter.delta.id}'"
deltaQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text' and last_modified >
'${dataimporter.last_index_time}'">
<field column="id" name="id" />
<field column="solr_id" name="solr_id" />
<field column="title" name="title" stripHTML="true" />
<field column="model" name="model" stripHTML="true" />
<field column="type" name="type" stripHTML="true" />
<field column="url" name="url" stripHTML="true" />
<field column="last_modified" name="last_modified" stripHTML="true" />
<field column="content" name="content" stripHTML="true" />
</entity>
<entity name="aitiologikes_ektheseis_bin"
query="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin'"
deltaImportQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'"
deltaQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin' and last_modified >
'${dataimporter.last_index_time}'"
transformer="TemplateTransformer"
dataSource="db">
<entity dataSource="fieldReader" processor="TikaEntityProcessor"
dataField="aitiologikes_ektheseis_bin.content" format="text">
<field column="id" name="id" />
<field column="solr_id" name="solr_id" />
<field column="title" name="title" stripHTML="true" />
<field column="model" name="model" stripHTML="true" />
<field column="type" name="type" stripHTML="true" />
<field column="url" name="url" stripHTML="true" />
<field column="last_modified" name="last_modified" stripHTML="true" />
<field column="content" name="content" stripHTML="true" />
</entity>
</entity>
</document>
</dataConfig>
--
View this message in context: http://lucene.472066.n3.nabble.com/Indexing-binary-files-from-database-issue-no-errors-tp3993140.html
Sent from the Solr - User mailing list archive at Nabble.com.
Re: Indexing binary files from database issue (no errors)
Posted by Jan Høydahl <ja...@cominvent.com>.
Hi,
Did you get a reply on this?
I'd guess that it is your JDBC driver which does not handle the response to your CONCAT_WS() SQL. Try without it and see. Then try to upgrade your mysql JDBC driver to a newer version and see if it helps.
--
Jan Høydahl, search solution architect
Cominvent AS - www.cominvent.com
Solr Training - www.solrtraining.com
On 5. juli 2012, at 09:26, anarchos78 wrote:
> Greetings friends,
> I am trying to index binary files stored in a database (mysql) and I have no
> success. I have a solr configured as below:
> *Solr file structure*
> +solr
> +bookledger(core0)
> -conf
> +lib(all necessary libraries)
> +contrib
> +dist
> +data
> +bookledger
> -index
> -spellchecker
> +ktimatologio
> -index
> -spellchecker
> +ktimatologio(core1)
> -conf
> +lib(all necessary libraries)
> +contrib
> +dist
>
> As you can see the configuration concerns a multicore solr setup. Now, on
> the bookledger(core0) I have indexed binary files successfully (stored in a
> database). In the second core when I conduct full-import I see no errors!
> Then, when I try to query the binary content the output is like:
> [B@660b1b14. What am I missing here?
>
> Thank you in advance,
>
> Tom
> Greece
>
> *The solr.xml file:*
>
> <?xml version="1.0" encoding="UTF-8" ?>
> <solr persistent="false">
> <cores adminPath="/admin/cores">
> <core name="ktimatologio" instanceDir="ktimatologio"
> dataDir="../data/ktimatologio"/>
> <core name="bookledger" instanceDir="bookledger"
> dataDir="../data/bookledger"/>
> </cores>
> </solr>
>
> *The solrconfig.xml file:*
>
> <?xml version="1.0" encoding="UTF-8" ?>
>
> <config>
>
>
> <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
>
>
> <luceneMatchVersion>LUCENE_36</luceneMatchVersion>
>
> <lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" />
> <lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" />
> <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
> <lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" />
> <lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" />
> <lib dir="lib/dist/"
> regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />
>
> <lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" />
> <lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" />
> <lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" />
> <lib dir="lib/contrib/langid/lib/" regex=".*\.jar" />
> <lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" />
> <lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" />
> <lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" />
>
>
> <dataDir>${solr.data.dir:}</dataDir>
>
>
>
> <directoryFactory name="DirectoryFactory"
>
> class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
>
>
> <indexConfig>
>
> </indexConfig>
>
>
>
> <jmx />
>
>
>
> <updateHandler class="solr.DirectUpdateHandler2">
>
>
> </updateHandler>
>
> <query>
>
> <maxBooleanClauses>1024</maxBooleanClauses>
>
>
>
> <filterCache class="solr.FastLRUCache"
> size="512"
> initialSize="512"
> autowarmCount="0"/>
>
>
> <queryResultCache class="solr.LRUCache"
> size="512"
> initialSize="512"
> autowarmCount="0"/>
>
>
> <documentCache class="solr.LRUCache"
> size="512"
> initialSize="512"
> autowarmCount="0"/>
>
> <enableLazyFieldLoading>true</enableLazyFieldLoading>
>
> <queryResultWindowSize>20</queryResultWindowSize>
>
> <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
>
> <listener event="newSearcher" class="solr.QuerySenderListener">
> <arr name="queries">
>
> </arr>
> </listener>
> <listener event="firstSearcher" class="solr.QuerySenderListener">
> <arr name="queries">
> <lst>
> <str name="q">static firstSearcher warming in solrconfig.xml</str>
> </lst>
> </arr>
> </listener>
>
> <useColdSearcher>false</useColdSearcher>
>
> <maxWarmingSearchers>2</maxWarmingSearchers>
>
> </query>
>
> <requestDispatcher>
>
> <requestParsers enableRemoteStreaming="true"
> multipartUploadLimitInKB="2048000" />
>
> </requestDispatcher>
>
> <requestHandler name="/dataimport"
> class="org.apache.solr.handler.dataimport.DataImportHandler">
> <lst name="defaults">
> <str name="config">data-config.xml</str>
> </lst>
> </requestHandler>
>
> <requestHandler name="/select" class="solr.SearchHandler">
>
> <lst name="defaults">
> <str name="echoParams">explicit</str>
> <int name="rows">100</int>
> </lst>
>
> </requestHandler>
>
> <requestHandler name="/browse" class="solr.SearchHandler">
> <lst name="defaults">
> <str name="echoParams">explicit</str>
>
>
> <str name="wt">velocity</str>
>
> <str name="v.template">browse</str>
> <str name="v.layout">layout</str>
> <str name="title">Solritas</str>
>
> <str name="df">text</str>
> <str name="defType">edismax</str>
> <str name="q.alt">*:*</str>
> <str name="rows">10</str>
> <str name="fl">*,score</str>
> <str name="mlt.qf">
> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
> </str>
> <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
> <int name="mlt.count">3</int>
>
> <str name="qf">
> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
> </str>
>
> <str name="facet">on</str>
> <str name="facet.field">cat</str>
> <str name="facet.field">manu_exact</str>
> <str name="facet.query">ipod</str>
> <str name="facet.query">GB</str>
> <str name="facet.mincount">1</str>
> <str name="facet.pivot">cat,inStock</str>
> <str name="facet.range.other">after</str>
> <str name="facet.range">price</str>
> <int name="f.price.facet.range.start">0</int>
> <int name="f.price.facet.range.end">600</int>
> <int name="f.price.facet.range.gap">50</int>
> <str name="facet.range">popularity</str>
> <int name="f.popularity.facet.range.start">0</int>
> <int name="f.popularity.facet.range.end">10</int>
> <int name="f.popularity.facet.range.gap">3</int>
> <str name="facet.range">manufacturedate_dt</str>
> <str
> name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
> <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
> <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
> <str name="f.manufacturedate_dt.facet.range.other">before</str>
> <str name="f.manufacturedate_dt.facet.range.other">after</str>
>
>
>
> <str name="hl">on</str>
> <str name="hl.fl">text features name</str>
> <str name="f.name.hl.fragsize">0</str>
> <str name="f.name.hl.alternateField">name</str>
> </lst>
> <arr name="last-components">
> <str>spellcheck</str>
> </arr>
>
> </requestHandler>
>
>
> <requestHandler name="/update"
> class="solr.XmlUpdateRequestHandler">
>
> </requestHandler>
>
> <requestHandler name="/update/javabin"
> class="solr.BinaryUpdateRequestHandler" />
>
>
> <requestHandler name="/update/csv"
> class="solr.CSVRequestHandler"
> startup="lazy" />
>
>
> <requestHandler name="/update/json"
> class="solr.JsonUpdateRequestHandler"
> startup="lazy" />
>
>
> <requestHandler name="/update/extract"
> startup="lazy"
> class="solr.extraction.ExtractingRequestHandler" >
> <lst name="defaults">
>
> <str name="fmap.content">text</str>
> <str name="lowernames">true</str>
> <str name="uprefix">ignored_</str>
>
>
> <str name="captureAttr">true</str>
> <str name="fmap.a">links</str>
> <str name="fmap.div">ignored_</str>
> </lst>
> </requestHandler>
>
> <requestHandler name="/update/xslt"
> startup="lazy"
> class="solr.XsltUpdateRequestHandler"/>
>
> <requestHandler name="/analysis/field"
> startup="lazy"
> class="solr.FieldAnalysisRequestHandler" />
> <requestHandler name="/analysis/document"
> class="solr.DocumentAnalysisRequestHandler"
> startup="lazy" />
>
>
> <requestHandler name="/admin/"
> class="solr.admin.AdminHandlers" />
>
>
> <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
> <lst name="invariants">
> <str name="q">solrpingquery</str>
> </lst>
> <lst name="defaults">
> <str name="echoParams">all</str>
> </lst>
> </requestHandler>
>
>
> <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
> <lst name="defaults">
> <str name="echoParams">explicit</str>
> <str name="echoHandler">true</str>
> </lst>
> </requestHandler>
>
>
> <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
>
> <str name="queryAnalyzerFieldType">textSpell</str>
>
>
> <lst name="spellchecker">
> <str name="name">default</str>
> <str name="field">name</str>
> <str name="spellcheckIndexDir">spellchecker</str>
>
> </lst>
>
>
>
> </searchComponent>
>
>
> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
> <lst name="defaults">
> <str name="df">text</str>
> <str name="spellcheck.onlyMorePopular">false</str>
> <str name="spellcheck.extendedResults">false</str>
> <str name="spellcheck.count">1</str>
> </lst>
> <arr name="last-components">
> <str>spellcheck</str>
> </arr>
> </requestHandler>
>
>
> <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
>
>
> <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
> <lst name="defaults">
> <str name="df">text</str>
> <bool name="tv">true</bool>
> </lst>
> <arr name="last-components">
> <str>tvComponent</str>
> </arr>
> </requestHandler>
>
>
> <searchComponent name="clustering"
> enable="${solr.clustering.enabled:false}"
> class="solr.clustering.ClusteringComponent" >
>
> <lst name="engine">
>
> <str name="name">default</str>
>
>
> <str
> name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
>
>
> <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
>
>
> <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
>
>
> <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
> </lst>
> <lst name="engine">
> <str name="name">stc</str>
> <str
> name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
> </lst>
> </searchComponent>
>
>
> <requestHandler name="/clustering"
> startup="lazy"
> enable="${solr.clustering.enabled:false}"
> class="solr.SearchHandler">
> <lst name="defaults">
> <bool name="clustering">true</bool>
> <str name="clustering.engine">default</str>
> <bool name="clustering.results">true</bool>
>
> <str name="carrot.title">name</str>
> <str name="carrot.url">id</str>
>
> <str name="carrot.snippet">features</str>
>
> <bool name="carrot.produceSummary">true</bool>
>
>
>
> <bool name="carrot.outputSubClusters">false</bool>
>
> <str name="df">text</str>
> <str name="defType">edismax</str>
> <str name="qf">
> text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
> </str>
> <str name="q.alt">*:*</str>
> <str name="rows">10</str>
> <str name="fl">*,score</str>
> </lst>
> <arr name="last-components">
> <str>clustering</str>
> </arr>
> </requestHandler>
>
>
> <searchComponent name="terms" class="solr.TermsComponent"/>
>
>
> <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
> <lst name="defaults">
> <bool name="terms">true</bool>
> </lst>
> <arr name="components">
> <str>terms</str>
> </arr>
> </requestHandler>
>
> <searchComponent name="elevator" class="solr.QueryElevationComponent" >
>
> <str name="queryFieldType">string</str>
> <str name="config-file">elevate.xml</str>
> </searchComponent>
>
>
> <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
> <lst name="defaults">
> <str name="echoParams">explicit</str>
> <str name="df">text</str>
> </lst>
> <arr name="last-components">
> <str>elevator</str>
> </arr>
> </requestHandler>
>
>
> <searchComponent class="solr.HighlightComponent" name="highlight">
> <highlighting>
>
>
> <fragmenter name="gap"
> default="true"
> class="solr.highlight.GapFragmenter">
> <lst name="defaults">
> <int name="hl.fragsize">100</int>
> </lst>
> </fragmenter>
>
>
> <fragmenter name="regex"
> class="solr.highlight.RegexFragmenter">
> <lst name="defaults">
>
> <int name="hl.fragsize">70</int>
>
> <float name="hl.regex.slop">0.5</float>
>
> <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
> </lst>
> </fragmenter>
>
>
> <formatter name="html"
> default="true"
> class="solr.highlight.HtmlFormatter">
> <lst name="defaults">
> <str name="hl.simple.pre"></str>
> <str name="hl.simple.post"></str>
> </lst>
> </formatter>
>
>
> <encoder name="html"
> class="solr.highlight.HtmlEncoder" />
>
>
> <fragListBuilder name="simple"
> default="true"
> class="solr.highlight.SimpleFragListBuilder"/>
>
>
> <fragListBuilder name="single"
> class="solr.highlight.SingleFragListBuilder"/>
>
>
> <fragmentsBuilder name="default"
> default="true"
> class="solr.highlight.ScoreOrderFragmentsBuilder">
>
> </fragmentsBuilder>
>
>
> <fragmentsBuilder name="colored"
> class="solr.highlight.ScoreOrderFragmentsBuilder">
> <lst name="defaults">
> <str name="hl.tag.pre"></str>
> <str name="hl.tag.post"></str>
> </lst>
> </fragmentsBuilder>
>
> <boundaryScanner name="default"
> default="true"
> class="solr.highlight.SimpleBoundaryScanner">
> <lst name="defaults">
> <str name="hl.bs.maxScan">10</str>
> <str name="hl.bs.chars">.,!? 	 </str>
> </lst>
> </boundaryScanner>
>
> <boundaryScanner name="breakIterator"
> class="solr.highlight.BreakIteratorBoundaryScanner">
> <lst name="defaults">
>
> <str name="hl.bs.type">WORD</str>
>
> <str name="hl.bs.language">en</str>
> <str name="hl.bs.country">US</str>
> </lst>
> </boundaryScanner>
> </highlighting>
> </searchComponent>
>
> <queryResponseWriter name="json" class="solr.JSONResponseWriter">
>
> <str name="content-type">text/plain; charset=UTF-8</str>
> </queryResponseWriter>
>
>
> <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"
> startup="lazy"/>
>
>
> -->
> <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
> <int name="xsltCacheLifetimeSeconds">5</int>
> </queryResponseWriter>
> <admin>
>
> <defaultQuery>*:*</defaultQuery>
>
>
> </admin>
>
> </config>
>
> *The schema.xml file:*
>
> <?xml version="1.0" encoding="UTF-8" ?>
>
> <schema name="ktimatologio" version="1.5">
>
> <types>
>
>
> <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
>
>
> <fieldType name="boolean" class="solr.BoolField"
> sortMissingLast="true"/>
>
> <fieldtype name="binary" class="solr.BinaryField"/>
>
> <fieldType name="int" class="solr.TrieIntField" precisionStep="0"
> positionIncrementGap="0"/>
> <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
> positionIncrementGap="0"/>
> <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
> positionIncrementGap="0"/>
> <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
> positionIncrementGap="0"/>
>
> <fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
> positionIncrementGap="0"/>
> <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
> positionIncrementGap="0"/>
> <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
> positionIncrementGap="0"/>
> <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
> positionIncrementGap="0"/>
>
> <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
> positionIncrementGap="0"/>
>
>
> <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
> positionIncrementGap="0"/>
>
> <fieldType name="pint" class="solr.IntField"/>
> <fieldType name="plong" class="solr.LongField"/>
> <fieldType name="pfloat" class="solr.FloatField"/>
> <fieldType name="pdouble" class="solr.DoubleField"/>
> <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
>
> <fieldType name="sint" class="solr.SortableIntField"
> sortMissingLast="true" omitNorms="true"/>
> <fieldType name="slong" class="solr.SortableLongField"
> sortMissingLast="true" omitNorms="true"/>
> <fieldType name="sfloat" class="solr.SortableFloatField"
> sortMissingLast="true" omitNorms="true"/>
> <fieldType name="sdouble" class="solr.SortableDoubleField"
> sortMissingLast="true" omitNorms="true"/>
> <fieldType name="random" class="solr.RandomSortField" indexed="true" />
>
> <fieldType name="text_el" class="solr.TextField"
> positionIncrementGap="100">
> <analyzer>
> <tokenizer class="solr.StandardTokenizerFactory"/>
>
> <filter class="solr.GreekLowerCaseFilterFactory"/>
> <filter class="solr.StopFilterFactory" ignoreCase="false"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
> <filter class="solr.GreekStemFilterFactory"/>
> </analyzer>
> </fieldType>
>
> <fieldType name="text_ktimatologio" class="solr.TextField"
> positionIncrementGap="100">
>
> <analyzer type="index">
> <tokenizer class="solr.StandardTokenizerFactory"/>
> <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
> <filter class="solr.LowerCaseFilterFactory"/>
> <filter class="solr.EnglishPossessiveFilterFactory"/>
> <filter class="solr.GreekLowerCaseFilterFactory"/>
> <filter class="solr.GreekStemFilterFactory"/>
> <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
> <filter class="solr.PorterStemFilterFactory"/>
> </analyzer>
>
>
> <analyzer type="query">
> <tokenizer class="solr.StandardTokenizerFactory"/>
> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
> <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>
> <filter class="solr.GreekLowerCaseFilterFactory"/>
> <filter class="solr.GreekStemFilterFactory"/>
> <filter class="solr.LowerCaseFilterFactory"/>
> <filter class="solr.EnglishPossessiveFilterFactory"/>
> <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
> <filter class="solr.PorterStemFilterFactory"/>
> </analyzer>
> </fieldType>
>
> <fieldtype name="ignored" stored="false" indexed="false"
> multiValued="true" class="solr.StrField" />
> <fieldType name="point" class="solr.PointType" dimension="2"
> subFieldSuffix="_d"/>
> <fieldType name="location" class="solr.LatLonType"
> subFieldSuffix="_coordinate"/>
> <fieldtype name="geohash" class="solr.GeoHashField"/>
> <fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
> defaultCurrency="USD" currencyConfig="currency.xml" />
> </types>
>
>
>
> <fields>
>
> <field name="id" type="string" indexed="true" stored="true"
> multiValued="false"/>
> <field name="solr_id" type="string" indexed="true" stored="true"
> multiValued="false"/>
> <field name="title" type="text_ktimatologio" indexed="true"
> stored="true"/>
> <field name="model" type="text_ktimatologio" indexed="true" stored="true"
> multiValued="false"/>
> <field name="type" type="text_ktimatologio" indexed="true"
> stored="true"/>
> <field name="url" type="text_ktimatologio" indexed="true" stored="true"/>
> <field name="content" type="text_ktimatologio" indexed="true"
> stored="true" multiValued="true"/>
> <field name="last_modified" type="string" indexed="true" stored="true"/>
>
> </fields>
>
> <uniqueKey>solr_id</uniqueKey>
>
> <defaultSearchField>content</defaultSearchField>
>
> <solrQueryParser defaultOperator="OR"/>
>
> <copyField source="title" dest="content" />
>
> </schema>
>
> *The data-config.xml file:*
>
> <?xml version="1.0" encoding="utf-8"?>
>
> <dataConfig>
>
> <dataSource type="JdbcDataSource"
> autoCommit="true" batchSize="-1"
> convertType="false"
> driver="com.mysql.jdbc.Driver"
> url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
> user="root"
> password="1a2b3c4d"/>
>
> <dataSource name="fieldReader" type="FieldStreamDataSource" />
>
>
> <document>
>
> <entity name="aitiologikes_ektheseis"
> dataSource="db"
> transformer="HTMLStripTransformer"
> query="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text'"
> deltaImportQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text' and
> id='${dataimporter.delta.id}'"
> deltaQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text' and last_modified >
> '${dataimporter.last_index_time}'">
> <field column="id" name="id" />
> <field column="solr_id" name="solr_id" />
> <field column="title" name="title" stripHTML="true" />
> <field column="model" name="model" stripHTML="true" />
> <field column="type" name="type" stripHTML="true" />
> <field column="url" name="url" stripHTML="true" />
> <field column="last_modified" name="last_modified" stripHTML="true" />
> <field column="content" name="content" stripHTML="true" />
> </entity>
>
> <entity name="aitiologikes_ektheseis_bin"
> query="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin'"
> deltaImportQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'"
> deltaQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin' and last_modified >
> '${dataimporter.last_index_time}'"
> transformer="TemplateTransformer"
> dataSource="db">
>
> <entity dataSource="fieldReader" processor="TikaEntityProcessor"
> dataField="aitiologikes_ektheseis_bin.content" format="text">
> <field column="id" name="id" />
> <field column="solr_id" name="solr_id" />
> <field column="title" name="title" stripHTML="true" />
> <field column="model" name="model" stripHTML="true" />
> <field column="type" name="type" stripHTML="true" />
> <field column="url" name="url" stripHTML="true" />
> <field column="last_modified" name="last_modified" stripHTML="true" />
> <field column="content" name="content" stripHTML="true" />
> </entity>
>
> </entity>
>
> </document>
>
>
> </dataConfig>
>
>
>
> --
> View this message in context: http://lucene.472066.n3.nabble.com/Indexing-binary-files-from-database-issue-no-errors-tp3993140.html
> Sent from the Solr - User mailing list archive at Nabble.com.