You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by anarchos78 <ri...@hotmail.com> on 2012/07/05 09:26:44 UTC

Indexing binary files from database issue (no errors)

Greetings friends,
I am trying to index binary files stored in a database (mysql) and I have no
success. I have a solr configured as below:
*Solr file structure*
+solr
   +bookledger(core0)
      -conf
      +lib(all necessary libraries)
        +contrib
        +dist
   +data
      +bookledger
        -index
        -spellchecker
      +ktimatologio
        -index
        -spellchecker
    +ktimatologio(core1)
      -conf
      +lib(all necessary libraries)
        +contrib
        +dist

As you can see the configuration concerns a multicore solr setup. Now, on
the bookledger(core0) I have indexed binary files successfully (stored in a
database). In the second core when I conduct full-import I see no errors!
Then, when I try to query the binary content the output is like:
[B@660b1b14. What am I missing here?

Thank you in advance,

Tom
Greece

*The solr.xml file:*

<?xml version="1.0" encoding="UTF-8" ?>
<solr persistent="false">  
  <cores adminPath="/admin/cores">
    <core name="ktimatologio" instanceDir="ktimatologio"
dataDir="../data/ktimatologio"/>
	<core name="bookledger" instanceDir="bookledger"
dataDir="../data/bookledger"/>
  </cores>
</solr>

*The solrconfig.xml file:*

<?xml version="1.0" encoding="UTF-8" ?>

<config>
  
 
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
  
 
  <luceneMatchVersion>LUCENE_36</luceneMatchVersion>  
  
  <lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" />
  <lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" />
  <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
  <lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" />
  <lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" />
  <lib dir="lib/dist/"
regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />  
  
  <lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" />  
  <lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" />  
  <lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" /> 
  <lib dir="lib/contrib/langid/lib/" regex=".*\.jar" />  
  <lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" />  
  <lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" />
  <lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" /> 

 
  <dataDir>${solr.data.dir:}</dataDir>


  
  <directoryFactory name="DirectoryFactory" 
                   
class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

 
  <indexConfig>
    
  </indexConfig>


  
  <jmx />
  

  
  <updateHandler class="solr.DirectUpdateHandler2">

   
  </updateHandler>
  
  <query>
   
    <maxBooleanClauses>1024</maxBooleanClauses>


    
    <filterCache class="solr.FastLRUCache"
                 size="512"
                 initialSize="512"
                 autowarmCount="0"/>

   
    <queryResultCache class="solr.LRUCache"
                     size="512"
                     initialSize="512"
                     autowarmCount="0"/>
   
    
    <documentCache class="solr.LRUCache"
                   size="512"
                   initialSize="512"
                   autowarmCount="0"/>
				   
    <enableLazyFieldLoading>true</enableLazyFieldLoading>
	
   <queryResultWindowSize>20</queryResultWindowSize>
   
   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
   
    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
	  
      </arr>
    </listener>
    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst>
          <str name="q">static firstSearcher warming in solrconfig.xml</str>
        </lst>
      </arr>
    </listener>
	
    <useColdSearcher>false</useColdSearcher>
	
    <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>
  
  <requestDispatcher>
  
    <requestParsers enableRemoteStreaming="true" 
                    multipartUploadLimitInKB="2048000" />
					
  </requestDispatcher>
	
  <requestHandler name="/dataimport"
class="org.apache.solr.handler.dataimport.DataImportHandler">
	<lst name="defaults">
		<str name="config">data-config.xml</str>
	</lst>
  </requestHandler>
  
  <requestHandler name="/select" class="solr.SearchHandler">
    
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <int name="rows">100</int>
     </lst>
	 
  </requestHandler>

  <requestHandler name="/browse" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="echoParams">explicit</str>

       
       <str name="wt">velocity</str>

       <str name="v.template">browse</str>
       <str name="v.layout">layout</str>
       <str name="title">Solritas</str>

       <str name="df">text</str>
       <str name="defType">edismax</str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
       <str name="mlt.qf">
         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
       <int name="mlt.count">3</int>

       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>

       <str name="facet">on</str>
       <str name="facet.field">cat</str>
       <str name="facet.field">manu_exact</str>
       <str name="facet.query">ipod</str>
       <str name="facet.query">GB</str>
       <str name="facet.mincount">1</str>
       <str name="facet.pivot">cat,inStock</str>
       <str name="facet.range.other">after</str>
       <str name="facet.range">price</str>
       <int name="f.price.facet.range.start">0</int>
       <int name="f.price.facet.range.end">600</int>
       <int name="f.price.facet.range.gap">50</int>
       <str name="facet.range">popularity</str>
       <int name="f.popularity.facet.range.start">0</int>
       <int name="f.popularity.facet.range.end">10</int>
       <int name="f.popularity.facet.range.gap">3</int>
       <str name="facet.range">manufacturedate_dt</str>
       <str
name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
       <str name="f.manufacturedate_dt.facet.range.other">before</str>
       <str name="f.manufacturedate_dt.facet.range.other">after</str>


       
       <str name="hl">on</str>
       <str name="hl.fl">text features name</str>
       <str name="f.name.hl.fragsize">0</str>
       <str name="f.name.hl.alternateField">name</str>
     </lst>
     <arr name="last-components">
       <str>spellcheck</str>
     </arr>
     
  </requestHandler>

  
  <requestHandler name="/update" 
                  class="solr.XmlUpdateRequestHandler">
   
    </requestHandler>
 
  <requestHandler name="/update/javabin" 
                  class="solr.BinaryUpdateRequestHandler" />

 
  <requestHandler name="/update/csv" 
                  class="solr.CSVRequestHandler" 
                  startup="lazy" />

  
  <requestHandler name="/update/json" 
                  class="solr.JsonUpdateRequestHandler" 
                  startup="lazy" />

  
  <requestHandler name="/update/extract" 
                  startup="lazy"
                  class="solr.extraction.ExtractingRequestHandler" >
    <lst name="defaults">
      
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>

      
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>
  
  <requestHandler name="/update/xslt"
                   startup="lazy"
                   class="solr.XsltUpdateRequestHandler"/>
				   
  <requestHandler name="/analysis/field" 
                  startup="lazy"
                  class="solr.FieldAnalysisRequestHandler" />
  <requestHandler name="/analysis/document" 
                  class="solr.DocumentAnalysisRequestHandler" 
                  startup="lazy" />

  
  <requestHandler name="/admin/" 
                  class="solr.admin.AdminHandlers" />

  
  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
    <lst name="invariants">
      <str name="q">solrpingquery</str>
    </lst>
    <lst name="defaults">
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

  
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> 
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>
    
      
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

    
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">spellchecker</str>
     
    </lst>

    
    
  </searchComponent>

  
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

  
  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>

  
  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <bool name="tv">true</bool>
    </lst>
    <arr name="last-components">
      <str>tvComponent</str>
    </arr>
  </requestHandler>

 
  <searchComponent name="clustering" 
                   enable="${solr.clustering.enabled:false}"
                   class="solr.clustering.ClusteringComponent" >
    
    <lst name="engine">
      
      <str name="name">default</str>

     
      <str
name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

     
      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
      
      
      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>

      
      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
    </lst>
    <lst name="engine">
      <str name="name">stc</str>
      <str
name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
  </searchComponent>

  
  <requestHandler name="/clustering"
                  startup="lazy"
                  enable="${solr.clustering.enabled:false}"
                  class="solr.SearchHandler">
    <lst name="defaults">
      <bool name="clustering">true</bool>
      <str name="clustering.engine">default</str>
      <bool name="clustering.results">true</bool>
      
      <str name="carrot.title">name</str>
      <str name="carrot.url">id</str>
      
       <str name="carrot.snippet">features</str>
       
       <bool name="carrot.produceSummary">true</bool>
       
       
       
       <bool name="carrot.outputSubClusters">false</bool>
       
       <str name="df">text</str>
       <str name="defType">edismax</str>
       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
    </lst>     
    <arr name="last-components">
      <str>clustering</str>
    </arr>
  </requestHandler>
  
  
  <searchComponent name="terms" class="solr.TermsComponent"/>

  
  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>     
    <arr name="components">
      <str>terms</str>
    </arr>
  </requestHandler>
  
  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
    
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

  
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
      <str name="df">text</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>

  
  <searchComponent class="solr.HighlightComponent" name="highlight">
    <highlighting>
      
      
      <fragmenter name="gap" 
                  default="true"
                  class="solr.highlight.GapFragmenter">
        <lst name="defaults">
          <int name="hl.fragsize">100</int>
        </lst>
      </fragmenter>

      
      <fragmenter name="regex" 
                  class="solr.highlight.RegexFragmenter">
        <lst name="defaults">
          
          <int name="hl.fragsize">70</int>
          
          <float name="hl.regex.slop">0.5</float>
          
          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
        </lst>
      </fragmenter>

      
      <formatter name="html" 
                 default="true"
                 class="solr.highlight.HtmlFormatter">
        <lst name="defaults">
          <str name="hl.simple.pre"></str>
          <str name="hl.simple.post"></str>
        </lst>
      </formatter>

      
      <encoder name="html" 
               class="solr.highlight.HtmlEncoder" />

      
      <fragListBuilder name="simple" 
                       default="true"
                       class="solr.highlight.SimpleFragListBuilder"/>

      
      <fragListBuilder name="single" 
                       class="solr.highlight.SingleFragListBuilder"/>

      
      <fragmentsBuilder name="default" 
                        default="true"
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        
      </fragmentsBuilder>

      
      <fragmentsBuilder name="colored" 
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        <lst name="defaults">
          <str name="hl.tag.pre"></str>
          <str name="hl.tag.post"></str>
        </lst>
      </fragmentsBuilder>
      
      <boundaryScanner name="default" 
                       default="true"
                       class="solr.highlight.SimpleBoundaryScanner">
        <lst name="defaults">
          <str name="hl.bs.maxScan">10</str>
          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
        </lst>
      </boundaryScanner>
      
      <boundaryScanner name="breakIterator" 
                       class="solr.highlight.BreakIteratorBoundaryScanner">
        <lst name="defaults">
         
          <str name="hl.bs.type">WORD</str>
          
          <str name="hl.bs.language">en</str>
          <str name="hl.bs.country">US</str>
        </lst>
      </boundaryScanner>
    </highlighting>
  </searchComponent>

  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
    
    <str name="content-type">text/plain; charset=UTF-8</str>
  </queryResponseWriter>
  
 
    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"
startup="lazy"/>
  

    -->
  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>
  <admin>
  
    <defaultQuery>*:*</defaultQuery>

   
  </admin>

</config>

*The schema.xml file:*

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="ktimatologio" version="1.5">

  <types>

    
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />

    
    <fieldType name="boolean" class="solr.BoolField"
sortMissingLast="true"/>
    
    <fieldtype name="binary" class="solr.BinaryField"/>

    <fieldType name="int" class="solr.TrieIntField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
positionIncrementGap="0"/>
	
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
positionIncrementGap="0"/>
	
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
positionIncrementGap="0"/>

    
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
positionIncrementGap="0"/>
	
    <fieldType name="pint" class="solr.IntField"/>
    <fieldType name="plong" class="solr.LongField"/>
    <fieldType name="pfloat" class="solr.FloatField"/>
    <fieldType name="pdouble" class="solr.DoubleField"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
	
    <fieldType name="sint" class="solr.SortableIntField"
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField"
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true" omitNorms="true"/>
    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
	
    <fieldType name="text_el" class="solr.TextField"
positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
        <filter class="solr.GreekStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="text_ktimatologio" class="solr.TextField"
positionIncrementGap="100">
	  
      <analyzer type="index">		
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
	    <filter class="solr.EnglishPossessiveFilterFactory"/>	
	    <filter class="solr.GreekLowerCaseFilterFactory"/>
	    <filter class="solr.GreekStemFilterFactory"/>		
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
	  
	  
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
				
		<filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.GreekStemFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
	    <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>
	
    <fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
    <fieldType name="point" class="solr.PointType" dimension="2"
subFieldSuffix="_d"/>
    <fieldType name="location" class="solr.LatLonType"
subFieldSuffix="_coordinate"/>
    <fieldtype name="geohash" class="solr.GeoHashField"/>
    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
defaultCurrency="USD" currencyConfig="currency.xml" />
 </types>



 <fields>

  <field  name="id" type="string" indexed="true" stored="true"
multiValued="false"/> 
  <field  name="solr_id" type="string" indexed="true" stored="true"
multiValued="false"/> 
  <field  name="title" type="text_ktimatologio" indexed="true"
stored="true"/>
  <field  name="model" type="text_ktimatologio" indexed="true" stored="true"
multiValued="false"/>
  <field  name="type" type="text_ktimatologio" indexed="true"
stored="true"/>
  <field  name="url" type="text_ktimatologio" indexed="true" stored="true"/>
  <field  name="content" type="text_ktimatologio" indexed="true"
stored="true" multiValued="true"/>
  <field  name="last_modified" type="string" indexed="true" stored="true"/>
    
 </fields>
 
 <uniqueKey>solr_id</uniqueKey>
 
 <defaultSearchField>content</defaultSearchField>
 
 <solrQueryParser defaultOperator="OR"/>
 
   <copyField source="title" dest="content" />
   
</schema>

*The data-config.xml file:*

<?xml version="1.0" encoding="utf-8"?>

<dataConfig>
  
  <dataSource type="JdbcDataSource"
		  autoCommit="true" batchSize="-1"
		  convertType="false"
		  driver="com.mysql.jdbc.Driver"
		  url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
		  user="root" 
		  password="1a2b3c4d"/>
		  
		 <dataSource name="fieldReader" type="FieldStreamDataSource" />		
                  
			  
  <document>  
  
  <entity name="aitiologikes_ektheseis"
  	dataSource="db" 
  	transformer="HTMLStripTransformer" 
  	query="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text'"
	deltaImportQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text' and
id='${dataimporter.delta.id}'"
	deltaQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, body AS content from
aitiologikes_ektheseis where type = 'text' and last_modified &gt;
'${dataimporter.last_index_time}'">
		<field column="id" name="id" /> 		 
		<field column="solr_id" name="solr_id" />
		<field column="title" name="title" stripHTML="true" />
		<field column="model" name="model" stripHTML="true"  />
		<field column="type" name="type" stripHTML="true"  />
		<field column="url" name="url" stripHTML="true"  />
		<field column="last_modified" name="last_modified" stripHTML="true"  />
		<field column="content" name="content" stripHTML="true" />
    </entity>
	
    <entity name="aitiologikes_ektheseis_bin"
	  query="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin'" 
	  deltaImportQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'"
	  deltaQuery="select id, title, model, type, url, last_modified,
CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
aitiologikes_ektheseis where type = 'bin' and last_modified &gt;
'${dataimporter.last_index_time}'"
	  transformer="TemplateTransformer"
	  dataSource="db">
	  
		<entity dataSource="fieldReader" processor="TikaEntityProcessor"
dataField="aitiologikes_ektheseis_bin.content" format="text">
		  <field column="id" name="id" /> 		 
		  <field column="solr_id" name="solr_id" />
		  <field column="title" name="title" stripHTML="true" />
		  <field column="model" name="model" stripHTML="true"  />
		  <field column="type" name="type" stripHTML="true"  />
		  <field column="url" name="url" stripHTML="true"  />
		  <field column="last_modified" name="last_modified" stripHTML="true"  />
		  <field column="content" name="content" stripHTML="true" />
		</entity>
		
	</entity>
	
  </document>	
  
   
</dataConfig>



--
View this message in context: http://lucene.472066.n3.nabble.com/Indexing-binary-files-from-database-issue-no-errors-tp3993140.html
Sent from the Solr - User mailing list archive at Nabble.com.

Re: Indexing binary files from database issue (no errors)

Posted by Jan Høydahl <ja...@cominvent.com>.
Hi,

Did you get a reply on this?

I'd guess that it is your JDBC driver which does not handle the response to your CONCAT_WS() SQL. Try without it and see. Then try to upgrade your mysql JDBC driver to a newer version and see if it helps.

--
Jan Høydahl, search solution architect
Cominvent AS - www.cominvent.com
Solr Training - www.solrtraining.com

On 5. juli 2012, at 09:26, anarchos78 wrote:

> Greetings friends,
> I am trying to index binary files stored in a database (mysql) and I have no
> success. I have a solr configured as below:
> *Solr file structure*
> +solr
>   +bookledger(core0)
>      -conf
>      +lib(all necessary libraries)
>        +contrib
>        +dist
>   +data
>      +bookledger
>        -index
>        -spellchecker
>      +ktimatologio
>        -index
>        -spellchecker
>    +ktimatologio(core1)
>      -conf
>      +lib(all necessary libraries)
>        +contrib
>        +dist
> 
> As you can see the configuration concerns a multicore solr setup. Now, on
> the bookledger(core0) I have indexed binary files successfully (stored in a
> database). In the second core when I conduct full-import I see no errors!
> Then, when I try to query the binary content the output is like:
> [B@660b1b14. What am I missing here?
> 
> Thank you in advance,
> 
> Tom
> Greece
> 
> *The solr.xml file:*
> 
> <?xml version="1.0" encoding="UTF-8" ?>
> <solr persistent="false">  
>  <cores adminPath="/admin/cores">
>    <core name="ktimatologio" instanceDir="ktimatologio"
> dataDir="../data/ktimatologio"/>
> 	<core name="bookledger" instanceDir="bookledger"
> dataDir="../data/bookledger"/>
>  </cores>
> </solr>
> 
> *The solrconfig.xml file:*
> 
> <?xml version="1.0" encoding="UTF-8" ?>
> 
> <config>
> 
> 
> <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
> 
> 
>  <luceneMatchVersion>LUCENE_36</luceneMatchVersion>  
> 
>  <lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" />
>  <lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" />
>  <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
>  <lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" />
>  <lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" />
>  <lib dir="lib/dist/"
> regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />  
> 
>  <lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" />  
>  <lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" />  
>  <lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" /> 
>  <lib dir="lib/contrib/langid/lib/" regex=".*\.jar" />  
>  <lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" />  
>  <lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" />
>  <lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" /> 
> 
> 
>  <dataDir>${solr.data.dir:}</dataDir>
> 
> 
> 
>  <directoryFactory name="DirectoryFactory" 
> 
> class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>
> 
> 
>  <indexConfig>
> 
>  </indexConfig>
> 
> 
> 
>  <jmx />
> 
> 
> 
>  <updateHandler class="solr.DirectUpdateHandler2">
> 
> 
>  </updateHandler>
> 
>  <query>
> 
>    <maxBooleanClauses>1024</maxBooleanClauses>
> 
> 
> 
>    <filterCache class="solr.FastLRUCache"
>                 size="512"
>                 initialSize="512"
>                 autowarmCount="0"/>
> 
> 
>    <queryResultCache class="solr.LRUCache"
>                     size="512"
>                     initialSize="512"
>                     autowarmCount="0"/>
> 
> 
>    <documentCache class="solr.LRUCache"
>                   size="512"
>                   initialSize="512"
>                   autowarmCount="0"/>
> 				   
>    <enableLazyFieldLoading>true</enableLazyFieldLoading>
> 	
>   <queryResultWindowSize>20</queryResultWindowSize>
> 
>   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
> 
>    <listener event="newSearcher" class="solr.QuerySenderListener">
>      <arr name="queries">
> 	  
>      </arr>
>    </listener>
>    <listener event="firstSearcher" class="solr.QuerySenderListener">
>      <arr name="queries">
>        <lst>
>          <str name="q">static firstSearcher warming in solrconfig.xml</str>
>        </lst>
>      </arr>
>    </listener>
> 	
>    <useColdSearcher>false</useColdSearcher>
> 	
>    <maxWarmingSearchers>2</maxWarmingSearchers>
> 
>  </query>
> 
>  <requestDispatcher>
> 
>    <requestParsers enableRemoteStreaming="true" 
>                    multipartUploadLimitInKB="2048000" />
> 					
>  </requestDispatcher>
> 	
>  <requestHandler name="/dataimport"
> class="org.apache.solr.handler.dataimport.DataImportHandler">
> 	<lst name="defaults">
> 		<str name="config">data-config.xml</str>
> 	</lst>
>  </requestHandler>
> 
>  <requestHandler name="/select" class="solr.SearchHandler">
> 
>     <lst name="defaults">
>       <str name="echoParams">explicit</str>
>       <int name="rows">100</int>
>     </lst>
> 	 
>  </requestHandler>
> 
>  <requestHandler name="/browse" class="solr.SearchHandler">
>     <lst name="defaults">
>       <str name="echoParams">explicit</str>
> 
> 
>       <str name="wt">velocity</str>
> 
>       <str name="v.template">browse</str>
>       <str name="v.layout">layout</str>
>       <str name="title">Solritas</str>
> 
>       <str name="df">text</str>
>       <str name="defType">edismax</str>
>       <str name="q.alt">*:*</str>
>       <str name="rows">10</str>
>       <str name="fl">*,score</str>
>       <str name="mlt.qf">
>         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
>       </str>
>       <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
>       <int name="mlt.count">3</int>
> 
>       <str name="qf">
>          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
>       </str>
> 
>       <str name="facet">on</str>
>       <str name="facet.field">cat</str>
>       <str name="facet.field">manu_exact</str>
>       <str name="facet.query">ipod</str>
>       <str name="facet.query">GB</str>
>       <str name="facet.mincount">1</str>
>       <str name="facet.pivot">cat,inStock</str>
>       <str name="facet.range.other">after</str>
>       <str name="facet.range">price</str>
>       <int name="f.price.facet.range.start">0</int>
>       <int name="f.price.facet.range.end">600</int>
>       <int name="f.price.facet.range.gap">50</int>
>       <str name="facet.range">popularity</str>
>       <int name="f.popularity.facet.range.start">0</int>
>       <int name="f.popularity.facet.range.end">10</int>
>       <int name="f.popularity.facet.range.gap">3</int>
>       <str name="facet.range">manufacturedate_dt</str>
>       <str
> name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
>       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
>       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
>       <str name="f.manufacturedate_dt.facet.range.other">before</str>
>       <str name="f.manufacturedate_dt.facet.range.other">after</str>
> 
> 
> 
>       <str name="hl">on</str>
>       <str name="hl.fl">text features name</str>
>       <str name="f.name.hl.fragsize">0</str>
>       <str name="f.name.hl.alternateField">name</str>
>     </lst>
>     <arr name="last-components">
>       <str>spellcheck</str>
>     </arr>
> 
>  </requestHandler>
> 
> 
>  <requestHandler name="/update" 
>                  class="solr.XmlUpdateRequestHandler">
> 
>    </requestHandler>
> 
>  <requestHandler name="/update/javabin" 
>                  class="solr.BinaryUpdateRequestHandler" />
> 
> 
>  <requestHandler name="/update/csv" 
>                  class="solr.CSVRequestHandler" 
>                  startup="lazy" />
> 
> 
>  <requestHandler name="/update/json" 
>                  class="solr.JsonUpdateRequestHandler" 
>                  startup="lazy" />
> 
> 
>  <requestHandler name="/update/extract" 
>                  startup="lazy"
>                  class="solr.extraction.ExtractingRequestHandler" >
>    <lst name="defaults">
> 
>      <str name="fmap.content">text</str>
>      <str name="lowernames">true</str>
>      <str name="uprefix">ignored_</str>
> 
> 
>      <str name="captureAttr">true</str>
>      <str name="fmap.a">links</str>
>      <str name="fmap.div">ignored_</str>
>    </lst>
>  </requestHandler>
> 
>  <requestHandler name="/update/xslt"
>                   startup="lazy"
>                   class="solr.XsltUpdateRequestHandler"/>
> 				   
>  <requestHandler name="/analysis/field" 
>                  startup="lazy"
>                  class="solr.FieldAnalysisRequestHandler" />
>  <requestHandler name="/analysis/document" 
>                  class="solr.DocumentAnalysisRequestHandler" 
>                  startup="lazy" />
> 
> 
>  <requestHandler name="/admin/" 
>                  class="solr.admin.AdminHandlers" />
> 
> 
>  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
>    <lst name="invariants">
>      <str name="q">solrpingquery</str>
>    </lst>
>    <lst name="defaults">
>      <str name="echoParams">all</str>
>    </lst>
>  </requestHandler>
> 
> 
>  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
>    <lst name="defaults">
>     <str name="echoParams">explicit</str> 
>     <str name="echoHandler">true</str>
>    </lst>
>  </requestHandler>
> 
> 
>  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
> 
>    <str name="queryAnalyzerFieldType">textSpell</str>
> 
> 
>    <lst name="spellchecker">
>      <str name="name">default</str>
>      <str name="field">name</str>
>      <str name="spellcheckIndexDir">spellchecker</str>
> 
>    </lst>
> 
> 
> 
>  </searchComponent>
> 
> 
>  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
>    <lst name="defaults">
>      <str name="df">text</str>
>      <str name="spellcheck.onlyMorePopular">false</str>
>      <str name="spellcheck.extendedResults">false</str>
>      <str name="spellcheck.count">1</str>
>    </lst>
>    <arr name="last-components">
>      <str>spellcheck</str>
>    </arr>
>  </requestHandler>
> 
> 
>  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
> 
> 
>  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
>    <lst name="defaults">
>      <str name="df">text</str>
>      <bool name="tv">true</bool>
>    </lst>
>    <arr name="last-components">
>      <str>tvComponent</str>
>    </arr>
>  </requestHandler>
> 
> 
>  <searchComponent name="clustering" 
>                   enable="${solr.clustering.enabled:false}"
>                   class="solr.clustering.ClusteringComponent" >
> 
>    <lst name="engine">
> 
>      <str name="name">default</str>
> 
> 
>      <str
> name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
> 
> 
>      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
> 
> 
>      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
> 
> 
>      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
>    </lst>
>    <lst name="engine">
>      <str name="name">stc</str>
>      <str
> name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
>    </lst>
>  </searchComponent>
> 
> 
>  <requestHandler name="/clustering"
>                  startup="lazy"
>                  enable="${solr.clustering.enabled:false}"
>                  class="solr.SearchHandler">
>    <lst name="defaults">
>      <bool name="clustering">true</bool>
>      <str name="clustering.engine">default</str>
>      <bool name="clustering.results">true</bool>
> 
>      <str name="carrot.title">name</str>
>      <str name="carrot.url">id</str>
> 
>       <str name="carrot.snippet">features</str>
> 
>       <bool name="carrot.produceSummary">true</bool>
> 
> 
> 
>       <bool name="carrot.outputSubClusters">false</bool>
> 
>       <str name="df">text</str>
>       <str name="defType">edismax</str>
>       <str name="qf">
>          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
>       </str>
>       <str name="q.alt">*:*</str>
>       <str name="rows">10</str>
>       <str name="fl">*,score</str>
>    </lst>     
>    <arr name="last-components">
>      <str>clustering</str>
>    </arr>
>  </requestHandler>
> 
> 
>  <searchComponent name="terms" class="solr.TermsComponent"/>
> 
> 
>  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
>     <lst name="defaults">
>      <bool name="terms">true</bool>
>    </lst>     
>    <arr name="components">
>      <str>terms</str>
>    </arr>
>  </requestHandler>
> 
>  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
> 
>    <str name="queryFieldType">string</str>
>    <str name="config-file">elevate.xml</str>
>  </searchComponent>
> 
> 
>  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
>    <lst name="defaults">
>      <str name="echoParams">explicit</str>
>      <str name="df">text</str>
>    </lst>
>    <arr name="last-components">
>      <str>elevator</str>
>    </arr>
>  </requestHandler>
> 
> 
>  <searchComponent class="solr.HighlightComponent" name="highlight">
>    <highlighting>
> 
> 
>      <fragmenter name="gap" 
>                  default="true"
>                  class="solr.highlight.GapFragmenter">
>        <lst name="defaults">
>          <int name="hl.fragsize">100</int>
>        </lst>
>      </fragmenter>
> 
> 
>      <fragmenter name="regex" 
>                  class="solr.highlight.RegexFragmenter">
>        <lst name="defaults">
> 
>          <int name="hl.fragsize">70</int>
> 
>          <float name="hl.regex.slop">0.5</float>
> 
>          <str name="hl.regex.pattern">[-\w ,/\n\&quot;&apos;]{20,200}</str>
>        </lst>
>      </fragmenter>
> 
> 
>      <formatter name="html" 
>                 default="true"
>                 class="solr.highlight.HtmlFormatter">
>        <lst name="defaults">
>          <str name="hl.simple.pre"></str>
>          <str name="hl.simple.post"></str>
>        </lst>
>      </formatter>
> 
> 
>      <encoder name="html" 
>               class="solr.highlight.HtmlEncoder" />
> 
> 
>      <fragListBuilder name="simple" 
>                       default="true"
>                       class="solr.highlight.SimpleFragListBuilder"/>
> 
> 
>      <fragListBuilder name="single" 
>                       class="solr.highlight.SingleFragListBuilder"/>
> 
> 
>      <fragmentsBuilder name="default" 
>                        default="true"
>                        class="solr.highlight.ScoreOrderFragmentsBuilder">
> 
>      </fragmentsBuilder>
> 
> 
>      <fragmentsBuilder name="colored" 
>                        class="solr.highlight.ScoreOrderFragmentsBuilder">
>        <lst name="defaults">
>          <str name="hl.tag.pre"></str>
>          <str name="hl.tag.post"></str>
>        </lst>
>      </fragmentsBuilder>
> 
>      <boundaryScanner name="default" 
>                       default="true"
>                       class="solr.highlight.SimpleBoundaryScanner">
>        <lst name="defaults">
>          <str name="hl.bs.maxScan">10</str>
>          <str name="hl.bs.chars">.,!? &#9;&#10;&#13;</str>
>        </lst>
>      </boundaryScanner>
> 
>      <boundaryScanner name="breakIterator" 
>                       class="solr.highlight.BreakIteratorBoundaryScanner">
>        <lst name="defaults">
> 
>          <str name="hl.bs.type">WORD</str>
> 
>          <str name="hl.bs.language">en</str>
>          <str name="hl.bs.country">US</str>
>        </lst>
>      </boundaryScanner>
>    </highlighting>
>  </searchComponent>
> 
>  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
> 
>    <str name="content-type">text/plain; charset=UTF-8</str>
>  </queryResponseWriter>
> 
> 
>    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"
> startup="lazy"/>
> 
> 
>    -->
>  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
>    <int name="xsltCacheLifetimeSeconds">5</int>
>  </queryResponseWriter>
>  <admin>
> 
>    <defaultQuery>*:*</defaultQuery>
> 
> 
>  </admin>
> 
> </config>
> 
> *The schema.xml file:*
> 
> <?xml version="1.0" encoding="UTF-8" ?>
> 
> <schema name="ktimatologio" version="1.5">
> 
>  <types>
> 
> 
>    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
> 
> 
>    <fieldType name="boolean" class="solr.BoolField"
> sortMissingLast="true"/>
> 
>    <fieldtype name="binary" class="solr.BinaryField"/>
> 
>    <fieldType name="int" class="solr.TrieIntField" precisionStep="0"
> positionIncrementGap="0"/>
>    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
> positionIncrementGap="0"/>
>    <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
> positionIncrementGap="0"/>
>    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
> positionIncrementGap="0"/>
> 	
>    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
> positionIncrementGap="0"/>
>    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
> positionIncrementGap="0"/>
>    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
> positionIncrementGap="0"/>
>    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
> positionIncrementGap="0"/>
> 	
>    <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
> positionIncrementGap="0"/>
> 
> 
>    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
> positionIncrementGap="0"/>
> 	
>    <fieldType name="pint" class="solr.IntField"/>
>    <fieldType name="plong" class="solr.LongField"/>
>    <fieldType name="pfloat" class="solr.FloatField"/>
>    <fieldType name="pdouble" class="solr.DoubleField"/>
>    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>
> 	
>    <fieldType name="sint" class="solr.SortableIntField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="slong" class="solr.SortableLongField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="sfloat" class="solr.SortableFloatField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="sdouble" class="solr.SortableDoubleField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
> 	
>    <fieldType name="text_el" class="solr.TextField"
> positionIncrementGap="100">
>      <analyzer> 
>        <tokenizer class="solr.StandardTokenizerFactory"/>
> 
>        <filter class="solr.GreekLowerCaseFilterFactory"/>
>        <filter class="solr.StopFilterFactory" ignoreCase="false"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>        <filter class="solr.GreekStemFilterFactory"/>
>      </analyzer>
>    </fieldType>
> 
>    <fieldType name="text_ktimatologio" class="solr.TextField"
> positionIncrementGap="100">
> 	  
>      <analyzer type="index">		
>        <tokenizer class="solr.StandardTokenizerFactory"/>
>        <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
> 	    <filter class="solr.EnglishPossessiveFilterFactory"/>	
> 	    <filter class="solr.GreekLowerCaseFilterFactory"/>
> 	    <filter class="solr.GreekStemFilterFactory"/>		
>        <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>        <filter class="solr.PorterStemFilterFactory"/>
>      </analyzer>
> 	  
> 	  
>      <analyzer type="query">
>        <tokenizer class="solr.StandardTokenizerFactory"/>
>        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
>        <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
> 				
> 		<filter class="solr.GreekLowerCaseFilterFactory"/>
>        <filter class="solr.GreekStemFilterFactory"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
> 	    <filter class="solr.EnglishPossessiveFilterFactory"/>
>        <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>        <filter class="solr.PorterStemFilterFactory"/>
>      </analyzer>
>    </fieldType>
> 	
>    <fieldtype name="ignored" stored="false" indexed="false"
> multiValued="true" class="solr.StrField" />
>    <fieldType name="point" class="solr.PointType" dimension="2"
> subFieldSuffix="_d"/>
>    <fieldType name="location" class="solr.LatLonType"
> subFieldSuffix="_coordinate"/>
>    <fieldtype name="geohash" class="solr.GeoHashField"/>
>    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8"
> defaultCurrency="USD" currencyConfig="currency.xml" />
> </types>
> 
> 
> 
> <fields>
> 
>  <field  name="id" type="string" indexed="true" stored="true"
> multiValued="false"/> 
>  <field  name="solr_id" type="string" indexed="true" stored="true"
> multiValued="false"/> 
>  <field  name="title" type="text_ktimatologio" indexed="true"
> stored="true"/>
>  <field  name="model" type="text_ktimatologio" indexed="true" stored="true"
> multiValued="false"/>
>  <field  name="type" type="text_ktimatologio" indexed="true"
> stored="true"/>
>  <field  name="url" type="text_ktimatologio" indexed="true" stored="true"/>
>  <field  name="content" type="text_ktimatologio" indexed="true"
> stored="true" multiValued="true"/>
>  <field  name="last_modified" type="string" indexed="true" stored="true"/>
> 
> </fields>
> 
> <uniqueKey>solr_id</uniqueKey>
> 
> <defaultSearchField>content</defaultSearchField>
> 
> <solrQueryParser defaultOperator="OR"/>
> 
>   <copyField source="title" dest="content" />
> 
> </schema>
> 
> *The data-config.xml file:*
> 
> <?xml version="1.0" encoding="utf-8"?>
> 
> <dataConfig>
> 
>  <dataSource type="JdbcDataSource"
> 		  autoCommit="true" batchSize="-1"
> 		  convertType="false"
> 		  driver="com.mysql.jdbc.Driver"
> 		  url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
> 		  user="root" 
> 		  password="1a2b3c4d"/>
> 		  
> 		 <dataSource name="fieldReader" type="FieldStreamDataSource" />		
> 
> 			  
>  <document>  
> 
>  <entity name="aitiologikes_ektheseis"
>  	dataSource="db" 
>  	transformer="HTMLStripTransformer" 
>  	query="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text'"
> 	deltaImportQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text' and
> id='${dataimporter.delta.id}'"
> 	deltaQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, body AS content from
> aitiologikes_ektheseis where type = 'text' and last_modified &gt;
> '${dataimporter.last_index_time}'">
> 		<field column="id" name="id" /> 		 
> 		<field column="solr_id" name="solr_id" />
> 		<field column="title" name="title" stripHTML="true" />
> 		<field column="model" name="model" stripHTML="true"  />
> 		<field column="type" name="type" stripHTML="true"  />
> 		<field column="url" name="url" stripHTML="true"  />
> 		<field column="last_modified" name="last_modified" stripHTML="true"  />
> 		<field column="content" name="content" stripHTML="true" />
>    </entity>
> 	
>    <entity name="aitiologikes_ektheseis_bin"
> 	  query="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin'" 
> 	  deltaImportQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'"
> 	  deltaQuery="select id, title, model, type, url, last_modified,
> CONCAT_WS('_',id,model) AS solr_id, bin_con AS content from
> aitiologikes_ektheseis where type = 'bin' and last_modified &gt;
> '${dataimporter.last_index_time}'"
> 	  transformer="TemplateTransformer"
> 	  dataSource="db">
> 	  
> 		<entity dataSource="fieldReader" processor="TikaEntityProcessor"
> dataField="aitiologikes_ektheseis_bin.content" format="text">
> 		  <field column="id" name="id" /> 		 
> 		  <field column="solr_id" name="solr_id" />
> 		  <field column="title" name="title" stripHTML="true" />
> 		  <field column="model" name="model" stripHTML="true"  />
> 		  <field column="type" name="type" stripHTML="true"  />
> 		  <field column="url" name="url" stripHTML="true"  />
> 		  <field column="last_modified" name="last_modified" stripHTML="true"  />
> 		  <field column="content" name="content" stripHTML="true" />
> 		</entity>
> 		
> 	</entity>
> 	
>  </document>	
> 
> 
> </dataConfig>
> 
> 
> 
> --
> View this message in context: http://lucene.472066.n3.nabble.com/Indexing-binary-files-from-database-issue-no-errors-tp3993140.html
> Sent from the Solr - User mailing list archive at Nabble.com.