You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-user@lucene.apache.org by anarchos78 <ri...@hotmail.com> on 2012/08/23 20:40:39 UTC

Indexing and querying BLOBS stored in Mysql

Greeting friends,

Straight to the point. I have stored many BLOBS in a Mysql DB. These are
mainly PDF's(80%) and .doc. I have also text in the DB. Till now i have
indexed and i can query the text, but i cannot index the BLOBS. I am trying
to make a single collection(document)-but sucks. Is there any recipe on how
to do such a thing? 

*A portion of data-config.xml:*

<?xml version="1.0" encoding="utf-8"?>

<dataConfig>
   
  <dataSource type="JdbcDataSource"
		  autoCommit="true" batchSize="-1"
		  convertType="false"
		  driver="com.mysql.jdbc.Driver"
		  url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
		  user="root" 
		  password="1a2b3c4d"
		  name="db"/>
		  
		 <dataSource name="fieldReader" type="FieldStreamDataSource" />		
                  
			  
  <document>  
  
  
  <entity name="aitiologikes_ektheseis"
  	dataSource="db" 
  	transformer="HTMLStripTransformer" 
  	query="select id, title, title AS grid_title, model, type, url,
last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'"
	deltaImportQuery="select id, title, title AS grid_title, model, type, url,
last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
and id='${dataimporter.delta.id}'"
	deltaQuery="select id, title, title AS grid_title, model, type, url,
last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
and last_modified &gt; '${dataimporter.last_index_time}'">
		<field column="id" name="ida" />		
		<field column="solr_id" name="solr_id" />
		<field column="title" name="title" stripHTML="true" />
		<field column="grid_title" name="grid_title" stripHTML="true" />
		<field column="model" name="model" stripHTML="true" />
		<field column="type" name="type" stripHTML="true" />
		<field column="url" name="url" stripHTML="true" />
		<field column="last_modified" name="last_modified" stripHTML="true"  />
		<field column="search_tag" name="search_tag" stripHTML="true" />
		<field column="content" name="content" stripHTML="true" />
    </entity>
	
    <entity name="aitiologikes_ektheseis_bin"
	  query="select id, title, title AS grid_title, model, type, url,
last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
text from aitiologikes_ektheseis where type = 'bin'" 
	  deltaImportQuery="select id, title, title AS grid_title, model, type,
url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con
AS text from aitiologikes_ektheseis where type = 'bin' and
id='${dataimporter.delta.id}'"
	  deltaQuery="select id, title, title AS grid_title, model, type, url,
last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
text from aitiologikes_ektheseis where type = 'bin' and last_modified &gt;
'${dataimporter.last_index_time}'"
	  transformer="TemplateTransformer"
	  dataSource="db">
	  		
		  <field column="id" name="ida" />		
		<field column="solr_id" name="solr_id" />
		  <field column="title" name="title" stripHTML="true" />
		  <field column="grid_title" name="grid_title" stripHTML="true" />
		  <field column="model" name="model" stripHTML="true" />
		  <field column="type" name="type" stripHTML="true" />
		  <field column="url" name="url" stripHTML="true" />
		  <field column="last_modified" name="last_modified" stripHTML="true"  />
		  <field column="search_tag" name="search_tag" stripHTML="true" />
		  
		<entity dataSource="fieldReader" processor="TikaEntityProcessor"
dataField="aitiologikes_ektheseis_bin.text" format="text">  
		  <field column="text" name="contentbin" stripHTML="true" />
		</entity>
		
	</entity>
	
...
...
    </document>  

</dataConfig>

*A portion from schema.xml (the fieldTypes and filed definition):*

<fieldType name="text_ktimatologio" class="solr.TextField"
positionIncrementGap="100">
	  
      <analyzer type="index">		
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
	    <filter class="solr.EnglishPossessiveFilterFactory"/>		
		<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
	    <filter class="solr.GreekLowerCaseFilterFactory"/>
	    <filter class="solr.GreekStemFilterFactory"/>	
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer> 
	  
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_en.txt" enablePositionIncrements="true"/>		
		<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>		
		<filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.GreekStemFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
	    <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
</fieldType>




<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
	<analyzer type="index">
		<charFilter class="solr.HTMLStripCharFilterFactory"/>
		<tokenizer class="solr.StandardTokenizerFactory"/>
		<filter class="solr.StandardFilterFactory"/> 			   
		<filter class="solr.LowerCaseFilterFactory"/>				
		<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>		
		<filter class="solr.GreekLowerCaseFilterFactory"/>
		<filter class="solr.GreekStemFilterFactory"/>
		<filter class="solr.HunspellStemFilterFactory"
dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
ignoreCase="true" />
	</analyzer>
	
	<analyzer type="query">
		<charFilter class="solr.HTMLStripCharFilterFactory"/>
		<tokenizer class="solr.StandardTokenizerFactory"/>
		<filter class="solr.StandardFilterFactory"/>
		<filter class="solr.LowerCaseFilterFactory"/> 			   
		<filter class="solr.LowerCaseFilterFactory"/>				
		<filter class="solr.StopFilterFactory" ignoreCase="true"
words="lang/stopwords_el.txt" enablePositionIncrements="true"/>		
		<filter class="solr.GreekLowerCaseFilterFactory"/>
		<filter class="solr.GreekStemFilterFactory"/>
		<filter class="solr.HunspellStemFilterFactory"
dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
ignoreCase="true" />
	</analyzer>
</fieldType>


<fields>
  <field  name="ida" type="string" indexed="true" stored="true"
multiValued="false"/>
  <field  name="solr_id" type="string" indexed="true" stored="true"
multiValued="false"/> 
  <field  name="title" type="text_ktimatologio" indexed="true"
stored="true"/>
  <field  name="grid_title" type="text_ktimatologio" indexed="true"
stored="true"/>
  <field  name="model" type="string" indexed="true" stored="true"
multiValued="false"/>
  <field  name="type" type="string" indexed="true" stored="true"/>
  <field  name="url" type="string" indexed="true" stored="true"/>
  <field  name="last_modified" type="string" indexed="true" stored="true"/>
  <field  name="search_tag" type="string" indexed="true" stored="true"/>
  <field  name="contentbin" type="text" indexed="true" stored="true"
multiValued="true"/>
  <field  name="content" type="text_ktimatologio" indexed="true"
stored="true" multiValued="true"/>     
</fields>

I really need help on this!

With respect,

Tom

Greece



--
View this message in context: http://lucene.472066.n3.nabble.com/Indexing-and-querying-BLOBS-stored-in-Mysql-tp4002940.html
Sent from the Solr - User mailing list archive at Nabble.com.

Re: Indexing and querying BLOBS stored in Mysql

Posted by Alexandre Rafalovitch <ar...@gmail.com>.
I think it would greatly help if you say specifically where you are
stuck. Otherwise, there are too many directions to guess. The
configuration snippet you have is a little too large to 'parse'.

I believe DataImportHandler has some definition for nested processors,
have you tried using those and having problems?

Do you want extra custom processing for the blobs? Have you tried
writing a CustomProcessor that will call Tika and parse the content
and add it to the record? I am doing this to merge files in filesystem
with metadata records during index (for a test). If that sounds
similar to what you do, I can share my sample privately.

Otherwise, just try to be very specific about:
*) What you are trying to do
*) What you are actually doing to get there, and
*) What specifically you are getting stuck on (Exception? Missed
records? Out of memory? etc)

Regards,
   Alex.

Personal blog: http://blog.outerthoughts.com/
LinkedIn: http://www.linkedin.com/in/alexandrerafalovitch
- Time is the quality of nature that keeps events from happening all
at once. Lately, it doesn't seem to be working.  (Anonymous  - via GTD
book)


On Thu, Aug 23, 2012 at 2:40 PM, anarchos78
<ri...@hotmail.com> wrote:
> Greeting friends,
>
> Straight to the point. I have stored many BLOBS in a Mysql DB. These are
> mainly PDF's(80%) and .doc. I have also text in the DB. Till now i have
> indexed and i can query the text, but i cannot index the BLOBS. I am trying
> to make a single collection(document)-but sucks. Is there any recipe on how
> to do such a thing?
>
> *A portion of data-config.xml:*
>
> <?xml version="1.0" encoding="utf-8"?>
>
> <dataConfig>
>
>   <dataSource type="JdbcDataSource"
>                   autoCommit="true" batchSize="-1"
>                   convertType="false"
>                   driver="com.mysql.jdbc.Driver"
>                   url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
>                   user="root"
>                   password="1a2b3c4d"
>                   name="db"/>
>
>                  <dataSource name="fieldReader" type="FieldStreamDataSource" />
>
>
>   <document>
>
>
>   <entity name="aitiologikes_ektheseis"
>         dataSource="db"
>         transformer="HTMLStripTransformer"
>         query="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'"
>         deltaImportQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
> and id='${dataimporter.delta.id}'"
>         deltaQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
> and last_modified > '${dataimporter.last_index_time}'">
>                 <field column="id" name="ida" />
>                 <field column="solr_id" name="solr_id" />
>                 <field column="title" name="title" stripHTML="true" />
>                 <field column="grid_title" name="grid_title" stripHTML="true" />
>                 <field column="model" name="model" stripHTML="true" />
>                 <field column="type" name="type" stripHTML="true" />
>                 <field column="url" name="url" stripHTML="true" />
>                 <field column="last_modified" name="last_modified" stripHTML="true"  />
>                 <field column="search_tag" name="search_tag" stripHTML="true" />
>                 <field column="content" name="content" stripHTML="true" />
>     </entity>
>
>     <entity name="aitiologikes_ektheseis_bin"
>           query="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
> text from aitiologikes_ektheseis where type = 'bin'"
>           deltaImportQuery="select id, title, title AS grid_title, model, type,
> url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con
> AS text from aitiologikes_ektheseis where type = 'bin' and
> id='${dataimporter.delta.id}'"
>           deltaQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
> text from aitiologikes_ektheseis where type = 'bin' and last_modified >
> '${dataimporter.last_index_time}'"
>           transformer="TemplateTransformer"
>           dataSource="db">
>
>                   <field column="id" name="ida" />
>                 <field column="solr_id" name="solr_id" />
>                   <field column="title" name="title" stripHTML="true" />
>                   <field column="grid_title" name="grid_title" stripHTML="true" />
>                   <field column="model" name="model" stripHTML="true" />
>                   <field column="type" name="type" stripHTML="true" />
>                   <field column="url" name="url" stripHTML="true" />
>                   <field column="last_modified" name="last_modified" stripHTML="true"  />
>                   <field column="search_tag" name="search_tag" stripHTML="true" />
>
>                 <entity dataSource="fieldReader" processor="TikaEntityProcessor"
> dataField="aitiologikes_ektheseis_bin.text" format="text">
>                   <field column="text" name="contentbin" stripHTML="true" />
>                 </entity>
>
>         </entity>
>
> ...
> ...
>     </document>
>
> </dataConfig>
>
> *A portion from schema.xml (the fieldTypes and filed definition):*
>
> <fieldType name="text_ktimatologio" class="solr.TextField"
> positionIncrementGap="100">
>
>       <analyzer type="index">
>         <tokenizer class="solr.StandardTokenizerFactory"/>
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>         <filter class="solr.LowerCaseFilterFactory"/>
>             <filter class="solr.EnglishPossessiveFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>             <filter class="solr.GreekLowerCaseFilterFactory"/>
>             <filter class="solr.GreekStemFilterFactory"/>
>         <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.PorterStemFilterFactory"/>
>       </analyzer>
>
>       <analyzer type="query">
>         <tokenizer class="solr.StandardTokenizerFactory"/>
>         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>         <filter class="solr.GreekStemFilterFactory"/>
>         <filter class="solr.LowerCaseFilterFactory"/>
>             <filter class="solr.EnglishPossessiveFilterFactory"/>
>         <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.PorterStemFilterFactory"/>
>       </analyzer>
> </fieldType>
>
>
>
>
> <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
>         <analyzer type="index">
>                 <charFilter class="solr.HTMLStripCharFilterFactory"/>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.StandardFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>                 <filter class="solr.GreekStemFilterFactory"/>
>                 <filter class="solr.HunspellStemFilterFactory"
> dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
> ignoreCase="true" />
>         </analyzer>
>
>         <analyzer type="query">
>                 <charFilter class="solr.HTMLStripCharFilterFactory"/>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.StandardFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>                 <filter class="solr.GreekStemFilterFactory"/>
>                 <filter class="solr.HunspellStemFilterFactory"
> dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
> ignoreCase="true" />
>         </analyzer>
> </fieldType>
>
>
> <fields>
>   <field  name="ida" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="solr_id" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="title" type="text_ktimatologio" indexed="true"
> stored="true"/>
>   <field  name="grid_title" type="text_ktimatologio" indexed="true"
> stored="true"/>
>   <field  name="model" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="type" type="string" indexed="true" stored="true"/>
>   <field  name="url" type="string" indexed="true" stored="true"/>
>   <field  name="last_modified" type="string" indexed="true" stored="true"/>
>   <field  name="search_tag" type="string" indexed="true" stored="true"/>
>   <field  name="contentbin" type="text" indexed="true" stored="true"
> multiValued="true"/>
>   <field  name="content" type="text_ktimatologio" indexed="true"
> stored="true" multiValued="true"/>
> </fields>
>
> I really need help on this!
>
> With respect,
>
> Tom
>
> Greece
>
>
>
> --
> View this message in context: http://lucene.472066.n3.nabble.com/Indexing-and-querying-BLOBS-stored-in-Mysql-tp4002940.html
> Sent from the Solr - User mailing list archive at Nabble.com.

Re: Indexing and querying BLOBS stored in Mysql

Posted by Alexey Serba <as...@gmail.com>.
I would recommend to create a simple data import handler to test tika
parsing for large BLOBs, i.e. remove not related entities, remove all
the configuration for delta imports and keep just entity that
retrieves blobs and entity that parses binary content
(fieldReader/TikaEntityProcessor).

Some comments:
1. Maybe you are running delta import and there are not new records in database?
2. deltaQuery should only return id-s and not other columns/data,
because you don't use them in deltaQueryImport (see
dataimporter.delta.id )
3. not all entities have HTMLStripTransformer in a transformers list,
but use them in fields. TemplateTransformer is not used at all.

>   <entity name="aitiologikes_ektheseis"
>         dataSource="db"
>         transformer="HTMLStripTransformer"
>         query="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'"
>         deltaImportQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
> and id='${dataimporter.delta.id}'"
>         deltaQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT(
> body,' ',title)  AS content from aitiologikes_ektheseis where type = 'text'
> and last_modified > '${dataimporter.last_index_time}'">
>                 <field column="id" name="ida" />
>                 <field column="solr_id" name="solr_id" />
>                 <field column="title" name="title" stripHTML="true" />
>                 <field column="grid_title" name="grid_title" stripHTML="true" />
>                 <field column="model" name="model" stripHTML="true" />
>                 <field column="type" name="type" stripHTML="true" />
>                 <field column="url" name="url" stripHTML="true" />
>                 <field column="last_modified" name="last_modified" stripHTML="true"  />
>                 <field column="search_tag" name="search_tag" stripHTML="true" />
>                 <field column="content" name="content" stripHTML="true" />
>     </entity>
>
>     <entity name="aitiologikes_ektheseis_bin"
>           query="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
> text from aitiologikes_ektheseis where type = 'bin'"
>           deltaImportQuery="select id, title, title AS grid_title, model, type,
> url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con
> AS text from aitiologikes_ektheseis where type = 'bin' and
> id='${dataimporter.delta.id}'"
>           deltaQuery="select id, title, title AS grid_title, model, type, url,
> last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS
> text from aitiologikes_ektheseis where type = 'bin' and last_modified >
> '${dataimporter.last_index_time}'"
>           transformer="TemplateTransformer"
>           dataSource="db">
>
>                   <field column="id" name="ida" />
>                 <field column="solr_id" name="solr_id" />
>                   <field column="title" name="title" stripHTML="true" />
>                   <field column="grid_title" name="grid_title" stripHTML="true" />
>                   <field column="model" name="model" stripHTML="true" />
>                   <field column="type" name="type" stripHTML="true" />
>                   <field column="url" name="url" stripHTML="true" />
>                   <field column="last_modified" name="last_modified" stripHTML="true"  />
>                   <field column="search_tag" name="search_tag" stripHTML="true" />
>
>                 <entity dataSource="fieldReader" processor="TikaEntityProcessor"
> dataField="aitiologikes_ektheseis_bin.text" format="text">
>                   <field column="text" name="contentbin" stripHTML="true" />
>                 </entity>
>
>         </entity>
>
> ...
> ...
>     </document>
>
> </dataConfig>
>
> *A portion from schema.xml (the fieldTypes and filed definition):*
>
> <fieldType name="text_ktimatologio" class="solr.TextField"
> positionIncrementGap="100">
>
>       <analyzer type="index">
>         <tokenizer class="solr.StandardTokenizerFactory"/>
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>         <filter class="solr.LowerCaseFilterFactory"/>
>             <filter class="solr.EnglishPossessiveFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>             <filter class="solr.GreekLowerCaseFilterFactory"/>
>             <filter class="solr.GreekStemFilterFactory"/>
>         <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.PorterStemFilterFactory"/>
>       </analyzer>
>
>       <analyzer type="query">
>         <tokenizer class="solr.StandardTokenizerFactory"/>
>         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>         <filter class="solr.GreekStemFilterFactory"/>
>         <filter class="solr.LowerCaseFilterFactory"/>
>             <filter class="solr.EnglishPossessiveFilterFactory"/>
>         <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.PorterStemFilterFactory"/>
>       </analyzer>
> </fieldType>
>
>
>
>
> <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
>         <analyzer type="index">
>                 <charFilter class="solr.HTMLStripCharFilterFactory"/>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.StandardFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>                 <filter class="solr.GreekStemFilterFactory"/>
>                 <filter class="solr.HunspellStemFilterFactory"
> dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
> ignoreCase="true" />
>         </analyzer>
>
>         <analyzer type="query">
>                 <charFilter class="solr.HTMLStripCharFilterFactory"/>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.StandardFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
>                 <filter class="solr.GreekLowerCaseFilterFactory"/>
>                 <filter class="solr.GreekStemFilterFactory"/>
>                 <filter class="solr.HunspellStemFilterFactory"
> dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff"
> ignoreCase="true" />
>         </analyzer>
> </fieldType>
>
>
> <fields>
>   <field  name="ida" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="solr_id" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="title" type="text_ktimatologio" indexed="true"
> stored="true"/>
>   <field  name="grid_title" type="text_ktimatologio" indexed="true"
> stored="true"/>
>   <field  name="model" type="string" indexed="true" stored="true"
> multiValued="false"/>
>   <field  name="type" type="string" indexed="true" stored="true"/>
>   <field  name="url" type="string" indexed="true" stored="true"/>
>   <field  name="last_modified" type="string" indexed="true" stored="true"/>
>   <field  name="search_tag" type="string" indexed="true" stored="true"/>
>   <field  name="contentbin" type="text" indexed="true" stored="true"
> multiValued="true"/>
>   <field  name="content" type="text_ktimatologio" indexed="true"
> stored="true" multiValued="true"/>
> </fields>
>
> I really need help on this!
>
> With respect,
>
> Tom
>
> Greece
>
>
>
> --
> View this message in context: http://lucene.472066.n3.nabble.com/Indexing-and-querying-BLOBS-stored-in-Mysql-tp4002940.html
> Sent from the Solr - User mailing list archive at Nabble.com.