You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@nutch.apache.org by gsamsa <ma...@gmail.com> on 2014/09/24 22:29:20 UTC

Nutch 1.9 with Solr 3.6.2 - Solr does not show any data

Hello guys, 
 
I have installed apache nutch 1.9 and solr 3.6.2. Nutch is running
correctly, however, I have some problems with solr. When opening:

/http://127.0.0.1:8983/solr//

I see the following:
<http://lucene.472066.n3.nabble.com/file/n4160993/solr.jpg> 

When clicking solr admin nothing redirects me...

My schema.xml looks like that:

/<?xml version="1.0" encoding="UTF-8" ?>
<schema name="nutch" version="1.5">
    <types>
        <fieldType name="string" class="solr.StrField"
sortMissingLast="true"
            omitNorms="true"/> 
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="float" class="solr.TrieFloatField"
precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>

        <fieldType name="text" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.StopFilterFactory"
                    ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"
                    catenateWords="1" catenateNumbers="1" catenateAll="0"
                    splitOnCaseChange="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory"
                    protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldType>
        <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>
        <field name="id" type="string" stored="true" indexed="true"
            required="true"/>

        
        <field name="segment" type="string" stored="true" indexed="false"/>
        <field name="digest" type="string" stored="true" indexed="false"/>
        <field name="boost" type="float" stored="true" indexed="false"/>

        
        <field name="host" type="string" stored="false" indexed="true"/>
        <field name="url" type="url" stored="true" indexed="true"/>
        <field name="content" type="text" stored="false" indexed="true"/>
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>
        <field name="tstamp" type="date" stored="true" indexed="false"/>

        
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        
        <field name="lang" type="string" stored="true" indexed="true"/>

        
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

        
        <field name="author" type="string" stored="true" indexed="true"/>
        <field name="tag" type="string" stored="true" indexed="true"
multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>

        
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>
            
            
        <field name="tld" type="string" stored="false" indexed="false"/>

        <field name="_version_" type="long" stored="true" indexed="true"/>
    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>content</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
</schema>
/

Furthermore, I have set solr up like from the  apache wiki
<http://wiki.apache.org/nutch/NutchTutorial#A6._Integrate_Solr_with_Nutch> 
:

/mv ${APACHE_SOLR_HOME}/example/solr/conf/schema.xml
${APACHE_SOLR_HOME}/example/solr/conf/schema.xml.org

cp ${NUTCH_RUNTIME_HOME}/conf/schema.xml
${APACHE_SOLR_HOME}/example/solr/conf/
vi ${APACHE_SOLR_HOME}/example/solr/conf/schema.xml

Copy exactly in 351 line: <field name="_version_" type="long" indexed="true"
stored="true"/>

restart Solr with the command “java -jar start.jar” under
${APACHE_SOLR_HOME}/example/


Any recommendations what I am doing wrong?

Would it be better to use the latest *solr 4.10*? Any good tutorials how to
configure this version?

I really appreciate your answers!





--
View this message in context: http://lucene.472066.n3.nabble.com/Nutch-1-9-with-Solr-3-6-2-Solr-does-not-show-any-data-tp4160993.html
Sent from the Nutch - User mailing list archive at Nabble.com.

Re: Nutch 1.9 with Solr 3.6.2 - Solr does not show any data

Posted by Talat Uyarer <ta...@uyarer.com>.
Hi,

I think you fix this problem. Am I wrong ?

Talat
On Sep 28, 2014 1:28 AM, "gsamsa" <ma...@gmail.com> wrote:

> Hello guys,
>
> I have installed apache nutch 1.9 and solr 3.6.2. Nutch is running
> correctly, however, I have some problems with solr. When opening:
>
> /http://127.0.0.1:8983/solr//
>
> I see the following:
> <http://lucene.472066.n3.nabble.com/file/n4160993/solr.jpg>
>
> When clicking solr admin nothing redirects me...
>
> My schema.xml looks like that:
>
> /<?xml version="1.0" encoding="UTF-8" ?>
> <schema name="nutch" version="1.5">
>     <types>
>         <fieldType name="string" class="solr.StrField"
> sortMissingLast="true"
>             omitNorms="true"/>
>         <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>         <fieldType name="float" class="solr.TrieFloatField"
> precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>         <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>
>         <fieldType name="text" class="solr.TextField"
>             positionIncrementGap="100">
>             <analyzer>
>                 <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>                 <filter class="solr.StopFilterFactory"
>                     ignoreCase="true" words="stopwords.txt"/>
>                 <filter class="solr.WordDelimiterFilterFactory"
>                     generateWordParts="1" generateNumberParts="1"
>                     catenateWords="1" catenateNumbers="1" catenateAll="0"
>                     splitOnCaseChange="1"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.EnglishPorterFilterFactory"
>                     protected="protwords.txt"/>
>                 <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>             </analyzer>
>         </fieldType>
>         <fieldType name="url" class="solr.TextField"
>             positionIncrementGap="100">
>             <analyzer>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.WordDelimiterFilterFactory"
>                     generateWordParts="1" generateNumberParts="1"/>
>             </analyzer>
>         </fieldType>
>     </types>
>     <fields>
>         <field name="id" type="string" stored="true" indexed="true"
>             required="true"/>
>
>
>         <field name="segment" type="string" stored="true" indexed="false"/>
>         <field name="digest" type="string" stored="true" indexed="false"/>
>         <field name="boost" type="float" stored="true" indexed="false"/>
>
>
>         <field name="host" type="string" stored="false" indexed="true"/>
>         <field name="url" type="url" stored="true" indexed="true"/>
>         <field name="content" type="text" stored="false" indexed="true"/>
>         <field name="title" type="text" stored="true" indexed="true"/>
>         <field name="cache" type="string" stored="true" indexed="false"/>
>         <field name="tstamp" type="date" stored="true" indexed="false"/>
>
>
>         <field name="anchor" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>
>
>         <field name="type" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>         <field name="contentLength" type="long" stored="true"
>             indexed="false"/>
>         <field name="lastModified" type="date" stored="true"
>             indexed="false"/>
>         <field name="date" type="date" stored="true" indexed="true"/>
>
>
>         <field name="lang" type="string" stored="true" indexed="true"/>
>
>
>         <field name="subcollection" type="string" stored="true"
>             indexed="true" multiValued="true"/>
>
>
>         <field name="author" type="string" stored="true" indexed="true"/>
>         <field name="tag" type="string" stored="true" indexed="true"
> multiValued="true"/>
>         <field name="feed" type="string" stored="true" indexed="true"/>
>         <field name="publishedDate" type="date" stored="true"
>             indexed="true"/>
>         <field name="updatedDate" type="date" stored="true"
>             indexed="true"/>
>
>
>         <field name="cc" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>
>
>         <field name="tld" type="string" stored="false" indexed="false"/>
>
>         <field name="_version_" type="long" stored="true" indexed="true"/>
>     </fields>
>     <uniqueKey>id</uniqueKey>
>     <defaultSearchField>content</defaultSearchField>
>     <solrQueryParser defaultOperator="OR"/>
> </schema>
> /
>
> Furthermore, I have set solr up like from the  apache wiki
> <http://wiki.apache.org/nutch/NutchTutorial#A6._Integrate_Solr_with_Nutch>
> :
>
> /mv ${APACHE_SOLR_HOME}/example/solr/conf/schema.xml
> ${APACHE_SOLR_HOME}/example/solr/conf/schema.xml.org
>
> cp ${NUTCH_RUNTIME_HOME}/conf/schema.xml
> ${APACHE_SOLR_HOME}/example/solr/conf/
> vi ${APACHE_SOLR_HOME}/example/solr/conf/schema.xml
>
> Copy exactly in 351 line: <field name="_version_" type="long"
> indexed="true"
> stored="true"/>
>
> restart Solr with the command “java -jar start.jar” under
> ${APACHE_SOLR_HOME}/example/
>
>
> Any recommendations what I am doing wrong?
>
> Would it be better to use the latest *solr 4.10*? Any good tutorials how to
> configure this version?
>
> I really appreciate your answers!
>
>
>
>
>
> --
> View this message in context:
> http://lucene.472066.n3.nabble.com/Nutch-1-9-with-Solr-3-6-2-Solr-does-not-show-any-data-tp4160993.html
> Sent from the Nutch - User mailing list archive at Nabble.com.
>