You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@nutch.apache.org by "Arthur.hk.chan@gmail.com" <ar...@gmail.com> on 2014/12/02 10:52:26 UTC

ERROR: [doc=http://nutch.apache.org/] unknown field 'metatag.keywords'

Hi,

I am new to Nutch and Solr, please help!!

I am using Nutch-1.9, solr-4.10.2 and Hadoop-2.4.1, 

It always returns "org.apache.solr.common.SolrException: Bad Request”
(I have already copied [nutch]conf/schema.xml to [solr]/collection1/conf/schema.xml and restarted solr)

Below is about my settings, some questions:
Q1: Do I need to manually copy some .jar files to notch’s lib folder or solr’s lib folder? (e.g. need to copy hadoop’s jar files to nutch ir sold?)
Q2: Would it be something wrong in my Plugin setup?


regards
Arthur


My Nutch command:
./bin/crawl input_url/ output_url/ http://192.168.0.1:8983/solr/collection1 2


input_url/seed.txt:
http://nutch.apache.org/



conf/regex-urlfilter.txt
# skip file: ftp: and mailto: urls
-^(file|ftp|mailto):
# skip image and other suffixes we can't yet parse
# for a more extensive coverage use the urlfilter-suffix plugin
-\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)$
# skip URLs containing certain characters as probable queries, etc.
-[?*!@=]
# skip URLs with slash-delimited segment that repeats 3+ times, to break loops
-.*(/[^/]+)/[^/]+\1/[^/]+\1/
# accept anything else
#+.
+^http://([a-z0-9]*\.)*nutch.apache.org/



conf/nutch-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
 <property>
  <name>http.agent.name</name>
  <value>MyBot</value>
 </property>
 <property>
  <name>http.robots.agents</name>
  <value>MyBot,*</value>
 </property>
 <property>
  <name>fetcher.store.content</name>
  <value>true</value>
 </property>
 <property>
  <name>fetcher.max.crawl.delay</name>
  <value>-1</value>
 </property>
 <property>
  <name>plugin.includes</name>
  <value>protocol-http|urlfilter-regex|parse-(html|tika|metatags)|index-(basic|anchor|metadata)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|indexer-solr|urlnormalizer-(pass|regex|basic)</value>
 </property>
 <property>
  <name>mapred.temp.dir</name>
  <value>/tmp</value>
  <description>A shared directory for temporary files.</description>
 </property>
<!-- Used only if plugin parse-metatags is enabled. -->
<property>
<name>metatags.names</name>
<value>metatag.keywords;metatag.description</value>
</property>
<property>
  <name>index.parse.md</name>
  <value>metatag.description,metatag.keywords</value>
</property>
</configuration>



conf/solrindex-mapping.xml
<?xml version="1.0" encoding="UTF-8"?>
<mapping>
	<fields>
		<field dest="content" source="content"/>
		<field dest="title" source="title"/>
		<field dest="host" source="host"/>
		<field dest="segment" source="segment"/>
		<field dest="boost" source="boost"/>
		<field dest="digest" source="digest"/>
		<field dest="tstamp" source="tstamp"/>

                <field dest="description" source="description"/>
                <field dest="keywords" source="keywords"/>
	</fields>
	<uniqueKey>id</uniqueKey>
</mapping>



conf/schema.xml
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="nutch" version="1.5">
    <types>
        <fieldType name="string" class="solr.StrField" sortMissingLast="true"
            omitNorms="true"/> 
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>

        <fieldType name="text" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.StopFilterFactory"
                    ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"
                    catenateWords="1" catenateNumbers="1" catenateAll="0"
                    splitOnCaseChange="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory"
                    protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldType>
        <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>

      <field name="_root_" type="string" indexed="true" stored="false"/>
        <field name="id" type="string" stored="true" indexed="true"
            required="true"/>

        <!-- core fields -->
        <field name="_version_" type="long" indexed="true" stored="true"/>
        <field name="host" type="string" stored="false" indexed="true"/>   
        <field name="digest" type="string" stored="true" indexed="false"/>   
        <field name="segment" type="string" stored="true" indexed="false"/>   
        <field name="boost" type="float" stored="true" indexed="false"/>   
        <field name="tstamp" type="date" stored="true" indexed="false"/>  

        <field name="url" type="text" indexed="true" stored="true" required="true"/> 
        <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
        <field name="last_modified" type="date" indexed="true" stored="true"/>
        <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>

        <!-- fields for the metatags plugin -->
        <field name="metatag.description" type="text" stored="true" indexed="true"/>
        <field name="metatag.keywords" type="text" stored="true" indexed="true"/>

        <!-- fields for index-basic plugin -->
        <field name="content" type="text" indexed="true" stored="true" multiValued="true"/> 
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>

        <!-- fields for index-anchor plugin -->
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for index-more plugin -->
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        <!-- fields for languageidentifier plugin -->
        <field name="lang" type="string" stored="true" indexed="true"/>

        <!-- fields for subcollection plugin -->
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

        <!-- fields for feed plugin (tag is also used by microformats-reltag)-->
        <field name="author" type="string" stored="true" indexed="true"/>
        <field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>

        <!-- fields for creativecommons plugin -->
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>
            
        <!-- fields for tld plugin -->    
        <field name="tld" type="string" stored="false" indexed="false"/>
    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>content</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
</schema>



Nutch logs/hadoop.log:
org.apache.solr.common.SolrException: Bad Request
Bad Request
request: http://192.168.0.1:8983/solr/collection1/update?wt=javabin&version=2
	at org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:430)
	at org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:244)
	at org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(AbstractUpdateRequest.java:105)
	at org.apache.nutch.indexwriter.solr.SolrIndexWriter.close(SolrIndexWriter.java:155)
	at org.apache.nutch.indexer.IndexWriters.close(IndexWriters.java:118)
	at org.apache.nutch.indexer.IndexerOutputFormat$1.close(IndexerOutputFormat.java:44)
	at org.apache.hadoop.mapred.ReduceTask$OldTrackingRecordWriter.close(ReduceTask.java:467)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:535)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
	at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:398)
        2014-12-02 17:16:55,021 ERROR indexer.IndexingJob - Indexer: java.io.IOException: Job failed!
	at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1357)
	at org.apache.nutch.indexer.IndexingJob.index(IndexingJob.java:114)
	at org.apache.nutch.indexer.IndexingJob.run(IndexingJob.java:176)
	at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
	at org.apache.nutch.indexer.IndexingJob.main(IndexingJob.java:186)



Solr logs/solr.log:
INFO  - 2014-12-02 17:08:32.650; org.apache.solr.cloud.Overseer$ClusterStateUpdater; Update state numShards=null message={
  "operation":"state",
  "core_node_name":"core_node1",
  "shard":"shard1",
  "roles":null,
  "state":"active",
  "core":"collection1",
  "collection":"collection1",
  "node_name":"192.168.0.1:8983_solr",
  "base_url":"http://192.168.0.1:8983/solr"}
INFO  - 2014-12-02 17:08:32.675; org.apache.solr.cloud.DistributedQueue$LatchChildWatcher; LatchChildWatcher fired on path: /overseer/queue state: SyncConnected type NodeChildrenChanged
INFO  - 2014-12-02 17:08:32.686; org.apache.solr.common.cloud.ZkStateReader$2; A cluster state change: WatchedEvent state:SyncConnected type:NodeDataChanged path:/clusterstate.json, has occurred - updating... (live nodes size: 1)
INFO  - 2014-12-02 17:08:38.556; org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null path=/admin/cores params={indexInfo=false&_=1417511318548&wt=json} status=0 QTime=2 
INFO  - 2014-12-02 17:08:38.680; org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null path=/admin/info/system params={_=1417511318656&wt=json} status=0 QTime=5 
INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/file/ params={file=admin-extra.menu-bottom.html&_=1417511319050&contentType=text/html;charset%3Dutf-8} status=0 QTime=7 
INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/file/ params={file=admin-extra.menu-top.html&_=1417511319041&contentType=text/html;charset%3Dutf-8} status=0 QTime=12 
INFO  - 2014-12-02 17:08:39.374; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/luke params={numTerms=0&_=1417511319348&show=index&wt=json} status=0 QTime=1 
INFO  - 2014-12-02 17:08:39.387; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/replication params={command=details&_=1417511319355&wt=json} status=0 QTime=8 
INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/system params={_=1417511319358&wt=json} status=0 QTime=4 
INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/ping params={action=status&_=1417511319368&wt=json} status=503 QTime=2 
INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore; [collection1] webapp=/solr path=/admin/file/ params={file=admin-extra.html&_=1417511319363} status=0 QTime=0 
INFO  - 2014-12-02 17:09:08.990; org.apache.solr.update.processor.LogUpdateProcessor; [collection1] webapp=/solr path=/update params={wt=javabin&version=2} {} 0 29
ERROR - 2014-12-02 17:09:08.990; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/] unknown field 'metatag.keywords'
	at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
	at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
	at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
	at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
	at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
	at org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
	at org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
	at org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
	at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
	at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
	at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
	at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
	at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
	at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
	at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
	at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
	at org.eclipse.jetty.server.Server.handle(Server.java:368)
	at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
	at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
	at org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
	at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
	at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
	at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
	at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
	at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
	at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
	at java.lang.Thread.run(Thread.java:745)

INFO  - 2014-12-02 17:09:08.999; org.apache.solr.update.processor.LogUpdateProcessor; [collection1] webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
ERROR - 2014-12-02 17:09:09.000; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/] unknown field 'metatag.keywords'
	at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
	at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
	at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
	at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
	at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
	at org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
	at org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
	at org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
	at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
	at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
	at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
	at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
	at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
	at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
	at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
	at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
	at org.eclipse.jetty.server.Server.handle(Server.java:368)
	at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
	at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
	at org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
	at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
	at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
	at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
	at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
	at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
	at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
	at java.lang.Thread.run(Thread.java:745)

INFO  - 2014-12-02 17:16:54.284; org.apache.solr.update.processor.LogUpdateProcessor; [collection1] webapp=/solr path=/update params={wt=javabin&version=2} {} 0 5
ERROR - 2014-12-02 17:16:54.285; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/] unknown field 'host'
	at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
	at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
	at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
	at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
	at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
	at org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
	at org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
	at org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
	at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
	at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
	at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
	at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
	at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
	at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
	at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
	at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
	at org.eclipse.jetty.server.Server.handle(Server.java:368)
	at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
	at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
	at org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
	at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
	at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
	at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
	at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
	at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
	at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
	at java.lang.Thread.run(Thread.java:745)
INFO  - 2014-12-02 17:16:54.292; org.apache.solr.update.processor.LogUpdateProcessor; [collection1] webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
ERROR - 2014-12-02 17:16:54.292; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/] unknown field 'host'
	at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
	at org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
	at org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
	at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
	at org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
	at org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
	at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
	at org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
	at org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
	at org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
	at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
	at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
	at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
	at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
	at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
	at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
	at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
	at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
	at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
	at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
	at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
	at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
	at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
	at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
	at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
	at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
	at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
	at org.eclipse.jetty.server.Server.handle(Server.java:368)
	at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
	at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
	at org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
	at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
	at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
	at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
	at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
	at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
	at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
	at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
	at java.lang.Thread.run(Thread.java:745)



Re: ERROR: [doc=http://nutch.apache.org/] unknown field 'metatag.keywords'

Posted by "Arthur.hk.chan@gmail.com" <ar...@gmail.com>.
Hi,
Thank you!!

I fixed the  issue related to "unknown field ‘metatag.keywords’”.  

regards
Arthur

On 2 Dec, 2014, at 10:23 pm, Jonathan Cooper-Ellis <jc...@ziftr.com> wrote:

> Hi,
> 
> In solrindex-mapping.xml, try changing the values for "source" to
> metatag.keywords and metatag.description. Or change the fields Solr is
> expecting to metatag.keywords and metatag.description. Hope that helps!
> 
> On Tue, Dec 2, 2014 at 4:52 AM, Arthur.hk.chan@gmail.com <
> arthur.hk.chan@gmail.com> wrote:
> 
>> Hi,
>> 
>> I am new to Nutch and Solr, please help!!
>> 
>> I am using Nutch-1.9, solr-4.10.2 and Hadoop-2.4.1,
>> 
>> It always returns "org.apache.solr.common.SolrException: Bad Request”
>> (I have already copied [nutch]conf/schema.xml to
>> [solr]/collection1/conf/schema.xml and restarted solr)
>> 
>> Below is about my settings, some questions:
>> Q1: Do I need to manually copy some .jar files to notch’s lib folder or
>> solr’s lib folder? (e.g. need to copy hadoop’s jar files to nutch ir sold?)
>> Q2: Would it be something wrong in my Plugin setup?
>> 
>> 
>> regards
>> Arthur
>> 
>> 
>> My Nutch command:
>> ./bin/crawl input_url/ output_url/
>> http://192.168.0.1:8983/solr/collection1 2
>> 
>> 
>> input_url/seed.txt:
>> http://nutch.apache.org/
>> 
>> 
>> 
>> conf/regex-urlfilter.txt
>> # skip file: ftp: and mailto: urls
>> -^(file|ftp|mailto):
>> # skip image and other suffixes we can't yet parse
>> # for a more extensive coverage use the urlfilter-suffix plugin
>> 
>> -\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)$
>> # skip URLs containing certain characters as probable queries, etc.
>> -[?*!@=]
>> # skip URLs with slash-delimited segment that repeats 3+ times, to break
>> loops
>> -.*(/[^/]+)/[^/]+\1/[^/]+\1/
>> # accept anything else
>> #+.
>> +^http://([a-z0-9]*\.)*nutch.apache.org/
>> 
>> 
>> 
>> conf/nutch-site.xml
>> <?xml version="1.0"?>
>> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
>> <configuration>
>> <property>
>>  <name>http.agent.name</name>
>>  <value>MyBot</value>
>> </property>
>> <property>
>>  <name>http.robots.agents</name>
>>  <value>MyBot,*</value>
>> </property>
>> <property>
>>  <name>fetcher.store.content</name>
>>  <value>true</value>
>> </property>
>> <property>
>>  <name>fetcher.max.crawl.delay</name>
>>  <value>-1</value>
>> </property>
>> <property>
>>  <name>plugin.includes</name>
>> 
>> <value>protocol-http|urlfilter-regex|parse-(html|tika|metatags)|index-(basic|anchor|metadata)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|indexer-solr|urlnormalizer-(pass|regex|basic)</value>
>> </property>
>> <property>
>>  <name>mapred.temp.dir</name>
>>  <value>/tmp</value>
>>  <description>A shared directory for temporary files.</description>
>> </property>
>> <!-- Used only if plugin parse-metatags is enabled. -->
>> <property>
>> <name>metatags.names</name>
>> <value>metatag.keywords;metatag.description</value>
>> </property>
>> <property>
>>  <name>index.parse.md</name>
>>  <value>metatag.description,metatag.keywords</value>
>> </property>
>> </configuration>
>> 
>> 
>> 
>> conf/solrindex-mapping.xml
>> <?xml version="1.0" encoding="UTF-8"?>
>> <mapping>
>>        <fields>
>>                <field dest="content" source="content"/>
>>                <field dest="title" source="title"/>
>>                <field dest="host" source="host"/>
>>                <field dest="segment" source="segment"/>
>>                <field dest="boost" source="boost"/>
>>                <field dest="digest" source="digest"/>
>>                <field dest="tstamp" source="tstamp"/>
>> 
>>                <field dest="description" source="description"/>
>>                <field dest="keywords" source="keywords"/>
>>        </fields>
>>        <uniqueKey>id</uniqueKey>
>> </mapping>
>> 
>> 
>> 
>> conf/schema.xml
>> <?xml version="1.0" encoding="UTF-8" ?>
>> <schema name="nutch" version="1.5">
>>    <types>
>>        <fieldType name="string" class="solr.StrField"
>> sortMissingLast="true"
>>            omitNorms="true"/>
>>        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
>>            omitNorms="true" positionIncrementGap="0"/>
>>        <fieldType name="float" class="solr.TrieFloatField"
>> precisionStep="0"
>>            omitNorms="true" positionIncrementGap="0"/>
>>        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
>>            omitNorms="true" positionIncrementGap="0"/>
>> 
>>        <fieldType name="text" class="solr.TextField"
>>            positionIncrementGap="100">
>>            <analyzer>
>>                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>>                <filter class="solr.StopFilterFactory"
>>                    ignoreCase="true" words="stopwords.txt"/>
>>                <filter class="solr.WordDelimiterFilterFactory"
>>                    generateWordParts="1" generateNumberParts="1"
>>                    catenateWords="1" catenateNumbers="1" catenateAll="0"
>>                    splitOnCaseChange="1"/>
>>                <filter class="solr.LowerCaseFilterFactory"/>
>>                <filter class="solr.EnglishPorterFilterFactory"
>>                    protected="protwords.txt"/>
>>                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>>            </analyzer>
>>        </fieldType>
>>        <fieldType name="url" class="solr.TextField"
>>            positionIncrementGap="100">
>>            <analyzer>
>>                <tokenizer class="solr.StandardTokenizerFactory"/>
>>                <filter class="solr.LowerCaseFilterFactory"/>
>>                <filter class="solr.WordDelimiterFilterFactory"
>>                    generateWordParts="1" generateNumberParts="1"/>
>>            </analyzer>
>>        </fieldType>
>>    </types>
>>    <fields>
>> 
>>      <field name="_root_" type="string" indexed="true" stored="false"/>
>>        <field name="id" type="string" stored="true" indexed="true"
>>            required="true"/>
>> 
>>        <!-- core fields -->
>>        <field name="_version_" type="long" indexed="true" stored="true"/>
>>        <field name="host" type="string" stored="false" indexed="true"/>
>>        <field name="digest" type="string" stored="true" indexed="false"/>
>>        <field name="segment" type="string" stored="true" indexed="false"/>
>>        <field name="boost" type="float" stored="true" indexed="false"/>
>>        <field name="tstamp" type="date" stored="true" indexed="false"/>
>> 
>>        <field name="url" type="text" indexed="true" stored="true"
>> required="true"/>
>>        <field name="content_type" type="string" indexed="true"
>> stored="true" multiValued="true"/>
>>        <field name="last_modified" type="date" indexed="true"
>> stored="true"/>
>>        <field name="links" type="string" indexed="true" stored="true"
>> multiValued="true"/>
>> 
>>        <!-- fields for the metatags plugin -->
>>        <field name="metatag.description" type="text" stored="true"
>> indexed="true"/>
>>        <field name="metatag.keywords" type="text" stored="true"
>> indexed="true"/>
>> 
>>        <!-- fields for index-basic plugin -->
>>        <field name="content" type="text" indexed="true" stored="true"
>> multiValued="true"/>
>>        <field name="title" type="text" stored="true" indexed="true"/>
>>        <field name="cache" type="string" stored="true" indexed="false"/>
>> 
>>        <!-- fields for index-anchor plugin -->
>>        <field name="anchor" type="string" stored="true" indexed="true"
>>            multiValued="true"/>
>> 
>>        <!-- fields for index-more plugin -->
>>        <field name="type" type="string" stored="true" indexed="true"
>>            multiValued="true"/>
>>        <field name="contentLength" type="long" stored="true"
>>            indexed="false"/>
>>        <field name="lastModified" type="date" stored="true"
>>            indexed="false"/>
>>        <field name="date" type="date" stored="true" indexed="true"/>
>> 
>>        <!-- fields for languageidentifier plugin -->
>>        <field name="lang" type="string" stored="true" indexed="true"/>
>> 
>>        <!-- fields for subcollection plugin -->
>>        <field name="subcollection" type="string" stored="true"
>>            indexed="true" multiValued="true"/>
>> 
>>        <!-- fields for feed plugin (tag is also used by
>> microformats-reltag)-->
>>        <field name="author" type="string" stored="true" indexed="true"/>
>>        <field name="tag" type="string" stored="true" indexed="true"
>> multiValued="true"/>
>>        <field name="feed" type="string" stored="true" indexed="true"/>
>>        <field name="publishedDate" type="date" stored="true"
>>            indexed="true"/>
>>        <field name="updatedDate" type="date" stored="true"
>>            indexed="true"/>
>> 
>>        <!-- fields for creativecommons plugin -->
>>        <field name="cc" type="string" stored="true" indexed="true"
>>            multiValued="true"/>
>> 
>>        <!-- fields for tld plugin -->
>>        <field name="tld" type="string" stored="false" indexed="false"/>
>>    </fields>
>>    <uniqueKey>id</uniqueKey>
>>    <defaultSearchField>content</defaultSearchField>
>>    <solrQueryParser defaultOperator="OR"/>
>> </schema>
>> 
>> 
>> 
>> Nutch logs/hadoop.log:
>> org.apache.solr.common.SolrException: Bad Request
>> Bad Request
>> request:
>> http://192.168.0.1:8983/solr/collection1/update?wt=javabin&version=2
>>        at
>> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:430)
>>        at
>> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:244)
>>        at
>> org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(AbstractUpdateRequest.java:105)
>>        at
>> org.apache.nutch.indexwriter.solr.SolrIndexWriter.close(SolrIndexWriter.java:155)
>>        at
>> org.apache.nutch.indexer.IndexWriters.close(IndexWriters.java:118)
>>        at
>> org.apache.nutch.indexer.IndexerOutputFormat$1.close(IndexerOutputFormat.java:44)
>>        at
>> org.apache.hadoop.mapred.ReduceTask$OldTrackingRecordWriter.close(ReduceTask.java:467)
>>        at
>> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:535)
>>        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
>>        at
>> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:398)
>>        2014-12-02 17:16:55,021 ERROR indexer.IndexingJob - Indexer:
>> java.io.IOException: Job failed!
>>        at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1357)
>>        at org.apache.nutch.indexer.IndexingJob.index(IndexingJob.java:114)
>>        at org.apache.nutch.indexer.IndexingJob.run(IndexingJob.java:176)
>>        at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
>>        at org.apache.nutch.indexer.IndexingJob.main(IndexingJob.java:186)
>> 
>> 
>> 
>> Solr logs/solr.log:
>> INFO  - 2014-12-02 17:08:32.650;
>> org.apache.solr.cloud.Overseer$ClusterStateUpdater; Update state
>> numShards=null message={
>>  "operation":"state",
>>  "core_node_name":"core_node1",
>>  "shard":"shard1",
>>  "roles":null,
>>  "state":"active",
>>  "core":"collection1",
>>  "collection":"collection1",
>>  "node_name":"192.168.0.1:8983_solr",
>>  "base_url":"http://192.168.0.1:8983/solr"}
>> INFO  - 2014-12-02 17:08:32.675;
>> org.apache.solr.cloud.DistributedQueue$LatchChildWatcher; LatchChildWatcher
>> fired on path: /overseer/queue state: SyncConnected type NodeChildrenChanged
>> INFO  - 2014-12-02 17:08:32.686;
>> org.apache.solr.common.cloud.ZkStateReader$2; A cluster state change:
>> WatchedEvent state:SyncConnected type:NodeDataChanged
>> path:/clusterstate.json, has occurred - updating... (live nodes size: 1)
>> INFO  - 2014-12-02 17:08:38.556;
>> org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null
>> path=/admin/cores params={indexInfo=false&_=1417511318548&wt=json} status=0
>> QTime=2
>> INFO  - 2014-12-02 17:08:38.680;
>> org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null
>> path=/admin/info/system params={_=1417511318656&wt=json} status=0 QTime=5
>> INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/file/
>> params={file=admin-extra.menu-bottom.html&_=1417511319050&contentType=text/html;charset%3Dutf-8}
>> status=0 QTime=7
>> INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/file/
>> params={file=admin-extra.menu-top.html&_=1417511319041&contentType=text/html;charset%3Dutf-8}
>> status=0 QTime=12
>> INFO  - 2014-12-02 17:08:39.374; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/luke
>> params={numTerms=0&_=1417511319348&show=index&wt=json} status=0 QTime=1
>> INFO  - 2014-12-02 17:08:39.387; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/replication
>> params={command=details&_=1417511319355&wt=json} status=0 QTime=8
>> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/system
>> params={_=1417511319358&wt=json} status=0 QTime=4
>> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/ping
>> params={action=status&_=1417511319368&wt=json} status=503 QTime=2
>> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
>> [collection1] webapp=/solr path=/admin/file/
>> params={file=admin-extra.html&_=1417511319363} status=0 QTime=0
>> INFO  - 2014-12-02 17:09:08.990;
>> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
>> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 29
>> ERROR - 2014-12-02 17:09:08.990; org.apache.solr.common.SolrException;
>> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
>> unknown field 'metatag.keywords'
>>        at
>> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>>        at
>> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>>        at
>> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>>        at
>> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>>        at
>> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>>        at
>> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>>        at
>> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>>        at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>        at
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>        at
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>        at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>        at org.eclipse.jetty.server.Server.handle(Server.java:368)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>>        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>>        at
>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>        at
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>        at java.lang.Thread.run(Thread.java:745)
>> 
>> INFO  - 2014-12-02 17:09:08.999;
>> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
>> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
>> ERROR - 2014-12-02 17:09:09.000; org.apache.solr.common.SolrException;
>> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
>> unknown field 'metatag.keywords'
>>        at
>> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>>        at
>> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>>        at
>> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>>        at
>> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>>        at
>> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>>        at
>> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>>        at
>> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>>        at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>        at
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>        at
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>        at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>        at org.eclipse.jetty.server.Server.handle(Server.java:368)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>>        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>>        at
>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>        at
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>        at java.lang.Thread.run(Thread.java:745)
>> 
>> INFO  - 2014-12-02 17:16:54.284;
>> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
>> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 5
>> ERROR - 2014-12-02 17:16:54.285; org.apache.solr.common.SolrException;
>> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
>> unknown field 'host'
>>        at
>> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>>        at
>> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>>        at
>> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>>        at
>> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>>        at
>> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>>        at
>> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>>        at
>> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>>        at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>        at
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>        at
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>        at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>        at org.eclipse.jetty.server.Server.handle(Server.java:368)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>>        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>>        at
>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>        at
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>        at java.lang.Thread.run(Thread.java:745)
>> INFO  - 2014-12-02 17:16:54.292;
>> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
>> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
>> ERROR - 2014-12-02 17:16:54.292; org.apache.solr.common.SolrException;
>> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
>> unknown field 'host'
>>        at
>> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>>        at
>> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>>        at
>> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>>        at
>> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>>        at
>> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>>        at
>> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>>        at
>> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>>        at
>> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>>        at
>> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>>        at
>> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>>        at
>> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>>        at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>>        at
>> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>>        at
>> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>>        at
>> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>>        at
>> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>>        at
>> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>>        at
>> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>>        at
>> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>>        at
>> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>>        at org.eclipse.jetty.server.Server.handle(Server.java:368)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>>        at
>> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>>        at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>>        at
>> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>>        at
>> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>>        at
>> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>>        at
>> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>>        at java.lang.Thread.run(Thread.java:745)
>> 
>> 
>> 
> 
> 
> -- 
> Jonathan Cooper-Ellis
> *Data Engineer*
> myVBO, LLC dba Ziftr


Re: ERROR: [doc=http://nutch.apache.org/] unknown field 'metatag.keywords'

Posted by Jonathan Cooper-Ellis <jc...@ziftr.com>.
Hi,

In solrindex-mapping.xml, try changing the values for "source" to
metatag.keywords and metatag.description. Or change the fields Solr is
expecting to metatag.keywords and metatag.description. Hope that helps!

On Tue, Dec 2, 2014 at 4:52 AM, Arthur.hk.chan@gmail.com <
arthur.hk.chan@gmail.com> wrote:

> Hi,
>
> I am new to Nutch and Solr, please help!!
>
> I am using Nutch-1.9, solr-4.10.2 and Hadoop-2.4.1,
>
> It always returns "org.apache.solr.common.SolrException: Bad Request”
> (I have already copied [nutch]conf/schema.xml to
> [solr]/collection1/conf/schema.xml and restarted solr)
>
> Below is about my settings, some questions:
> Q1: Do I need to manually copy some .jar files to notch’s lib folder or
> solr’s lib folder? (e.g. need to copy hadoop’s jar files to nutch ir sold?)
> Q2: Would it be something wrong in my Plugin setup?
>
>
> regards
> Arthur
>
>
> My Nutch command:
> ./bin/crawl input_url/ output_url/
> http://192.168.0.1:8983/solr/collection1 2
>
>
> input_url/seed.txt:
> http://nutch.apache.org/
>
>
>
> conf/regex-urlfilter.txt
> # skip file: ftp: and mailto: urls
> -^(file|ftp|mailto):
> # skip image and other suffixes we can't yet parse
> # for a more extensive coverage use the urlfilter-suffix plugin
>
> -\.(gif|GIF|jpg|JPG|png|PNG|ico|ICO|css|CSS|sit|SIT|eps|EPS|wmf|WMF|zip|ZIP|ppt|PPT|mpg|MPG|xls|XLS|gz|GZ|rpm|RPM|tgz|TGZ|mov|MOV|exe|EXE|jpeg|JPEG|bmp|BMP|js|JS)$
> # skip URLs containing certain characters as probable queries, etc.
> -[?*!@=]
> # skip URLs with slash-delimited segment that repeats 3+ times, to break
> loops
> -.*(/[^/]+)/[^/]+\1/[^/]+\1/
> # accept anything else
> #+.
> +^http://([a-z0-9]*\.)*nutch.apache.org/
>
>
>
> conf/nutch-site.xml
> <?xml version="1.0"?>
> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
> <configuration>
>  <property>
>   <name>http.agent.name</name>
>   <value>MyBot</value>
>  </property>
>  <property>
>   <name>http.robots.agents</name>
>   <value>MyBot,*</value>
>  </property>
>  <property>
>   <name>fetcher.store.content</name>
>   <value>true</value>
>  </property>
>  <property>
>   <name>fetcher.max.crawl.delay</name>
>   <value>-1</value>
>  </property>
>  <property>
>   <name>plugin.includes</name>
>
> <value>protocol-http|urlfilter-regex|parse-(html|tika|metatags)|index-(basic|anchor|metadata)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|indexer-solr|urlnormalizer-(pass|regex|basic)</value>
>  </property>
>  <property>
>   <name>mapred.temp.dir</name>
>   <value>/tmp</value>
>   <description>A shared directory for temporary files.</description>
>  </property>
> <!-- Used only if plugin parse-metatags is enabled. -->
> <property>
> <name>metatags.names</name>
> <value>metatag.keywords;metatag.description</value>
> </property>
> <property>
>   <name>index.parse.md</name>
>   <value>metatag.description,metatag.keywords</value>
> </property>
> </configuration>
>
>
>
> conf/solrindex-mapping.xml
> <?xml version="1.0" encoding="UTF-8"?>
> <mapping>
>         <fields>
>                 <field dest="content" source="content"/>
>                 <field dest="title" source="title"/>
>                 <field dest="host" source="host"/>
>                 <field dest="segment" source="segment"/>
>                 <field dest="boost" source="boost"/>
>                 <field dest="digest" source="digest"/>
>                 <field dest="tstamp" source="tstamp"/>
>
>                 <field dest="description" source="description"/>
>                 <field dest="keywords" source="keywords"/>
>         </fields>
>         <uniqueKey>id</uniqueKey>
> </mapping>
>
>
>
> conf/schema.xml
> <?xml version="1.0" encoding="UTF-8" ?>
> <schema name="nutch" version="1.5">
>     <types>
>         <fieldType name="string" class="solr.StrField"
> sortMissingLast="true"
>             omitNorms="true"/>
>         <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>         <fieldType name="float" class="solr.TrieFloatField"
> precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>         <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
>             omitNorms="true" positionIncrementGap="0"/>
>
>         <fieldType name="text" class="solr.TextField"
>             positionIncrementGap="100">
>             <analyzer>
>                 <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>                 <filter class="solr.StopFilterFactory"
>                     ignoreCase="true" words="stopwords.txt"/>
>                 <filter class="solr.WordDelimiterFilterFactory"
>                     generateWordParts="1" generateNumberParts="1"
>                     catenateWords="1" catenateNumbers="1" catenateAll="0"
>                     splitOnCaseChange="1"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.EnglishPorterFilterFactory"
>                     protected="protwords.txt"/>
>                 <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>             </analyzer>
>         </fieldType>
>         <fieldType name="url" class="solr.TextField"
>             positionIncrementGap="100">
>             <analyzer>
>                 <tokenizer class="solr.StandardTokenizerFactory"/>
>                 <filter class="solr.LowerCaseFilterFactory"/>
>                 <filter class="solr.WordDelimiterFilterFactory"
>                     generateWordParts="1" generateNumberParts="1"/>
>             </analyzer>
>         </fieldType>
>     </types>
>     <fields>
>
>       <field name="_root_" type="string" indexed="true" stored="false"/>
>         <field name="id" type="string" stored="true" indexed="true"
>             required="true"/>
>
>         <!-- core fields -->
>         <field name="_version_" type="long" indexed="true" stored="true"/>
>         <field name="host" type="string" stored="false" indexed="true"/>
>         <field name="digest" type="string" stored="true" indexed="false"/>
>         <field name="segment" type="string" stored="true" indexed="false"/>
>         <field name="boost" type="float" stored="true" indexed="false"/>
>         <field name="tstamp" type="date" stored="true" indexed="false"/>
>
>         <field name="url" type="text" indexed="true" stored="true"
> required="true"/>
>         <field name="content_type" type="string" indexed="true"
> stored="true" multiValued="true"/>
>         <field name="last_modified" type="date" indexed="true"
> stored="true"/>
>         <field name="links" type="string" indexed="true" stored="true"
> multiValued="true"/>
>
>         <!-- fields for the metatags plugin -->
>         <field name="metatag.description" type="text" stored="true"
> indexed="true"/>
>         <field name="metatag.keywords" type="text" stored="true"
> indexed="true"/>
>
>         <!-- fields for index-basic plugin -->
>         <field name="content" type="text" indexed="true" stored="true"
> multiValued="true"/>
>         <field name="title" type="text" stored="true" indexed="true"/>
>         <field name="cache" type="string" stored="true" indexed="false"/>
>
>         <!-- fields for index-anchor plugin -->
>         <field name="anchor" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>
>         <!-- fields for index-more plugin -->
>         <field name="type" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>         <field name="contentLength" type="long" stored="true"
>             indexed="false"/>
>         <field name="lastModified" type="date" stored="true"
>             indexed="false"/>
>         <field name="date" type="date" stored="true" indexed="true"/>
>
>         <!-- fields for languageidentifier plugin -->
>         <field name="lang" type="string" stored="true" indexed="true"/>
>
>         <!-- fields for subcollection plugin -->
>         <field name="subcollection" type="string" stored="true"
>             indexed="true" multiValued="true"/>
>
>         <!-- fields for feed plugin (tag is also used by
> microformats-reltag)-->
>         <field name="author" type="string" stored="true" indexed="true"/>
>         <field name="tag" type="string" stored="true" indexed="true"
> multiValued="true"/>
>         <field name="feed" type="string" stored="true" indexed="true"/>
>         <field name="publishedDate" type="date" stored="true"
>             indexed="true"/>
>         <field name="updatedDate" type="date" stored="true"
>             indexed="true"/>
>
>         <!-- fields for creativecommons plugin -->
>         <field name="cc" type="string" stored="true" indexed="true"
>             multiValued="true"/>
>
>         <!-- fields for tld plugin -->
>         <field name="tld" type="string" stored="false" indexed="false"/>
>     </fields>
>     <uniqueKey>id</uniqueKey>
>     <defaultSearchField>content</defaultSearchField>
>     <solrQueryParser defaultOperator="OR"/>
> </schema>
>
>
>
> Nutch logs/hadoop.log:
> org.apache.solr.common.SolrException: Bad Request
> Bad Request
> request:
> http://192.168.0.1:8983/solr/collection1/update?wt=javabin&version=2
>         at
> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:430)
>         at
> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHttpSolrServer.java:244)
>         at
> org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(AbstractUpdateRequest.java:105)
>         at
> org.apache.nutch.indexwriter.solr.SolrIndexWriter.close(SolrIndexWriter.java:155)
>         at
> org.apache.nutch.indexer.IndexWriters.close(IndexWriters.java:118)
>         at
> org.apache.nutch.indexer.IndexerOutputFormat$1.close(IndexerOutputFormat.java:44)
>         at
> org.apache.hadoop.mapred.ReduceTask$OldTrackingRecordWriter.close(ReduceTask.java:467)
>         at
> org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:535)
>         at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:421)
>         at
> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:398)
>         2014-12-02 17:16:55,021 ERROR indexer.IndexingJob - Indexer:
> java.io.IOException: Job failed!
>         at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1357)
>         at org.apache.nutch.indexer.IndexingJob.index(IndexingJob.java:114)
>         at org.apache.nutch.indexer.IndexingJob.run(IndexingJob.java:176)
>         at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
>         at org.apache.nutch.indexer.IndexingJob.main(IndexingJob.java:186)
>
>
>
> Solr logs/solr.log:
> INFO  - 2014-12-02 17:08:32.650;
> org.apache.solr.cloud.Overseer$ClusterStateUpdater; Update state
> numShards=null message={
>   "operation":"state",
>   "core_node_name":"core_node1",
>   "shard":"shard1",
>   "roles":null,
>   "state":"active",
>   "core":"collection1",
>   "collection":"collection1",
>   "node_name":"192.168.0.1:8983_solr",
>   "base_url":"http://192.168.0.1:8983/solr"}
> INFO  - 2014-12-02 17:08:32.675;
> org.apache.solr.cloud.DistributedQueue$LatchChildWatcher; LatchChildWatcher
> fired on path: /overseer/queue state: SyncConnected type NodeChildrenChanged
> INFO  - 2014-12-02 17:08:32.686;
> org.apache.solr.common.cloud.ZkStateReader$2; A cluster state change:
> WatchedEvent state:SyncConnected type:NodeDataChanged
> path:/clusterstate.json, has occurred - updating... (live nodes size: 1)
> INFO  - 2014-12-02 17:08:38.556;
> org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null
> path=/admin/cores params={indexInfo=false&_=1417511318548&wt=json} status=0
> QTime=2
> INFO  - 2014-12-02 17:08:38.680;
> org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null
> path=/admin/info/system params={_=1417511318656&wt=json} status=0 QTime=5
> INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/file/
> params={file=admin-extra.menu-bottom.html&_=1417511319050&contentType=text/html;charset%3Dutf-8}
> status=0 QTime=7
> INFO  - 2014-12-02 17:08:39.270; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/file/
> params={file=admin-extra.menu-top.html&_=1417511319041&contentType=text/html;charset%3Dutf-8}
> status=0 QTime=12
> INFO  - 2014-12-02 17:08:39.374; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/luke
> params={numTerms=0&_=1417511319348&show=index&wt=json} status=0 QTime=1
> INFO  - 2014-12-02 17:08:39.387; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/replication
> params={command=details&_=1417511319355&wt=json} status=0 QTime=8
> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/system
> params={_=1417511319358&wt=json} status=0 QTime=4
> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/ping
> params={action=status&_=1417511319368&wt=json} status=503 QTime=2
> INFO  - 2014-12-02 17:08:39.388; org.apache.solr.core.SolrCore;
> [collection1] webapp=/solr path=/admin/file/
> params={file=admin-extra.html&_=1417511319363} status=0 QTime=0
> INFO  - 2014-12-02 17:09:08.990;
> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 29
> ERROR - 2014-12-02 17:09:08.990; org.apache.solr.common.SolrException;
> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
> unknown field 'metatag.keywords'
>         at
> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>         at
> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>         at
> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>         at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>         at
> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>         at
> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>         at
> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>         at
> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>         at org.eclipse.jetty.server.Server.handle(Server.java:368)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>         at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>         at
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>         at
> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>         at java.lang.Thread.run(Thread.java:745)
>
> INFO  - 2014-12-02 17:09:08.999;
> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
> ERROR - 2014-12-02 17:09:09.000; org.apache.solr.common.SolrException;
> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
> unknown field 'metatag.keywords'
>         at
> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>         at
> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>         at
> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>         at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>         at
> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>         at
> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>         at
> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>         at
> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>         at org.eclipse.jetty.server.Server.handle(Server.java:368)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>         at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>         at
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>         at
> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>         at java.lang.Thread.run(Thread.java:745)
>
> INFO  - 2014-12-02 17:16:54.284;
> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 5
> ERROR - 2014-12-02 17:16:54.285; org.apache.solr.common.SolrException;
> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
> unknown field 'host'
>         at
> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>         at
> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>         at
> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>         at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>         at
> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>         at
> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>         at
> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>         at
> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>         at org.eclipse.jetty.server.Server.handle(Server.java:368)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>         at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>         at
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>         at
> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>         at java.lang.Thread.run(Thread.java:745)
> INFO  - 2014-12-02 17:16:54.292;
> org.apache.solr.update.processor.LogUpdateProcessor; [collection1]
> webapp=/solr path=/update params={wt=javabin&version=2} {} 0 1
> ERROR - 2014-12-02 17:16:54.292; org.apache.solr.common.SolrException;
> org.apache.solr.common.SolrException: ERROR: [doc=http://nutch.apache.org/]
> unknown field 'host'
>         at
> org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:185)
>         at
> org.apache.solr.update.AddUpdateCommand.getLuceneDocument(AddUpdateCommand.java:78)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:238)
>         at
> org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:164)
>         at
> org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:69)
>         at
> org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:51)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:926)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1080)
>         at
> org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:692)
>         at
> org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:100)
>         at
> org.apache.solr.handler.loader.XMLLoader.processUpdate(XMLLoader.java:247)
>         at
> org.apache.solr.handler.loader.XMLLoader.load(XMLLoader.java:174)
>         at
> org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:99)
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:74)
>         at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:1967)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:777)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:418)
>         at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:207)
>         at
> org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1419)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:455)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
>         at
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:557)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1075)
>         at
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:384)
>         at
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
>         at
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1009)
>         at
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
>         at
> org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
>         at
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
>         at
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
>         at org.eclipse.jetty.server.Server.handle(Server.java:368)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:489)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection.content(AbstractHttpConnection.java:953)
>         at
> org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.content(AbstractHttpConnection.java:1014)
>         at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:953)
>         at
> org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:240)
>         at
> org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
>         at
> org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
>         at
> org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
>         at java.lang.Thread.run(Thread.java:745)
>
>
>


-- 
Jonathan Cooper-Ellis
*Data Engineer*
myVBO, LLC dba Ziftr