You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@nutch.apache.org by Diane Palla <pa...@shu.edu> on 2005/08/18 20:42:19 UTC
Search Java JSP error after configuration and set up. Please help.
I am trying to set up Nutch with an intranet. I used Nutch 0.7 with Java
J2SE 1.4.2 and Tomcat 4.1.31.
I did the crawl with the command
bin/nutch crawl bin/urls.txt -dir crawl.test -depth 3 >& crawl.log
and the crawl.log gave log messages that appeared to imply that it was a
successful run. (Crawl.log is copied after the Java/JSP errors below)
and I set JAVA_HOME and NUTCH_JAVA_HOME to the J2re when I did the crawl,
but I set JAVA_HOME to the j2se when I ran tomcat and i went to
http://localhost:8080
I tried to search something and
I got this error of the Nutch Bean.
Did I configure something wrong? How can I fix this?
Diane Palla
Web Services Developer
Seton Hall University
973 313-6199
palladia@shu.edu
org.apache.jasper.JasperException
at
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:207)
at
org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
at
org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
at
javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
at
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
at
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
at
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
at
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
at
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
at
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
at
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
at
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
at java.lang.Thread.run(Thread.java:534)
root cause
java.lang.NullPointerException
at
org.apache.nutch.searcher.NutchBean.init(NutchBean.java:96)
at
org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:82)
at
org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:72)
at
org.apache.nutch.searcher.NutchBean.get(NutchBean.java:64)
at
org.apache.jsp.search_jsp._jspService(search_jsp.java:108)
at
org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:92)
at
javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
at
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:162)
at
org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
at
org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
at
javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
at
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
at
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
at
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
at
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
at
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
at
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
at
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
at
org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
at
org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
at
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
at
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
at
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
at
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
at java.lang.Thread.run(Thread.java:534)
Crawl.log:
run java in /usr/java/j2re1.4.2_02
050818 140148 parsing
file:/gartner/httpd/html/nutch-0.7/conf/nutch-default.xml
050818 140149 parsing
file:/gartner/httpd/html/nutch-0.7/conf/crawl-tool.xml
050818 140149 parsing
file:/gartner/httpd/html/nutch-0.7/conf/nutch-site.xml
050818 140149 No FS indicated, using default:local
050818 140149 crawl started in: crawl.test
050818 140149 rootUrlFile = bin/urls.txt
050818 140149 threads = 10
050818 140149 depth = 3
050818 140149 Created webdb at
LocalFS,/gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140149 Starting URL processing
050818 140149 Plugins: looking in: /gartner/httpd/html/nutch-0.7/plugins
050818 140149 not including:
/gartner/httpd/html/nutch-0.7/plugins/clustering-carrot2
050818 140149 not including:
/gartner/httpd/html/nutch-0.7/plugins/creativecommons
050818 140149 parsing:
/gartner/httpd/html/nutch-0.7/plugins/index-basic/plugin.xml
050818 140150 impl: point=org.apache.nutch.indexer.IndexingFilter
class=org.apache.nutch.indexer.basic.BasicIndexingFilter
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/index-more
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/language-identifier
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/ontology
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/parse-ext
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/parse-html/plugin.xml
050818 140150 impl: point=org.apache.nutch.parse.Parser
class=org.apache.nutch.parse.html.HtmlParser
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/parse-js/plugin.xml
050818 140150 impl: point=org.apache.nutch.parse.Parser
class=org.apache.nutch.parse.js.JSParseFilter
050818 140150 impl: point=org.apache.nutch.parse.HtmlParseFilter
class=org.apache.nutch.parse.js.JSParseFilter
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/parse-msword
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/parse-pdf
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/parse-rss
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/parse-text/plugin.xml
050818 140150 impl: point=org.apache.nutch.parse.Parser
class=org.apache.nutch.parse.text.TextParser
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/protocol-file
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/protocol-ftp
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/protocol-http
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/protocol-httpclient/plugin.xml
050818 140150 impl: point=org.apache.nutch.protocol.Protocol
class=org.apache.nutch.protocol.httpclient.Http
050818 140150 impl: point=org.apache.nutch.protocol.Protocol
class=org.apache.nutch.protocol.httpclient.Http
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/query-basic/plugin.xml
050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
class=org.apache.nutch.searcher.basic.BasicQueryFilter
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/query-more
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/query-site/plugin.xml
050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
class=org.apache.nutch.searcher.site.SiteQueryFilter
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/query-url/plugin.xml
050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
class=org.apache.nutch.searcher.url.URLQueryFilter
050818 140150 not including:
/gartner/httpd/html/nutch-0.7/plugins/urlfilter-prefix
050818 140150 parsing:
/gartner/httpd/html/nutch-0.7/plugins/urlfilter-regex/plugin.xml
050818 140150 impl: point=org.apache.nutch.net.URLFilter
class=org.apache.nutch.net.RegexURLFilter
050818 140150 found resource crawl-urlfilter.txt at
file:/gartner/httpd/html/nutch-0.7/conf/crawl-urlfilter.txt
050818 140150 Using URL normalizer:
org.apache.nutch.net.BasicUrlNormalizer
050818 140150 Added 1 pages
050818 140150 Processing pagesByURL: Sorted 1 instructions in 0.014
seconds.
050818 140150 Processing pagesByURL: Sorted 71.42857142857143
instructions/second
050818 140150 Processing pagesByURL: Merged to new DB containing 1 records
in 0.0070 seconds
050818 140150 Processing pagesByURL: Merged 142.85714285714286
records/second
050818 140150 Processing pagesByMD5: Sorted 1 instructions in 0.0020
seconds.
050818 140150 Processing pagesByMD5: Sorted 500.0 instructions/second
050818 140150 Processing pagesByMD5: Merged to new DB containing 1 records
in 0.0030 seconds
050818 140150 Processing pagesByMD5: Merged 333.3333333333333
records/second
050818 140150 Processing linksByMD5: Copied file (4096 bytes) in 0.01
secs.
050818 140150 Processing linksByURL: Copied file (4096 bytes) in -0.0020
secs.
050818 140150 FetchListTool started
050818 140151 Processing pagesByURL: Sorted 1 instructions in 0.106
seconds.
050818 140151 Processing pagesByURL: Sorted 9.433962264150944
instructions/second
050818 140151 Processing pagesByURL: Merged to new DB containing 1 records
in 0.0 seconds
050818 140151 Processing pagesByURL: Merged Infinity records/second
050818 140151 Processing pagesByMD5: Sorted 1 instructions in 0.0020
seconds.
050818 140151 Processing pagesByMD5: Sorted 500.0 instructions/second
050818 140151 Processing pagesByMD5: Merged to new DB containing 1 records
in 0.0020 seconds
050818 140151 Processing pagesByMD5: Merged 500.0 records/second
050818 140151 Processing linksByMD5: Copied file (4096 bytes) in 0.0010
secs.
050818 140151 Processing linksByURL: Copied file (4096 bytes) in 0.0020
secs.
050818 140151 Processing
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
Sorted 1 entries in 0.011 seconds.
050818 140151 Processing
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
Sorted 90.90909090909092 entries/second
050818 140151 Overall processing: Sorted 1 entries in 0.011 seconds.
050818 140151 Overall processing: Sorted 0.011 entries/second
050818 140151 FetchListTool completed
050818 140151 logging at INFO
050818 140151 fetching http://gartner.shu.edu/
050818 140151 http.proxy.host = null
050818 140151 http.proxy.port = 8080
050818 140151 http.timeout = 10000
050818 140151 http.content.limit = 65536
050818 140151 http.agent = NutchCVS/0.7 (Nutch;
http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
050818 140151 http.auth.ntlm.username =
050818 140151 fetcher.server.delay = 1000
050818 140151 http.max.delays = 100
050818 140152 Configured Client
050818 140152 basic authentication scheme selected
050818 140152 basic authentication scheme selected
050818 140153 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140154 Updating for
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
050818 140154 Processing document 0
050818 140154 Finishing update
050818 140154 Processing pagesByURL: Sorted 1 instructions in 0.0060
seconds.
050818 140154 Processing pagesByURL: Sorted 166.66666666666666
instructions/second
050818 140154 Processing pagesByURL: Merged to new DB containing 1 records
in 0.0010 seconds
050818 140154 Processing pagesByURL: Merged 1000.0 records/second
050818 140154 Processing pagesByMD5: Sorted 1 instructions in 0.0050
seconds.
050818 140154 Processing pagesByMD5: Sorted 200.0 instructions/second
050818 140154 Processing pagesByMD5: Merged to new DB containing 1 records
in 0.0 seconds
050818 140154 Processing pagesByMD5: Merged Infinity records/second
050818 140154 Processing linksByMD5: Copied file (4096 bytes) in 0.0020
secs.
050818 140154 Processing linksByURL: Copied file (4096 bytes) in 0.0040
secs.
050818 140154 Update finished
050818 140154 FetchListTool started
050818 140154 Overall processing: Sorted 0 entries in 0.0 seconds.
050818 140154 Overall processing: Sorted NaN entries/second
050818 140154 FetchListTool completed
050818 140154 logging at INFO
050818 140155 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140155 Updating for
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
050818 140155 Finishing update
050818 140155 Update finished
050818 140155 FetchListTool started
050818 140156 Overall processing: Sorted 0 entries in 0.0 seconds.
050818 140156 Overall processing: Sorted NaN entries/second
050818 140156 FetchListTool completed
050818 140156 logging at INFO
050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140157 Updating for
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
050818 140157 Finishing update
050818 140157 Update finished
050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/segments
from /gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140157 reading
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
050818 140157 reading
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
050818 140157 reading
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
050818 140157 Sorting pages by url...
050818 140157 Getting updated scores and anchors from db...
050818 140157 Sorting updates by segment...
050818 140157 Updating segments...
050818 140157 updating
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
050818 140157 Done updating
/gartner/httpd/html/nutch-0.7/crawl.test/segments from
/gartner/httpd/html/nutch-0.7/crawl.test/db
050818 140158 indexing segment:
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
050818 140158 * Opening segment 20050818140150
050818 140158 * Indexing segment 20050818140150
050818 140158 * Optimizing index...
050818 140158 * Moving index to NFS if needed...
050818 140158 DONE indexing segment 20050818140150: total 1 records in
0.034 s (Infinity rec/s).
050818 140158 done indexing
050818 140158 indexing segment:
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
050818 140158 * Opening segment 20050818140154
050818 140158 * Indexing segment 20050818140154
050818 140158 * Optimizing index...
050818 140158 * Moving index to NFS if needed...
050818 140158 DONE indexing segment 20050818140154: total 0 records in
0.046 s (NaN rec/s).
050818 140158 done indexing
050818 140158 indexing segment:
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
050818 140158 * Opening segment 20050818140156
050818 140158 * Indexing segment 20050818140156
050818 140158 * Optimizing index...
050818 140158 * Moving index to NFS if needed...
050818 140158 DONE indexing segment 20050818140156: total 0 records in
0.071 s (NaN rec/s).
050818 140158 done indexing
050818 140158 Reading url hashes...
050818 140158 Sorting url hashes...
050818 140158 Deleting url duplicates...
050818 140158 Deleted 0 url duplicates.
050818 140158 Reading content hashes...
050818 140158 Sorting content hashes...
050818 140158 Deleting content duplicates...
050818 140158 Deleted 0 content duplicates.
050818 140158 Duplicate deletion complete locally. Now returning to
NFS...
050818 140158 DeleteDuplicates complete
050818 140158 Merging segment indexes...
050818 140158 crawl finished: crawl.test
Re: Crawl produced no search results.
Posted by Diane Palla <pa...@shu.edu>.
My crawl apparently created no indexes for the search to produce any
search results.
For intranets that require BASIC authentication, how do configure it to
crawl ? How do I tell Nutch the username and password and credentials so
it can access my intranet site?
I also am installing Nutch on the same computer that the intranet is
hosted on. Alternatively, can it search filesystems and produce the
mappings for the html pages?
Diane Palla
Web Services Developer
Seton Hall University
973 313-6199
palladia@shu.edu
Piotr Kosiorowski <pk...@gmail.com>
08/18/2005 03:26 PM
Please respond to
nutch-user@lucene.apache.org
To
nutch-user@lucene.apache.org
cc
Subject
Re: Search Java JSP error after configuration and set up. Please help.
Please make sure you started tomcat from crawl.test directory (or have
it configured in nutch-default.xml in *.war file)
Regards
Piotr
Diane Palla wrote:
> I am trying to set up Nutch with an intranet. I used Nutch 0.7 with
Java
> J2SE 1.4.2 and Tomcat 4.1.31.
>
> I did the crawl with the command
>
> bin/nutch crawl bin/urls.txt -dir crawl.test -depth 3 >& crawl.log
>
>
> and the crawl.log gave log messages that appeared to imply that it was a
> successful run. (Crawl.log is copied after the Java/JSP errors below)
>
> and I set JAVA_HOME and NUTCH_JAVA_HOME to the J2re when I did the
crawl,
> but I set JAVA_HOME to the j2se when I ran tomcat and i went to
> http://localhost:8080
>
> I tried to search something and
>
> I got this error of the Nutch Bean.
>
> Did I configure something wrong? How can I fix this?
>
>
> Diane Palla
> Web Services Developer
> Seton Hall University
> 973 313-6199
> palladia@shu.edu
>
>
>
> org.apache.jasper.JasperException
> at
>
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:207)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
>
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
>
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
>
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
>
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
>
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
>
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
>
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
> root cause
> java.lang.NullPointerException
> at
> org.apache.nutch.searcher.NutchBean.init(NutchBean.java:96)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:82)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:72)
> at
> org.apache.nutch.searcher.NutchBean.get(NutchBean.java:64)
> at
> org.apache.jsp.search_jsp._jspService(search_jsp.java:108)
> at
> org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:92)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:162)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
>
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
>
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
>
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
>
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
>
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
>
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
>
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
>
>
> Crawl.log:
>
> run java in /usr/java/j2re1.4.2_02
> 050818 140148 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-default.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-tool.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-site.xml
> 050818 140149 No FS indicated, using default:local
> 050818 140149 crawl started in: crawl.test
> 050818 140149 rootUrlFile = bin/urls.txt
> 050818 140149 threads = 10
> 050818 140149 depth = 3
> 050818 140149 Created webdb at
> LocalFS,/gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140149 Starting URL processing
> 050818 140149 Plugins: looking in: /gartner/httpd/html/nutch-0.7/plugins
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/clustering-carrot2
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/creativecommons
> 050818 140149 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/index-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.indexer.IndexingFilter
> class=org.apache.nutch.indexer.basic.BasicIndexingFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/index-more
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/language-identifier
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/ontology
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-ext
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-html/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.html.HtmlParser
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-js/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 impl: point=org.apache.nutch.parse.HtmlParseFilter
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-msword
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-pdf
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-rss
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-text/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.text.TextParser
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-file
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-ftp
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-httpclient/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.basic.BasicQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/query-more
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-site/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.site.SiteQueryFilter
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-url/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.url.URLQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-prefix
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-regex/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.net.URLFilter
> class=org.apache.nutch.net.RegexURLFilter
> 050818 140150 found resource crawl-urlfilter.txt at
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-urlfilter.txt
> 050818 140150 Using URL normalizer:
> org.apache.nutch.net.BasicUrlNormalizer
> 050818 140150 Added 1 pages
> 050818 140150 Processing pagesByURL: Sorted 1 instructions in 0.014
> seconds.
> 050818 140150 Processing pagesByURL: Sorted 71.42857142857143
> instructions/second
> 050818 140150 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0070 seconds
> 050818 140150 Processing pagesByURL: Merged 142.85714285714286
> records/second
> 050818 140150 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140150 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140150 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0030 seconds
> 050818 140150 Processing pagesByMD5: Merged 333.3333333333333
> records/second
> 050818 140150 Processing linksByMD5: Copied file (4096 bytes) in 0.01
> secs.
> 050818 140150 Processing linksByURL: Copied file (4096 bytes) in -0.0020
> secs.
> 050818 140150 FetchListTool started
> 050818 140151 Processing pagesByURL: Sorted 1 instructions in 0.106
> seconds.
> 050818 140151 Processing pagesByURL: Sorted 9.433962264150944
> instructions/second
> 050818 140151 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0 seconds
> 050818 140151 Processing pagesByURL: Merged Infinity records/second
> 050818 140151 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140151 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140151 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0020 seconds
> 050818 140151 Processing pagesByMD5: Merged 500.0 records/second
> 050818 140151 Processing linksByMD5: Copied file (4096 bytes) in 0.0010
> secs.
> 050818 140151 Processing linksByURL: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140151 Processing
>
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 1 entries in 0.011 seconds.
> 050818 140151 Processing
>
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 90.90909090909092 entries/second
> 050818 140151 Overall processing: Sorted 1 entries in 0.011 seconds.
> 050818 140151 Overall processing: Sorted 0.011 entries/second
> 050818 140151 FetchListTool completed
> 050818 140151 logging at INFO
> 050818 140151 fetching http://gartner.shu.edu/
> 050818 140151 http.proxy.host = null
> 050818 140151 http.proxy.port = 8080
> 050818 140151 http.timeout = 10000
> 050818 140151 http.content.limit = 65536
> 050818 140151 http.agent = NutchCVS/0.7 (Nutch;
> http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
> 050818 140151 http.auth.ntlm.username =
> 050818 140151 fetcher.server.delay = 1000
> 050818 140151 http.max.delays = 100
> 050818 140152 Configured Client
> 050818 140152 basic authentication scheme selected
> 050818 140152 basic authentication scheme selected
> 050818 140153 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140154 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140154 Processing document 0
> 050818 140154 Finishing update
> 050818 140154 Processing pagesByURL: Sorted 1 instructions in 0.0060
> seconds.
> 050818 140154 Processing pagesByURL: Sorted 166.66666666666666
> instructions/second
> 050818 140154 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0010 seconds
> 050818 140154 Processing pagesByURL: Merged 1000.0 records/second
> 050818 140154 Processing pagesByMD5: Sorted 1 instructions in 0.0050
> seconds.
> 050818 140154 Processing pagesByMD5: Sorted 200.0 instructions/second
> 050818 140154 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0 seconds
> 050818 140154 Processing pagesByMD5: Merged Infinity records/second
> 050818 140154 Processing linksByMD5: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140154 Processing linksByURL: Copied file (4096 bytes) in 0.0040
> secs.
> 050818 140154 Update finished
> 050818 140154 FetchListTool started
> 050818 140154 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140154 Overall processing: Sorted NaN entries/second
> 050818 140154 FetchListTool completed
> 050818 140154 logging at INFO
> 050818 140155 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140155 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140155 Finishing update
> 050818 140155 Update finished
> 050818 140155 FetchListTool started
> 050818 140156 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140156 Overall processing: Sorted NaN entries/second
> 050818 140156 FetchListTool completed
> 050818 140156 logging at INFO
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Finishing update
> 050818 140157 Update finished
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/segments
> from /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Sorting pages by url...
> 050818 140157 Getting updated scores and anchors from db...
> 050818 140157 Sorting updates by segment...
> 050818 140157 Updating segments...
> 050818 140157 updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 Done updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments from
> /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140158 * Opening segment 20050818140150
> 050818 140158 * Indexing segment 20050818140150
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140150: total 1 records in
> 0.034 s (Infinity rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140158 * Opening segment 20050818140154
> 050818 140158 * Indexing segment 20050818140154
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140154: total 0 records in
> 0.046 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140158 * Opening segment 20050818140156
> 050818 140158 * Indexing segment 20050818140156
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140156: total 0 records in
> 0.071 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 Reading url hashes...
> 050818 140158 Sorting url hashes...
> 050818 140158 Deleting url duplicates...
> 050818 140158 Deleted 0 url duplicates.
> 050818 140158 Reading content hashes...
> 050818 140158 Sorting content hashes...
> 050818 140158 Deleting content duplicates...
> 050818 140158 Deleted 0 content duplicates.
> 050818 140158 Duplicate deletion complete locally. Now returning to
> NFS...
> 050818 140158 DeleteDuplicates complete
> 050818 140158 Merging segment indexes...
> 050818 140158 crawl finished: crawl.test
Re: Search Java JSP error after configuration and set up. Please help.
Posted by Diane Palla <pa...@shu.edu>.
ok I entered
/usr/local/jakarta-tomcat-4.1.31/bin/catalina.sh start
when i was in the crawl.test directory.
The search.jsp works now without reporting JSP/Java errors and stack
traces.
Thanks.
What do you mean that I must "(...have
it configured in nutch-default.xml in *.war file)" in order to start
tomcat without being in crawl.test directory to have it the searches work
on http://localhost:8080?
What properties do I have to set in this file nutch-default.xml to make
that work?
Diane Palla
Web Services Developer
Seton Hall University
973 313-6199
palladia@shu.edu
Piotr Kosiorowski <pk...@gmail.com>
08/18/2005 03:26 PM
Please respond to
nutch-user@lucene.apache.org
To
nutch-user@lucene.apache.org
cc
Subject
Re: Search Java JSP error after configuration and set up. Please help.
Please make sure you started tomcat from crawl.test directory (or have
it configured in nutch-default.xml in *.war file)
Regards
Piotr
Diane Palla wrote:
> I am trying to set up Nutch with an intranet. I used Nutch 0.7 with
Java
> J2SE 1.4.2 and Tomcat 4.1.31.
>
> I did the crawl with the command
>
> bin/nutch crawl bin/urls.txt -dir crawl.test -depth 3 >& crawl.log
>
>
> and the crawl.log gave log messages that appeared to imply that it was a
> successful run. (Crawl.log is copied after the Java/JSP errors below)
>
> and I set JAVA_HOME and NUTCH_JAVA_HOME to the J2re when I did the
crawl,
> but I set JAVA_HOME to the j2se when I ran tomcat and i went to
> http://localhost:8080
>
> I tried to search something and
>
> I got this error of the Nutch Bean.
>
> Did I configure something wrong? How can I fix this?
>
>
> Diane Palla
> Web Services Developer
> Seton Hall University
> 973 313-6199
> palladia@shu.edu
>
>
>
> org.apache.jasper.JasperException
> at
>
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:207)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
>
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
>
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
>
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
>
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
>
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
>
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
>
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
> root cause
> java.lang.NullPointerException
> at
> org.apache.nutch.searcher.NutchBean.init(NutchBean.java:96)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:82)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:72)
> at
> org.apache.nutch.searcher.NutchBean.get(NutchBean.java:64)
> at
> org.apache.jsp.search_jsp._jspService(search_jsp.java:108)
> at
> org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:92)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:162)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
>
org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
>
org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
>
org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
>
org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
>
org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
>
org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
>
org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
>
org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
>
org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
>
org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
>
org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
>
>
> Crawl.log:
>
> run java in /usr/java/j2re1.4.2_02
> 050818 140148 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-default.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-tool.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-site.xml
> 050818 140149 No FS indicated, using default:local
> 050818 140149 crawl started in: crawl.test
> 050818 140149 rootUrlFile = bin/urls.txt
> 050818 140149 threads = 10
> 050818 140149 depth = 3
> 050818 140149 Created webdb at
> LocalFS,/gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140149 Starting URL processing
> 050818 140149 Plugins: looking in: /gartner/httpd/html/nutch-0.7/plugins
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/clustering-carrot2
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/creativecommons
> 050818 140149 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/index-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.indexer.IndexingFilter
> class=org.apache.nutch.indexer.basic.BasicIndexingFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/index-more
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/language-identifier
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/ontology
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-ext
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-html/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.html.HtmlParser
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-js/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 impl: point=org.apache.nutch.parse.HtmlParseFilter
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-msword
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-pdf
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-rss
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-text/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.text.TextParser
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-file
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-ftp
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-httpclient/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.basic.BasicQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/query-more
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-site/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.site.SiteQueryFilter
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-url/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.url.URLQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-prefix
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-regex/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.net.URLFilter
> class=org.apache.nutch.net.RegexURLFilter
> 050818 140150 found resource crawl-urlfilter.txt at
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-urlfilter.txt
> 050818 140150 Using URL normalizer:
> org.apache.nutch.net.BasicUrlNormalizer
> 050818 140150 Added 1 pages
> 050818 140150 Processing pagesByURL: Sorted 1 instructions in 0.014
> seconds.
> 050818 140150 Processing pagesByURL: Sorted 71.42857142857143
> instructions/second
> 050818 140150 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0070 seconds
> 050818 140150 Processing pagesByURL: Merged 142.85714285714286
> records/second
> 050818 140150 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140150 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140150 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0030 seconds
> 050818 140150 Processing pagesByMD5: Merged 333.3333333333333
> records/second
> 050818 140150 Processing linksByMD5: Copied file (4096 bytes) in 0.01
> secs.
> 050818 140150 Processing linksByURL: Copied file (4096 bytes) in -0.0020
> secs.
> 050818 140150 FetchListTool started
> 050818 140151 Processing pagesByURL: Sorted 1 instructions in 0.106
> seconds.
> 050818 140151 Processing pagesByURL: Sorted 9.433962264150944
> instructions/second
> 050818 140151 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0 seconds
> 050818 140151 Processing pagesByURL: Merged Infinity records/second
> 050818 140151 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140151 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140151 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0020 seconds
> 050818 140151 Processing pagesByMD5: Merged 500.0 records/second
> 050818 140151 Processing linksByMD5: Copied file (4096 bytes) in 0.0010
> secs.
> 050818 140151 Processing linksByURL: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140151 Processing
>
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 1 entries in 0.011 seconds.
> 050818 140151 Processing
>
/gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 90.90909090909092 entries/second
> 050818 140151 Overall processing: Sorted 1 entries in 0.011 seconds.
> 050818 140151 Overall processing: Sorted 0.011 entries/second
> 050818 140151 FetchListTool completed
> 050818 140151 logging at INFO
> 050818 140151 fetching http://gartner.shu.edu/
> 050818 140151 http.proxy.host = null
> 050818 140151 http.proxy.port = 8080
> 050818 140151 http.timeout = 10000
> 050818 140151 http.content.limit = 65536
> 050818 140151 http.agent = NutchCVS/0.7 (Nutch;
> http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
> 050818 140151 http.auth.ntlm.username =
> 050818 140151 fetcher.server.delay = 1000
> 050818 140151 http.max.delays = 100
> 050818 140152 Configured Client
> 050818 140152 basic authentication scheme selected
> 050818 140152 basic authentication scheme selected
> 050818 140153 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140154 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140154 Processing document 0
> 050818 140154 Finishing update
> 050818 140154 Processing pagesByURL: Sorted 1 instructions in 0.0060
> seconds.
> 050818 140154 Processing pagesByURL: Sorted 166.66666666666666
> instructions/second
> 050818 140154 Processing pagesByURL: Merged to new DB containing 1
records
> in 0.0010 seconds
> 050818 140154 Processing pagesByURL: Merged 1000.0 records/second
> 050818 140154 Processing pagesByMD5: Sorted 1 instructions in 0.0050
> seconds.
> 050818 140154 Processing pagesByMD5: Sorted 200.0 instructions/second
> 050818 140154 Processing pagesByMD5: Merged to new DB containing 1
records
> in 0.0 seconds
> 050818 140154 Processing pagesByMD5: Merged Infinity records/second
> 050818 140154 Processing linksByMD5: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140154 Processing linksByURL: Copied file (4096 bytes) in 0.0040
> secs.
> 050818 140154 Update finished
> 050818 140154 FetchListTool started
> 050818 140154 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140154 Overall processing: Sorted NaN entries/second
> 050818 140154 FetchListTool completed
> 050818 140154 logging at INFO
> 050818 140155 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140155 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140155 Finishing update
> 050818 140155 Update finished
> 050818 140155 FetchListTool started
> 050818 140156 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140156 Overall processing: Sorted NaN entries/second
> 050818 140156 FetchListTool completed
> 050818 140156 logging at INFO
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Finishing update
> 050818 140157 Update finished
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/segments
> from /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Sorting pages by url...
> 050818 140157 Getting updated scores and anchors from db...
> 050818 140157 Sorting updates by segment...
> 050818 140157 Updating segments...
> 050818 140157 updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 Done updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments from
> /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140158 * Opening segment 20050818140150
> 050818 140158 * Indexing segment 20050818140150
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140150: total 1 records in
> 0.034 s (Infinity rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140158 * Opening segment 20050818140154
> 050818 140158 * Indexing segment 20050818140154
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140154: total 0 records in
> 0.046 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140158 * Opening segment 20050818140156
> 050818 140158 * Indexing segment 20050818140156
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140156: total 0 records in
> 0.071 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 Reading url hashes...
> 050818 140158 Sorting url hashes...
> 050818 140158 Deleting url duplicates...
> 050818 140158 Deleted 0 url duplicates.
> 050818 140158 Reading content hashes...
> 050818 140158 Sorting content hashes...
> 050818 140158 Deleting content duplicates...
> 050818 140158 Deleted 0 content duplicates.
> 050818 140158 Duplicate deletion complete locally. Now returning to
> NFS...
> 050818 140158 DeleteDuplicates complete
> 050818 140158 Merging segment indexes...
> 050818 140158 crawl finished: crawl.test
Re: Search Java JSP error after configuration and set up. Please
help.
Posted by Piotr Kosiorowski <pk...@gmail.com>.
Please make sure you started tomcat from crawl.test directory (or have
it configured in nutch-default.xml in *.war file)
Regards
Piotr
Diane Palla wrote:
> I am trying to set up Nutch with an intranet. I used Nutch 0.7 with Java
> J2SE 1.4.2 and Tomcat 4.1.31.
>
> I did the crawl with the command
>
> bin/nutch crawl bin/urls.txt -dir crawl.test -depth 3 >& crawl.log
>
>
> and the crawl.log gave log messages that appeared to imply that it was a
> successful run. (Crawl.log is copied after the Java/JSP errors below)
>
> and I set JAVA_HOME and NUTCH_JAVA_HOME to the J2re when I did the crawl,
> but I set JAVA_HOME to the j2se when I ran tomcat and i went to
> http://localhost:8080
>
> I tried to search something and
>
> I got this error of the Nutch Bean.
>
> Did I configure something wrong? How can I fix this?
>
>
> Diane Palla
> Web Services Developer
> Seton Hall University
> 973 313-6199
> palladia@shu.edu
>
>
>
> org.apache.jasper.JasperException
> at
> org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:207)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
> org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
> org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
> org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
> org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
> org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
> org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
> org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
> org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
> org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
> root cause
> java.lang.NullPointerException
> at
> org.apache.nutch.searcher.NutchBean.init(NutchBean.java:96)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:82)
> at
> org.apache.nutch.searcher.NutchBean.<init>(NutchBean.java:72)
> at
> org.apache.nutch.searcher.NutchBean.get(NutchBean.java:64)
> at
> org.apache.jsp.search_jsp._jspService(search_jsp.java:108)
> at
> org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:92)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
> org.apache.jasper.servlet.JspServletWrapper.service(JspServletWrapper.java:162)
> at
> org.apache.jasper.servlet.JspServlet.serviceJspFile(JspServlet.java:240)
> at
> org.apache.jasper.servlet.JspServlet.service(JspServlet.java:187)
> at
> javax.servlet.http.HttpServlet.service(HttpServlet.java:809)
> at
> org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:200)
> at
> org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:146)
> at
> org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:209)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:144)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardContext.invoke(StandardContext.java:2358)
> at
> org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:133)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.valves.ErrorDispatcherValve.invoke(ErrorDispatcherValve.java:118)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
> org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:116)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:594)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:127)
> at
> org.apache.catalina.core.StandardPipeline$StandardPipelineValveContext.invokeNext(StandardPipeline.java:596)
> at
> org.apache.catalina.core.StandardPipeline.invoke(StandardPipeline.java:433)
> at
> org.apache.catalina.core.ContainerBase.invoke(ContainerBase.java:948)
> at
> org.apache.coyote.tomcat4.CoyoteAdapter.service(CoyoteAdapter.java:152)
> at
> org.apache.coyote.http11.Http11Processor.process(Http11Processor.java:799)
> at
> org.apache.coyote.http11.Http11Protocol$Http11ConnectionHandler.processConnection(Http11Protocol.java:705)
> at
> org.apache.tomcat.util.net.TcpWorkerThread.runIt(PoolTcpEndpoint.java:577)
> at
> org.apache.tomcat.util.threads.ThreadPool$ControlRunnable.run(ThreadPool.java:683)
> at java.lang.Thread.run(Thread.java:534)
>
>
>
> Crawl.log:
>
> run java in /usr/java/j2re1.4.2_02
> 050818 140148 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-default.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-tool.xml
> 050818 140149 parsing
> file:/gartner/httpd/html/nutch-0.7/conf/nutch-site.xml
> 050818 140149 No FS indicated, using default:local
> 050818 140149 crawl started in: crawl.test
> 050818 140149 rootUrlFile = bin/urls.txt
> 050818 140149 threads = 10
> 050818 140149 depth = 3
> 050818 140149 Created webdb at
> LocalFS,/gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140149 Starting URL processing
> 050818 140149 Plugins: looking in: /gartner/httpd/html/nutch-0.7/plugins
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/clustering-carrot2
> 050818 140149 not including:
> /gartner/httpd/html/nutch-0.7/plugins/creativecommons
> 050818 140149 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/index-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.indexer.IndexingFilter
> class=org.apache.nutch.indexer.basic.BasicIndexingFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/index-more
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/language-identifier
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/ontology
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-ext
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-html/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.html.HtmlParser
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-js/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 impl: point=org.apache.nutch.parse.HtmlParseFilter
> class=org.apache.nutch.parse.js.JSParseFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-msword
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-pdf
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/parse-rss
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/parse-text/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.parse.Parser
> class=org.apache.nutch.parse.text.TextParser
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-file
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-ftp
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/protocol-httpclient/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 impl: point=org.apache.nutch.protocol.Protocol
> class=org.apache.nutch.protocol.httpclient.Http
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-basic/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.basic.BasicQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/query-more
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-site/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.site.SiteQueryFilter
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/query-url/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.searcher.QueryFilter
> class=org.apache.nutch.searcher.url.URLQueryFilter
> 050818 140150 not including:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-prefix
> 050818 140150 parsing:
> /gartner/httpd/html/nutch-0.7/plugins/urlfilter-regex/plugin.xml
> 050818 140150 impl: point=org.apache.nutch.net.URLFilter
> class=org.apache.nutch.net.RegexURLFilter
> 050818 140150 found resource crawl-urlfilter.txt at
> file:/gartner/httpd/html/nutch-0.7/conf/crawl-urlfilter.txt
> 050818 140150 Using URL normalizer:
> org.apache.nutch.net.BasicUrlNormalizer
> 050818 140150 Added 1 pages
> 050818 140150 Processing pagesByURL: Sorted 1 instructions in 0.014
> seconds.
> 050818 140150 Processing pagesByURL: Sorted 71.42857142857143
> instructions/second
> 050818 140150 Processing pagesByURL: Merged to new DB containing 1 records
> in 0.0070 seconds
> 050818 140150 Processing pagesByURL: Merged 142.85714285714286
> records/second
> 050818 140150 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140150 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140150 Processing pagesByMD5: Merged to new DB containing 1 records
> in 0.0030 seconds
> 050818 140150 Processing pagesByMD5: Merged 333.3333333333333
> records/second
> 050818 140150 Processing linksByMD5: Copied file (4096 bytes) in 0.01
> secs.
> 050818 140150 Processing linksByURL: Copied file (4096 bytes) in -0.0020
> secs.
> 050818 140150 FetchListTool started
> 050818 140151 Processing pagesByURL: Sorted 1 instructions in 0.106
> seconds.
> 050818 140151 Processing pagesByURL: Sorted 9.433962264150944
> instructions/second
> 050818 140151 Processing pagesByURL: Merged to new DB containing 1 records
> in 0.0 seconds
> 050818 140151 Processing pagesByURL: Merged Infinity records/second
> 050818 140151 Processing pagesByMD5: Sorted 1 instructions in 0.0020
> seconds.
> 050818 140151 Processing pagesByMD5: Sorted 500.0 instructions/second
> 050818 140151 Processing pagesByMD5: Merged to new DB containing 1 records
> in 0.0020 seconds
> 050818 140151 Processing pagesByMD5: Merged 500.0 records/second
> 050818 140151 Processing linksByMD5: Copied file (4096 bytes) in 0.0010
> secs.
> 050818 140151 Processing linksByURL: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140151 Processing
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 1 entries in 0.011 seconds.
> 050818 140151 Processing
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150/fetchlist.unsorted:
> Sorted 90.90909090909092 entries/second
> 050818 140151 Overall processing: Sorted 1 entries in 0.011 seconds.
> 050818 140151 Overall processing: Sorted 0.011 entries/second
> 050818 140151 FetchListTool completed
> 050818 140151 logging at INFO
> 050818 140151 fetching http://gartner.shu.edu/
> 050818 140151 http.proxy.host = null
> 050818 140151 http.proxy.port = 8080
> 050818 140151 http.timeout = 10000
> 050818 140151 http.content.limit = 65536
> 050818 140151 http.agent = NutchCVS/0.7 (Nutch;
> http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
> 050818 140151 http.auth.ntlm.username =
> 050818 140151 fetcher.server.delay = 1000
> 050818 140151 http.max.delays = 100
> 050818 140152 Configured Client
> 050818 140152 basic authentication scheme selected
> 050818 140152 basic authentication scheme selected
> 050818 140153 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140154 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140154 Processing document 0
> 050818 140154 Finishing update
> 050818 140154 Processing pagesByURL: Sorted 1 instructions in 0.0060
> seconds.
> 050818 140154 Processing pagesByURL: Sorted 166.66666666666666
> instructions/second
> 050818 140154 Processing pagesByURL: Merged to new DB containing 1 records
> in 0.0010 seconds
> 050818 140154 Processing pagesByURL: Merged 1000.0 records/second
> 050818 140154 Processing pagesByMD5: Sorted 1 instructions in 0.0050
> seconds.
> 050818 140154 Processing pagesByMD5: Sorted 200.0 instructions/second
> 050818 140154 Processing pagesByMD5: Merged to new DB containing 1 records
> in 0.0 seconds
> 050818 140154 Processing pagesByMD5: Merged Infinity records/second
> 050818 140154 Processing linksByMD5: Copied file (4096 bytes) in 0.0020
> secs.
> 050818 140154 Processing linksByURL: Copied file (4096 bytes) in 0.0040
> secs.
> 050818 140154 Update finished
> 050818 140154 FetchListTool started
> 050818 140154 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140154 Overall processing: Sorted NaN entries/second
> 050818 140154 FetchListTool completed
> 050818 140154 logging at INFO
> 050818 140155 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140155 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140155 Finishing update
> 050818 140155 Update finished
> 050818 140155 FetchListTool started
> 050818 140156 Overall processing: Sorted 0 entries in 0.0 seconds.
> 050818 140156 Overall processing: Sorted NaN entries/second
> 050818 140156 FetchListTool completed
> 050818 140156 logging at INFO
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 Updating for
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Finishing update
> 050818 140157 Update finished
> 050818 140157 Updating /gartner/httpd/html/nutch-0.7/crawl.test/segments
> from /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140157 reading
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140157 Sorting pages by url...
> 050818 140157 Getting updated scores and anchors from db...
> 050818 140157 Sorting updates by segment...
> 050818 140157 Updating segments...
> 050818 140157 updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140157 Done updating
> /gartner/httpd/html/nutch-0.7/crawl.test/segments from
> /gartner/httpd/html/nutch-0.7/crawl.test/db
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140150
> 050818 140158 * Opening segment 20050818140150
> 050818 140158 * Indexing segment 20050818140150
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140150: total 1 records in
> 0.034 s (Infinity rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140154
> 050818 140158 * Opening segment 20050818140154
> 050818 140158 * Indexing segment 20050818140154
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140154: total 0 records in
> 0.046 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 indexing segment:
> /gartner/httpd/html/nutch-0.7/crawl.test/segments/20050818140156
> 050818 140158 * Opening segment 20050818140156
> 050818 140158 * Indexing segment 20050818140156
> 050818 140158 * Optimizing index...
> 050818 140158 * Moving index to NFS if needed...
> 050818 140158 DONE indexing segment 20050818140156: total 0 records in
> 0.071 s (NaN rec/s).
> 050818 140158 done indexing
> 050818 140158 Reading url hashes...
> 050818 140158 Sorting url hashes...
> 050818 140158 Deleting url duplicates...
> 050818 140158 Deleted 0 url duplicates.
> 050818 140158 Reading content hashes...
> 050818 140158 Sorting content hashes...
> 050818 140158 Deleting content duplicates...
> 050818 140158 Deleted 0 content duplicates.
> 050818 140158 Duplicate deletion complete locally. Now returning to
> NFS...
> 050818 140158 DeleteDuplicates complete
> 050818 140158 Merging segment indexes...
> 050818 140158 crawl finished: crawl.test