You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@nutch.apache.org by lei wang <nu...@gmail.com> on 2009/07/11 04:46:18 UTC

job failed for "Too many fetch-failures"

hi, everyone,  an error occurred to me as "Too many fetch-failures" when use
nutch-1.0.
can anyone help me, thanks a lot.

my hadoop-site.xml file config as :

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
  <name>fs.default.name</name>
  <value>hdfs://distributed1:9000/</value>
  <description>The name of the default file system. Either the literal
string "local" or a host:port for DFS.</description>
</property>
<property>
  <name>mapred.job.tracker</name>
  <value>distributed1:9001</value>
  <description>The host and port that the MapReduce job tracker runs at. If
"local", then jobs are run in-process as a single map and reduce
task.</description>
</property>
<property>
  <name>hadoop.tmp.dir</name>
  <value>/home/had/nutch-1.0/tmp</value>
  <description>A base for other temporary directories.</description>
</property>
<property>
  <name>dfs.name.dir</name>
  <value>/home/had/nutch-1.0/filesystem/name</value>
  <description>Determines where on the local filesystem the DFS name node
should store the name table. If this is a comma-delimited list of
directories then the name table is replicated in all of the directories, for
redundancy. </description>
</property>
<property>
  <name>dfs.data.dir</name>
  <value>/home/had/nutch-1.0/filesystem/data</value>
  <description>Determines where on the local filesystem an DFS data node
should store its blocks. If this is a comma-delimited list of directories,
then data will be stored in all named directories, typically on different
devices. Directories that do not exist are ignored.</description>
</property>
<property>
  <name>dfs.replication</name>
  <value>1</value>
  <description>Default block replication. The actual number of replications
can be specified when the file is created. The default is used if
replication is not specified in create time.</description>
</property>
<property>
  <name>mapred.map.tasks</name>
  <value>480</value>
  <description>The default number of map tasks per job.  Typically set
to a prime several times greater than number of available hosts.
  Ignored when mapred.job.tracker is "local".
  </description>
</property>
<property>
  <name>mapred.reduce.tasks</name>
  <value>9</value>
  <description>The default number of reduce tasks per job.  Typically set
  to a prime close to the number of available hosts.  Ignored when
  mapred.job.tracker is "local".
  </description>
</property>
<property>
  <name>mapred.child.java.opts</name>
  <value>-Xmx2000m</value>
  <description>
    You can specify other Java options for each map or reduce task here,
    but most likely you will want to adjust the heap size.
  </description>
</property>
<property>
 <name>mapred.child.ulimit</name>
 <value>3145728</value>
</property>
<property>
  <name>mapred.tasktracker.map.tasks.maximum</name>
  <value>2</value>
  <description>The maximum number of map tasks that will be run
  simultaneously by a task tracker.
  </description>
</property>
<property>
  <name>mapred.tasktracker.reduce.tasks.maximum</name>
  <value>2</value>
  <description>The maximum number of reduce tasks that will be run
  simultaneously by a task tracker.
  </description>
</property>
</configuration>