You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2011/04/14 11:52:18 UTC

svn commit: r1092082 [2/2] - in /nutch/trunk: ./ src/bin/ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutc...

Modified: nutch/trunk/src/java/org/apache/nutch/tools/Benchmark.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/Benchmark.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/Benchmark.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/Benchmark.java Thu Apr 14 09:52:16 2011
@@ -1,236 +1,252 @@
-package org.apache.nutch.tools;
-
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.nutch.crawl.DbUpdaterJob;
-import org.apache.nutch.crawl.GeneratorJob;
-import org.apache.nutch.crawl.InjectorJob;
-import org.apache.nutch.crawl.WebTableReader;
-import org.apache.nutch.fetcher.FetcherJob;
-import org.apache.nutch.metadata.Nutch;
-import org.apache.nutch.parse.ParserJob;
-import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.util.NutchJob;
-
-public class Benchmark extends Configured implements Tool {
-  private static final Logger LOG = LoggerFactory.getLogger(Benchmark.class);
-
-  public static void main(String[] args) throws Exception {
-    Configuration conf = NutchConfiguration.create();
-    int res = ToolRunner.run(conf, new Benchmark(), args);
-    System.exit(res);
-  }
-
-  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
-    OutputStream os = fs.create(new Path(seedsDir, "seeds"));
-    for (int i = 0; i < count; i++) {
-      String url = "http://www.test-" + i + ".com/\r\n";
-      os.write(url.getBytes());
-    }
-    os.flush();
-    os.close();
-  }
-
-  public static final class BenchmarkResults {
-    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
-    List<String> runs = new ArrayList<String>();
-    List<String> stages = new ArrayList<String>();
-    int seeds, depth, threads;
-    long topN;
-    long elapsed;
-    String plugins;
-
-    public void addTiming(String stage, String run, long timing) {
-      if (!runs.contains(run)) {
-        runs.add(run);
-      }
-      if (!stages.contains(stage)) {
-        stages.add(stage);
-      }
-      Map<String,Long> t = timings.get(stage);
-      if (t == null) {
-        t = new HashMap<String,Long>();
-        timings.put(stage, t);
-      }
-      t.put(run, timing);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder();
-      sb.append("* Plugins:\t" + plugins + "\n");
-      sb.append("* Seeds:\t" + seeds + "\n");
-      sb.append("* Depth:\t" + depth + "\n");
-      sb.append("* Threads:\t" + threads + "\n");
-      sb.append("* TopN:\t" + topN + "\n");
-      sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
-      for (String stage : stages) {
-        Map<String,Long> timing = timings.get(stage);
-        if (timing == null) continue;
-        sb.append("- stage: " + stage + "\n");
-        for (String r : runs) {
-          Long Time = timing.get(r);
-          if (Time == null) {
-            continue;
-          }
-          sb.append("\trun " + r + "\t" + Time + "\n");
-        }
-      }
-      return sb.toString();
-    }
-
-    public List<String> getStages() {
-      return stages;
-    }
-    public List<String> getRuns() {
-      return runs;
-    }
-  }
-
-  public int run(String[] args) throws Exception {
-    String plugins = "protocol-http|parse-tika|scoring-opic|urlfilter-regex|urlnormalizer-pass";
-    int seeds = 1;
-    int depth = 10;
-    int threads = 10;
-    //boolean delete = true;
-    long topN = Long.MAX_VALUE;
-
-    if (args.length == 0) {
-      System.err.println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
-      System.err.println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
-      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
-      System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
-      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
-      // XXX what is the equivalent here? not an additional job...
-      // System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
-      System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
-      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
-      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
-      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
-      return -1;
-    }
-    int maxPerHost = Integer.MAX_VALUE;
-    for (int i = 0; i < args.length; i++) {
-      if (args[i].equals("-crawlId")) {
-        getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
-      } else if (args[i].equals("-seeds")) {
-        seeds = Integer.parseInt(args[++i]);
-      } else if (args[i].equals("-threads")) {
-        threads = Integer.parseInt(args[++i]);
-      } else if (args[i].equals("-depth")) {
-        depth = Integer.parseInt(args[++i]);
-      } else if (args[i].equals("-plugins")) {
-        plugins = args[++i];
-      } else if (args[i].equalsIgnoreCase("-maxPerHost")) {
-        maxPerHost = Integer.parseInt(args[++i]);
-      } else {
-        LOG.error("Invalid argument: '" + args[i] + "'");
-        return -1;
-      }
-    }
-    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, plugins);
-    System.out.println(res);
-    return 0;
-  }
-
-  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
-        long topN, String plugins) throws Exception {
-    Configuration conf = getConf();
-    conf.set("http.proxy.host", "localhost");
-    conf.setInt("http.proxy.port", 8181);
-    conf.set("http.agent.name", "test");
-    conf.set("http.robots.agents", "test,*");
-    if (!plugins.equals("default")) {
-      conf.set("plugin.includes", plugins);
-    }
-    conf.setInt(GeneratorJob.GENERATOR_MAX_COUNT, maxPerHost);
-    conf.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
-    Job job = new NutchJob(conf);
-    FileSystem fs = FileSystem.get(job.getConfiguration());
-    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
-            "bench-" + System.currentTimeMillis());
-    fs.mkdirs(dir);
-    Path rootUrlDir = new Path(dir, "seed");
-    fs.mkdirs(rootUrlDir);
-    createSeeds(fs, rootUrlDir, seeds);
-
-    if (LOG.isInfoEnabled()) {
-      LOG.info("crawl started in: " + dir);
-      LOG.info("rootUrlDir = " + rootUrlDir);
-      LOG.info("threads = " + threads);
-      LOG.info("depth = " + depth);
-    }
-
-    BenchmarkResults res = new BenchmarkResults();
-    res.depth = depth;
-    res.plugins = plugins;
-    res.seeds = seeds;
-    res.threads = threads;
-    res.topN = topN;
-
-    res.elapsed = System.currentTimeMillis();
-    InjectorJob injector = new InjectorJob(conf);
-    GeneratorJob generator = new GeneratorJob(conf);
-    FetcherJob fetcher = new FetcherJob(conf);
-    ParserJob parseSegment = new ParserJob(conf);
-    DbUpdaterJob crawlDbTool = new DbUpdaterJob(conf);
-    // not needed in the new API
-    //LinkDb linkDbTool = new LinkDb(getConf());
-
-    long start = System.currentTimeMillis();
-    // initialize crawlDb
-    injector.inject(rootUrlDir);
-    long delta = System.currentTimeMillis() - start;
-    res.addTiming("inject", "0", delta);
-    int i;
-    for (i = 0; i < depth; i++) {             // generate new segment
-      start = System.currentTimeMillis();
-      String batchId = generator.generate(topN, System.currentTimeMillis(),
-              false, false);
-      delta = System.currentTimeMillis() - start;
-      res.addTiming("generate", i + "", delta);
-      if (batchId == null) {
-        LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
-        break;
-      }
-      boolean isParsing = getConf().getBoolean("fetcher.parse", true);
-      start = System.currentTimeMillis();
-      fetcher.fetch(batchId, threads, false, isParsing, -1);  // fetch it
-      delta = System.currentTimeMillis() - start;
-      res.addTiming("fetch", i + "", delta);
-      if (!isParsing) {
-        start = System.currentTimeMillis();
-        parseSegment.parse(batchId, false, false);    // parse it, if needed
-        delta = System.currentTimeMillis() - start;
-        res.addTiming("parse", i + "", delta);
-      }
-      start = System.currentTimeMillis();
-      crawlDbTool.run(new String[0]); // update crawldb
-      delta = System.currentTimeMillis() - start;
-      res.addTiming("update", i + "", delta);
-    }
-    if (i == 0) {
-      LOG.warn("No URLs to fetch - check your seed list and URL filters.");
-    }
-    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
-    res.elapsed = System.currentTimeMillis() - res.elapsed;
-    WebTableReader dbreader = new WebTableReader();
-    dbreader.setConf(conf);
-    dbreader.processStatJob(false);
-    return res;
-  }
-
-}
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.apache.nutch.tools;
+
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.nutch.crawl.DbUpdaterJob;
+import org.apache.nutch.crawl.GeneratorJob;
+import org.apache.nutch.crawl.InjectorJob;
+import org.apache.nutch.crawl.WebTableReader;
+import org.apache.nutch.fetcher.FetcherJob;
+import org.apache.nutch.metadata.Nutch;
+import org.apache.nutch.parse.ParserJob;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.NutchJob;
+
+public class Benchmark extends Configured implements Tool {
+  private static final Logger LOG = LoggerFactory.getLogger(Benchmark.class);
+
+  public static void main(String[] args) throws Exception {
+    Configuration conf = NutchConfiguration.create();
+    int res = ToolRunner.run(conf, new Benchmark(), args);
+    System.exit(res);
+  }
+
+  private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
+    OutputStream os = fs.create(new Path(seedsDir, "seeds"));
+    for (int i = 0; i < count; i++) {
+      String url = "http://www.test-" + i + ".com/\r\n";
+      os.write(url.getBytes());
+    }
+    os.flush();
+    os.close();
+  }
+
+  public static final class BenchmarkResults {
+    Map<String,Map<String,Long>> timings = new HashMap<String,Map<String,Long>>();
+    List<String> runs = new ArrayList<String>();
+    List<String> stages = new ArrayList<String>();
+    int seeds, depth, threads;
+    long topN;
+    long elapsed;
+    String plugins;
+
+    public void addTiming(String stage, String run, long timing) {
+      if (!runs.contains(run)) {
+        runs.add(run);
+      }
+      if (!stages.contains(stage)) {
+        stages.add(stage);
+      }
+      Map<String,Long> t = timings.get(stage);
+      if (t == null) {
+        t = new HashMap<String,Long>();
+        timings.put(stage, t);
+      }
+      t.put(run, timing);
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("* Plugins:\t" + plugins + "\n");
+      sb.append("* Seeds:\t" + seeds + "\n");
+      sb.append("* Depth:\t" + depth + "\n");
+      sb.append("* Threads:\t" + threads + "\n");
+      sb.append("* TopN:\t" + topN + "\n");
+      sb.append("* TOTAL ELAPSED:\t" + elapsed + "\n");
+      for (String stage : stages) {
+        Map<String,Long> timing = timings.get(stage);
+        if (timing == null) continue;
+        sb.append("- stage: " + stage + "\n");
+        for (String r : runs) {
+          Long Time = timing.get(r);
+          if (Time == null) {
+            continue;
+          }
+          sb.append("\trun " + r + "\t" + Time + "\n");
+        }
+      }
+      return sb.toString();
+    }
+
+    public List<String> getStages() {
+      return stages;
+    }
+    public List<String> getRuns() {
+      return runs;
+    }
+  }
+
+  public int run(String[] args) throws Exception {
+    String plugins = "protocol-http|parse-tika|scoring-opic|urlfilter-regex|urlnormalizer-pass";
+    int seeds = 1;
+    int depth = 10;
+    int threads = 10;
+    //boolean delete = true;
+    long topN = Long.MAX_VALUE;
+
+    if (args.length == 0) {
+      System.err.println("Usage: Benchmark [-crawlId <id>] [-seeds NN] [-depth NN] [-threads NN] [-maxPerHost NN] [-plugins <regex>]");
+      System.err.println("\t-crawlId id\t the id to prefix the schemas to operate on, (default: storage.crawl.id)");
+      System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
+      System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
+      System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
+      // XXX what is the equivalent here? not an additional job...
+      // System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
+      System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
+      System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
+      System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
+      System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
+      return -1;
+    }
+    int maxPerHost = Integer.MAX_VALUE;
+    for (int i = 0; i < args.length; i++) {
+      if (args[i].equals("-crawlId")) {
+        getConf().set(Nutch.CRAWL_ID_KEY, args[++i]);
+      } else if (args[i].equals("-seeds")) {
+        seeds = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-threads")) {
+        threads = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-depth")) {
+        depth = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-plugins")) {
+        plugins = args[++i];
+      } else if (args[i].equalsIgnoreCase("-maxPerHost")) {
+        maxPerHost = Integer.parseInt(args[++i]);
+      } else {
+        LOG.error("Invalid argument: '" + args[i] + "'");
+        return -1;
+      }
+    }
+    BenchmarkResults res = benchmark(seeds, depth, threads, maxPerHost, topN, plugins);
+    System.out.println(res);
+    return 0;
+  }
+
+  public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost,
+        long topN, String plugins) throws Exception {
+    Configuration conf = getConf();
+    conf.set("http.proxy.host", "localhost");
+    conf.setInt("http.proxy.port", 8181);
+    conf.set("http.agent.name", "test");
+    conf.set("http.robots.agents", "test,*");
+    if (!plugins.equals("default")) {
+      conf.set("plugin.includes", plugins);
+    }
+    conf.setInt(GeneratorJob.GENERATOR_MAX_COUNT, maxPerHost);
+    conf.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
+    Job job = new NutchJob(conf);
+    FileSystem fs = FileSystem.get(job.getConfiguration());
+    Path dir = new Path(getConf().get("hadoop.tmp.dir"),
+            "bench-" + System.currentTimeMillis());
+    fs.mkdirs(dir);
+    Path rootUrlDir = new Path(dir, "seed");
+    fs.mkdirs(rootUrlDir);
+    createSeeds(fs, rootUrlDir, seeds);
+
+    if (LOG.isInfoEnabled()) {
+      LOG.info("crawl started in: " + dir);
+      LOG.info("rootUrlDir = " + rootUrlDir);
+      LOG.info("threads = " + threads);
+      LOG.info("depth = " + depth);
+    }
+
+    BenchmarkResults res = new BenchmarkResults();
+    res.depth = depth;
+    res.plugins = plugins;
+    res.seeds = seeds;
+    res.threads = threads;
+    res.topN = topN;
+
+    res.elapsed = System.currentTimeMillis();
+    InjectorJob injector = new InjectorJob(conf);
+    GeneratorJob generator = new GeneratorJob(conf);
+    FetcherJob fetcher = new FetcherJob(conf);
+    ParserJob parseSegment = new ParserJob(conf);
+    DbUpdaterJob crawlDbTool = new DbUpdaterJob(conf);
+    // not needed in the new API
+    //LinkDb linkDbTool = new LinkDb(getConf());
+
+    long start = System.currentTimeMillis();
+    // initialize crawlDb
+    injector.inject(rootUrlDir);
+    long delta = System.currentTimeMillis() - start;
+    res.addTiming("inject", "0", delta);
+    int i;
+    for (i = 0; i < depth; i++) {             // generate new segment
+      start = System.currentTimeMillis();
+      String batchId = generator.generate(topN, System.currentTimeMillis(),
+              false, false);
+      delta = System.currentTimeMillis() - start;
+      res.addTiming("generate", i + "", delta);
+      if (batchId == null) {
+        LOG.info("Stopping at depth=" + i + " - no more URLs to fetch.");
+        break;
+      }
+      boolean isParsing = getConf().getBoolean("fetcher.parse", true);
+      start = System.currentTimeMillis();
+      fetcher.fetch(batchId, threads, false, isParsing, -1);  // fetch it
+      delta = System.currentTimeMillis() - start;
+      res.addTiming("fetch", i + "", delta);
+      if (!isParsing) {
+        start = System.currentTimeMillis();
+        parseSegment.parse(batchId, false, false);    // parse it, if needed
+        delta = System.currentTimeMillis() - start;
+        res.addTiming("parse", i + "", delta);
+      }
+      start = System.currentTimeMillis();
+      crawlDbTool.run(new String[0]); // update crawldb
+      delta = System.currentTimeMillis() - start;
+      res.addTiming("update", i + "", delta);
+    }
+    if (i == 0) {
+      LOG.warn("No URLs to fetch - check your seed list and URL filters.");
+    }
+    if (LOG.isInfoEnabled()) { LOG.info("crawl finished: " + dir); }
+    res.elapsed = System.currentTimeMillis() - res.elapsed;
+    WebTableReader dbreader = new WebTableReader();
+    dbreader.setConf(conf);
+    dbreader.processStatJob(false);
+    return res;
+  }
+
+}

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/AbstractTestbedHandler.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/DelayHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/DelayHandler.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/DelayHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/DelayHandler.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/FakeHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/FakeHandler.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/FakeHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/FakeHandler.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/LogDebugHandler.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/NotFoundHandler.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/TestbedProxy.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/TestbedProxy.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/TestbedProxy.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/TestbedProxy.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.tools.proxy;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: nutch/trunk/src/java/org/apache/nutch/util/IdentityPageReducer.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/IdentityPageReducer.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/IdentityPageReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/IdentityPageReducer.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import java.io.IOException;

Modified: nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/NutchTool.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import java.io.IOException;
@@ -77,4 +93,4 @@ public abstract class NutchTool extends 
     }
     return false;
   }
-}
\ No newline at end of file
+}

Modified: nutch/trunk/src/java/org/apache/nutch/util/Pair.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/Pair.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/Pair.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/Pair.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 public class Pair<F, S> {

Modified: nutch/trunk/src/java/org/apache/nutch/util/TableUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/TableUtil.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/TableUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/TableUtil.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import java.net.MalformedURLException;

Modified: nutch/trunk/src/java/org/apache/nutch/util/ToolUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/ToolUtil.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/ToolUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/ToolUtil.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import java.util.HashMap;

Modified: nutch/trunk/src/java/org/apache/nutch/util/WebPageWritable.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/WebPageWritable.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/WebPageWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/WebPageWritable.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import java.io.DataInput;

Modified: nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestRTFParser.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.parse.tika;
 
 /**

Modified: nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/api/TestAPI.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.api;
 
 import static org.junit.Assert.*;

Modified: nutch/trunk/src/test/org/apache/nutch/storage/TestGoraStorage.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/storage/TestGoraStorage.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/storage/TestGoraStorage.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.storage;
 
 import java.io.File;

Modified: nutch/trunk/src/test/org/apache/nutch/util/TestTableUtil.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/util/TestTableUtil.java?rev=1092082&r1=1092081&r2=1092082&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/util/TestTableUtil.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/util/TestTableUtil.java Thu Apr 14 09:52:16 2011
@@ -1,3 +1,19 @@
+/*******************************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
 package org.apache.nutch.util;
 
 import org.apache.nutch.util.TableUtil;