You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2013/05/23 00:44:12 UTC

svn commit: r1485475 - in /nutch/branches/2.x: ./ ivy/ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/host/ src/test/org/apache/nutch/crawl/ src/test/org/apache/nutch/fetcher/ src/test/org/apache/nutch/storage...

Author: lewismc
Date: Wed May 22 22:44:12 2013
New Revision: 1485475

URL: http://svn.apache.org/r1485475
Log:
revert NUTCH-1569 Upgrade 2.x to Gora 0.3

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/build.xml
    nutch/branches/2.x/ivy/ivy.xml
    nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
    nutch/branches/2.x/src/java/org/apache/nutch/host/HostDb.java
    nutch/branches/2.x/src/java/org/apache/nutch/host/HostDbReader.java
    nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java
    nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java
    nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java
    nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java
    nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java
    nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed May 22 22:44:12 2013
@@ -4,8 +4,6 @@ Release 2.2 - Current Development
 
 * NUTCH-1249 and NUTCH-1275 : Resolve all issues flagged up by adding javac -Xlint argument (tejasp)
 
-* NUTCH-1569 Upgrade 2.x to Gora 0.3 (lewismc)
-
 * NUTCH-1513 Support Robots.txt for Ftp urls (tejasp)
 
 * NUTCH-1053 Parsing of RSS feeds fails (tejasp)

Modified: nutch/branches/2.x/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/build.xml (original)
+++ nutch/branches/2.x/build.xml Wed May 22 22:44:12 2013
@@ -538,7 +538,7 @@
   <target name="generate-gora-src" depends="init" description="--> compile the avro schema(s) in src/gora/*.avsc">
     <java classname="org.apache.gora.compiler.GoraCompiler">
      <classpath refid="classpath"/>
-     <arg value="src/gora/"/>
+     <arg value="src/gora/webpage.avsc"/>
      <arg value="${src.dir}"/>
     </java>
  </target>

Modified: nutch/branches/2.x/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/ivy/ivy.xml?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/ivy/ivy.xml (original)
+++ nutch/branches/2.x/ivy/ivy.xml Wed May 22 22:44:12 2013
@@ -99,28 +99,23 @@
     <!--================-->
     <!-- Gora artifacts -->
     <!--================-->
-    <dependency org="org.apache.gora" name="gora-core" rev="0.3" conf="*->compile"/>
-    <!-- Uncomment this to use SQL as Gora backend. It should be noted that the 
-    gora-sql 0.1.1-incubating artifact is NOT compatable with gora-core 0.3. Users should 
-    downgrade to gora-core 0.2.1 in order to use SQL as a backend. -->
-    <!--
+    <dependency org="org.apache.gora" name="gora-core" rev="0.2.1" conf="*->compile"/>
     <dependency org="org.apache.gora" name="gora-sql" rev="0.1.1-incubating" conf="*->default" />
-    -->
     <!-- Uncomment this to use MySQL as database with SQL as Gora store. -->
     <!-- 
     <dependency org="mysql" name="mysql-connector-java" rev="5.1.18" conf="*->default"/> 
     -->
     <!-- Uncomment this to use HBase as Gora backend. -->
     <!-- 
-    <dependency org="org.apache.gora" name="gora-hbase" rev="0.3" conf="*->default" />
+    <dependency org="org.apache.gora" name="gora-hbase" rev="0.2.1" conf="*->default" />
     -->
     <!-- Uncomment this to use Accumulo as Gora backend. -->
     <!-- 
-    <dependency org="org.apache.gora" name="gora-accumulo" rev="0.3" conf="*->default" />
+    <dependency org="org.apache.gora" name="gora-accumulo" rev="0.2.1" conf="*->default" />
     -->
     <!-- Uncomment this to use Cassandra as Gora backend. -->
     <!-- 
-    <dependency org="org.apache.gora" name="gora-cassandra" rev="0.3" conf="*->default" />
+    <dependency org="org.apache.gora" name="gora-cassandra" rev="0.2" conf="*->default" />
     -->
 
     <!--global exclusion -->

Modified: nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/api/DbReader.java Wed May 22 22:44:12 2013
@@ -113,14 +113,10 @@ public class DbReader {
       if (fields != null) {
         this.fields = new TreeSet<String>(Arrays.asList(fields));
       }
-      try {
-        advance();
-      } catch (Exception e){
-        e.printStackTrace();
-      }
+      advance();
     }
     
-    private void advance() throws Exception, IOException {
+    private void advance() throws IOException {
       hasNext = res.next();
       if (hasNext && batchId != null) {
         do {
@@ -155,10 +151,6 @@ public class DbReader {
         e.printStackTrace();
         hasNext = false;
         return null;
-      } catch (Exception e) {
-        e.printStackTrace();
-        hasNext = false;
-        return null;
       }
       return pageAsMap(url, page);
     }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java Wed May 22 22:44:12 2013
@@ -223,7 +223,7 @@ public class WebTableReader extends Nutc
 
   /** Prints out the entry to the standard out **/
   private void read(String key, boolean dumpContent, boolean dumpHeaders,
-      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException, IOException, Exception {
+      boolean dumpLinks, boolean dumpText) throws ClassNotFoundException, IOException {
     DataStore<String, WebPage> datastore = StorageUtils.createWebStore(getConf(),
         String.class, WebPage.class);
 
@@ -235,19 +235,15 @@ public class WebTableReader extends Nutc
     boolean found = false;
     // should happen only once
     while (result.next()) {
-      try {
-        WebPage page = result.get();
-        String skey = result.getKey();
-        // we should not get to this point but nevermind
-        if (page == null || skey == null)
-          break;
-        found = true;
-        String url = TableUtil.unreverseUrl(skey);
-        System.out.println(getPageRepresentation(url, page, dumpContent,
-            dumpHeaders, dumpLinks, dumpText));
-      }catch (Exception e) {
-        e.printStackTrace();
-      }
+      WebPage page = result.get();
+      String skey = result.getKey();
+      // we should not get to this point but nevermind
+      if (page == null || skey == null)
+        break;
+      found = true;
+      String url = TableUtil.unreverseUrl(skey);
+      System.out.println(getPageRepresentation(url, page, dumpContent,
+          dumpHeaders, dumpLinks, dumpText));
     }
     if (!found)
       System.out.println(key + " not found");

Modified: nutch/branches/2.x/src/java/org/apache/nutch/host/HostDb.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/host/HostDb.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/host/HostDb.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/host/HostDb.java Wed May 22 22:44:12 2013
@@ -25,7 +25,6 @@ import java.util.concurrent.atomic.Atomi
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.gora.store.DataStore;
-import org.apache.gora.util.GoraException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.storage.Host;
 import org.apache.nutch.storage.StorageUtils;
@@ -64,7 +63,7 @@ public class HostDb implements Closeable
   
   private AtomicLong lastFlush;
 
-  public HostDb(Configuration conf) throws GoraException {
+  public HostDb(Configuration conf) throws IOException {
     try {
       hostStore = StorageUtils.createWebStore(conf, String.class, Host.class);
     } catch (ClassNotFoundException e) {
@@ -87,7 +86,11 @@ public class HostDb implements Closeable
             CacheHost removeFromCacheHost = notification.getValue();
             if (removeFromCacheHost != NULL_HOST) {
               if (removeFromCacheHost.timestamp < lastFlush.get()) {
-                hostStore.flush();
+                try {
+                  hostStore.flush();
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
                 lastFlush.set(System.currentTimeMillis());
               }
             }

Modified: nutch/branches/2.x/src/java/org/apache/nutch/host/HostDbReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/host/HostDbReader.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/host/HostDbReader.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/host/HostDbReader.java Wed May 22 22:44:12 2013
@@ -39,7 +39,7 @@ import org.apache.nutch.util.TableUtil;
 public class HostDbReader extends Configured implements Tool {
   public static final Log LOG = LogFactory.getLog(HostDbReader.class);
 
-  private void read(String key) throws ClassNotFoundException, IOException, Exception {
+  private void read(String key) throws ClassNotFoundException, IOException {
 
     DataStore<String, Host> datastore = StorageUtils.createWebStore(getConf(),
         String.class, Host.class);
@@ -53,14 +53,10 @@ public class HostDbReader extends Config
     Result<String, Host> result = datastore.execute(query);
 
     while (result.next()) {
-      try {
-        String hostName = TableUtil.unreverseUrl(result.getKey());
-        Host host = result.get();
-        System.out.println(hostName);
-        System.out.println(host);
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
+      String hostName = TableUtil.unreverseUrl(result.getKey());
+      Host host = result.get();
+      System.out.println(hostName);
+      System.out.println(host);
     }
     result.close();
     datastore.close();

Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java Wed May 22 22:44:12 2013
@@ -20,7 +20,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -31,7 +30,6 @@ import org.apache.nutch.util.CrawlTestUt
 import org.apache.nutch.util.TableUtil;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
@@ -68,7 +66,6 @@ public class TestGenerator extends Abstr
    * @throws Exception
    */
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testGenerateHighest() throws Exception {
 
     final int NUM_RESULTS = 2;
@@ -129,7 +126,6 @@ public class TestGenerator extends Abstr
    * @throws Exception
    */
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testGenerateHostLimit() throws Exception {
     ArrayList<URLWebPage> list = new ArrayList<URLWebPage>();
 
@@ -178,7 +174,6 @@ public class TestGenerator extends Abstr
    * @throws Exception
    */
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testGenerateDomainLimit() throws Exception {
     ArrayList<URLWebPage> list = new ArrayList<URLWebPage>();
 
@@ -231,7 +226,6 @@ public class TestGenerator extends Abstr
    * @throws IOException
    */
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testFilter() throws IOException, Exception {
 
     ArrayList<URLWebPage> list = new ArrayList<URLWebPage>();

Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java Wed May 22 22:44:12 2013
@@ -28,7 +28,6 @@ import org.apache.nutch.util.AbstractNut
 import org.apache.nutch.util.CrawlTestUtil;
 import org.apache.gora.util.ByteUtils;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
@@ -37,6 +36,7 @@ import static org.junit.Assert.*;
  * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
  * into webdb 5. Reads crawldb entries and verifies contents
  *
+ * @author nutch-dev <nutch-dev at lucene.apache.org>
  */
 public class TestInjector extends AbstractNutchTest {
   Path urlPath;
@@ -49,7 +49,6 @@ public class TestInjector extends Abstra
   }
 
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testInject() throws Exception {
     ArrayList<String> urls = new ArrayList<String>();
     for (int i = 0; i < 100; i++) {

Modified: nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java Wed May 22 22:44:12 2013
@@ -32,7 +32,6 @@ import org.mortbay.jetty.Server;
 
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import static org.junit.Assert.*;
 
@@ -68,7 +67,6 @@ public class TestFetcher extends Abstrac
   }
 
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testFetch() throws Exception {
 
     //generate seedlist

Modified: nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java Wed May 22 22:44:12 2013
@@ -34,7 +34,6 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.nutch.util.AbstractNutchTest;
 import org.apache.nutch.util.CrawlTestUtil;
 import org.hsqldb.Server;
-import org.junit.Ignore;
 
 import org.junit.After;
 import org.junit.Before;
@@ -70,7 +69,7 @@ public class TestGoraStorage extends Abs
   }
 
   private static void readWrite(String id, DataStore<String, WebPage> store) 
-      throws IOException, Exception {
+      throws IOException {
     WebPage page = new WebPage();
     int max = 1000;
     for (int i = 0; i < max; i++) {
@@ -91,13 +90,9 @@ public class TestGoraStorage extends Abs
     Result<String, WebPage> result = store.execute(store.newQuery());
     int count = 0;
     while (result.next()) {
-      try {
-        // only count keys in the store for the current id
-        if (result.getKey().contains(id))
-          count++;
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
+      // only count keys in the store for the current id
+      if (result.getKey().contains(id))
+        count++;
     }
     // check amount
     assertEquals(max, count);
@@ -111,7 +106,6 @@ public class TestGoraStorage extends Abs
    * @throws Exception
    */
   @Test
-  @Ignore("Temporarily diable until NUTCH-1572 is addressed.")
   public void testMultithreaded() throws Exception {
     // create a fixed thread pool
     int numThreads = 8;
@@ -225,7 +219,7 @@ public class TestGoraStorage extends Abs
     System.out.println("Starting!");
 
     Configuration localConf = CrawlTestUtil.createConfiguration();
-    localConf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+    localConf.set("storage.data.store.class", "org.apache.gora.sql.store.SqlStore");
 
     DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf,
         String.class, WebPage.class);

Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java Wed May 22 22:44:12 2013
@@ -37,7 +37,7 @@ public class AbstractNutchTest {
 
   public void setUp() throws Exception {
     conf = CrawlTestUtil.createConfiguration();
-    conf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+    conf.set("storage.data.store.class", "org.apache.gora.sql.store.SqlStore");
     fs = FileSystem.get(conf);
     webPageStore = StorageUtils.createWebStore(conf, String.class,
         WebPage.class);

Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=1485475&r1=1485474&r2=1485475&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java Wed May 22 22:44:12 2013
@@ -98,10 +98,10 @@ public class CrawlTestUtil {
    * Read entries from a data store
    *
    * @return list of matching {@link URLWebPage} objects
-   * @throws Exception
+   * @throws IOException
    */
   public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
-      Mark requiredMark, String... fields) throws Exception {
+      Mark requiredMark, String... fields) throws IOException {
     ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
 
     Query<String, WebPage> query = store.newQuery();
@@ -111,21 +111,18 @@ public class CrawlTestUtil {
 
     Result<String, WebPage> results = store.execute(query);
     while (results.next()) {
-      try {
-        WebPage page = results.get();
-        String url = results.getKey();
-
-        if (page == null)
-          continue;
-
-        if (requiredMark != null && requiredMark.checkMark(page) == null)
-          continue;
-
-        l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
+      WebPage page = results.get();
+      String url = results.getKey();
+
+      if (page == null)
+        continue;
+
+      if (requiredMark != null && requiredMark.checkMark(page) == null)
+        continue;
+
+      l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
     }
+
     return l;
   }