You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2011/10/31 11:49:18 UTC

svn commit: r1195403 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/gora-cassandra-mapping.xml conf/gora-hbase-mapping.xml conf/gora-sql-mapping.xml conf/nutch-default.xml ivy/ivy.xml

Author: lewismc
Date: Mon Oct 31 10:49:17 2011
New Revision: 1195403

URL: http://svn.apache.org/viewvc?rev=1195403&view=rev
Log:
commit to address NUTCH-902 and update to changes.txt

Added:
    nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
    nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
Modified:
    nutch/branches/nutchgora/CHANGES.txt
    nutch/branches/nutchgora/build.xml
    nutch/branches/nutchgora/conf/gora-sql-mapping.xml
    nutch/branches/nutchgora/conf/nutch-default.xml
    nutch/branches/nutchgora/ivy/ivy.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Oct 31 10:49:17 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-902 Add all necessary files and configuration so that nutch can be used with different backends out-of-the-box (lewismc)
+
 * NUTCH-1081 & 1135 ant tests fail & Fix TestGoraStorage for Nutchgora (Ferdy via lewismc)
 
 * NUTCH-1156 building errors with gora-hbase as a backend; update ivy.xml to use correct dependancies (Ferdy via lewismc)

Modified: nutch/branches/nutchgora/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Mon Oct 31 10:49:17 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 -->
-<project name="Nutch" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+<project name="Nutchgora" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
 
   <!-- Load all the default properties, and any the user wants    -->
   <!-- to contribute (without having to type -D or edit this file -->

Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1195403&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Mon Oct 31 10:49:17 2011
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<gora-orm>
+    
+    <keyspace name="webpage" cluster="Test Cluster" host="localhost">
+        <family name="p"/>
+        <family name="f"/>
+        <family name="sc" type="super"/>
+    </keyspace>
+    <class keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage">
+        
+        <!-- fetch fields -->
+        <field name="baseUrl" family="f" qualifier="bas"/>
+        <field name="status" family="f" qualifier="st"/>
+        <field name="prevFetchTime" family="f" qualifier="pts"/>
+        <field name="fetchTime" family="f" qualifier="ts"/>
+        <field name="fetchInterval" family="f" qualifier="fi"/>
+        <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+        <field name="reprUrl" family="f" qualifier="rpr"/>
+        <field name="content" family="f" qualifier="cnt"/>
+        <field name="contentType" family="f" qualifier="typ"/>
+        <field name="modifiedTime" family="f" qualifier="mod"/>
+        
+        <!-- parse fields -->
+        <field name="title" family="p" qualifier="t"/>
+        <field name="text" family="p" qualifier="c"/>
+        <field name="signature" family="p" qualifier="sig"/>
+        <field name="prevSignature" family="p" qualifier="psig"/>
+        
+        <!-- score fields -->
+        <field name="score" family="f" qualifier="s"/>
+        
+        <!-- super columns -->
+        <field name="markers" family="sc" qualifier="mk"/>
+        <field name="inlinks" family="sc" qualifier="il"/>
+        <field name="outlinks" family="sc" qualifier="ol"/>
+        <field name="metadata" family="sc" qualifier="mtdt"/>
+        <field name="headers" family="sc" qualifier="h"/>
+        <field name="parseStatus" family="sc" qualifier="pas"/>
+        <field name="protocolStatus" family="sc" qualifier="prs"/>
+    </class>
+    
+</gora-orm>
\ No newline at end of file

Added: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1195403&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Mon Oct 31 10:49:17 2011
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<gora-orm>
+    
+    <table name="webpage">
+        <family name="p"/> <!-- This can also have params like compression, bloom filters -->
+        <family name="f"/>
+        <family name="s"/>
+        <family name="il"/>
+        <family name="ol"/>
+        <family name="h"/>
+        <family name="mtdt"/>
+        <family name="mk"/>
+    </table>
+    <class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage">
+        
+        <!-- fetch fields                                       -->
+        <field name="baseUrl" family="f" qualifier="bas"/>
+        <field name="status" family="f" qualifier="st"/>
+        <field name="prevFetchTime" family="f" qualifier="pts"/>
+        <field name="fetchTime" family="f" qualifier="ts"/>
+        <field name="fetchInterval" family="f" qualifier="fi"/>
+        <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+        <field name="reprUrl" family="f" qualifier="rpr"/>
+        <field name="content" family="f" qualifier="cnt"/>
+        <field name="contentType" family="f" qualifier="typ"/>
+        <field name="protocolStatus" family="f" qualifier="prot"/>
+        <field name="modifiedTime" family="f" qualifier="mod"/>
+        
+        <!-- parse fields                                       -->
+        <field name="title" family="p" qualifier="t"/>
+        <field name="text" family="p" qualifier="c"/>
+        <field name="parseStatus" family="p" qualifier="st"/>
+        <field name="signature" family="p" qualifier="sig"/>
+        <field name="prevSignature" family="p" qualifier="psig"/>
+        
+        <!-- score fields                                       -->
+        <field name="score" family="s" qualifier="s"/>
+        <field name="headers" family="h"/>
+        <field name="inlinks" family="il"/>
+        <field name="outlinks" family="ol"/>
+        <field name="metadata" family="mtdt"/>
+        <field name="markers" family="mk"/>
+    </class>
+    
+</gora-orm>
\ No newline at end of file

Modified: nutch/branches/nutchgora/conf/gora-sql-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-sql-mapping.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-sql-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-sql-mapping.xml Mon Oct 31 10:49:17 2011
@@ -25,15 +25,10 @@
 
     <!-- score fields                                       -->
     <field name="score" column="score"/>
-
     <field name="headers" column="headers"/>
-
     <field name="inlinks" column="inlinks"/>
-    
     <field name="outlinks" column="outlinks"/>
-    
     <field name="metadata" column="metadata"/>
-
     <field name="markers" column="markers"/>
 </class>
 

Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Mon Oct 31 10:49:17 2011
@@ -1065,14 +1065,16 @@
 <property>
   <name>storage.data.store.class</name>
   <value>org.apache.gora.sql.store.SqlStore</value>
-  <description>Default class for storing data</description>
+  <description>Default Gora class for storing data in the SQL Store. 
+  A DataStore implementation for RDBMS with a SQL interface. SqlStore
+  uses JDBC drivers to communicate with the DB.</description>
 </property>
 
 <!--
 <property>
   <name>storage.data.store.class</name>
   <value>org.apache.gora.cassandra.store.CassandraStore</value>
-  <description>Class for storing data in Apache Cassandra</description>
+  <description>Gora class for storing data in Apache Cassandra</description>
 </property>
 -->
 
@@ -1080,7 +1082,7 @@
 <property>
   <name>storage.data.store.class</name>
   <value>org.apache.gora.hbase.store.HBaseStore</value>
-  <description>Class for storing data in Apache HBase</description>
+  <description>Gora class for storing data in Apache HBase</description>
 </property>
 -->
 

Modified: nutch/branches/nutchgora/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/ivy/ivy.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/ivy/ivy.xml (original)
+++ nutch/branches/nutchgora/ivy/ivy.xml Mon Oct 31 10:49:17 2011
@@ -114,6 +114,16 @@
        </dependency>
 -->
 
+<!--
+    	Uncomment this to use Cassandra as Gora backend. 
+-->
+<!--
+		<dependency org="org.apache.gora" name="gora-cassandra" rev="0.1.1-incubating" conf="*->compile">
+		</dependency>
+		// Should be another dependency here???
+		</dependency>
+-->
+
                 <!--global exclusion-->
              	<exclude module="ant" />
              	<exclude module="jmxtools" />