You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2011/10/31 11:49:18 UTC
svn commit: r1195403 - in /nutch/branches/nutchgora: CHANGES.txt build.xml
conf/gora-cassandra-mapping.xml conf/gora-hbase-mapping.xml
conf/gora-sql-mapping.xml conf/nutch-default.xml ivy/ivy.xml
Author: lewismc
Date: Mon Oct 31 10:49:17 2011
New Revision: 1195403
URL: http://svn.apache.org/viewvc?rev=1195403&view=rev
Log:
commit to address NUTCH-902 and update to changes.txt
Added:
nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/conf/gora-sql-mapping.xml
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/ivy/ivy.xml
Modified: nutch/branches/nutchgora/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Oct 31 10:49:17 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release nutchgora - Current Development
+* NUTCH-902 Add all necessary files and configuration so that nutch can be used with different backends out-of-the-box (lewismc)
+
* NUTCH-1081 & 1135 ant tests fail & Fix TestGoraStorage for Nutchgora (Ferdy via lewismc)
* NUTCH-1156 building errors with gora-hbase as a backend; update ivy.xml to use correct dependancies (Ferdy via lewismc)
Modified: nutch/branches/nutchgora/build.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Mon Oct 31 10:49:17 2011
@@ -15,7 +15,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<project name="Nutch" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+<project name="Nutchgora" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1195403&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Mon Oct 31 10:49:17 2011
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<gora-orm>
+
+ <keyspace name="webpage" cluster="Test Cluster" host="localhost">
+ <family name="p"/>
+ <family name="f"/>
+ <family name="sc" type="super"/>
+ </keyspace>
+ <class keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage">
+
+ <!-- fetch fields -->
+ <field name="baseUrl" family="f" qualifier="bas"/>
+ <field name="status" family="f" qualifier="st"/>
+ <field name="prevFetchTime" family="f" qualifier="pts"/>
+ <field name="fetchTime" family="f" qualifier="ts"/>
+ <field name="fetchInterval" family="f" qualifier="fi"/>
+ <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+ <field name="reprUrl" family="f" qualifier="rpr"/>
+ <field name="content" family="f" qualifier="cnt"/>
+ <field name="contentType" family="f" qualifier="typ"/>
+ <field name="modifiedTime" family="f" qualifier="mod"/>
+
+ <!-- parse fields -->
+ <field name="title" family="p" qualifier="t"/>
+ <field name="text" family="p" qualifier="c"/>
+ <field name="signature" family="p" qualifier="sig"/>
+ <field name="prevSignature" family="p" qualifier="psig"/>
+
+ <!-- score fields -->
+ <field name="score" family="f" qualifier="s"/>
+
+ <!-- super columns -->
+ <field name="markers" family="sc" qualifier="mk"/>
+ <field name="inlinks" family="sc" qualifier="il"/>
+ <field name="outlinks" family="sc" qualifier="ol"/>
+ <field name="metadata" family="sc" qualifier="mtdt"/>
+ <field name="headers" family="sc" qualifier="h"/>
+ <field name="parseStatus" family="sc" qualifier="pas"/>
+ <field name="protocolStatus" family="sc" qualifier="prs"/>
+ </class>
+
+</gora-orm>
\ No newline at end of file
Added: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1195403&view=auto
==============================================================================
--- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Mon Oct 31 10:49:17 2011
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<gora-orm>
+
+ <table name="webpage">
+ <family name="p"/> <!-- This can also have params like compression, bloom filters -->
+ <family name="f"/>
+ <family name="s"/>
+ <family name="il"/>
+ <family name="ol"/>
+ <family name="h"/>
+ <family name="mtdt"/>
+ <family name="mk"/>
+ </table>
+ <class table="webpage" keyClass="java.lang.String" name="org.apache.nutch.storage.WebPage">
+
+ <!-- fetch fields -->
+ <field name="baseUrl" family="f" qualifier="bas"/>
+ <field name="status" family="f" qualifier="st"/>
+ <field name="prevFetchTime" family="f" qualifier="pts"/>
+ <field name="fetchTime" family="f" qualifier="ts"/>
+ <field name="fetchInterval" family="f" qualifier="fi"/>
+ <field name="retriesSinceFetch" family="f" qualifier="rsf"/>
+ <field name="reprUrl" family="f" qualifier="rpr"/>
+ <field name="content" family="f" qualifier="cnt"/>
+ <field name="contentType" family="f" qualifier="typ"/>
+ <field name="protocolStatus" family="f" qualifier="prot"/>
+ <field name="modifiedTime" family="f" qualifier="mod"/>
+
+ <!-- parse fields -->
+ <field name="title" family="p" qualifier="t"/>
+ <field name="text" family="p" qualifier="c"/>
+ <field name="parseStatus" family="p" qualifier="st"/>
+ <field name="signature" family="p" qualifier="sig"/>
+ <field name="prevSignature" family="p" qualifier="psig"/>
+
+ <!-- score fields -->
+ <field name="score" family="s" qualifier="s"/>
+ <field name="headers" family="h"/>
+ <field name="inlinks" family="il"/>
+ <field name="outlinks" family="ol"/>
+ <field name="metadata" family="mtdt"/>
+ <field name="markers" family="mk"/>
+ </class>
+
+</gora-orm>
\ No newline at end of file
Modified: nutch/branches/nutchgora/conf/gora-sql-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-sql-mapping.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/gora-sql-mapping.xml (original)
+++ nutch/branches/nutchgora/conf/gora-sql-mapping.xml Mon Oct 31 10:49:17 2011
@@ -25,15 +25,10 @@
<!-- score fields -->
<field name="score" column="score"/>
-
<field name="headers" column="headers"/>
-
<field name="inlinks" column="inlinks"/>
-
<field name="outlinks" column="outlinks"/>
-
<field name="metadata" column="metadata"/>
-
<field name="markers" column="markers"/>
</class>
Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Mon Oct 31 10:49:17 2011
@@ -1065,14 +1065,16 @@
<property>
<name>storage.data.store.class</name>
<value>org.apache.gora.sql.store.SqlStore</value>
- <description>Default class for storing data</description>
+ <description>Default Gora class for storing data in the SQL Store.
+ A DataStore implementation for RDBMS with a SQL interface. SqlStore
+ uses JDBC drivers to communicate with the DB.</description>
</property>
<!--
<property>
<name>storage.data.store.class</name>
<value>org.apache.gora.cassandra.store.CassandraStore</value>
- <description>Class for storing data in Apache Cassandra</description>
+ <description>Gora class for storing data in Apache Cassandra</description>
</property>
-->
@@ -1080,7 +1082,7 @@
<property>
<name>storage.data.store.class</name>
<value>org.apache.gora.hbase.store.HBaseStore</value>
- <description>Class for storing data in Apache HBase</description>
+ <description>Gora class for storing data in Apache HBase</description>
</property>
-->
Modified: nutch/branches/nutchgora/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/branches/nutchgora/ivy/ivy.xml?rev=1195403&r1=1195402&r2=1195403&view=diff
==============================================================================
--- nutch/branches/nutchgora/ivy/ivy.xml (original)
+++ nutch/branches/nutchgora/ivy/ivy.xml Mon Oct 31 10:49:17 2011
@@ -114,6 +114,16 @@
</dependency>
-->
+<!--
+ Uncomment this to use Cassandra as Gora backend.
+-->
+<!--
+ <dependency org="org.apache.gora" name="gora-cassandra" rev="0.1.1-incubating" conf="*->compile">
+ </dependency>
+ // Should be another dependency here???
+ </dependency>
+-->
+
<!--global exclusion-->
<exclude module="ant" />
<exclude module="jmxtools" />