You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/01/22 22:19:00 UTC

svn commit: r498809 - in /lucene/hadoop/trunk/src/contrib/abacus/examples: ./ pyAbacus/ pyAbacus/JyAbacusWCPlugIN.py pyAbacus/JythonAbacus.py pyAbacus/compile pyAbacus/wordcountaggregator.spec

Author: cutting
Date: Mon Jan 22 13:19:00 2007
New Revision: 498809

URL: http://svn.apache.org/viewvc?view=rev&rev=498809
Log:
HADOOP-918.  Add an example of of Abacus use with Python.  Contributed by Runping.

Added:
    lucene/hadoop/trunk/src/contrib/abacus/examples/
    lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/
    lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py
    lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py
    lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile
    lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec

Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py Mon Jan 22 13:19:00 2007
@@ -0,0 +1,34 @@
+#
+# Copyright 2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from org.apache.hadoop.fs import Path
+from org.apache.hadoop.io import *
+from org.apache.hadoop.mapred import *
+
+from org.apache.hadoop.abacus import *;
+
+from java.util import *;
+
+import sys
+
+class AbacusWordCount(ValueAggregatorBaseDescriptor):
+    def generateKeyValPairs(self, key, val):
+        retv = ArrayList();
+        for w in val.toString().split():
+            en =  ValueAggregatorBaseDescriptor.generateEntry(ValueAggregatorBaseDescriptor.LONG_VALUE_SUM, w, ValueAggregatorBaseDescriptor.ONE);
+            retv.add(en);
+        return retv;
+

Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py Mon Jan 22 13:19:00 2007
@@ -0,0 +1,80 @@
+#
+# Copyright 2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from org.apache.hadoop.fs import Path
+from org.apache.hadoop.io import *
+from org.apache.hadoop.mapred import *
+
+from org.apache.hadoop.abacus import *
+
+from java.util import *;
+
+import sys
+
+class AbacusMapper(ValueAggregatorMapper):
+    def map(self, key, value, output, reporter):
+        ValueAggregatorMapper.map(self, key, value, output, reporter);
+
+class AbacusReducer(ValueAggregatorReducer):
+    def reduce(self, key, values, output, reporter):
+        ValueAggregatorReducer.reduce(self, key, values, output, reporter);
+
+class AbacusCombiner(ValueAggregatorCombiner):
+    def reduce(self, key, values, output, reporter):
+        ValueAggregatorCombiner.reduce(self, key, values, output, reporter);
+
+def printUsage(code):
+    print "Abacus <input> <output> <numOfReducers> <inputformat> <specfile>"
+    sys.exit(code)
+
+def main(args):
+    if len(args) < 6:
+        printUsage(1);
+
+    inDir = args[1];
+    outDir = args[2];
+    numOfReducers = int(args[3]);
+    theInputFormat = args[4];
+    specFile = args[5];
+                                        
+    print "numOfReducers: ", numOfReducers, "theInputFormat: ", theInputFormat, "specFile: ", specFile
+
+    conf = JobConf(AbacusMapper);
+    conf.setJobName("recordcount");
+    conf.addDefaultResource(Path(specFile));
+ 
+    if theInputFormat=="textinputformat":
+        conf.setInputFormat(TextInputFormat);
+    else:
+        conf.setInputFormat(SequenceFileInputFormat);
+    conf.setOutputFormat(TextOutputFormat);
+    conf.setMapOutputKeyClass(Text);
+    conf.setMapOutputValueClass(Text);
+    conf.setOutputKeyClass(Text);
+    conf.setOutputValueClass(Text);
+    conf.setNumMapTasks(1);
+    conf.setNumReduceTasks(numOfReducers);
+
+    conf.setMapperClass(AbacusMapper);        
+    conf.setCombinerClass(AbacusCombiner);
+    conf.setReducerClass(AbacusReducer);
+    conf.setInputPath(Path(args[1]))
+    conf.setOutputPath(Path(args[2]))
+
+    JobClient.runJob(conf);
+
+if __name__ == "__main__":
+    main(sys.argv)

Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile Mon Jan 22 13:19:00 2007
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+export HADOOP_HOME=../../../../..
+
+export CLASSPATH="$HADOOP_HOME/build/classes"
+export CLASSPATH=${CLASSPATH}:"$HADOOP_HOME/build/contrib/abacus/classes"
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# add libs to CLASSPATH
+for f in $HADOOP_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+jythonc -p org.apache.hadoop.abacus.examples -d -j jwc.jar -c JythonAbacus.py JyAbacusWCPlugIN.py
+
+jar -uvf jwc.jar -C $HADOOP_HOME/build/contrib/abacus/classes .
+

Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec Mon Jan 22 13:19:00 2007
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>aggregator.descriptor.num</name>
+  <value>1</value>
+</property>
+
+<property>
+   <name>aggregator.descriptor.0</name>
+   <value>UserDefined,org.apache.hadoop.abacus.examples.JyAbacusWCPlugIN$AbacusWordCount</value>
+o</property>
+</configuration>