You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/01/22 22:19:00 UTC
svn commit: r498809 - in /lucene/hadoop/trunk/src/contrib/abacus/examples:
./ pyAbacus/ pyAbacus/JyAbacusWCPlugIN.py pyAbacus/JythonAbacus.py
pyAbacus/compile pyAbacus/wordcountaggregator.spec
Author: cutting
Date: Mon Jan 22 13:19:00 2007
New Revision: 498809
URL: http://svn.apache.org/viewvc?view=rev&rev=498809
Log:
HADOOP-918. Add an example of of Abacus use with Python. Contributed by Runping.
Added:
lucene/hadoop/trunk/src/contrib/abacus/examples/
lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/
lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py
lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py
lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile
lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec
Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JyAbacusWCPlugIN.py Mon Jan 22 13:19:00 2007
@@ -0,0 +1,34 @@
+#
+# Copyright 2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from org.apache.hadoop.fs import Path
+from org.apache.hadoop.io import *
+from org.apache.hadoop.mapred import *
+
+from org.apache.hadoop.abacus import *;
+
+from java.util import *;
+
+import sys
+
+class AbacusWordCount(ValueAggregatorBaseDescriptor):
+ def generateKeyValPairs(self, key, val):
+ retv = ArrayList();
+ for w in val.toString().split():
+ en = ValueAggregatorBaseDescriptor.generateEntry(ValueAggregatorBaseDescriptor.LONG_VALUE_SUM, w, ValueAggregatorBaseDescriptor.ONE);
+ retv.add(en);
+ return retv;
+
Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/JythonAbacus.py Mon Jan 22 13:19:00 2007
@@ -0,0 +1,80 @@
+#
+# Copyright 2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from org.apache.hadoop.fs import Path
+from org.apache.hadoop.io import *
+from org.apache.hadoop.mapred import *
+
+from org.apache.hadoop.abacus import *
+
+from java.util import *;
+
+import sys
+
+class AbacusMapper(ValueAggregatorMapper):
+ def map(self, key, value, output, reporter):
+ ValueAggregatorMapper.map(self, key, value, output, reporter);
+
+class AbacusReducer(ValueAggregatorReducer):
+ def reduce(self, key, values, output, reporter):
+ ValueAggregatorReducer.reduce(self, key, values, output, reporter);
+
+class AbacusCombiner(ValueAggregatorCombiner):
+ def reduce(self, key, values, output, reporter):
+ ValueAggregatorCombiner.reduce(self, key, values, output, reporter);
+
+def printUsage(code):
+ print "Abacus <input> <output> <numOfReducers> <inputformat> <specfile>"
+ sys.exit(code)
+
+def main(args):
+ if len(args) < 6:
+ printUsage(1);
+
+ inDir = args[1];
+ outDir = args[2];
+ numOfReducers = int(args[3]);
+ theInputFormat = args[4];
+ specFile = args[5];
+
+ print "numOfReducers: ", numOfReducers, "theInputFormat: ", theInputFormat, "specFile: ", specFile
+
+ conf = JobConf(AbacusMapper);
+ conf.setJobName("recordcount");
+ conf.addDefaultResource(Path(specFile));
+
+ if theInputFormat=="textinputformat":
+ conf.setInputFormat(TextInputFormat);
+ else:
+ conf.setInputFormat(SequenceFileInputFormat);
+ conf.setOutputFormat(TextOutputFormat);
+ conf.setMapOutputKeyClass(Text);
+ conf.setMapOutputValueClass(Text);
+ conf.setOutputKeyClass(Text);
+ conf.setOutputValueClass(Text);
+ conf.setNumMapTasks(1);
+ conf.setNumReduceTasks(numOfReducers);
+
+ conf.setMapperClass(AbacusMapper);
+ conf.setCombinerClass(AbacusCombiner);
+ conf.setReducerClass(AbacusReducer);
+ conf.setInputPath(Path(args[1]))
+ conf.setOutputPath(Path(args[2]))
+
+ JobClient.runJob(conf);
+
+if __name__ == "__main__":
+ main(sys.argv)
Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/compile Mon Jan 22 13:19:00 2007
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+export HADOOP_HOME=../../../../..
+
+export CLASSPATH="$HADOOP_HOME/build/classes"
+export CLASSPATH=${CLASSPATH}:"$HADOOP_HOME/build/contrib/abacus/classes"
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# add libs to CLASSPATH
+for f in $HADOOP_HOME/lib/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+jythonc -p org.apache.hadoop.abacus.examples -d -j jwc.jar -c JythonAbacus.py JyAbacusWCPlugIN.py
+
+jar -uvf jwc.jar -C $HADOOP_HOME/build/contrib/abacus/classes .
+
Added: lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec?view=auto&rev=498809
==============================================================================
--- lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec (added)
+++ lucene/hadoop/trunk/src/contrib/abacus/examples/pyAbacus/wordcountaggregator.spec Mon Jan 22 13:19:00 2007
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+ <name>aggregator.descriptor.num</name>
+ <value>1</value>
+</property>
+
+<property>
+ <name>aggregator.descriptor.0</name>
+ <value>UserDefined,org.apache.hadoop.abacus.examples.JyAbacusWCPlugIN$AbacusWordCount</value>
+o</property>
+</configuration>