You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/03/02 19:08:33 UTC
svn commit: r918133 - in /lucene/mahout/trunk: bin/ conf/
core/src/main/java/org/apache/mahout/driver/ src/main/assembly/
Author: jmannix
Date: Tue Mar 2 18:08:32 2010
New Revision: 918133
URL: http://svn.apache.org/viewvc?rev=918133&view=rev
Log:
Fixes MAHOUT-301
Added:
lucene/mahout/trunk/conf/
lucene/mahout/trunk/conf/cleansvd.props
lucene/mahout/trunk/conf/clusterdump.props
lucene/mahout/trunk/conf/dirichlet.props
lucene/mahout/trunk/conf/driver.classes.props
lucene/mahout/trunk/conf/fkmeans.props
lucene/mahout/trunk/conf/fpg.props
lucene/mahout/trunk/conf/kmeans.props
lucene/mahout/trunk/conf/lucenevector.props
lucene/mahout/trunk/conf/meanshift.props
lucene/mahout/trunk/conf/seq2sparse.props
lucene/mahout/trunk/conf/seqdirectory.props
lucene/mahout/trunk/conf/seqdumper.props
lucene/mahout/trunk/conf/seqwiki.props
lucene/mahout/trunk/conf/svd.props
lucene/mahout/trunk/conf/testclassifier.props
lucene/mahout/trunk/conf/trainclassifier.props
lucene/mahout/trunk/conf/vectordump.props
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (with props)
Modified:
lucene/mahout/trunk/bin/mahout
lucene/mahout/trunk/src/main/assembly/bin.xml
Modified: lucene/mahout/trunk/bin/mahout
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/bin/mahout?rev=918133&r1=918132&r2=918133&view=diff
==============================================================================
--- lucene/mahout/trunk/bin/mahout (original)
+++ lucene/mahout/trunk/bin/mahout Tue Mar 2 18:08:32 2010
@@ -9,10 +9,16 @@
# MAHOUT_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
-# HADOOP_CONFIF_DIR The location of a hadoop config directory
+# HADOOP_CONF_DIR The location of a hadoop config directory
#
# MAHOUT_OPTS Extra Java runtime options.
#
+# MAHOUT_CONF_DIR The location of the program short-name to class name
+# mappings and the default properties files
+# defaults to "$MAHOUT_HOME/conf"
+#
+
+#
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
@@ -47,40 +53,6 @@
fi
done
-# if no args specified, show usage
-if [ $# = 0 ]; then
- echo "NOTE: These parameters are subject to change in future releases"
- echo "Usage: mahout [-core] COMMAND"
- echo "where COMMAND is one of:"
-# Please keep alphabetized
- echo " canopy run canopy clustering"
- echo " clusterdump dump clusters from a sequence file"
- echo " dirchlet run Dirchlet clustering"
- echo " fkmeans run fuzzy kmeans clustering"
- echo " fpg run FPGrowth Driver for freq. pattern mining"
- echo " kmeans run kmeans clustering"
- echo " lda run LDA clustering"
- echo " ldadump dump the output state of LDA"
- echo " lucenevector generate vectors from a lucene index"
- echo " trainclassifier run Bayes/CBayes classifier training job"
- echo " testclassifier test Bayes/CBayes model using a pre-classified data"
- echo " meanshift run Mean Shift clustering"
- echo " seqdirectory generate sequence files containing the documents beneathe a directory"
- echo " seqdump dump a sequence files using the writable toString() method"
- echo " seqwiki generate sequence files from a wikipedia dump file"
- echo " seq2sparse generate sparse vectors from a sequence file"
- echo " vectordump dump vectors from a sequence file"
- echo " or"
- echo " CLASSNAME run the class named CLASSNAME"
- echo "Most commands print help when invoked w/o parameters."
- echo ""
- echo "Expert: -core option is for developers only. It avoids building the job jar, "
- echo " instead it simply includes classes compiled with mvn package. "
- echo " NOTE: this works only for jobs executed in 'local' mode"
- echo " Most algorithms should support using --help to get the full list of inputs."
- exit 1
-fi
-
IS_CORE=0
#check for -core option
if [ "$1" == "-core" ] ; then
@@ -88,10 +60,6 @@
shift
fi
-# get arguments
-COMMAND=$1
-shift
-
# some directories
THIS_DIR=`dirname "$THIS"`
MAHOUT_HOME=`cd "$THIS_DIR/.." ; pwd`
@@ -117,8 +85,13 @@
#echo $JAVA_HEAP_MAX
fi
+if [ "x$MAHOUT_CONF_DIR" = "x" ]; then
+ MAHOUT_CONF_DIR=$MAHOUT_HOME/conf
+fi
+
# CLASSPATH initially contains $MAHOUT_CONF_DIR, or defaults to $MAHOUT_HOME/conf
-CLASSPATH=${HADOOP_CONF_DIR:=$MAHOUT_HOME/conf}
+CLASSPATH=${CLASSPATH}:$MAHOUT_CONF_DIR
+CLASSPATH=${CLASSPATH}:$HADOOP_CONF_DIR
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
# so that filenames w/ spaces are handled correctly in loops below
@@ -126,12 +99,18 @@
if [ $IS_CORE == 0 ]
then
+ # add release dependencies to CLASSPATH
+ for f in $MAHOUT_HOME/mahout-*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+ done
+
+ # add dev targets if they exist
for f in $MAHOUT_HOME/*/target/mahout-*.job; do
CLASSPATH=${CLASSPATH}:$f;
done
- # for releases, add Mahout job to CLASSPATH
- for f in $MAHOUT_HOME/mahout-*.job; do
+ # add release dependencies to CLASSPATH
+ for f in $MAHOUT_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
else
@@ -139,13 +118,15 @@
CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/target/classes
CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/utils/target/classes
CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/examples/target/classes
+ #CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/src/main/resources
fi
-# add dependencies to CLASSPATH
+# add development dependencies to CLASSPATH
for f in $MAHOUT_HOME/examples/target/dependency/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
+
# cygwin path translation
if $cygwin; then
CLASSPATH=`cygpath -p -w "$CLASSPATH"`
@@ -174,47 +155,28 @@
MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
fi
-# figure out which class to run
-if [ "$COMMAND" = "vectordump" ] ; then
- CLASS=org.apache.mahout.utils.vectors.VectorDumper
-elif [ "$COMMAND" = "clusterdump" ] ; then
- CLASS=org.apache.mahout.utils.clustering.ClusterDumper
-elif [ "$COMMAND" = "seqdump" ] ; then
- CLASS=org.apache.mahout.utils.SequenceFileDumper
-elif [ "$COMMAND" = "kmeans" ] ; then
- CLASS=org.apache.mahout.clustering.kmeans.KMeansDriver
-elif [ "$COMMAND" = "fkmeans" ] ; then
- CLASS=org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver
-elif [ "$COMMAND" = "lda" ] ; then
- CLASS=org.apache.mahout.clustering.lda.LDADriver
-elif [ "$COMMAND" = "ldadump" ] ; then
- CLASS=org.apache.mahout.clustering.lda.LDAPrintTopics
-elif [ "$COMMAND" = "fpg" ] ; then
- CLASS=org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver
-elif [ "$COMMAND" = "dirichlet" ] ; then
- CLASS=org.apache.mahout.clustering.dirichlet.DirichletDriver
-elif [ "$COMMAND" = "meanshift" ] ; then
- CLASS=org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver
-elif [ "$COMMAND" = "canopy" ] ; then
- CLASS=org.apache.mahout.clustering.canopy.CanopyDriver
-elif [ "$COMMAND" = "lucenevector" ]; then
- CLASS=org.apache.mahout.utils.vectors.lucene.Driver
-elif [ "$COMMAND" = "seqdirectory" ]; then
- CLASS=org.apache.mahout.text.SequenceFilesFromDirectory
-elif [ "$COMMAND" = "seqwiki" ]; then
- CLASS=org.apache.mahout.text.WikipediaToSequenceFile
-elif [ "$COMMAND" = "seq2sparse" ]; then
- CLASS=org.apache.mahout.text.SparseVectorsFromSequenceFiles
-elif [ "$COMMAND" = "trainclassifier" ]; then
- CLASS=org.apache.mahout.classifier.bayes.TrainClassifier
-elif [ "$COMMAND" = "testclassifier" ]; then
- CLASS=org.apache.mahout.classifier.bayes.TestClassifier
-else
- CLASS=$COMMAND
-fi
+CLASS=org.apache.mahout.driver.MahoutDriver
-#echo $CLASSPATH
+for f in $MAHOUT_HOME/examples/target/mahout-examples-*.job; do
+ if [ -e "$f" ]; then
+ MAHOUT_JOB=$f
+ fi
+done
+
+if [ "$MAHOUT_JOB" = "" ]; then
+ for f in $MAHOUT_HOME/mahout-examples-*.job; do
+ MAHOUT_JOB=$f
+ done
+fi
# run it
-exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+
+if [ "$HADOOP_CONF_DIR" = "" ] || [ "$HADOOP_HOME" = "" ]; then
+ echo "no HADOOP_CONF_DIR or HADOOP_HOME set, running locally"
+ exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+else
+ echo "running on hadoop, using HADOOP_HOME=$HADOOP_HOME and HADOOP_CONF_DIR=$HADOOP_CONF_DIR"
+ export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH}
+ exec "$HADOOP_HOME/bin/hadoop" jar $MAHOUT_JOB $CLASS "$@"
+fi
Added: lucene/mahout/trunk/conf/cleansvd.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/cleansvd.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/cleansvd.props (added)
+++ lucene/mahout/trunk/conf/cleansvd.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,3 @@
+#ci|corpusInput =
+#ei|eigenInput =
+#o|output =
Added: lucene/mahout/trunk/conf/clusterdump.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/clusterdump.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/dirichlet.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/dirichlet.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/driver.classes.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/driver.classes.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/driver.classes.props (added)
+++ lucene/mahout/trunk/conf/driver.classes.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,18 @@
+org.apache.mahout.utils.vectors.VectorDumper = vectordump : Dump vectors from a sequence file to text
+org.apache.mahout.utils.clustering.ClusterDumper = clusterdump : Dump cluster output to text
+org.apache.mahout.utils.SequenceFileDumper = seqdumper : Generic Sequence File dumper
+org.apache.mahout.clustering.kmeans.KMeansDriver = kmeans : K-means clustering
+org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver = fkmeans : Fuzzy K-means clustering
+org.apache.mahout.clustering.lda.LDADriver = lda : Latent Dirchlet Allocation
+org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
+org.apache.mahout.clustering.dirichlet.DirichletDriver = dirichlet : Dirichlet Clustering
+org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver = meanshift : Mean Shift clustering
+org.apache.mahout.clustering.canopy.CanopyDriver = canopy : Canopy clustering
+org.apache.mahout.utils.vectors.lucene.Driver = lucene.vector : Generate Vectors from a Lucene index
+org.apache.mahout.text.SequenceFilesFromDirectory = seqdirectory : Generate sequence files (of Text) from a directory
+org.apache.mahout.text.SparseVectorsFromSequenceFiles = seq2sparse: Sparse Vector generation from Text sequence files
+org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file
+org.apache.mahout.classifier.bayes.TestClassifier = testclassifier : Test Bayes Classifier
+org.apache.mahout.classifier.bayes.TrainClassifier = trainclassifier : Train Bayes Classifier
+org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver = svd : Lanczos Singular Value Decomposition
+org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob = cleansvd : Cleanup and verification of SVD output
Added: lucene/mahout/trunk/conf/fkmeans.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/fkmeans.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/fpg.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/fpg.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/kmeans.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/kmeans.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/kmeans.props (added)
+++ lucene/mahout/trunk/conf/kmeans.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,5 @@
+#i|input = /path/to/input
+#o|output = /path/to/output
+#c|clusters = /path/to/put/clusters
+#x|max = <numIterations>
+#k|k = <numClusters>
Added: lucene/mahout/trunk/conf/lucenevector.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/lucenevector.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/meanshift.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/meanshift.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/seq2sparse.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seq2sparse.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/seq2sparse.props (added)
+++ lucene/mahout/trunk/conf/seq2sparse.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,15 @@
+#o|output =
+#i|input =
+#s|minSupport =
+#a|analyzerName =
+#chunk|chunkSize =
+#md|minDF =
+#x|maxDFPercent =
+#wt|weight =
+#n|norm =
+#ml|minLLR =
+#nr|numReducers =
+#ng|maxNGramSize =
+#w|overwrite =
+#h|help =
+#seq|sequentialAccessVector =
Added: lucene/mahout/trunk/conf/seqdirectory.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqdirectory.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/seqdirectory.props (added)
+++ lucene/mahout/trunk/conf/seqdirectory.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,3 @@
+#i|input =
+#o|output =
+#c|charset =
Added: lucene/mahout/trunk/conf/seqdumper.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqdumper.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/seqwiki.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqwiki.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/svd.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/svd.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/svd.props (added)
+++ lucene/mahout/trunk/conf/svd.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,6 @@
+#i|input =
+#o|output =
+#nr|numRows =
+#nc|numCols =
+#r|rank =
+#t|tempDir =
\ No newline at end of file
Added: lucene/mahout/trunk/conf/testclassifier.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/testclassifier.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/testclassifier.props (added)
+++ lucene/mahout/trunk/conf/testclassifier.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1,6 @@
+#m|model =
+#d|testDir =
+#method|method =
+#source|dataSource =
+#type|classifierType =
+#ng|gramSize =
Added: lucene/mahout/trunk/conf/trainclassifier.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/trainclassifier.props?rev=918133&view=auto
==============================================================================
(empty)
Added: lucene/mahout/trunk/conf/vectordump.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/vectordump.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/vectordump.props (added)
+++ lucene/mahout/trunk/conf/vectordump.props Tue Mar 2 18:08:32 2010
@@ -0,0 +1 @@
+
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Tue Mar 2 18:08:32 2010
@@ -0,0 +1,198 @@
+package org.apache.mahout.driver;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.util.ProgramDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * General-purpose driver class for Mahout programs. Utilizes org.apache.hadoop.util.ProgramDriver to run
+ * main methods of other classes, but first loads up default properties from a properties file.
+ *
+ * for local running:
+ *
+ * $MAHOUT_HOME/bin/mahout run shortJobName [over-ride ops]
+ *
+ * Works like this: by default, the file "driver.classes.props" is loaded from the classpath, which
+ * defines a mapping between short names like "vectordump" and fully qualified class names.
+ * The format of driver.classes.props is like so:
+ *
+ * fully.qualified.class.name = shortJobName : descriptive string
+ *
+ * The default properties to be applied to the program run is pulled out of, by default, "<shortJobName>.props"
+ * (also off of the classpath).
+ *
+ * The format of the default properties files is as follows:
+ *
+ * i|input = /path/to/my/input
+ * o|output = /path/to/my/output
+ * m|jarFile = /path/to/jarFile
+ * # etc - each line is shortArg|longArg = value
+ *
+ * The next argument to the Driver is supposed to be the short name of the class to be run (as defined in the
+ * driver.classes.props file).
+ *
+ * Then the class which will be run will have it's main called with
+ *
+ * main(new String[] { "--input", "/path/to/my/input", "--output", "/path/to/my/output" });
+ *
+ * After all the "default" properties are loaded from the file, any further command-line arguments are taken in,
+ * and over-ride the defaults.
+ *
+ * So if your driver.classes.props looks like so:
+ *
+ * org.apache.mahout.utils.vectors.VectorDumper = vecDump : dump vectors from a sequence file
+ *
+ * and you have a file core/src/main/resources/vecDump.props which looks like
+ *
+ * o|output = /tmp/vectorOut
+ * s|seqFile = /my/vector/sequenceFile
+ *
+ * And you execute the command-line:
+ *
+ * $MAHOUT_HOME/bin/mahout run vecDump -s /my/otherVector/sequenceFile
+ *
+ * Then org.apache.mahout.utils.vectors.VectorDumper.main() will be called with arguments:
+ * {"--output", "/tmp/vectorOut", "-s", "/my/otherVector/sequenceFile"}
+ */
+public class MahoutDriver {
+ private static final Logger log = LoggerFactory.getLogger(MahoutDriver.class);
+
+ public static void main(String[] args) throws Exception {
+ int exitCode = -1;
+ try {
+ ProgramDriver programDriver = new ProgramDriver();
+ Properties mainClasses = new Properties();
+ InputStream propsStream = Thread.currentThread()
+ .getContextClassLoader()
+ .getResourceAsStream("driver.classes.props");
+
+ mainClasses.load(propsStream);
+
+ String progName = args[0];
+
+ boolean foundShortName = false;
+ for(Object key : mainClasses.keySet()) {
+ String keyString = (String) key;
+ if(shortName((String)mainClasses.get(keyString)).equals(progName)) {
+ foundShortName = true;
+ }
+ addClass(programDriver, keyString, (String)mainClasses.get(keyString));
+ }
+ if(args.length < 1 || args[0] == null || args[0].equals("-h") || args[0].equals("--help")) {
+ programDriver.driver(args);
+ }
+ if(!foundShortName) {
+ addClass(programDriver, progName, progName);
+ }
+ shift(args);
+
+ InputStream defaultsStream = Thread.currentThread()
+ .getContextClassLoader()
+ .getResourceAsStream(progName + ".props");
+
+ Properties mainProps = new Properties();
+ if (defaultsStream != null) { // can't find props file, use empty props.
+ mainProps.load(defaultsStream);
+ } else {
+ log.warn("No " + progName + ".props found on classpath, will use command-line arguments only");
+ }
+ Map<String,String[]> argMap = new HashMap<String,String[]>();
+ int i=0;
+ while(i<args.length && args[i] != null) {
+ List<String> argValues = new ArrayList<String>();
+ String arg = args[i];
+ i++;
+ if(arg.length() > 2 && arg.charAt(1) == 'D') { // '-Dkey=value' or '-Dkey=value1,value2,etc' case
+ String[] argSplit = arg.split("=");
+ arg = argSplit[0];
+ if(argSplit.length == 2) {
+ argValues.add(argSplit[1]);
+ }
+ } else { // '-key [values]' or '--key [values]' case.
+ while(i<args.length && args[i] != null) {
+ if(args[i].length() > 0 && args[i].charAt(0) != '-') {
+ argValues.add(args[i]);
+ i++;
+ } else {
+ break;
+ }
+ }
+ }
+ argMap.put(arg, argValues.toArray(new String[argValues.size()]));
+ }
+ for(Object key : mainProps.keySet()) {
+ String[] argNamePair = ((String)key).split("\\|");
+ String shortArg = "-" + argNamePair[0].trim();
+ String longArg = argNamePair.length < 2 ? null : "--" + argNamePair[1].trim();
+ if(!argMap.containsKey(shortArg) && (longArg == null || !argMap.containsKey(longArg))) {
+ argMap.put(longArg, new String[] { ((String)mainProps.get(key)) } );
+ }
+ }
+ List<String> argsList = new ArrayList<String>();
+ argsList.add(progName);
+ for(String arg : argMap.keySet()) {
+ if(arg.startsWith("-D")) { // arg is -Dkey - if value for this !isEmpty(), then arg -> -Dkey + "=" + value
+ if(argMap.get(arg).length > 0 && !argMap.get(arg)[0].trim().isEmpty()) {
+ arg += "=" + argMap.get(arg)[0].trim();
+ }
+ }
+ argsList.add(arg);
+ if(!arg.startsWith("-D")) {
+ for(String argValue : argMap.get(arg)) {
+ argsList.add(argValue);
+ }
+ }
+ }
+ programDriver.driver(argsList.toArray(new String[argsList.size()]));
+ exitCode = 0;
+ } catch (Throwable e) {
+ e.printStackTrace();
+ log.error("MahoutDriver failed with args: " + Arrays.toString(args) + "\n" + e.getMessage());
+ exitCode = -1;
+ }
+ System.exit(exitCode);
+ }
+
+ private static String[] shift(String[] args) {
+ System.arraycopy(args, 1, args, 0, args.length - 1);
+ args[args.length - 1] = null;
+ return args;
+ }
+
+ private static String shortName(String valueString) {
+ if(valueString.indexOf(":") < 0) {
+ return valueString;
+ } else {
+ return valueString.substring(0, valueString.indexOf(":")).trim();
+ }
+ }
+
+ private static String desc(String valueString) {
+ if(valueString.indexOf(":") < 0) {
+ return valueString;
+ } else {
+ return valueString.substring(valueString.indexOf(":")).trim();
+ }
+ }
+
+ private static void addClass(ProgramDriver driver, String classString, String descString) {
+ try {
+ Class<?> clazz = Class.forName(classString);
+ driver.addClass(shortName(descString), clazz, desc(descString));
+ } catch (Throwable e) {
+ log.warn("Unable to add class: " + classString + "\n" + e.getMessage());
+ }
+ }
+
+}
Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
svn:keywords = "Date Rev Author URL Id"
Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: lucene/mahout/trunk/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/assembly/bin.xml?rev=918133&r1=918132&r2=918133&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/assembly/bin.xml (original)
+++ lucene/mahout/trunk/src/main/assembly/bin.xml Tue Mar 2 18:08:32 2010
@@ -57,6 +57,10 @@
<outputDirectory>bin</outputDirectory>
</fileSet>
<fileSet>
+ <directory>conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ </fileSet>
+ <fileSet>
<directory>math/target/apidocs</directory>
<outputDirectory>docs/mahout-math</outputDirectory>
</fileSet>