You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/03/02 19:08:33 UTC

svn commit: r918133 - in /lucene/mahout/trunk: bin/ conf/ core/src/main/java/org/apache/mahout/driver/ src/main/assembly/

Author: jmannix
Date: Tue Mar  2 18:08:32 2010
New Revision: 918133

URL: http://svn.apache.org/viewvc?rev=918133&view=rev
Log:
Fixes MAHOUT-301

Added:
    lucene/mahout/trunk/conf/
    lucene/mahout/trunk/conf/cleansvd.props
    lucene/mahout/trunk/conf/clusterdump.props
    lucene/mahout/trunk/conf/dirichlet.props
    lucene/mahout/trunk/conf/driver.classes.props
    lucene/mahout/trunk/conf/fkmeans.props
    lucene/mahout/trunk/conf/fpg.props
    lucene/mahout/trunk/conf/kmeans.props
    lucene/mahout/trunk/conf/lucenevector.props
    lucene/mahout/trunk/conf/meanshift.props
    lucene/mahout/trunk/conf/seq2sparse.props
    lucene/mahout/trunk/conf/seqdirectory.props
    lucene/mahout/trunk/conf/seqdumper.props
    lucene/mahout/trunk/conf/seqwiki.props
    lucene/mahout/trunk/conf/svd.props
    lucene/mahout/trunk/conf/testclassifier.props
    lucene/mahout/trunk/conf/trainclassifier.props
    lucene/mahout/trunk/conf/vectordump.props
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java   (with props)
Modified:
    lucene/mahout/trunk/bin/mahout
    lucene/mahout/trunk/src/main/assembly/bin.xml

Modified: lucene/mahout/trunk/bin/mahout
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/bin/mahout?rev=918133&r1=918132&r2=918133&view=diff
==============================================================================
--- lucene/mahout/trunk/bin/mahout (original)
+++ lucene/mahout/trunk/bin/mahout Tue Mar  2 18:08:32 2010
@@ -9,10 +9,16 @@
 #   MAHOUT_HEAPSIZE    The maximum amount of heap to use, in MB. 
 #                      Default is 1000.
 #
-#   HADOOP_CONFIF_DIR  The location of a hadoop config directory 
+#   HADOOP_CONF_DIR  The location of a hadoop config directory 
 #
 #   MAHOUT_OPTS        Extra Java runtime options.
 #
+#   MAHOUT_CONF_DIR    The location of the program short-name to class name
+#                      mappings and the default properties files
+#                      defaults to "$MAHOUT_HOME/conf"
+#
+
+#
 #/**
 # * Licensed to the Apache Software Foundation (ASF) under one or more
 # * contributor license agreements.  See the NOTICE file distributed with
@@ -47,40 +53,6 @@
   fi
 done
 
-# if no args specified, show usage
-if [ $# = 0 ]; then
-  echo "NOTE: These parameters are subject to change in future releases"
-  echo "Usage: mahout [-core] COMMAND"
-  echo "where COMMAND is one of:"
-# Please keep alphabetized
-  echo "  canopy                run canopy clustering"
-  echo "  clusterdump           dump clusters from a sequence file"
-  echo "  dirchlet              run Dirchlet clustering"
-  echo "  fkmeans               run fuzzy kmeans clustering"
-  echo "  fpg                   run FPGrowth Driver for freq. pattern mining"
-  echo "  kmeans                run kmeans clustering"
-  echo "  lda                   run LDA clustering"
-  echo "  ldadump               dump the output state of LDA"
-  echo "  lucenevector          generate vectors from a lucene index"
-  echo "  trainclassifier       run Bayes/CBayes classifier training job"
-  echo "  testclassifier        test Bayes/CBayes model using a pre-classified data"
-  echo "  meanshift             run Mean Shift clustering"
-  echo "  seqdirectory          generate sequence files containing the documents beneathe a directory"
-  echo "  seqdump               dump a sequence files using the writable toString() method"
-  echo "  seqwiki               generate sequence files from a wikipedia dump file"
-  echo "  seq2sparse            generate sparse vectors from a sequence file"
-  echo "  vectordump            dump vectors from a sequence file"
-  echo " or"
-  echo "  CLASSNAME         run the class named CLASSNAME"
-  echo "Most commands print help when invoked w/o parameters."
-  echo ""
-  echo "Expert: -core option is for developers only. It avoids building the job jar, "
-  echo "        instead it simply includes classes compiled with mvn package. "
-  echo "        NOTE: this works only for jobs executed in 'local' mode"
-  echo " Most algorithms should support using --help to get the full list of inputs."
-  exit 1
-fi
-
 IS_CORE=0
 #check for -core option
 if [ "$1" == "-core" ] ; then
@@ -88,10 +60,6 @@
   shift
 fi
 
-# get arguments
-COMMAND=$1
-shift
-
 # some directories
 THIS_DIR=`dirname "$THIS"`
 MAHOUT_HOME=`cd "$THIS_DIR/.." ; pwd`
@@ -117,8 +85,13 @@
   #echo $JAVA_HEAP_MAX
 fi
 
+if [ "x$MAHOUT_CONF_DIR" = "x" ]; then
+  MAHOUT_CONF_DIR=$MAHOUT_HOME/conf
+fi
+
 # CLASSPATH initially contains $MAHOUT_CONF_DIR, or defaults to $MAHOUT_HOME/conf
-CLASSPATH=${HADOOP_CONF_DIR:=$MAHOUT_HOME/conf}
+CLASSPATH=${CLASSPATH}:$MAHOUT_CONF_DIR
+CLASSPATH=${CLASSPATH}:$HADOOP_CONF_DIR
 CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
 
 # so that filenames w/ spaces are handled correctly in loops below
@@ -126,12 +99,18 @@
 
 if [ $IS_CORE == 0 ] 
 then
+  # add release dependencies to CLASSPATH
+  for f in $MAHOUT_HOME/mahout-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  # add dev targets if they exist
   for f in $MAHOUT_HOME/*/target/mahout-*.job; do
     CLASSPATH=${CLASSPATH}:$f;
   done
 
-  # for releases, add Mahout job to CLASSPATH
-  for f in $MAHOUT_HOME/mahout-*.job; do
+  # add release dependencies to CLASSPATH
+  for f in $MAHOUT_HOME/lib/*.jar; do
     CLASSPATH=${CLASSPATH}:$f;
   done
 else
@@ -139,13 +118,15 @@
   CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/target/classes
   CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/utils/target/classes
   CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/examples/target/classes
+  #CLASSPATH=${CLASSPATH}:$MAHOUT_HOME/core/src/main/resources
 fi
 
-# add dependencies to CLASSPATH
+# add development dependencies to CLASSPATH
 for f in $MAHOUT_HOME/examples/target/dependency/*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 
+
 # cygwin path translation
 if $cygwin; then
   CLASSPATH=`cygpath -p -w "$CLASSPATH"`
@@ -174,47 +155,28 @@
   MAHOUT_OPTS="$MAHOUT_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
 fi
 
-# figure out which class to run
-if [ "$COMMAND" = "vectordump" ] ; then
-  CLASS=org.apache.mahout.utils.vectors.VectorDumper
-elif [ "$COMMAND" = "clusterdump" ] ; then
-  CLASS=org.apache.mahout.utils.clustering.ClusterDumper
-elif [ "$COMMAND" = "seqdump" ] ; then
-  CLASS=org.apache.mahout.utils.SequenceFileDumper
-elif [ "$COMMAND" = "kmeans" ] ; then
-  CLASS=org.apache.mahout.clustering.kmeans.KMeansDriver
-elif [ "$COMMAND" = "fkmeans" ] ; then
-  CLASS=org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver
-elif [ "$COMMAND" = "lda" ] ; then
-  CLASS=org.apache.mahout.clustering.lda.LDADriver
-elif [ "$COMMAND" = "ldadump" ] ; then
-  CLASS=org.apache.mahout.clustering.lda.LDAPrintTopics
-elif [ "$COMMAND" = "fpg" ] ; then
-  CLASS=org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver  
-elif [ "$COMMAND" = "dirichlet" ] ; then
-  CLASS=org.apache.mahout.clustering.dirichlet.DirichletDriver
-elif [ "$COMMAND" = "meanshift" ] ; then
-  CLASS=org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver  
-elif [ "$COMMAND" = "canopy" ] ; then
-  CLASS=org.apache.mahout.clustering.canopy.CanopyDriver
-elif [ "$COMMAND" = "lucenevector" ]; then
-  CLASS=org.apache.mahout.utils.vectors.lucene.Driver
-elif [ "$COMMAND" = "seqdirectory" ]; then
-  CLASS=org.apache.mahout.text.SequenceFilesFromDirectory
-elif [ "$COMMAND" = "seqwiki" ]; then
-  CLASS=org.apache.mahout.text.WikipediaToSequenceFile
-elif [ "$COMMAND" = "seq2sparse" ]; then
-  CLASS=org.apache.mahout.text.SparseVectorsFromSequenceFiles  
-elif [ "$COMMAND" = "trainclassifier" ]; then
-  CLASS=org.apache.mahout.classifier.bayes.TrainClassifier  
-elif [ "$COMMAND" = "testclassifier" ]; then
-  CLASS=org.apache.mahout.classifier.bayes.TestClassifier
-else
-  CLASS=$COMMAND
-fi
+CLASS=org.apache.mahout.driver.MahoutDriver
 
-#echo $CLASSPATH
+for f in $MAHOUT_HOME/examples/target/mahout-examples-*.job; do
+  if [ -e "$f" ]; then
+    MAHOUT_JOB=$f
+  fi
+done
+
+if [ "$MAHOUT_JOB" = "" ]; then
+  for f in $MAHOUT_HOME/mahout-examples-*.job; do
+    MAHOUT_JOB=$f
+  done
+fi
 
 # run it
-exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+
+if [ "$HADOOP_CONF_DIR" = "" ] || [ "$HADOOP_HOME" = "" ]; then
+  echo "no HADOOP_CONF_DIR or HADOOP_HOME set, running locally"
+  exec "$JAVA" $JAVA_HEAP_MAX $MAHOUT_OPTS -classpath "$CLASSPATH" $CLASS "$@"
+else
+  echo "running on hadoop, using HADOOP_HOME=$HADOOP_HOME and HADOOP_CONF_DIR=$HADOOP_CONF_DIR"
+  export HADOOP_CLASSPATH=$MAHOUT_CONF_DIR:${HADOOP_CLASSPATH}
+  exec "$HADOOP_HOME/bin/hadoop" jar $MAHOUT_JOB $CLASS "$@"
+fi
 

Added: lucene/mahout/trunk/conf/cleansvd.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/cleansvd.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/cleansvd.props (added)
+++ lucene/mahout/trunk/conf/cleansvd.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,3 @@
+#ci|corpusInput =
+#ei|eigenInput =  
+#o|output =

Added: lucene/mahout/trunk/conf/clusterdump.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/clusterdump.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/dirichlet.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/dirichlet.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/driver.classes.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/driver.classes.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/driver.classes.props (added)
+++ lucene/mahout/trunk/conf/driver.classes.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,18 @@
+org.apache.mahout.utils.vectors.VectorDumper = vectordump : Dump vectors from a sequence file to text
+org.apache.mahout.utils.clustering.ClusterDumper = clusterdump : Dump cluster output to text
+org.apache.mahout.utils.SequenceFileDumper = seqdumper : Generic Sequence File dumper
+org.apache.mahout.clustering.kmeans.KMeansDriver = kmeans : K-means clustering
+org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver = fkmeans : Fuzzy K-means clustering
+org.apache.mahout.clustering.lda.LDADriver = lda : Latent Dirchlet Allocation
+org.apache.mahout.fpm.pfpgrowth.FPGrowthDriver = fpg : Frequent Pattern Growth
+org.apache.mahout.clustering.dirichlet.DirichletDriver = dirichlet : Dirichlet Clustering
+org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver = meanshift : Mean Shift clustering
+org.apache.mahout.clustering.canopy.CanopyDriver = canopy : Canopy clustering
+org.apache.mahout.utils.vectors.lucene.Driver = lucene.vector : Generate Vectors from a Lucene index
+org.apache.mahout.text.SequenceFilesFromDirectory = seqdirectory : Generate sequence files (of Text) from a directory
+org.apache.mahout.text.SparseVectorsFromSequenceFiles = seq2sparse: Sparse Vector generation from Text sequence files
+org.apache.mahout.text.WikipediaToSequenceFile = seqwiki : Wikipedia xml dump to sequence file
+org.apache.mahout.classifier.bayes.TestClassifier = testclassifier : Test Bayes Classifier
+org.apache.mahout.classifier.bayes.TrainClassifier = trainclassifier : Train Bayes Classifier
+org.apache.mahout.math.hadoop.decomposer.DistributedLanczosSolver = svd : Lanczos Singular Value Decomposition
+org.apache.mahout.math.hadoop.decomposer.EigenVerificationJob = cleansvd : Cleanup and verification of SVD output

Added: lucene/mahout/trunk/conf/fkmeans.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/fkmeans.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/fpg.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/fpg.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/kmeans.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/kmeans.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/kmeans.props (added)
+++ lucene/mahout/trunk/conf/kmeans.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,5 @@
+#i|input = /path/to/input
+#o|output = /path/to/output
+#c|clusters = /path/to/put/clusters
+#x|max = <numIterations>
+#k|k = <numClusters>

Added: lucene/mahout/trunk/conf/lucenevector.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/lucenevector.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/meanshift.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/meanshift.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/seq2sparse.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seq2sparse.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/seq2sparse.props (added)
+++ lucene/mahout/trunk/conf/seq2sparse.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,15 @@
+#o|output =
+#i|input =
+#s|minSupport =
+#a|analyzerName = 
+#chunk|chunkSize =
+#md|minDF =
+#x|maxDFPercent =
+#wt|weight =
+#n|norm =
+#ml|minLLR =
+#nr|numReducers =
+#ng|maxNGramSize = 
+#w|overwrite =
+#h|help = 
+#seq|sequentialAccessVector =

Added: lucene/mahout/trunk/conf/seqdirectory.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqdirectory.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/seqdirectory.props (added)
+++ lucene/mahout/trunk/conf/seqdirectory.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,3 @@
+#i|input =
+#o|output =
+#c|charset =

Added: lucene/mahout/trunk/conf/seqdumper.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqdumper.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/seqwiki.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/seqwiki.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/svd.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/svd.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/svd.props (added)
+++ lucene/mahout/trunk/conf/svd.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,6 @@
+#i|input =
+#o|output =
+#nr|numRows =
+#nc|numCols =
+#r|rank =
+#t|tempDir = 
\ No newline at end of file

Added: lucene/mahout/trunk/conf/testclassifier.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/testclassifier.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/testclassifier.props (added)
+++ lucene/mahout/trunk/conf/testclassifier.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1,6 @@
+#m|model =
+#d|testDir =
+#method|method =
+#source|dataSource =
+#type|classifierType =
+#ng|gramSize = 

Added: lucene/mahout/trunk/conf/trainclassifier.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/trainclassifier.props?rev=918133&view=auto
==============================================================================
    (empty)

Added: lucene/mahout/trunk/conf/vectordump.props
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/conf/vectordump.props?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/conf/vectordump.props (added)
+++ lucene/mahout/trunk/conf/vectordump.props Tue Mar  2 18:08:32 2010
@@ -0,0 +1 @@
+

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=918133&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Tue Mar  2 18:08:32 2010
@@ -0,0 +1,198 @@
+package org.apache.mahout.driver;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.util.ProgramDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * General-purpose driver class for Mahout programs.  Utilizes org.apache.hadoop.util.ProgramDriver to run
+ * main methods of other classes, but first loads up default properties from a properties file.
+ *
+ * for local running:
+ *
+ * $MAHOUT_HOME/bin/mahout run shortJobName [over-ride ops]
+ *
+ * Works like this: by default, the file "driver.classes.props" is loaded from the classpath, which
+ * defines a mapping between short names like "vectordump" and fully qualified class names.
+ * The format of driver.classes.props is like so:
+ *
+ * fully.qualified.class.name = shortJobName : descriptive string
+ *
+ * The default properties to be applied to the program run is pulled out of, by default, "<shortJobName>.props"
+ * (also off of the classpath).
+ *
+ * The format of the default properties files is as follows:
+ *
+ * i|input = /path/to/my/input
+ * o|output = /path/to/my/output
+ * m|jarFile = /path/to/jarFile
+ * # etc - each line is shortArg|longArg = value
+ *
+ * The next argument to the Driver is supposed to be the short name of the class to be run (as defined in the
+ * driver.classes.props file).
+ *
+ * Then the class which will be run will have it's main called with
+ *
+ *   main(new String[] { "--input", "/path/to/my/input", "--output", "/path/to/my/output" });
+ *
+ * After all the "default" properties are loaded from the file, any further command-line arguments are taken in,
+ * and over-ride the defaults.
+ *
+ * So if your driver.classes.props looks like so:
+ *
+ * org.apache.mahout.utils.vectors.VectorDumper = vecDump : dump vectors from a sequence file
+ *
+ * and you have a file core/src/main/resources/vecDump.props which looks like
+ *
+ * o|output = /tmp/vectorOut
+ * s|seqFile = /my/vector/sequenceFile
+ *
+ * And you execute the command-line:
+ *
+ * $MAHOUT_HOME/bin/mahout run vecDump -s /my/otherVector/sequenceFile
+ *
+ * Then org.apache.mahout.utils.vectors.VectorDumper.main() will be called with arguments:
+ *   {"--output", "/tmp/vectorOut", "-s", "/my/otherVector/sequenceFile"}
+ */
+public class MahoutDriver {
+  private static final Logger log = LoggerFactory.getLogger(MahoutDriver.class);
+
+  public static void main(String[] args) throws Exception {
+    int exitCode = -1;
+    try {
+      ProgramDriver programDriver = new ProgramDriver();
+      Properties mainClasses = new Properties();
+      InputStream propsStream = Thread.currentThread()
+                                      .getContextClassLoader()
+                                      .getResourceAsStream("driver.classes.props");
+
+      mainClasses.load(propsStream);
+
+      String progName = args[0];
+
+      boolean foundShortName = false;
+      for(Object key :  mainClasses.keySet()) {
+        String keyString = (String) key;
+        if(shortName((String)mainClasses.get(keyString)).equals(progName)) {
+          foundShortName = true;
+        }
+        addClass(programDriver, keyString, (String)mainClasses.get(keyString));
+      }
+      if(args.length < 1 || args[0] == null || args[0].equals("-h") || args[0].equals("--help")) {
+        programDriver.driver(args);
+      }
+      if(!foundShortName) {
+        addClass(programDriver, progName, progName);
+      }
+      shift(args);
+
+      InputStream defaultsStream = Thread.currentThread()
+                                         .getContextClassLoader()
+                                         .getResourceAsStream(progName + ".props");
+
+      Properties mainProps = new Properties();
+      if (defaultsStream != null) { // can't find props file, use empty props.
+        mainProps.load(defaultsStream);
+      } else {
+        log.warn("No " + progName + ".props found on classpath, will use command-line arguments only");
+      }
+      Map<String,String[]> argMap = new HashMap<String,String[]>();
+      int i=0;
+      while(i<args.length && args[i] != null) {
+        List<String> argValues = new ArrayList<String>();
+        String arg = args[i];
+        i++;
+        if(arg.length() > 2 && arg.charAt(1) == 'D') { // '-Dkey=value' or '-Dkey=value1,value2,etc' case
+          String[] argSplit = arg.split("=");
+          arg = argSplit[0];
+          if(argSplit.length == 2) {
+            argValues.add(argSplit[1]);
+          }
+        } else {                                      // '-key [values]' or '--key [values]' case.
+          while(i<args.length && args[i] != null) {
+            if(args[i].length() > 0 && args[i].charAt(0) != '-') {
+              argValues.add(args[i]);
+              i++;
+            } else {
+              break;
+            }
+          }
+        }
+        argMap.put(arg, argValues.toArray(new String[argValues.size()]));
+      }
+      for(Object key : mainProps.keySet()) {
+        String[] argNamePair = ((String)key).split("\\|");
+        String shortArg = "-" + argNamePair[0].trim();
+        String longArg = argNamePair.length < 2 ? null : "--" + argNamePair[1].trim();
+        if(!argMap.containsKey(shortArg) && (longArg == null || !argMap.containsKey(longArg))) {
+          argMap.put(longArg, new String[] { ((String)mainProps.get(key)) } );
+        }
+      }
+      List<String> argsList = new ArrayList<String>();
+      argsList.add(progName);
+      for(String arg : argMap.keySet()) {
+        if(arg.startsWith("-D")) { // arg is -Dkey - if value for this !isEmpty(), then arg -> -Dkey + "=" + value
+          if(argMap.get(arg).length > 0 && !argMap.get(arg)[0].trim().isEmpty()) {
+            arg += "=" + argMap.get(arg)[0].trim();
+          }
+        }
+        argsList.add(arg);
+        if(!arg.startsWith("-D")) {
+          for(String argValue : argMap.get(arg)) {
+            argsList.add(argValue);
+          }
+        }
+      }
+      programDriver.driver(argsList.toArray(new String[argsList.size()]));
+      exitCode = 0;
+    } catch (Throwable e) {
+      e.printStackTrace();
+      log.error("MahoutDriver failed with args: " + Arrays.toString(args) + "\n" + e.getMessage());
+      exitCode = -1;
+    }
+    System.exit(exitCode);
+  }
+
+  private static String[] shift(String[] args) {
+    System.arraycopy(args, 1, args, 0, args.length - 1);
+    args[args.length - 1] = null;
+    return args;
+  }
+
+  private static String shortName(String valueString) {
+    if(valueString.indexOf(":") < 0) {
+      return valueString;
+    } else {
+      return valueString.substring(0, valueString.indexOf(":")).trim();
+    }
+  }
+
+  private static String desc(String valueString) {
+    if(valueString.indexOf(":") < 0) {
+      return valueString;
+    } else {
+      return valueString.substring(valueString.indexOf(":")).trim();
+    }
+  }
+
+  private static void addClass(ProgramDriver driver, String classString, String descString) {
+    try {
+      Class<?> clazz = Class.forName(classString);
+      driver.addClass(shortName(descString), clazz, desc(descString));
+    } catch (Throwable e) {
+      log.warn("Unable to add class: " + classString + "\n" + e.getMessage());
+    }
+  }
+
+}

Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
    svn:keywords = "Date Rev Author URL Id"

Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: lucene/mahout/trunk/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/src/main/assembly/bin.xml?rev=918133&r1=918132&r2=918133&view=diff
==============================================================================
--- lucene/mahout/trunk/src/main/assembly/bin.xml (original)
+++ lucene/mahout/trunk/src/main/assembly/bin.xml Tue Mar  2 18:08:32 2010
@@ -57,6 +57,10 @@
       <outputDirectory>bin</outputDirectory>
     </fileSet>
     <fileSet>
+      <directory>conf</directory>
+      <outputDirectory>conf</outputDirectory>
+    </fileSet>
+    <fileSet>
       <directory>math/target/apidocs</directory>
       <outputDirectory>docs/mahout-math</outputDirectory>
     </fileSet>