You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2012/12/20 13:02:47 UTC

svn commit: r1424423 - in /labs/alike/trunk: ./ demo/ src/java/org/apache/alike/ src/test/org/apache/alike/ src/test/test-files/

Author: koji
Date: Thu Dec 20 12:02:47 2012
New Revision: 1424423

URL: http://svn.apache.org/viewvc?rev=1424423&view=rev
Log:
add java programs for launching kmeans and clusterdump so that they can read xml config before launching mahout tools

Added:
    labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java
    labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java
Modified:
    labs/alike/trunk/demo/README.txt
    labs/alike/trunk/demo/build.xml
    labs/alike/trunk/demo/demo-conf.xml
    labs/alike/trunk/ivy.xml
    labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java
    labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
    labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java
    labs/alike/trunk/src/test/test-files/valid-conf.xml

Modified: labs/alike/trunk/demo/README.txt
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Thu Dec 20 12:02:47 2012
@@ -43,8 +43,8 @@
 
    $ ant piv
    # kmeans may take tens of minutes
-   $ mahout kmeans -i input-vectors -c init-clusters -o output-clusters -k 1000 --maxIter 50 -cd 0.01
-   $ mahout clusterdump -i $(find output-clusters -name \*-final) -o result-centroids.txt
+   $ ant kmeans
+   $ ant clusterdump
    $ ant qv
 
 7. goto Solr site, download Solr 4.0 or superior and unzip

Modified: labs/alike/trunk/demo/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/build.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/build.xml (original)
+++ labs/alike/trunk/demo/build.xml Thu Dec 20 12:02:47 2012
@@ -31,6 +31,24 @@
         </java>
     </target>
 
+    <target name="kmeans" depends="alike-compile" description="run KMeansLauncher">
+        <java classname="org.apache.alike.KMeansLauncher" dir="demo" fork="true">
+            <jvmarg line="-Dfile.encoding=UTF-8"/>
+            <arg line="demo-conf.xml"/>
+            <classpath refid="common.path.lib"/>
+            <classpath path="${cls.dir}"/>
+        </java>
+    </target>
+
+    <target name="clusterdump" depends="alike-compile" description="run ClusterDumperLauncher">
+        <java classname="org.apache.alike.ClusterDumperLauncher" dir="demo" fork="true">
+            <jvmarg line="-Dfile.encoding=UTF-8"/>
+            <arg line="demo-conf.xml"/>
+            <classpath refid="common.path.lib"/>
+            <classpath path="${cls.dir}"/>
+        </java>
+    </target>
+
     <target name="qv" depends="alike-compile" description="run PrepareInputVectors">
         <java classname="org.apache.alike.QuantizeVectors" fork="true">
             <jvmarg line="-Dfile.encoding=UTF-8"/>

Modified: labs/alike/trunk/demo/demo-conf.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/demo-conf.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/demo-conf.xml (original)
+++ labs/alike/trunk/demo/demo-conf.xml Thu Dec 20 12:02:47 2012
@@ -32,7 +32,7 @@
     <cluster method="kmeans">
       <param name="maxIter">10</param>
       <param name="cd">0.01</param>
-      <param name="init">demo/init-clusters</param>
+      <param name="init">init-clusters</param>
     </cluster>
     <dump file="result-centroids.txt"/>
   </clustering>
@@ -44,7 +44,7 @@
       <histogramFieldName>histogram</histogramFieldName>
     </fieldNames>
     <indexer class="org.apache.alike.SolrStandardXMLIndexer">
-      <histogramMatcher class="LeastSquaresHistogramMatcher"/>
+      <histogramMatcher class="org.apache.alike.LeastSquaresHistogramMatcher"/>
       <param name="file">demo/solr-demo-data.xml</param>
     </indexer>
   </vectorQuantization>

Modified: labs/alike/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Thu Dec 20 12:02:47 2012
@@ -30,7 +30,9 @@
 -->
     <dependency org="commons-io" name="commons-io" rev="2.4"/>
     <dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.204.0"/>
+    <dependency org="org.apache.mahout" name="mahout-integration" rev="0.7"/>
     <dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
     <dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
+    <exclude org="org.mongodb"/>
   </dependencies>
 </ivy-module>

Modified: labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java Thu Dec 20 12:02:47 2012
@@ -46,6 +46,30 @@ public final class AlikeConfig {
     return getStringValue("/config/visualDescriptorsExtraction/descDir/hdfs/text()");
   }
   
+  public String getNumOfClusters(){
+    return getStringValue("/config/clustering/@k");
+  }
+  
+  public String getOutClusterDir(){
+    return getStringValue("/config/clustering/outDir/text()");
+  }
+  
+  public String getClusterMaxIter(){
+    return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='maxIter']/text()");
+  }
+  
+  public String getClusterConvergenceDelta(){
+    return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='cd']/text()");
+  }
+  
+  public String getInitClusterDir(){
+    return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='init']/text()");
+  }
+  
+  public String getClusterDumpFile(){
+    return getStringValue("/config/clustering/dump/@file");
+  }
+  
   private String getStringValue(String exp){
     try {
       return ((String)xpath.evaluate(exp, is, XPathConstants.STRING)).trim();

Added: labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java?rev=1424423&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java Thu Dec 20 12:02:47 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.utils.clustering.ClusterDumper;
+
+/**
+ * This program reads alikeconfig.xml and launch {@link ClusterDumper#run(String[])}.
+ * 
+ * @see AlikeConfig
+ */
+public final class ClusterDumperLauncher {
+
+  /**
+   * The main program that takes the path to alikeconfig.xml as an argument.
+   * 
+   * @param args file path to alikeconfig.xml
+   * @throws Exception 
+   */
+  public static void main(String[] args) throws Exception {
+    if(args.length != 1){
+      printUsage(1);
+    }
+    
+    AlikeConfig config = new AlikeConfig(args[0]);
+
+    // mahout clusterdump -i $(find output-clusters -name \*-final) -o result-centroids.txt
+    Configuration hConf = new Configuration();
+    ClusterDumper cDumper = new ClusterDumper(null, null);
+    cDumper.setConf(hConf);
+    FileSystem fs = FileSystem.get(hConf);
+    Path outDir = new Path(config.getOutClusterDir());
+    FileStatus[] fsts = HadoopUtil.listStatus(fs, outDir, new FindFinalFilter());
+    
+    String[] arguments = new String[4];
+    arguments[0] = "-i";
+    arguments[1] = fsts[fsts.length - 1].getPath().toString();
+    arguments[2] = "-o";
+    arguments[3] = config.getClusterDumpFile();
+    
+    cDumper.run(arguments);
+  }
+
+  static void printUsage(int exit){
+    System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml>\n",
+        ClusterDumperLauncher.class.getName());
+    System.err.println("\t<path-to-alikeconfig.xml> the file path to alikeconfig.xml");
+
+    if(exit >= 0){
+      System.exit(exit);
+    }
+  }
+
+  static class FindFinalFilter implements PathFilter {
+    public boolean accept(Path path) {
+      return path.toString().endsWith("final");
+    }
+  }
+}

Added: labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java?rev=1424423&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java Thu Dec 20 12:02:47 2012
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import org.apache.mahout.clustering.kmeans.KMeansDriver;
+
+/**
+ * This program reads alikeconfig.xml and launch {@link KMeansDriver#main(String[])}.
+ * 
+ * @see AlikeConfig
+ */
+public final class KMeansLauncher {
+
+  /**
+   * The main program that takes the path to alikeconfig.xml as an argument.
+   * 
+   * @param args file path to alikeconfig.xml
+   * @throws Exception 
+   */
+  public static void main(String[] args) throws Exception {
+    if(args.length != 1){
+      printUsage(1);
+    }
+    
+    AlikeConfig config = new AlikeConfig(args[0]);
+    
+    // mahout kmeans -i input-vectors -c init-clusters -o output-clusters -k 1000 --maxIter 50 -cd 0.01
+    String[] arguments = new String[12];
+    arguments[0] = "-i";
+    arguments[1] = config.getDescHDFSDir();
+    arguments[2] = "-c";
+    arguments[3] = config.getInitClusterDir();
+    arguments[4] = "-o";
+    arguments[5] = config.getOutClusterDir();
+    arguments[6] = "-k";
+    arguments[7] = config.getNumOfClusters();
+    arguments[8] = "--maxIter";
+    arguments[9] = config.getClusterMaxIter();
+    arguments[10] = "-cd";
+    arguments[11] = config.getClusterConvergenceDelta();
+    
+    KMeansDriver.main(arguments);
+  }
+
+  static void printUsage(int exit){
+    System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml>\n",
+        KMeansLauncher.class.getName());
+    System.err.println("\t<path-to-alikeconfig.xml> the file path to alikeconfig.xml");
+
+    if(exit >= 0){
+      System.exit(exit);
+    }
+  }
+
+}

Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Thu Dec 20 12:02:47 2012
@@ -44,7 +44,7 @@ import org.apache.mahout.math.VectorWrit
 public class PrepareInputVectors {
 
   /**
-   * The main program that takes the path to alikeconfig.xml.
+   * The main program that takes the path to alikeconfig.xml as an argument.
    * 
    * @param args file path to alikeconfig.xml
    * @throws IOException 

Modified: labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java (original)
+++ labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java Thu Dec 20 12:02:47 2012
@@ -50,4 +50,34 @@ public class AlikeConfigTest {
   public void testGetDescHDFSDir() throws Exception {
     assertEquals("input-vectors", config.getDescHDFSDir());
   }
+  
+  @Test
+  public void testGetNumOfClusters() throws Exception {
+    assertEquals("1000", config.getNumOfClusters());
+  }
+  
+  @Test
+  public void testGetOutClusterDir() throws Exception {
+    assertEquals("output-clusters", config.getOutClusterDir());
+  }
+  
+  @Test
+  public void testGetClusterMaxIter() throws Exception {
+    assertEquals("10", config.getClusterMaxIter());
+  }
+  
+  @Test
+  public void testGetClusterConvergenceDelta() throws Exception {
+    assertEquals("0.01", config.getClusterConvergenceDelta());
+  }
+  
+  @Test
+  public void testGetInitClusterDir() throws Exception {
+    assertEquals("init-clusters", config.getInitClusterDir());
+  }
+  
+  @Test
+  public void testGetClusterDumpFile() throws Exception {
+    assertEquals("result-centroids.txt", config.getClusterDumpFile());
+  }
 }

Modified: labs/alike/trunk/src/test/test-files/valid-conf.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/test/test-files/valid-conf.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/test/test-files/valid-conf.xml (original)
+++ labs/alike/trunk/src/test/test-files/valid-conf.xml Thu Dec 20 12:02:47 2012
@@ -44,7 +44,7 @@
       <histogramFieldName>histogram</histogramFieldName>
     </fieldNames>
     <indexer class="org.apache.alike.SolrStandardXMLIndexer">
-      <histogramMatcher class="LeastSquaresHistogramMatcher"/>
+      <histogramMatcher class="org.apache.alike.LeastSquaresHistogramMatcher"/>
       <param name="file">solr-demo-data.xml</param>
     </indexer>
   </vectorQuantization>



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org