You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2012/12/20 13:02:47 UTC
svn commit: r1424423 - in /labs/alike/trunk: ./ demo/
src/java/org/apache/alike/ src/test/org/apache/alike/ src/test/test-files/
Author: koji
Date: Thu Dec 20 12:02:47 2012
New Revision: 1424423
URL: http://svn.apache.org/viewvc?rev=1424423&view=rev
Log:
add java programs for launching kmeans and clusterdump so that they can read xml config before launching mahout tools
Added:
labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java
labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java
Modified:
labs/alike/trunk/demo/README.txt
labs/alike/trunk/demo/build.xml
labs/alike/trunk/demo/demo-conf.xml
labs/alike/trunk/ivy.xml
labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java
labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java
labs/alike/trunk/src/test/test-files/valid-conf.xml
Modified: labs/alike/trunk/demo/README.txt
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Thu Dec 20 12:02:47 2012
@@ -43,8 +43,8 @@
$ ant piv
# kmeans may take tens of minutes
- $ mahout kmeans -i input-vectors -c init-clusters -o output-clusters -k 1000 --maxIter 50 -cd 0.01
- $ mahout clusterdump -i $(find output-clusters -name \*-final) -o result-centroids.txt
+ $ ant kmeans
+ $ ant clusterdump
$ ant qv
7. goto Solr site, download Solr 4.0 or superior and unzip
Modified: labs/alike/trunk/demo/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/build.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/build.xml (original)
+++ labs/alike/trunk/demo/build.xml Thu Dec 20 12:02:47 2012
@@ -31,6 +31,24 @@
</java>
</target>
+ <target name="kmeans" depends="alike-compile" description="run KMeansLauncher">
+ <java classname="org.apache.alike.KMeansLauncher" dir="demo" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="demo-conf.xml"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
+ <target name="clusterdump" depends="alike-compile" description="run ClusterDumperLauncher">
+ <java classname="org.apache.alike.ClusterDumperLauncher" dir="demo" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="demo-conf.xml"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
<target name="qv" depends="alike-compile" description="run PrepareInputVectors">
<java classname="org.apache.alike.QuantizeVectors" fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
Modified: labs/alike/trunk/demo/demo-conf.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/demo-conf.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/demo/demo-conf.xml (original)
+++ labs/alike/trunk/demo/demo-conf.xml Thu Dec 20 12:02:47 2012
@@ -32,7 +32,7 @@
<cluster method="kmeans">
<param name="maxIter">10</param>
<param name="cd">0.01</param>
- <param name="init">demo/init-clusters</param>
+ <param name="init">init-clusters</param>
</cluster>
<dump file="result-centroids.txt"/>
</clustering>
@@ -44,7 +44,7 @@
<histogramFieldName>histogram</histogramFieldName>
</fieldNames>
<indexer class="org.apache.alike.SolrStandardXMLIndexer">
- <histogramMatcher class="LeastSquaresHistogramMatcher"/>
+ <histogramMatcher class="org.apache.alike.LeastSquaresHistogramMatcher"/>
<param name="file">demo/solr-demo-data.xml</param>
</indexer>
</vectorQuantization>
Modified: labs/alike/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Thu Dec 20 12:02:47 2012
@@ -30,7 +30,9 @@
-->
<dependency org="commons-io" name="commons-io" rev="2.4"/>
<dependency org="org.apache.hadoop" name="hadoop-core" rev="0.20.204.0"/>
+ <dependency org="org.apache.mahout" name="mahout-integration" rev="0.7"/>
<dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
<dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
+ <exclude org="org.mongodb"/>
</dependencies>
</ivy-module>
Modified: labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/AlikeConfig.java Thu Dec 20 12:02:47 2012
@@ -46,6 +46,30 @@ public final class AlikeConfig {
return getStringValue("/config/visualDescriptorsExtraction/descDir/hdfs/text()");
}
+ public String getNumOfClusters(){
+ return getStringValue("/config/clustering/@k");
+ }
+
+ public String getOutClusterDir(){
+ return getStringValue("/config/clustering/outDir/text()");
+ }
+
+ public String getClusterMaxIter(){
+ return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='maxIter']/text()");
+ }
+
+ public String getClusterConvergenceDelta(){
+ return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='cd']/text()");
+ }
+
+ public String getInitClusterDir(){
+ return getStringValue("/config/clustering/cluster[@method='kmeans']/param[@name='init']/text()");
+ }
+
+ public String getClusterDumpFile(){
+ return getStringValue("/config/clustering/dump/@file");
+ }
+
private String getStringValue(String exp){
try {
return ((String)xpath.evaluate(exp, is, XPathConstants.STRING)).trim();
Added: labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java?rev=1424423&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/ClusterDumperLauncher.java Thu Dec 20 12:02:47 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.utils.clustering.ClusterDumper;
+
+/**
+ * This program reads alikeconfig.xml and launch {@link ClusterDumper#run(String[])}.
+ *
+ * @see AlikeConfig
+ */
+public final class ClusterDumperLauncher {
+
+ /**
+ * The main program that takes the path to alikeconfig.xml as an argument.
+ *
+ * @param args file path to alikeconfig.xml
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ if(args.length != 1){
+ printUsage(1);
+ }
+
+ AlikeConfig config = new AlikeConfig(args[0]);
+
+ // mahout clusterdump -i $(find output-clusters -name \*-final) -o result-centroids.txt
+ Configuration hConf = new Configuration();
+ ClusterDumper cDumper = new ClusterDumper(null, null);
+ cDumper.setConf(hConf);
+ FileSystem fs = FileSystem.get(hConf);
+ Path outDir = new Path(config.getOutClusterDir());
+ FileStatus[] fsts = HadoopUtil.listStatus(fs, outDir, new FindFinalFilter());
+
+ String[] arguments = new String[4];
+ arguments[0] = "-i";
+ arguments[1] = fsts[fsts.length - 1].getPath().toString();
+ arguments[2] = "-o";
+ arguments[3] = config.getClusterDumpFile();
+
+ cDumper.run(arguments);
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml>\n",
+ ClusterDumperLauncher.class.getName());
+ System.err.println("\t<path-to-alikeconfig.xml> the file path to alikeconfig.xml");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+ static class FindFinalFilter implements PathFilter {
+ public boolean accept(Path path) {
+ return path.toString().endsWith("final");
+ }
+ }
+}
Added: labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java?rev=1424423&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/KMeansLauncher.java Thu Dec 20 12:02:47 2012
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import org.apache.mahout.clustering.kmeans.KMeansDriver;
+
+/**
+ * This program reads alikeconfig.xml and launch {@link KMeansDriver#main(String[])}.
+ *
+ * @see AlikeConfig
+ */
+public final class KMeansLauncher {
+
+ /**
+ * The main program that takes the path to alikeconfig.xml as an argument.
+ *
+ * @param args file path to alikeconfig.xml
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ if(args.length != 1){
+ printUsage(1);
+ }
+
+ AlikeConfig config = new AlikeConfig(args[0]);
+
+ // mahout kmeans -i input-vectors -c init-clusters -o output-clusters -k 1000 --maxIter 50 -cd 0.01
+ String[] arguments = new String[12];
+ arguments[0] = "-i";
+ arguments[1] = config.getDescHDFSDir();
+ arguments[2] = "-c";
+ arguments[3] = config.getInitClusterDir();
+ arguments[4] = "-o";
+ arguments[5] = config.getOutClusterDir();
+ arguments[6] = "-k";
+ arguments[7] = config.getNumOfClusters();
+ arguments[8] = "--maxIter";
+ arguments[9] = config.getClusterMaxIter();
+ arguments[10] = "-cd";
+ arguments[11] = config.getClusterConvergenceDelta();
+
+ KMeansDriver.main(arguments);
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <path-to-alikeconfig.xml>\n",
+ KMeansLauncher.class.getName());
+ System.err.println("\t<path-to-alikeconfig.xml> the file path to alikeconfig.xml");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+}
Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Thu Dec 20 12:02:47 2012
@@ -44,7 +44,7 @@ import org.apache.mahout.math.VectorWrit
public class PrepareInputVectors {
/**
- * The main program that takes the path to alikeconfig.xml.
+ * The main program that takes the path to alikeconfig.xml as an argument.
*
* @param args file path to alikeconfig.xml
* @throws IOException
Modified: labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java (original)
+++ labs/alike/trunk/src/test/org/apache/alike/AlikeConfigTest.java Thu Dec 20 12:02:47 2012
@@ -50,4 +50,34 @@ public class AlikeConfigTest {
public void testGetDescHDFSDir() throws Exception {
assertEquals("input-vectors", config.getDescHDFSDir());
}
+
+ @Test
+ public void testGetNumOfClusters() throws Exception {
+ assertEquals("1000", config.getNumOfClusters());
+ }
+
+ @Test
+ public void testGetOutClusterDir() throws Exception {
+ assertEquals("output-clusters", config.getOutClusterDir());
+ }
+
+ @Test
+ public void testGetClusterMaxIter() throws Exception {
+ assertEquals("10", config.getClusterMaxIter());
+ }
+
+ @Test
+ public void testGetClusterConvergenceDelta() throws Exception {
+ assertEquals("0.01", config.getClusterConvergenceDelta());
+ }
+
+ @Test
+ public void testGetInitClusterDir() throws Exception {
+ assertEquals("init-clusters", config.getInitClusterDir());
+ }
+
+ @Test
+ public void testGetClusterDumpFile() throws Exception {
+ assertEquals("result-centroids.txt", config.getClusterDumpFile());
+ }
}
Modified: labs/alike/trunk/src/test/test-files/valid-conf.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/test/test-files/valid-conf.xml?rev=1424423&r1=1424422&r2=1424423&view=diff
==============================================================================
--- labs/alike/trunk/src/test/test-files/valid-conf.xml (original)
+++ labs/alike/trunk/src/test/test-files/valid-conf.xml Thu Dec 20 12:02:47 2012
@@ -44,7 +44,7 @@
<histogramFieldName>histogram</histogramFieldName>
</fieldNames>
<indexer class="org.apache.alike.SolrStandardXMLIndexer">
- <histogramMatcher class="LeastSquaresHistogramMatcher"/>
+ <histogramMatcher class="org.apache.alike.LeastSquaresHistogramMatcher"/>
<param name="file">solr-demo-data.xml</param>
</indexer>
</vectorQuantization>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org