You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2012/12/18 04:06:04 UTC
svn commit: r1423266 - in /labs/alike/trunk: build.xml demo/ demo/README.txt
src/java/org/apache/alike/HistogramMatching.java
src/java/org/apache/alike/PrepareInputVectors.java
src/java/org/apache/alike/QuantizeVectors.java
Author: koji
Date: Tue Dec 18 03:05:54 2012
New Revision: 1423266
URL: http://svn.apache.org/viewvc?rev=1423266&view=rev
Log:
add QuantizeVectors and remove HistogramMatching
Added:
labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java
Removed:
labs/alike/trunk/src/java/org/apache/alike/HistogramMatching.java
Modified:
labs/alike/trunk/build.xml
labs/alike/trunk/demo/ (props changed)
labs/alike/trunk/demo/README.txt
labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
Modified: labs/alike/trunk/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Tue Dec 18 03:05:54 2012
@@ -139,6 +139,15 @@
</java>
</target>
+ <target name="run-qv" depends="alike-compile" description="run PrepareInputVectors">
+ <java classname="org.apache.alike.QuantizeVectors" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="demo/desc demo/result-centroids.txt demo/solr-demo-data.xml"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
<target name="run-clustering" depends="alike-compile" description="run Clustering">
<java classname="org.apache.alike.Clustering" fork="true">
<jvmarg line="-Dfile.encoding=UTF-8"/>
Propchange: labs/alike/trunk/demo/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Tue Dec 18 03:05:54 2012
@@ -5,3 +5,4 @@ input-vectors
init-clusters
output-clusters
result-centroids.txt
+solr-demo-data.xml
Modified: labs/alike/trunk/demo/README.txt
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Tue Dec 18 03:05:54 2012
@@ -69,7 +69,7 @@
10. index demo vector quantization data
- $ ./post.sh demo-data.xml
+ $ ./post.sh solr-demo-data.xml
11. startup demo web server
Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Tue Dec 18 03:05:54 2012
@@ -91,7 +91,8 @@ public class PrepareInputVectors {
path, LongWritable.class, VectorWritable.class);
vw = new VectorWritable();
}
-
+
+ @Override
public boolean isExecutable(File theFile){
return theFile.getName().endsWith(".txt");
}
Added: labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java?rev=1423266&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java Tue Dec 18 03:05:54 2012
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.alike.FileUtil.Executor;
+import org.apache.commons.io.IOUtils;
+
+public class QuantizeVectors {
+
+ // TODO: make parameterization
+ static final int K = 500;
+ static final int D = 64;
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ if(args.length != 3){
+ printUsage(1);
+ }
+
+ // read cluster centroids
+ double[][] centroids = getCentroids(args[1]);
+
+ // make histograms
+ HistogramExecutor executor = new HistogramExecutor(centroids);
+ FileUtil.executeRecursively(executor, args[0]);
+ Map<String, int[]> histograms = executor.getHistograms();
+
+ // create Solr "standard" XML file
+ createForSolr(args[2], histograms);
+ }
+
+ static void printUsage(int exit){
+ System.err.printf("Usage: $ java %s <parent_dir_path> <centroids_file_path> <output_file_path>\n",
+ QuantizeVectors.class.getName());
+ System.err.println("\t<parent_dir_path> parent directory path of visual descriptors");
+ System.err.println("\t<centroids_file_path> file path to the cluster centroids");
+ System.err.println("\t<output_file_path> output file path for Solr \"standard\" XML");
+
+ if(exit >= 0){
+ System.exit(exit);
+ }
+ }
+
+ static double[][] getCentroids(String ifile) throws IOException {
+ double[][] centroids = new double[K][D];
+
+ FileReader fr = null;
+ BufferedReader br = null;
+
+ try{
+ fr = new FileReader(ifile);
+ br = new BufferedReader(fr);
+ String line = null;
+ int i = 0;
+ while((line = br.readLine()) != null){
+ int sp = line.indexOf("c=[") + "c=[".length();
+ int ep = line.indexOf("] r=[");
+ //System.out.printf("\"%s\"\n", line.substring(sp, ep));
+ String[] strValues = line.substring(sp, ep).trim().split(",\\s*");
+ if(strValues.length < D){
+ // may be sparse vector representation is used...
+ for(String sv : strValues){
+ int col = sv.indexOf(':');
+ int j = Integer.parseInt(sv.substring(0, col));
+ centroids[i][j] = Double.parseDouble(sv.substring(col + 1));
+ }
+ }
+ else{
+ for(int j = 0; j < D; j++){
+ centroids[i][j] = Double.parseDouble(strValues[j]);
+ }
+ }
+ i++;
+ }
+ }
+ finally{
+ IOUtils.closeQuietly(br);
+ IOUtils.closeQuietly(fr);
+ }
+
+ return centroids;
+ }
+
+ static class HistogramExecutor extends Executor {
+
+ private double[][] centroids;
+ Map<String, int[]> histogramMap;
+
+ public HistogramExecutor(double[][] centroids){
+ this.centroids = centroids;
+ histogramMap = new HashMap<String, int[]>();
+ }
+
+ @Override
+ public boolean isExecutable(File theFile){
+ return theFile.getName().endsWith(".txt");
+ }
+
+ public void execute(File theFile) {
+ try {
+ makeHistgram(theFile);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ void makeHistgram(File theFile) throws IOException {
+ int[] histgram = new int[K];
+
+ BufferedReader br = new BufferedReader(new FileReader(theFile));
+ String key = br.readLine();
+ String line = br.readLine(); // skip number of lines count
+ while((line = br.readLine()) != null){
+ String[] strValues = line.trim().split("\\s+");
+ double[] desc = new double[strValues.length];
+ for(int i = 0; i < strValues.length; i++){
+ desc[i] = Double.parseDouble(strValues[i]);
+ }
+
+ voteForVisualWord(histgram, desc);
+ }
+ br.close();
+ histogramMap.put(key, histgram);
+ }
+
+ void voteForVisualWord(int[] histgram, double[] desc){
+ double minDistance = Double.MAX_VALUE;
+ int pos = Integer.MAX_VALUE;
+ for(int i = 0; i < K; i++){
+ double distance = computeSimilarity(centroids[i], desc);
+ if(minDistance > distance){
+ minDistance = distance;
+ pos = i;
+ }
+ }
+
+ // vote for minimum distance
+ histgram[pos]++;
+ }
+
+ public Map<String, int[]> getHistograms(){
+ return histogramMap;
+ }
+ }
+
+ static double computeSimilarity(double[] centroid, double[] desc){
+ double sum = 0;
+ for(int i = 0; i < D; i++){
+ sum += (centroid[i] - desc[i]) * (centroid[i] - desc[i]);
+ }
+ return Math.sqrt(sum);
+ }
+
+ static void printHistograms(Map<String, int[]> histograms){
+ for(String key : histograms.keySet()){
+ System.out.println("\n------------------------------------------------------------");
+ System.out.println(key);
+ int[] histogram = histograms.get(key);
+ for(int i = 0; i < 100; i++){
+ int v = histogram[i];
+ for(int j = 0; j < v; j++){
+ System.out.print("*");
+ }
+ System.out.println(); // for LF
+ }
+ }
+ }
+
+ static void createForSolr(String ofile, Map<String, int[]> histograms) throws IOException {
+ PrintWriter pw = null;
+ try{
+ pw = new PrintWriter(ofile);
+ pw.println("<add>");
+ for(String key : histograms.keySet()){
+ pw.println("<doc>");
+ printImageFileNameField(pw, key);
+ printAssembledQueryField(pw, histograms.get(key));
+ printHistogramField(pw, histograms.get(key));
+ pw.println("</doc>");
+ }
+ pw.println("</add>");
+ }
+ finally{
+ IOUtils.closeQuietly(pw);
+ }
+ }
+
+ private static void printImageFileNameField(PrintWriter pw, String key) throws IOException {
+ printField(pw, "imgFile", key);
+ }
+
+ private static void printAssembledQueryField(PrintWriter pw, int[] histogram) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < histogram.length; i++){
+ if(histogram[i] > 0){
+ String q = Integer.toString(i) + "^" + Integer.toString(histogram[i]);
+ sb.append(q).append(' ');
+ }
+ }
+
+ printField(pw, "query", sb.toString().trim());
+ }
+
+ private static void printHistogramField(PrintWriter pw, int[] histogram) throws IOException {
+ StringBuilder sb = new StringBuilder();
+ for(int i = 0; i < histogram.length; i++){
+ int v = histogram[i];
+ for(int j = 0; j < v; j++){
+ sb.append(Integer.toString(i)).append(' ');
+ }
+ }
+
+ printField(pw, "histogram", sb.toString().trim());
+ }
+
+ static void printField(PrintWriter pw, String name, String value) throws IOException {
+ pw.printf(" <field name=\"%s\">%s</field>\n", name, value);
+ }
+
+ static void test(Map<String, int[]> histograms){
+ Comparator<KeyScorePair> c = new KeyScorePairComparator();
+ for(String key : histograms.keySet()){
+ if(!key.endsWith("0010.txt")) continue;
+
+ int[] srcHisto = histograms.get(key);
+ List<KeyScorePair> list = new ArrayList<QuantizeVectors.KeyScorePair>();
+ for(Map.Entry<String, int[]> entry : histograms.entrySet()){
+ int[] destHisto = entry.getValue();
+ list.add(new KeyScorePair(entry.getKey(), cosine(srcHisto, destHisto)));
+ //list.add(new KeyScorePair(entry.getKey(), intersection(srcHisto, destHisto)));
+ }
+ Collections.sort(list, c);
+
+ System.out.printf("\n%s\n", key);
+ for(int i = 0; i < 10; i++){
+ KeyScorePair ksp = list.get(i);
+ System.out.printf("\t%s, %f\n", ksp.key, ksp.score);
+ }
+ }
+ }
+
+ public static double intersection(int[] v1, int[] v2){
+ int sum = 0;
+ for(int i = 0; i < v1.length; i++){
+ sum += Math.min(v1[i], v2[i]);
+ }
+
+ return (double)sum;
+ }
+
+ public static double cosine(int[] v1, int[] v2){
+ long numerator = 0;
+ for(int i = 0; i < v1.length; i++){
+ numerator += v1[i] * v2[i];
+ }
+ if(numerator == 0) return 0;
+ double denominator = getSumSquareRoot(v1) * getSumSquareRoot(v2);
+
+ // shouldn't be occurred, but let's avoid zero devide
+ if(denominator == 0.0) return 0;
+
+ return numerator / denominator;
+ }
+
+ public static double getSumSquareRoot(int[] v){
+ double sum = 0;
+ for(int i = 0; i < v.length; i++){
+ sum += v[i] * v[i];
+ }
+
+ return Math.sqrt(sum);
+ }
+
+ static class KeyScorePair {
+ String key;
+ double score;
+ public KeyScorePair(String key, double score){
+ this.key = key;
+ this.score = score;
+ }
+ }
+
+ static class KeyScorePairComparator implements Comparator<KeyScorePair> {
+ public int compare(KeyScorePair arg0, KeyScorePair arg1) {
+ return arg0.score > arg1.score ? -1 : 1;
+ }
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org