You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2012/12/18 04:06:04 UTC

svn commit: r1423266 - in /labs/alike/trunk: build.xml demo/ demo/README.txt src/java/org/apache/alike/HistogramMatching.java src/java/org/apache/alike/PrepareInputVectors.java src/java/org/apache/alike/QuantizeVectors.java

Author: koji
Date: Tue Dec 18 03:05:54 2012
New Revision: 1423266

URL: http://svn.apache.org/viewvc?rev=1423266&view=rev
Log:
add QuantizeVectors and remove HistogramMatching

Added:
    labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java
Removed:
    labs/alike/trunk/src/java/org/apache/alike/HistogramMatching.java
Modified:
    labs/alike/trunk/build.xml
    labs/alike/trunk/demo/   (props changed)
    labs/alike/trunk/demo/README.txt
    labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java

Modified: labs/alike/trunk/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Tue Dec 18 03:05:54 2012
@@ -139,6 +139,15 @@
         </java>
     </target>
 
+    <target name="run-qv" depends="alike-compile" description="run PrepareInputVectors">
+        <java classname="org.apache.alike.QuantizeVectors" fork="true">
+            <jvmarg line="-Dfile.encoding=UTF-8"/>
+            <arg line="demo/desc demo/result-centroids.txt demo/solr-demo-data.xml"/>
+            <classpath refid="common.path.lib"/>
+            <classpath path="${cls.dir}"/>
+        </java>
+    </target>
+
     <target name="run-clustering" depends="alike-compile" description="run Clustering">
         <java classname="org.apache.alike.Clustering" fork="true">
             <jvmarg line="-Dfile.encoding=UTF-8"/>

Propchange: labs/alike/trunk/demo/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Tue Dec 18 03:05:54 2012
@@ -5,3 +5,4 @@ input-vectors
 init-clusters
 output-clusters
 result-centroids.txt
+solr-demo-data.xml

Modified: labs/alike/trunk/demo/README.txt
URL: http://svn.apache.org/viewvc/labs/alike/trunk/demo/README.txt?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/demo/README.txt (original)
+++ labs/alike/trunk/demo/README.txt Tue Dec 18 03:05:54 2012
@@ -69,7 +69,7 @@
 
 10. index demo vector quantization data
 
-   $ ./post.sh demo-data.xml
+   $ ./post.sh solr-demo-data.xml
 
 11. startup demo web server
 

Modified: labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java?rev=1423266&r1=1423265&r2=1423266&view=diff
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java (original)
+++ labs/alike/trunk/src/java/org/apache/alike/PrepareInputVectors.java Tue Dec 18 03:05:54 2012
@@ -91,7 +91,8 @@ public class PrepareInputVectors {
           path, LongWritable.class, VectorWritable.class);
       vw = new VectorWritable();
     }
-    
+
+    @Override
     public boolean isExecutable(File theFile){
       return theFile.getName().endsWith(".txt");
     }

Added: labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java?rev=1423266&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/QuantizeVectors.java Tue Dec 18 03:05:54 2012
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.alike.FileUtil.Executor;
+import org.apache.commons.io.IOUtils;
+
+public class QuantizeVectors {
+
+  // TODO: make parameterization
+  static final int K = 500;
+  static final int D = 64;
+
+  /**
+   * @param args
+   * @throws IOException 
+   */
+  public static void main(String[] args) throws IOException {
+    if(args.length != 3){
+      printUsage(1);
+    }
+
+    // read cluster centroids
+    double[][] centroids = getCentroids(args[1]);
+    
+    // make histograms
+    HistogramExecutor executor = new HistogramExecutor(centroids);
+    FileUtil.executeRecursively(executor, args[0]);
+    Map<String, int[]> histograms = executor.getHistograms();
+
+    // create Solr "standard" XML file
+    createForSolr(args[2], histograms);
+  }
+
+  static void printUsage(int exit){
+    System.err.printf("Usage: $ java %s <parent_dir_path> <centroids_file_path> <output_file_path>\n",
+        QuantizeVectors.class.getName());
+    System.err.println("\t<parent_dir_path> parent directory path of visual descriptors");
+    System.err.println("\t<centroids_file_path> file path to the cluster centroids");
+    System.err.println("\t<output_file_path> output file path for Solr \"standard\" XML");
+
+    if(exit >= 0){
+      System.exit(exit);
+    }
+  }
+  
+  static double[][] getCentroids(String ifile) throws IOException {
+    double[][] centroids = new double[K][D];
+
+    FileReader fr = null;
+    BufferedReader br = null;
+
+    try{
+      fr = new FileReader(ifile);
+      br = new BufferedReader(fr);
+      String line = null;
+      int i = 0;
+      while((line = br.readLine()) != null){
+        int sp = line.indexOf("c=[") + "c=[".length();
+        int ep = line.indexOf("] r=[");
+        //System.out.printf("\"%s\"\n", line.substring(sp, ep));
+        String[] strValues = line.substring(sp, ep).trim().split(",\\s*");
+        if(strValues.length < D){
+          // may be sparse vector representation is used...
+          for(String sv : strValues){
+            int col = sv.indexOf(':');
+            int j = Integer.parseInt(sv.substring(0, col));
+            centroids[i][j] = Double.parseDouble(sv.substring(col + 1));
+          }
+        }
+        else{
+          for(int j = 0; j < D; j++){
+            centroids[i][j] = Double.parseDouble(strValues[j]);
+          }
+        }
+        i++;
+      }
+    }
+    finally{
+      IOUtils.closeQuietly(br);
+      IOUtils.closeQuietly(fr);
+    }
+    
+    return centroids;
+  }
+  
+  static class HistogramExecutor extends Executor {
+    
+    private double[][] centroids;
+    Map<String, int[]> histogramMap;
+    
+    public HistogramExecutor(double[][] centroids){
+      this.centroids = centroids;
+      histogramMap = new HashMap<String, int[]>();
+    }
+
+    @Override
+    public boolean isExecutable(File theFile){
+      return theFile.getName().endsWith(".txt");
+    }
+
+    public void execute(File theFile) {
+      try {
+        makeHistgram(theFile);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+    
+    void makeHistgram(File theFile) throws IOException {
+      int[] histgram = new int[K];
+      
+      BufferedReader br = new BufferedReader(new FileReader(theFile));
+      String key = br.readLine();
+      String line = br.readLine(); // skip number of lines count
+      while((line = br.readLine()) != null){
+        String[] strValues = line.trim().split("\\s+");
+        double[] desc = new double[strValues.length];
+        for(int i = 0; i < strValues.length; i++){
+          desc[i] = Double.parseDouble(strValues[i]);
+        }
+        
+        voteForVisualWord(histgram, desc);
+      }
+      br.close();
+      histogramMap.put(key, histgram);
+    }
+
+    void voteForVisualWord(int[] histgram, double[] desc){
+      double minDistance = Double.MAX_VALUE;
+      int pos = Integer.MAX_VALUE;
+      for(int i = 0; i < K; i++){
+        double distance = computeSimilarity(centroids[i], desc);
+        if(minDistance > distance){
+          minDistance = distance;
+          pos = i;
+        }
+      }
+      
+      // vote for minimum distance
+      histgram[pos]++;
+    }
+    
+    public Map<String, int[]> getHistograms(){
+      return histogramMap;
+    }
+  }
+  
+  static double computeSimilarity(double[] centroid, double[] desc){
+    double sum = 0;
+    for(int i = 0; i < D; i++){
+      sum += (centroid[i] - desc[i]) * (centroid[i] - desc[i]);
+    }
+    return Math.sqrt(sum);
+  }
+  
+  static void printHistograms(Map<String, int[]> histograms){
+    for(String key : histograms.keySet()){
+      System.out.println("\n------------------------------------------------------------");
+      System.out.println(key);
+      int[] histogram = histograms.get(key);
+      for(int i = 0; i < 100; i++){
+        int v = histogram[i];
+        for(int j = 0; j < v; j++){
+          System.out.print("*");
+        }
+        System.out.println();  // for LF
+      }
+    }
+  }
+  
+  static void createForSolr(String ofile, Map<String, int[]> histograms) throws IOException {
+    PrintWriter pw = null;
+    try{
+      pw = new PrintWriter(ofile);
+      pw.println("<add>");
+      for(String key : histograms.keySet()){
+        pw.println("<doc>");
+        printImageFileNameField(pw, key);
+        printAssembledQueryField(pw, histograms.get(key));
+        printHistogramField(pw, histograms.get(key));
+        pw.println("</doc>");
+      }
+      pw.println("</add>");
+    }
+    finally{
+      IOUtils.closeQuietly(pw);
+    }
+  }
+  
+  private static void printImageFileNameField(PrintWriter pw, String key) throws IOException {
+    printField(pw, "imgFile", key);
+  }
+  
+  private static void printAssembledQueryField(PrintWriter pw, int[] histogram) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    for(int i = 0; i < histogram.length; i++){
+      if(histogram[i] > 0){
+        String q = Integer.toString(i) + "^" + Integer.toString(histogram[i]);
+        sb.append(q).append(' ');
+      }
+    }
+    
+    printField(pw, "query", sb.toString().trim());
+  }
+  
+  private static void printHistogramField(PrintWriter pw, int[] histogram) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    for(int i = 0; i < histogram.length; i++){
+      int v = histogram[i];
+      for(int j = 0; j < v; j++){
+        sb.append(Integer.toString(i)).append(' ');
+      }
+    }
+    
+    printField(pw, "histogram", sb.toString().trim());
+  }
+  
+  static void printField(PrintWriter pw, String name, String value) throws IOException {
+    pw.printf("  <field name=\"%s\">%s</field>\n", name, value);
+  }
+  
+  static void test(Map<String, int[]> histograms){
+    Comparator<KeyScorePair> c = new KeyScorePairComparator();
+    for(String key : histograms.keySet()){
+      if(!key.endsWith("0010.txt")) continue;
+      
+      int[] srcHisto = histograms.get(key);
+      List<KeyScorePair> list = new ArrayList<QuantizeVectors.KeyScorePair>();
+      for(Map.Entry<String, int[]> entry : histograms.entrySet()){
+        int[] destHisto = entry.getValue();
+        list.add(new KeyScorePair(entry.getKey(), cosine(srcHisto, destHisto)));
+        //list.add(new KeyScorePair(entry.getKey(), intersection(srcHisto, destHisto)));
+      }
+      Collections.sort(list, c);
+      
+      System.out.printf("\n%s\n", key);
+      for(int i = 0; i < 10; i++){
+        KeyScorePair ksp = list.get(i);
+        System.out.printf("\t%s, %f\n", ksp.key, ksp.score);
+      }
+    }
+  }
+  
+  public static double intersection(int[] v1, int[] v2){
+    int sum = 0;
+    for(int i = 0; i < v1.length; i++){
+      sum += Math.min(v1[i], v2[i]);
+    }
+    
+    return (double)sum;
+  }
+  
+  public static double cosine(int[] v1, int[] v2){
+    long numerator = 0;
+    for(int i = 0; i < v1.length; i++){
+      numerator += v1[i] * v2[i];
+    }
+    if(numerator == 0) return 0;
+    double denominator = getSumSquareRoot(v1) * getSumSquareRoot(v2);
+
+    // shouldn't be occurred, but let's avoid zero devide
+    if(denominator == 0.0) return 0;
+    
+    return numerator / denominator;
+  }
+
+  public static double getSumSquareRoot(int[] v){
+    double sum = 0;
+    for(int i = 0; i < v.length; i++){
+      sum += v[i] * v[i];
+    }
+    
+    return Math.sqrt(sum);
+  }
+  
+  static class KeyScorePair {
+    String key;
+    double score;
+    public KeyScorePair(String key, double score){
+      this.key = key;
+      this.score = score;
+    }
+  }
+  
+  static class KeyScorePairComparator implements Comparator<KeyScorePair> {
+    public int compare(KeyScorePair arg0, KeyScorePair arg1) {
+      return arg0.score > arg1.score ? -1 : 1;
+    }
+  }
+
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org