You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2013/01/02 15:01:38 UTC

svn commit: r1427792 - in /labs/alike/trunk: build.xml ivy.xml src/java/org/apache/alike/eval/ src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java

Author: koji
Date: Wed Jan  2 14:01:38 2013
New Revision: 1427792

URL: http://svn.apache.org/viewvc?rev=1427792&view=rev
Log:
add SimpleTopHitsEvaluator

Added:
    labs/alike/trunk/src/java/org/apache/alike/eval/
    labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
Modified:
    labs/alike/trunk/build.xml
    labs/alike/trunk/ivy.xml

Modified: labs/alike/trunk/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1427792&r1=1427791&r2=1427792&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Wed Jan  2 14:01:38 2013
@@ -147,6 +147,18 @@
     </target>
 
     <!-- ================================================================== -->
+    <!-- = LAUNCH TOOLS                                                   = -->
+    <!-- ================================================================== -->
+    <target name="eval" depends="alike-compile" description="run SimpleTopHitsEvaluator program">
+        <java classname="org.apache.alike.eval.SimpleTopHitsEvaluator" fork="true">
+            <jvmarg line="-Dfile.encoding=UTF-8"/>
+            <arg line="${index}"/>
+            <classpath refid="common.path.lib"/>
+            <classpath path="${cls.dir}"/>
+        </java>
+    </target>
+
+    <!-- ================================================================== -->
     <!-- = CLEANING                                                       = -->
     <!-- ================================================================== -->
     <target name="clean" description="clean all">

Modified: labs/alike/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1427792&r1=1427791&r2=1427792&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Wed Jan  2 14:01:38 2013
@@ -33,6 +33,8 @@
     <dependency org="org.apache.mahout" name="mahout-integration" rev="0.7"/>
     <dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
     <dependency org="org.apache.lucene" name="lucene-core" rev="4.0.0" />
+    <dependency org="org.apache.lucene" name="lucene-queryparser" rev="4.0.0" />
+    <dependency org="org.apache.lucene" name="lucene-analyzers-common" rev="4.0.0" />
     <dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
     <exclude org="org.mongodb"/>
   </dependencies>

Added: labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java?rev=1427792&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java Wed Jan  2 14:01:38 2013
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+public class SimpleTopHitsEvaluator {
+
+  static String F_ID = "imgFile";
+  static String F_QUERY = "query";
+  static String F_HISTOGRAM = "histogram";
+  static IndexReader reader;
+  static IndexSearcher searcher;
+  static QueryParser parser;
+  
+  public static void main(String[] args) throws Exception {
+    final String index = args[0];
+    System.out.printf("index:\"%s\"\n", index);
+    Directory dir = FSDirectory.open(new File(index));
+    parser = new QueryParser(Version.LUCENE_40, F_HISTOGRAM, new WhitespaceAnalyzer(Version.LUCENE_40));
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+    
+    String[][] sortedIdsQueries = getSortedIDsQueries();
+    int num = sortedIdsQueries.length;
+    //StringBuilder sb = new StringBuilder();
+    float total3 = 0;
+    float total10 = 0;
+    float total37 = 0;
+    for(int i = 0; i < num; i++){
+      String id = sortedIdsQueries[i][0];
+      String query = sortedIdsQueries[i][1] + " NOT " + F_ID + ":" + id;
+      List<Integer> top10docs = getSimilarDocs(id, query, 10);
+      /* print top 4 docs
+      sb.setLength(0);
+      sb.append(top11docs[0]).append(" => ");
+      for(int j = 1; j < 3; j++){
+        sb.append(top11docs[j]).append(", ");
+      }
+      sb.append(top11docs[3]);
+      System.out.println(sb.toString());
+      */
+      float score3 = 0;
+      float score10 = 0;
+      float score37 = 0;
+      int[] friends = getFriends(getNumId(id));
+      for(int friend : friends){
+        for(int j = 0; j < top10docs.size(); j++){
+          if(friend == top10docs.get(j)){
+            if(j < 3){
+              score3++;
+            }
+            score10++;
+            break;
+          }
+        }
+      }
+      score37 = score3 + (score10 - score3) * 0.5F;
+      total3 += score3;
+      total10 += score10;
+      total37 += score37;
+    }
+    
+    System.out.printf("mean TOP3   = %1.6f\n", total3  / (float)num);
+    System.out.printf("mean TOP10  = %1.6f\n", total10 / (float)num);
+    System.out.printf("mean TOP3+7 = %1.6f\n", total37 / (float)num);
+    
+    reader.close();
+  }
+
+  static String[][] getSortedIDsQueries() throws IOException {
+    int numDocs = reader.numDocs();
+    System.out.printf("numDocs=%d\n", numDocs);
+    String[][] sortedIdsQueries = new String[numDocs][2];
+    SortField sf = new SortField(F_ID, Type.STRING);
+    TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), numDocs, new Sort(sf));
+    int i = 0;
+    for(ScoreDoc scoreDoc : topDocs.scoreDocs){
+      int d = scoreDoc.doc;
+      Document doc = searcher.doc(d);
+      sortedIdsQueries[i][0] = doc.get(F_ID);
+      sortedIdsQueries[i++][1] = doc.get(F_QUERY);
+    }
+    return sortedIdsQueries;
+  }
+  
+  static List<Integer> getSimilarDocs(String id, String query, int n) throws IOException, ParseException {
+    Query q = parser.parse(query);
+    TopDocs topDocs = searcher.search(q, n);
+    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+    if(scoreDocs.length < n){
+      // this shouldn't be occurred
+      throw new RuntimeException("found " + scoreDocs.length + " docs that is less than " + n);
+    }
+    List<Integer> similarDocIDs = new ArrayList<Integer>(scoreDocs.length);
+    String topDocId = null;
+    for(ScoreDoc scoreDoc : scoreDocs){
+      int d = scoreDoc.doc;
+      Document doc = searcher.doc(d);
+      String idStr = doc.get(F_ID);
+      if(topDocId == null){
+        topDocId = idStr;
+      }
+      similarDocIDs.add(getNumId(idStr));
+    }
+
+    /*
+    if(checkTopDoc){
+      if(!id.equals(topDocId)){
+        throw new RuntimeException(id + " is expected for the top doc, but " + similarDocIDs.get(0));
+      }
+    }
+    */
+    
+    return similarDocIDs;
+  }
+  
+  static final int NUM_POS = 20;
+  
+  static int getNumId(String id){
+    //           1         2
+    // 012345678901234567890123456789
+    // ukbench/full/ukbench02515.jpg
+    return Integer.parseInt(id.substring(NUM_POS, NUM_POS + 5));
+  }
+  
+  static int[] getFriends(int id){
+    int[] friends = new int[3];
+    int remainder = id / 4;
+    switch (remainder) {
+    case 0:
+      friends[0] = id + 1; friends[1] = id + 2; friends[2] = id + 3;
+      break;
+    case 1:
+      friends[0] = id - 1; friends[1] = id + 1; friends[2] = id + 2;
+      break;
+    case 2:
+      friends[0] = id - 2; friends[1] = id - 1; friends[2] = id + 1;
+      break;
+    case 3:
+      friends[0] = id - 3; friends[1] = id - 2; friends[2] = id - 1;
+      break;
+    }
+    return friends;
+  }
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org