You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by ko...@apache.org on 2013/01/02 15:01:38 UTC
svn commit: r1427792 - in /labs/alike/trunk: build.xml ivy.xml
src/java/org/apache/alike/eval/
src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
Author: koji
Date: Wed Jan 2 14:01:38 2013
New Revision: 1427792
URL: http://svn.apache.org/viewvc?rev=1427792&view=rev
Log:
add SimpleTopHitsEvaluator
Added:
labs/alike/trunk/src/java/org/apache/alike/eval/
labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
Modified:
labs/alike/trunk/build.xml
labs/alike/trunk/ivy.xml
Modified: labs/alike/trunk/build.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/build.xml?rev=1427792&r1=1427791&r2=1427792&view=diff
==============================================================================
--- labs/alike/trunk/build.xml (original)
+++ labs/alike/trunk/build.xml Wed Jan 2 14:01:38 2013
@@ -147,6 +147,18 @@
</target>
<!-- ================================================================== -->
+ <!-- = LAUNCH TOOLS = -->
+ <!-- ================================================================== -->
+ <target name="eval" depends="alike-compile" description="run SimpleTopHitsEvaluator program">
+ <java classname="org.apache.alike.eval.SimpleTopHitsEvaluator" fork="true">
+ <jvmarg line="-Dfile.encoding=UTF-8"/>
+ <arg line="${index}"/>
+ <classpath refid="common.path.lib"/>
+ <classpath path="${cls.dir}"/>
+ </java>
+ </target>
+
+ <!-- ================================================================== -->
<!-- = CLEANING = -->
<!-- ================================================================== -->
<target name="clean" description="clean all">
Modified: labs/alike/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/labs/alike/trunk/ivy.xml?rev=1427792&r1=1427791&r2=1427792&view=diff
==============================================================================
--- labs/alike/trunk/ivy.xml (original)
+++ labs/alike/trunk/ivy.xml Wed Jan 2 14:01:38 2013
@@ -33,6 +33,8 @@
<dependency org="org.apache.mahout" name="mahout-integration" rev="0.7"/>
<dependency org="org.apache.mahout" name="mahout-core" rev="0.7"/>
<dependency org="org.apache.lucene" name="lucene-core" rev="4.0.0" />
+ <dependency org="org.apache.lucene" name="lucene-queryparser" rev="4.0.0" />
+ <dependency org="org.apache.lucene" name="lucene-analyzers-common" rev="4.0.0" />
<dependency org="org.slf4j" name="slf4j-jcl" rev="1.6.1"/>
<exclude org="org.mongodb"/>
</dependencies>
Added: labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java
URL: http://svn.apache.org/viewvc/labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java?rev=1427792&view=auto
==============================================================================
--- labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java (added)
+++ labs/alike/trunk/src/java/org/apache/alike/eval/SimpleTopHitsEvaluator.java Wed Jan 2 14:01:38 2013
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.alike.eval;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+public class SimpleTopHitsEvaluator {
+
+ static String F_ID = "imgFile";
+ static String F_QUERY = "query";
+ static String F_HISTOGRAM = "histogram";
+ static IndexReader reader;
+ static IndexSearcher searcher;
+ static QueryParser parser;
+
+ public static void main(String[] args) throws Exception {
+ final String index = args[0];
+ System.out.printf("index:\"%s\"\n", index);
+ Directory dir = FSDirectory.open(new File(index));
+ parser = new QueryParser(Version.LUCENE_40, F_HISTOGRAM, new WhitespaceAnalyzer(Version.LUCENE_40));
+ reader = DirectoryReader.open(dir);
+ searcher = new IndexSearcher(reader);
+
+ String[][] sortedIdsQueries = getSortedIDsQueries();
+ int num = sortedIdsQueries.length;
+ //StringBuilder sb = new StringBuilder();
+ float total3 = 0;
+ float total10 = 0;
+ float total37 = 0;
+ for(int i = 0; i < num; i++){
+ String id = sortedIdsQueries[i][0];
+ String query = sortedIdsQueries[i][1] + " NOT " + F_ID + ":" + id;
+ List<Integer> top10docs = getSimilarDocs(id, query, 10);
+ /* print top 4 docs
+ sb.setLength(0);
+ sb.append(top11docs[0]).append(" => ");
+ for(int j = 1; j < 3; j++){
+ sb.append(top11docs[j]).append(", ");
+ }
+ sb.append(top11docs[3]);
+ System.out.println(sb.toString());
+ */
+ float score3 = 0;
+ float score10 = 0;
+ float score37 = 0;
+ int[] friends = getFriends(getNumId(id));
+ for(int friend : friends){
+ for(int j = 0; j < top10docs.size(); j++){
+ if(friend == top10docs.get(j)){
+ if(j < 3){
+ score3++;
+ }
+ score10++;
+ break;
+ }
+ }
+ }
+ score37 = score3 + (score10 - score3) * 0.5F;
+ total3 += score3;
+ total10 += score10;
+ total37 += score37;
+ }
+
+ System.out.printf("mean TOP3 = %1.6f\n", total3 / (float)num);
+ System.out.printf("mean TOP10 = %1.6f\n", total10 / (float)num);
+ System.out.printf("mean TOP3+7 = %1.6f\n", total37 / (float)num);
+
+ reader.close();
+ }
+
+ static String[][] getSortedIDsQueries() throws IOException {
+ int numDocs = reader.numDocs();
+ System.out.printf("numDocs=%d\n", numDocs);
+ String[][] sortedIdsQueries = new String[numDocs][2];
+ SortField sf = new SortField(F_ID, Type.STRING);
+ TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), numDocs, new Sort(sf));
+ int i = 0;
+ for(ScoreDoc scoreDoc : topDocs.scoreDocs){
+ int d = scoreDoc.doc;
+ Document doc = searcher.doc(d);
+ sortedIdsQueries[i][0] = doc.get(F_ID);
+ sortedIdsQueries[i++][1] = doc.get(F_QUERY);
+ }
+ return sortedIdsQueries;
+ }
+
+ static List<Integer> getSimilarDocs(String id, String query, int n) throws IOException, ParseException {
+ Query q = parser.parse(query);
+ TopDocs topDocs = searcher.search(q, n);
+ ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+ if(scoreDocs.length < n){
+ // this shouldn't be occurred
+ throw new RuntimeException("found " + scoreDocs.length + " docs that is less than " + n);
+ }
+ List<Integer> similarDocIDs = new ArrayList<Integer>(scoreDocs.length);
+ String topDocId = null;
+ for(ScoreDoc scoreDoc : scoreDocs){
+ int d = scoreDoc.doc;
+ Document doc = searcher.doc(d);
+ String idStr = doc.get(F_ID);
+ if(topDocId == null){
+ topDocId = idStr;
+ }
+ similarDocIDs.add(getNumId(idStr));
+ }
+
+ /*
+ if(checkTopDoc){
+ if(!id.equals(topDocId)){
+ throw new RuntimeException(id + " is expected for the top doc, but " + similarDocIDs.get(0));
+ }
+ }
+ */
+
+ return similarDocIDs;
+ }
+
+ static final int NUM_POS = 20;
+
+ static int getNumId(String id){
+ // 1 2
+ // 012345678901234567890123456789
+ // ukbench/full/ukbench02515.jpg
+ return Integer.parseInt(id.substring(NUM_POS, NUM_POS + 5));
+ }
+
+ static int[] getFriends(int id){
+ int[] friends = new int[3];
+ int remainder = id / 4;
+ switch (remainder) {
+ case 0:
+ friends[0] = id + 1; friends[1] = id + 2; friends[2] = id + 3;
+ break;
+ case 1:
+ friends[0] = id - 1; friends[1] = id + 1; friends[2] = id + 2;
+ break;
+ case 2:
+ friends[0] = id - 2; friends[1] = id - 1; friends[2] = id + 1;
+ break;
+ case 3:
+ friends[0] = id - 3; friends[1] = id - 2; friends[2] = id - 1;
+ break;
+ }
+ return friends;
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org