You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by ds...@apache.org on 2012/01/27 19:46:38 UTC
svn commit: r1236815 - in /incubator/clerezza/trunk/parent/rdf.cris/core/src:
main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java
main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java
test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java
Author: dspicar
Date: Fri Jan 27 18:46:38 2012
New Revision: 1236815
URL: http://svn.apache.org/viewvc?rev=1236815&view=rev
Log:
CLEREZZA-678: added pagination support to GraphIndexer.
Modified:
incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java
incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java
incubator/clerezza/trunk/parent/rdf.cris/core/src/test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java
Modified: incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java?rev=1236815&r1=1236814&r2=1236815&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java (original)
+++ incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/GraphIndexer.java Fri Jan 27 18:46:38 2012
@@ -20,6 +20,8 @@ package org.apache.clerezza.rdf.cris;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -46,12 +48,9 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -59,7 +58,9 @@ import org.apache.lucene.search.ScoreDoc
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -70,6 +71,12 @@ import org.slf4j.LoggerFactory;
* @author reto, tio, daniel
*/
public class GraphIndexer extends ResourceFinder {
+
+ /**
+ * Default value for {@code maxhits}.
+ */
+ public static final int DEFAULT_MAXHITS = 100000;
+
private final Logger logger = LoggerFactory.getLogger(getClass());
/**
@@ -302,7 +309,9 @@ public class GraphIndexer extends Resour
* The constructor does not check if there is a valid exiting index.
* The user is responsible for setting this value correctly.
* @param maxHits
- * How many results the indexer returns.
+ * How many results the indexer returns. All entries in the index are
+ * searched, but only @code{maxHits} resources are resolved and
+ * returned in the result.
*
* @see IndexDefinitionManager
*/
@@ -439,7 +448,8 @@ public class GraphIndexer extends Resour
public GraphIndexer(TripleCollection definitionGraph,
TripleCollection baseGraph, Directory indexDirectory,
boolean createNewIndex) {
- this(definitionGraph, baseGraph, indexDirectory, createNewIndex, 100000);
+ this(definitionGraph, baseGraph, indexDirectory, createNewIndex,
+ DEFAULT_MAXHITS);
}
/**
@@ -584,12 +594,50 @@ public class GraphIndexer extends Resour
}
@Override
- public List<NonLiteral> findResources(List<Condition> conditions,
- SortSpecification sortSpecification, FacetCollector... facetCollectors)
+ public List<NonLiteral> findResources(List<? extends Condition> conditions,
+ SortSpecification sortSpecification,
+ FacetCollector... facetCollectors) throws ParseException {
+ return findResources(conditions, sortSpecification,
+ Arrays.asList(facetCollectors), 0, maxHits + 1);
+ }
+
+ /**
+ * Find resources using conditions and collect facets and specify a sort order.
+ *
+ * This method allows to specify the indices of the query results to return
+ * (e.g. for pagination).
+ *
+ * @param conditions
+ * a list of conditions to construct a query from.
+ * @param facetCollectors
+ * Facet collectors to apply to the query result.
+ * Can be {@link Collections#EMPTY_LIST}, if not used.
+ * @param sortSpecification
+ * Specifies the sort order. Can be null, if not used.
+ * @param from
+ * return results starting from this index (inclusive).
+ * @param to
+ * return results until this index (exclusive).
+ * @return
+ * a list of resources that match the query.
+ *
+ * @throws ParseException when the resulting query is illegal.
+ */
+ public List<NonLiteral> findResources(List<? extends Condition> conditions,
+ SortSpecification sortSpecification,
+ List<FacetCollector> facetCollectors, int from, int to)
throws ParseException {
+
+ if(from < 0) {
+ from = 0;
+ }
+
+ if(to < from) {
+ to = from + 1;
+ }
if(facetCollectors == null) {
- facetCollectors = new FacetCollector[0];
+ facetCollectors = Collections.EMPTY_LIST;
}
BooleanQuery booleanQuery = new BooleanQuery();
@@ -598,20 +646,6 @@ public class GraphIndexer extends Resour
}
IndexSearcher searcher = luceneTools.getIndexSearcher();
- TopScoreDocCollector testCollector = TopScoreDocCollector.create(1, true);
- try {
- logger.info(booleanQuery.toString());
- searcher.search(booleanQuery, testCollector);
- } catch (IOException ex) {
- }
- int totalHits = testCollector.topDocs().totalHits;
-
- int hitsPerPage = totalHits;
-
- if (totalHits > maxHits) {
- hitsPerPage = maxHits;
- }
-
ScoreDoc[] hits = null;
try {
if(sortSpecification != null) {
@@ -623,10 +657,10 @@ public class GraphIndexer extends Resour
sortCache.put(fieldKey, sort);
}
searcher.setDefaultFieldSortScoring(true, true);
- TopFieldDocs topFieldDocs = searcher.search(booleanQuery, null, hitsPerPage, sort);
+ TopFieldDocs topFieldDocs = searcher.search(booleanQuery, null, to, sort);
hits = topFieldDocs.scoreDocs;
} else {
- TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(to, true);
searcher.search(booleanQuery, collector);
hits = collector.topDocs().scoreDocs;
}
@@ -636,19 +670,16 @@ public class GraphIndexer extends Resour
List<NonLiteral> result = new ArrayList<NonLiteral>();
- for (ScoreDoc hit : hits) {
- int docId = hit.doc;
+ for (int i = from; i < hits.length; ++i) {
+ int docId = hits[i].doc;
Document d;
try {
d = searcher.doc(docId);
collectFacets(facetCollectors, d);
result.add(getResource(d));
- } catch (CorruptIndexException ex) {
- logger.error("CRIS Error: ", ex);
} catch (IOException ex) {
logger.error("CRIS Error: ", ex);
}
-
}
for(FacetCollector facetCollector : facetCollectors) {
@@ -815,8 +846,8 @@ public class GraphIndexer extends Resour
throw new RuntimeException("There is no propertyList on this definition.");
}
- private void collectFacets(FacetCollector[] facetCollectors, Document d) {
- if(facetCollectors.length > 0) {
+ private void collectFacets(List<FacetCollector> facetCollectors, Document d) {
+ if(facetCollectors.size() > 0) {
for(FacetCollector facetCollector : facetCollectors) {
Map<VirtualProperty, Map<String, Object>> facetMap =
facetCollector.getFacetMap();
Modified: incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java?rev=1236815&r1=1236814&r2=1236815&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java (original)
+++ incubator/clerezza/trunk/parent/rdf.cris/core/src/main/java/org/apache/clerezza/rdf/cris/ResourceFinder.java Fri Jan 27 18:46:38 2012
@@ -54,7 +54,7 @@ import org.apache.lucene.queryParser.Que
*
* @throws ParseException when the resulting query is illegal.
*/
- public List<NonLiteral> findResources(List<Condition> conditions)
+ public List<NonLiteral> findResources(List<? extends Condition> conditions)
throws ParseException {
return findResources(conditions, new FacetCollector[0]);
}
@@ -71,7 +71,7 @@ import org.apache.lucene.queryParser.Que
*
* @throws ParseException when the resulting query is illegal.
*/
- public List<NonLiteral> findResources(List<Condition> conditions,
+ public List<NonLiteral> findResources(List<? extends Condition> conditions,
FacetCollector... facetCollectors) throws ParseException {
return findResources(conditions, null, facetCollectors);
@@ -91,7 +91,7 @@ import org.apache.lucene.queryParser.Que
*
* @throws ParseException when the resulting query is illegal.
*/
- public abstract List<NonLiteral> findResources(List<Condition> conditions,
+ public abstract List<NonLiteral> findResources(List<? extends Condition> conditions,
SortSpecification sortSpecification, FacetCollector... facetCollectors)
throws ParseException;
Modified: incubator/clerezza/trunk/parent/rdf.cris/core/src/test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/parent/rdf.cris/core/src/test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java?rev=1236815&r1=1236814&r2=1236815&view=diff
==============================================================================
--- incubator/clerezza/trunk/parent/rdf.cris/core/src/test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java (original)
+++ incubator/clerezza/trunk/parent/rdf.cris/core/src/test/java/org/apache/clerezza/rdf/cris/GraphIndexerTest.java Fri Jan 27 18:46:38 2012
@@ -21,20 +21,21 @@ package org.apache.clerezza.rdf.cris;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
-import org.apache.clerezza.rdf.cris.ontologies.CRIS;
import org.apache.clerezza.rdf.core.*;
-import org.apache.clerezza.rdf.utils.*;
import org.apache.clerezza.rdf.core.impl.*;
+import org.apache.clerezza.rdf.cris.ontologies.CRIS;
import org.apache.clerezza.rdf.ontologies.FOAF;
import org.apache.clerezza.rdf.ontologies.RDF;
import org.apache.clerezza.rdf.ontologies.RDFS;
+import org.apache.clerezza.rdf.utils.*;
import org.apache.lucene.queryParser.ParseException;
-import org.wymiwyg.commons.util.Util;
import org.junit.*;
+import org.wymiwyg.commons.util.Util;
/**
*
@@ -552,6 +553,68 @@ public class GraphIndexerTest {
}
@Test
+ public void paginationTest() throws InterruptedException, ParseException {
+ IndexDefinitionManager indexDefinitionManager = new IndexDefinitionManager(definitions);
+ final PropertyHolder firstName = new PropertyHolder(FOAF.firstName);
+ List<VirtualProperty> properties = new ArrayList<VirtualProperty>();
+ properties.add(firstName);
+ indexDefinitionManager.addDefinitionVirtual(FOAF.Person, properties);
+ service.reCreateIndex();
+
+ SortSpecification sortSpecification = new SortSpecification();
+ sortSpecification.add(firstName, SortSpecification.STRING_COMPARETO);
+ sortSpecification.add(SortSpecification.INDEX_ORDER);
+
+ Thread.sleep(1000);
+ {
+ List<Condition> fl = new ArrayList<Condition>();
+ fl.add(new WildcardCondition(firstName, "*"));
+ List<NonLiteral> results = service.findResources(fl, sortSpecification,
+ Collections.EMPTY_LIST, 0, 2);
+ Assert.assertTrue(results.size() == 2);
+
+ List<String> expected = new ArrayList<String>(7);
+ expected.add("Frank");
+ expected.add("Harry");
+
+ List<String> actual = new ArrayList<String>(results.size());
+ for(NonLiteral result : results) {
+ GraphNode node = new GraphNode(result, dataGraph);
+ Iterator<Literal> it = node.getLiterals(FOAF.firstName);
+ while(it.hasNext()) {
+ actual.add(it.next().getLexicalForm());
+ }
+ }
+
+ Assert.assertArrayEquals(expected.toArray(), actual.toArray());
+
+ results = service.findResources(fl, sortSpecification,
+ Collections.EMPTY_LIST, 2, 5);
+ Assert.assertTrue(results.size() == 3);
+
+ expected = new ArrayList<String>(7);
+ expected.add("Harry Joe");
+ expected.add("Jane");
+ expected.add("Jane");
+
+ actual = new ArrayList<String>(results.size());
+ for(NonLiteral result : results) {
+ GraphNode node = new GraphNode(result, dataGraph);
+ Iterator<Literal> it = node.getLiterals(FOAF.firstName);
+ while(it.hasNext()) {
+ actual.add(it.next().getLexicalForm());
+ }
+ }
+
+ Assert.assertArrayEquals(expected.toArray(), actual.toArray());
+
+ results = service.findResources(fl, sortSpecification,
+ Collections.EMPTY_LIST, 2, 100000);
+ Assert.assertTrue(results.size() == 6);
+ }
+ }
+
+ @Test
public void genericConditionTest() throws InterruptedException, ParseException {
IndexDefinitionManager indexDefinitionManager = new IndexDefinitionManager(definitions);
final PropertyHolder firstName = new PropertyHolder(FOAF.firstName);