You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2012/09/17 18:02:02 UTC
svn commit: r1386681 [5/8] - in /lucene/dev/trunk:
lucene/analysis/common/src/java/org/apache/lucene/analysis/br/
lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/
lucene/analysis/common/src/java/org/apache/lucene/analysis/compound...
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactionRollback.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactionRollback.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactionRollback.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactionRollback.java Mon Sep 17 16:01:56 2012
@@ -41,13 +41,13 @@ import org.apache.lucene.util.Bits;
*/
public class TestTransactionRollback extends LuceneTestCase {
-
+
private static final String FIELD_RECORD_ID = "record_id";
private Directory dir;
-
+
//Rolls back index to a chosen ID
private void rollBackLast(int id) throws Exception {
-
+
// System.out.println("Attempting to rollback to "+id);
String ids="-"+id;
IndexCommit last=null;
@@ -62,7 +62,7 @@ public class TestTransactionRollback ext
if (last==null)
throw new RuntimeException("Couldn't find commit point "+id);
-
+
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random())).setIndexDeletionPolicy(
new RollbackDeletionPolicy(id)).setIndexCommit(last));
@@ -72,22 +72,22 @@ public class TestTransactionRollback ext
w.close();
}
- public void testRepeatedRollBacks() throws Exception {
+ public void testRepeatedRollBacks() throws Exception {
int expectedLastRecordId=100;
while (expectedLastRecordId>10) {
- expectedLastRecordId -=10;
+ expectedLastRecordId -=10;
rollBackLast(expectedLastRecordId);
BitSet expecteds = new BitSet(100);
expecteds.set(1,(expectedLastRecordId+1),true);
- checkExpecteds(expecteds);
+ checkExpecteds(expecteds);
}
}
-
+
private void checkExpecteds(BitSet expecteds) throws Exception {
IndexReader r = DirectoryReader.open(dir);
-
+
//Perhaps not the most efficient approach but meets our
//needs here.
final Bits liveDocs = MultiFields.getLiveDocs(r);
@@ -114,7 +114,7 @@ public class TestTransactionRollback ext
Collection files = comm.getFileNames();
for (Iterator iterator2 = files.iterator(); iterator2.hasNext();) {
String filename = (String) iterator2.next();
- System.out.print(filename+", ");
+ System.out.print(filename+", ");
}
System.out.println();
}
@@ -133,7 +133,7 @@ public class TestTransactionRollback ext
Document doc=new Document();
doc.add(newTextField(FIELD_RECORD_ID, ""+currentRecordId, Field.Store.YES));
w.addDocument(doc);
-
+
if (currentRecordId%10 == 0) {
Map<String,String> data = new HashMap<String,String>();
data.put("index", "records 1-"+currentRecordId);
@@ -177,16 +177,16 @@ public class TestTransactionRollback ext
" UserData="+commit.getUserData() +") ("+(commits.size()-1)+" commit points left) files=");
Collection files = commit.getFileNames();
for (Iterator iterator2 = files.iterator(); iterator2.hasNext();) {
- System.out.print(" "+iterator2.next());
+ System.out.print(" "+iterator2.next());
}
System.out.println();
*/
-
- commit.delete();
+
+ commit.delete();
}
}
}
- }
+ }
}
class DeleteLastCommitPolicy implements IndexDeletionPolicy {
@@ -198,7 +198,7 @@ public class TestTransactionRollback ext
}
}
- public void testRollbackDeletionPolicy() throws Exception {
+ public void testRollbackDeletionPolicy() throws Exception {
for(int i=0;i<2;i++) {
// Unless you specify a prior commit point, rollback
// should not work:
@@ -209,7 +209,7 @@ public class TestTransactionRollback ext
r.close();
}
}
-
+
// Keeps all commit points (used to build index)
class KeepAllDeletionPolicy implements IndexDeletionPolicy {
public void onCommit(List<? extends IndexCommit> commits) throws IOException {}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactions.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactions.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTransactions.java Mon Sep 17 16:01:56 2012
@@ -129,7 +129,7 @@ public class TestTransactions extends Lu
}
try {
writer2.prepareCommit();
- } catch (Throwable t) {
+ } catch (Throwable t) {
writer1.rollback();
writer2.rollback();
return;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java Mon Sep 17 16:01:56 2012
@@ -145,7 +145,7 @@ public class TestCachingCollector extend
try {
cc.replay(new NoOpCollector(false)); // this call should fail
fail("should have failed if an in-order Collector was given to replay(), " +
- "while CachingCollector was initialized with out-of-order collection");
+ "while CachingCollector was initialized with out-of-order collection");
} catch (IllegalArgumentException e) {
// ok
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDocIdSet.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDocIdSet.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestDocIdSet.java Mon Sep 17 16:01:56 2012
@@ -62,15 +62,15 @@ public class TestDocIdSet extends Lucene
};
}
};
-
-
+
+
DocIdSet filteredSet = new FilteredDocIdSet(innerSet){
@Override
protected boolean match(int docid) {
return docid%2 == 0; //validate only even docids
- }
+ }
};
-
+
DocIdSetIterator iter = filteredSet.iterator();
ArrayList<Integer> list = new ArrayList<Integer>();
int doc = iter.advance(3);
@@ -80,7 +80,7 @@ public class TestDocIdSet extends Lucene
list.add(Integer.valueOf(doc));
}
}
-
+
int[] docs = new int[list.size()];
int c=0;
Iterator<Integer> intIter = list.iterator();
@@ -151,7 +151,7 @@ public class TestDocIdSet extends Lucene
@Override
protected boolean match(int docid) {
return true;
- }
+ }
};
}
};
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFuzzyQuery.java Mon Sep 17 16:01:56 2012
@@ -101,7 +101,7 @@ public class TestFuzzyQuery extends Luce
}
// not similar enough:
- query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMaxEdits, 0);
+ query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMaxEdits, 0);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(0, hits.length);
query = new FuzzyQuery(new Term("field", "aaccc"), FuzzyQuery.defaultMaxEdits, 0); // edit distance to "aaaaa" = 3
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java Mon Sep 17 16:01:56 2012
@@ -140,7 +140,7 @@ public class TestPayloadNearQuery extend
query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
QueryUtils.check(query);
-
+
// all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
// and all the similarity factors are set to 1
hits = searcher.search(query, null, 100);
@@ -162,8 +162,8 @@ public class TestPayloadNearQuery extend
assertEquals("should be 100 hits", 100, hits.totalHits);
for (int j = 0; j < hits.scoreDocs.length; j++) {
ScoreDoc doc = hits.scoreDocs[j];
- // System.out.println("Doc: " + doc.toString());
- // System.out.println("Explain: " + searcher.explain(query, doc.doc));
+ // System.out.println("Doc: " + doc.toString());
+ // System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
}
@@ -192,71 +192,71 @@ public class TestPayloadNearQuery extend
}
public void testAverageFunction() throws IOException {
- PayloadNearQuery query;
- TopDocs hits;
+ PayloadNearQuery query;
+ TopDocs hits;
- query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
- QueryUtils.check(query);
- // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
- // and all the similarity factors are set to 1
- hits = searcher.search(query, null, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("should be 10 hits", hits.totalHits == 10);
- for (int j = 0; j < hits.scoreDocs.length; j++) {
- ScoreDoc doc = hits.scoreDocs[j];
- assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
- Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
- String exp = explain.toString();
- assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
- assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
- }
+ query = newPhraseQuery("field", "twenty two", true, new AveragePayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4
+ // and all the similarity factors are set to 1
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("AveragePayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 3, explain.getValue() == 3f);
+ }
}
public void testMaxFunction() throws IOException {
- PayloadNearQuery query;
- TopDocs hits;
+ PayloadNearQuery query;
+ TopDocs hits;
- query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
- QueryUtils.check(query);
- // all 10 hits should have score = 4 (max payload value)
- hits = searcher.search(query, null, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("should be 10 hits", hits.totalHits == 10);
- for (int j = 0; j < hits.scoreDocs.length; j++) {
- ScoreDoc doc = hits.scoreDocs[j];
- assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
- Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
- String exp = explain.toString();
- assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
- assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
- }
+ query = newPhraseQuery("field", "twenty two", true, new MaxPayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 4 (max payload value)
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 4, doc.score == 4);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("MaxPayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 4, explain.getValue() == 4f);
+ }
}
public void testMinFunction() throws IOException {
- PayloadNearQuery query;
- TopDocs hits;
+ PayloadNearQuery query;
+ TopDocs hits;
- query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
- QueryUtils.check(query);
- // all 10 hits should have score = 2 (min payload value)
- hits = searcher.search(query, null, 100);
- assertTrue("hits is null and it shouldn't be", hits != null);
- assertTrue("should be 10 hits", hits.totalHits == 10);
- for (int j = 0; j < hits.scoreDocs.length; j++) {
- ScoreDoc doc = hits.scoreDocs[j];
- assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
- Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
- String exp = explain.toString();
- assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
- assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
- }
+ query = newPhraseQuery("field", "twenty two", true, new MinPayloadFunction());
+ QueryUtils.check(query);
+ // all 10 hits should have score = 2 (min payload value)
+ hits = searcher.search(query, null, 100);
+ assertTrue("hits is null and it shouldn't be", hits != null);
+ assertTrue("should be 10 hits", hits.totalHits == 10);
+ for (int j = 0; j < hits.scoreDocs.length; j++) {
+ ScoreDoc doc = hits.scoreDocs[j];
+ assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
+ Explanation explain = searcher.explain(query, hits.scoreDocs[j].doc);
+ String exp = explain.toString();
+ assertTrue(exp, exp.indexOf("MinPayloadFunction") > -1);
+ assertTrue(hits.scoreDocs[j].score + " explain value does not equal: " + 2, explain.getValue() == 2f);
+ }
}
private SpanQuery[] getClauses() {
- SpanNearQuery q1, q2;
- q1 = spanNearQuery("field2", "twenty two");
- q2 = spanNearQuery("field2", "twenty three");
- SpanQuery[] clauses = new SpanQuery[2];
- clauses[0] = q1;
- clauses[1] = q2;
- return clauses;
+ SpanNearQuery q1, q2;
+ q1 = spanNearQuery("field2", "twenty two");
+ q2 = spanNearQuery("field2", "twenty three");
+ SpanQuery[] clauses = new SpanQuery[2];
+ clauses[0] = q1;
+ clauses[1] = q2;
+ return clauses;
}
private SpanNearQuery spanNearQuery(String fieldName, String words) {
String[] wordList = words.split("[\\s]+");
@@ -274,8 +274,8 @@ public class TestPayloadNearQuery extend
hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
ScoreDoc doc = hits.scoreDocs[0];
- // System.out.println("Doc: " + doc.toString());
- // System.out.println("Explain: " + searcher.explain(query, doc.doc));
+ // System.out.println("Doc: " + doc.toString());
+ // System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue("there should only be one hit", hits.totalHits == 1);
// should have score = 3 because adjacent terms have payloads of 2,4
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
@@ -299,8 +299,8 @@ public class TestPayloadNearQuery extend
assertTrue("should only be one hit", hits.scoreDocs.length == 1);
// the score should be 3 - the average of all the underlying payloads
ScoreDoc doc = hits.scoreDocs[0];
- // System.out.println("Doc: " + doc.toString());
- // System.out.println("Explain: " + searcher.explain(query, doc.doc));
+ // System.out.println("Doc: " + doc.toString());
+ // System.out.println("Explain: " + searcher.explain(query, doc.doc));
assertTrue(doc.score + " does not equal: " + 3, doc.score == 3);
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java Mon Sep 17 16:01:56 2012
@@ -582,21 +582,21 @@ public class TestBasics extends LuceneTe
@Test
public void testSpansSkipTo() throws Exception {
- SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
- SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
- Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
- Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
-
- assertTrue(s1.next());
- assertTrue(s2.next());
-
- boolean hasMore = true;
-
- do {
- hasMore = skipToAccoringToJavaDocs(s1, s1.doc());
- assertEquals(hasMore, s2.skipTo(s2.doc()));
- assertEquals(s1.doc(), s2.doc());
- } while (hasMore);
+ SpanTermQuery t1 = new SpanTermQuery(new Term("field", "seventy"));
+ SpanTermQuery t2 = new SpanTermQuery(new Term("field", "seventy"));
+ Spans s1 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t1);
+ Spans s2 = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), t2);
+
+ assertTrue(s1.next());
+ assertTrue(s2.next());
+
+ boolean hasMore = true;
+
+ do {
+ hasMore = skipToAccoringToJavaDocs(s1, s1.doc());
+ assertEquals(hasMore, s2.skipTo(s2.doc()));
+ assertEquals(s1.doc(), s2.doc());
+ } while (hasMore);
}
/** Skips to the first match beyond the current, whose document number is
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/store/TestWindowsMMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/store/TestWindowsMMap.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/store/TestWindowsMMap.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/store/TestWindowsMMap.java Mon Sep 17 16:01:56 2012
@@ -84,7 +84,7 @@ public class TestWindowsMMap extends Luc
for(int dx = 0; dx < num; dx ++) {
String f = randomField();
Document doc = new Document();
- doc.add(newTextField("data", f, Field.Store.YES));
+ doc.add(newTextField("data", f, Field.Store.YES));
writer.addDocument(doc);
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java Mon Sep 17 16:01:56 2012
@@ -71,12 +71,12 @@ public class TestBitUtil extends LuceneT
long sumRes = 0;
while (iters-- >= 0) {
for (int i = 1; i <= 63; i++) {
- long a = testArg(i);
- sumRes += BitUtil.nlz(a);
- sumRes += BitUtil.nlz(a+1);
- sumRes += BitUtil.nlz(a-1);
- sumRes += BitUtil.nlz(a+10);
- sumRes += BitUtil.nlz(a-10);
+ long a = testArg(i);
+ sumRes += BitUtil.nlz(a);
+ sumRes += BitUtil.nlz(a + 1);
+ sumRes += BitUtil.nlz(a - 1);
+ sumRes += BitUtil.nlz(a + 10);
+ sumRes += BitUtil.nlz(a - 10);
}
}
return sumRes;
@@ -86,12 +86,12 @@ public class TestBitUtil extends LuceneT
long sumRes = 0;
while (iters-- >= 0) {
for (int i = 1; i <= 63; i++) {
- long a = testArg(i);
- sumRes += Long.numberOfLeadingZeros(a);
- sumRes += Long.numberOfLeadingZeros(a+1);
- sumRes += Long.numberOfLeadingZeros(a-1);
- sumRes += Long.numberOfLeadingZeros(a+10);
- sumRes += Long.numberOfLeadingZeros(a-10);
+ long a = testArg(i);
+ sumRes += Long.numberOfLeadingZeros(a);
+ sumRes += Long.numberOfLeadingZeros(a + 1);
+ sumRes += Long.numberOfLeadingZeros(a - 1);
+ sumRes += Long.numberOfLeadingZeros(a + 10);
+ sumRes += Long.numberOfLeadingZeros(a - 10);
}
}
return sumRes;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestFixedBitSet.java Mon Sep 17 16:01:56 2012
@@ -49,7 +49,7 @@ public class TestFixedBitSet extends Luc
// aa = a.prevSetBit(aa-1);
aa--;
while ((aa >= 0) && (! a.get(aa))) {
- aa--;
+ aa--;
}
if (b.length() == 0) {
bb = -1;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestOpenBitSet.java Mon Sep 17 16:01:56 2012
@@ -71,7 +71,7 @@ public class TestOpenBitSet extends Luce
// aa = a.prevSetBit(aa-1);
aa--;
while ((aa >= 0) && (! a.get(aa))) {
- aa--;
+ aa--;
}
bb = b.prevSetBit(bb-1);
assertEquals(aa,bb);
@@ -85,7 +85,7 @@ public class TestOpenBitSet extends Luce
// aa = a.prevSetBit(aa-1);
aa--;
while ((aa >= 0) && (! a.get(aa))) {
- aa--;
+ aa--;
}
bb = (int) b.prevSetBit((long) (bb-1));
assertEquals(aa,bb);
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java Mon Sep 17 16:01:56 2012
@@ -356,7 +356,7 @@ public class DirectoryTaxonomyReader imp
// only possible writer, and it is "synchronized" to avoid this case).
DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
if (r2 == null) {
- return false; // no changes, nothing to do
+ return false; // no changes, nothing to do
}
// validate that a refresh is valid at this point, i.e. that the taxonomy
@@ -364,13 +364,13 @@ public class DirectoryTaxonomyReader imp
String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
if (t1==null) {
- if (t2!=null) {
- r2.close();
- throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
- }
+ if (t2!=null) {
+ r2.close();
+ throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
+ }
} else if (!t1.equals(t2)) {
- r2.close();
- throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
+ r2.close();
+ throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
}
IndexReader oldreader = indexReader;
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/DefaultEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/DefaultEncoder.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/DefaultEncoder.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/DefaultEncoder.java Mon Sep 17 16:01:56 2012
@@ -21,12 +21,12 @@ package org.apache.lucene.search.highlig
*/
public class DefaultEncoder implements Encoder
{
- public DefaultEncoder()
- {
- }
+ public DefaultEncoder()
+ {
+ }
- public String encodeText(String originalText)
- {
- return originalText;
- }
+ public String encodeText(String originalText)
+ {
+ return originalText;
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Encoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Encoder.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Encoder.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Encoder.java Mon Sep 17 16:01:56 2012
@@ -22,8 +22,8 @@ package org.apache.lucene.search.highlig
*/
public interface Encoder
{
- /**
- * @param originalText The section of text being output
- */
- String encodeText(String originalText);
+ /**
+ * @param originalText The section of text being output
+ */
+ String encodeText(String originalText);
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Formatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Formatter.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Formatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Formatter.java Mon Sep 17 16:01:56 2012
@@ -24,10 +24,10 @@ package org.apache.lucene.search.highlig
*/
public interface Formatter
{
- /**
- * @param originalText The section of text being considered for markup
- * @param tokenGroup contains one or several overlapping Tokens along with
- * their scores and positions.
- */
- String highlightTerm(String originalText, TokenGroup tokenGroup);
+ /**
+ * @param originalText The section of text being considered for markup
+ * @param tokenGroup contains one or several overlapping Tokens along with
+ * their scores and positions.
+ */
+ String highlightTerm(String originalText, TokenGroup tokenGroup);
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/GradientFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/GradientFormatter.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/GradientFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/GradientFormatter.java Mon Sep 17 16:01:56 2012
@@ -42,7 +42,7 @@ public class GradientFormatter implement
*
* @param maxScore
* The score (and above) displayed as maxColor (See QueryScorer.getMaxWeight
- * which can be used to calibrate scoring scale)
+ * which can be used to calibrate scoring scale)
* @param minForegroundColor
* The hex color used for representing IDF scores of zero eg
* #FFFFFF (white) or null if no foreground color required
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Mon Sep 17 16:01:56 2012
@@ -38,445 +38,445 @@ public class Highlighter
public static final int DEFAULT_MAX_CHARS_TO_ANALYZE = 50*1024;
private int maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE;
- private Formatter formatter;
- private Encoder encoder;
- private Fragmenter textFragmenter=new SimpleFragmenter();
- private Scorer fragmentScorer=null;
-
- public Highlighter(Scorer fragmentScorer)
- {
- this(new SimpleHTMLFormatter(),fragmentScorer);
- }
-
-
- public Highlighter(Formatter formatter, Scorer fragmentScorer)
- {
- this(formatter,new DefaultEncoder(),fragmentScorer);
- }
-
-
- public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer)
- {
- this.formatter = formatter;
- this.encoder = encoder;
- this.fragmentScorer = fragmentScorer;
- }
-
- /**
- * Highlights chosen terms in a text, extracting the most relevant section.
- * This is a convenience method that calls
- * {@link #getBestFragment(TokenStream, String)}
- *
- * @param analyzer the analyzer that will be used to split <code>text</code>
- * into chunks
- * @param text text to highlight terms in
- * @param fieldName Name of field used to influence analyzer's tokenization policy
- *
- * @return highlighted text fragment or null if no terms found
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
- throws IOException, InvalidTokenOffsetsException
- {
- TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
- return getBestFragment(tokenStream, text);
- }
-
- /**
- * Highlights chosen terms in a text, extracting the most relevant section.
- * The document text is analysed in chunks to record hit statistics
- * across the document. After accumulating stats, the fragment with the highest score
- * is returned
- *
- * @param tokenStream a stream of tokens identified in the text parameter, including offset information.
- * This is typically produced by an analyzer re-parsing a document's
- * text. Some work may be done on retrieving TokenStreams more efficiently
- * by adding support for storing original text position data in the Lucene
- * index but this support is not currently available (as of Lucene 1.4 rc2).
- * @param text text to highlight terms in
- *
- * @return highlighted text fragment or null if no terms found
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final String getBestFragment(TokenStream tokenStream, String text)
- throws IOException, InvalidTokenOffsetsException
- {
- String[] results = getBestFragments(tokenStream,text, 1);
- if (results.length > 0)
- {
- return results[0];
- }
- return null;
- }
-
- /**
- * Highlights chosen terms in a text, extracting the most relevant sections.
- * This is a convenience method that calls
- * {@link #getBestFragments(TokenStream, String, int)}
- *
- * @param analyzer the analyzer that will be used to split <code>text</code>
- * into chunks
- * @param fieldName the name of the field being highlighted (used by analyzer)
- * @param text text to highlight terms in
- * @param maxNumFragments the maximum number of fragments.
- *
- * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final String[] getBestFragments(
- Analyzer analyzer,
- String fieldName,
- String text,
- int maxNumFragments)
- throws IOException, InvalidTokenOffsetsException
- {
- TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
- return getBestFragments(tokenStream, text, maxNumFragments);
- }
-
- /**
- * Highlights chosen terms in a text, extracting the most relevant sections.
- * The document text is analysed in chunks to record hit statistics
- * across the document. After accumulating stats, the fragments with the highest scores
- * are returned as an array of strings in order of score (contiguous fragments are merged into
- * one in their original order to improve readability)
- *
- * @param text text to highlight terms in
- * @param maxNumFragments the maximum number of fragments.
- *
- * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final String[] getBestFragments(
- TokenStream tokenStream,
- String text,
- int maxNumFragments)
- throws IOException, InvalidTokenOffsetsException
- {
- maxNumFragments = Math.max(1, maxNumFragments); //sanity check
-
- TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
-
- //Get text
- ArrayList<String> fragTexts = new ArrayList<String>();
- for (int i = 0; i < frag.length; i++)
- {
- if ((frag[i] != null) && (frag[i].getScore() > 0))
- {
- fragTexts.add(frag[i].toString());
- }
- }
- return fragTexts.toArray(new String[0]);
- }
-
-
- /**
- * Low level api to get the most relevant (formatted) sections of the document.
- * This method has been made public to allow visibility of score information held in TextFragment objects.
- * Thanks to Jason Calabrese for help in redefining the interface.
- * @param tokenStream
- * @param text
- * @param maxNumFragments
- * @param mergeContiguousFragments
- * @throws IOException
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final TextFragment[] getBestTextFragments(
- TokenStream tokenStream,
- String text,
- boolean mergeContiguousFragments,
- int maxNumFragments)
- throws IOException, InvalidTokenOffsetsException
- {
- ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
- StringBuilder newText=new StringBuilder();
-
- CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
- OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- tokenStream.addAttribute(PositionIncrementAttribute.class);
- tokenStream.reset();
-
- TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
-
+ private Formatter formatter;
+ private Encoder encoder;
+ private Fragmenter textFragmenter=new SimpleFragmenter();
+ private Scorer fragmentScorer=null;
+
+ public Highlighter(Scorer fragmentScorer)
+ {
+ this(new SimpleHTMLFormatter(),fragmentScorer);
+ }
+
+
+ public Highlighter(Formatter formatter, Scorer fragmentScorer)
+ {
+ this(formatter,new DefaultEncoder(),fragmentScorer);
+ }
+
+
+ public Highlighter(Formatter formatter, Encoder encoder, Scorer fragmentScorer)
+ {
+ this.formatter = formatter;
+ this.encoder = encoder;
+ this.fragmentScorer = fragmentScorer;
+ }
+
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant section.
+ * This is a convenience method that calls
+ * {@link #getBestFragment(TokenStream, String)}
+ *
+ * @param analyzer the analyzer that will be used to split <code>text</code>
+ * into chunks
+ * @param text text to highlight terms in
+ * @param fieldName Name of field used to influence analyzer's tokenization policy
+ *
+ * @return highlighted text fragment or null if no terms found
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
+ return getBestFragment(tokenStream, text);
+ }
+
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant section.
+ * The document text is analysed in chunks to record hit statistics
+ * across the document. After accumulating stats, the fragment with the highest score
+ * is returned
+ *
+ * @param tokenStream a stream of tokens identified in the text parameter, including offset information.
+ * This is typically produced by an analyzer re-parsing a document's
+ * text. Some work may be done on retrieving TokenStreams more efficiently
+ * by adding support for storing original text position data in the Lucene
+ * index but this support is not currently available (as of Lucene 1.4 rc2).
+ * @param text text to highlight terms in
+ *
+ * @return highlighted text fragment or null if no terms found
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final String getBestFragment(TokenStream tokenStream, String text)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ String[] results = getBestFragments(tokenStream,text, 1);
+ if (results.length > 0)
+ {
+ return results[0];
+ }
+ return null;
+ }
+
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant sections.
+ * This is a convenience method that calls
+ * {@link #getBestFragments(TokenStream, String, int)}
+ *
+ * @param analyzer the analyzer that will be used to split <code>text</code>
+ * into chunks
+ * @param fieldName the name of the field being highlighted (used by analyzer)
+ * @param text text to highlight terms in
+ * @param maxNumFragments the maximum number of fragments.
+ *
+ * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final String[] getBestFragments(
+ Analyzer analyzer,
+ String fieldName,
+ String text,
+ int maxNumFragments)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
+ return getBestFragments(tokenStream, text, maxNumFragments);
+ }
+
+ /**
+ * Highlights chosen terms in a text, extracting the most relevant sections.
+ * The document text is analysed in chunks to record hit statistics
+ * across the document. After accumulating stats, the fragments with the highest scores
+ * are returned as an array of strings in order of score (contiguous fragments are merged into
+ * one in their original order to improve readability)
+ *
+ * @param text text to highlight terms in
+ * @param maxNumFragments the maximum number of fragments.
+ *
+ * @return highlighted text fragments (between 0 and maxNumFragments number of fragments)
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final String[] getBestFragments(
+ TokenStream tokenStream,
+ String text,
+ int maxNumFragments)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ maxNumFragments = Math.max(1, maxNumFragments); //sanity check
+
+ TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
+
+ //Get text
+ ArrayList<String> fragTexts = new ArrayList<String>();
+ for (int i = 0; i < frag.length; i++)
+ {
+ if ((frag[i] != null) && (frag[i].getScore() > 0))
+ {
+ fragTexts.add(frag[i].toString());
+ }
+ }
+ return fragTexts.toArray(new String[0]);
+ }
+
+
+ /**
+ * Low level api to get the most relevant (formatted) sections of the document.
+ * This method has been made public to allow visibility of score information held in TextFragment objects.
+ * Thanks to Jason Calabrese for help in redefining the interface.
+ * @param tokenStream
+ * @param text
+ * @param maxNumFragments
+ * @param mergeContiguousFragments
+ * @throws IOException
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final TextFragment[] getBestTextFragments(
+ TokenStream tokenStream,
+ String text,
+ boolean mergeContiguousFragments,
+ int maxNumFragments)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
+ StringBuilder newText=new StringBuilder();
+
+ CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+ OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+ tokenStream.addAttribute(PositionIncrementAttribute.class);
+ tokenStream.reset();
+
+ TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
+
if (fragmentScorer instanceof QueryScorer) {
((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
}
- TokenStream newStream = fragmentScorer.init(tokenStream);
- if(newStream != null) {
- tokenStream = newStream;
- }
- fragmentScorer.startFragment(currentFrag);
- docFrags.add(currentFrag);
-
- FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
-
- try
- {
-
- String tokenText;
- int startOffset;
- int endOffset;
- int lastEndOffset = 0;
- textFragmenter.start(text, tokenStream);
-
- TokenGroup tokenGroup=new TokenGroup(tokenStream);
-
- for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
- next = tokenStream.incrementToken())
- {
- if( (offsetAtt.endOffset()>text.length())
- ||
- (offsetAtt.startOffset()>text.length())
- )
- {
- throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
- +" exceeds length of provided text sized "+text.length());
- }
- if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
- {
- //the current token is distinct from previous tokens -
- // markup the cached token group info
- startOffset = tokenGroup.matchStartOffset;
- endOffset = tokenGroup.matchEndOffset;
- tokenText = text.substring(startOffset, endOffset);
- String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
- //store any whitespace etc from between this and last group
- if (startOffset > lastEndOffset)
- newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
- newText.append(markedUpText);
- lastEndOffset=Math.max(endOffset, lastEndOffset);
- tokenGroup.clear();
-
- //check if current token marks the start of a new fragment
- if(textFragmenter.isNewFragment())
- {
- currentFrag.setScore(fragmentScorer.getFragmentScore());
- //record stats for a new fragment
- currentFrag.textEndPos = newText.length();
- currentFrag =new TextFragment(newText, newText.length(), docFrags.size());
- fragmentScorer.startFragment(currentFrag);
- docFrags.add(currentFrag);
- }
- }
-
- tokenGroup.addToken(fragmentScorer.getTokenScore());
-
-// if(lastEndOffset>maxDocBytesToAnalyze)
-// {
-// break;
-// }
- }
- currentFrag.setScore(fragmentScorer.getFragmentScore());
-
- if(tokenGroup.numTokens>0)
- {
- //flush the accumulated text (same code as in above loop)
- startOffset = tokenGroup.matchStartOffset;
- endOffset = tokenGroup.matchEndOffset;
- tokenText = text.substring(startOffset, endOffset);
- String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
- //store any whitespace etc from between this and last group
- if (startOffset > lastEndOffset)
- newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
- newText.append(markedUpText);
- lastEndOffset=Math.max(lastEndOffset,endOffset);
- }
-
- //Test what remains of the original text beyond the point where we stopped analyzing
- if (
-// if there is text beyond the last token considered..
- (lastEndOffset < text.length())
- &&
-// and that text is not too large...
- (text.length()<= maxDocCharsToAnalyze)
- )
- {
- //append it to the last fragment
- newText.append(encoder.encodeText(text.substring(lastEndOffset)));
- }
-
- currentFrag.textEndPos = newText.length();
-
- //sort the most relevant sections of the text
- for (Iterator<TextFragment> i = docFrags.iterator(); i.hasNext();)
- {
- currentFrag = i.next();
-
- //If you are running with a version of Lucene before 11th Sept 03
- // you do not have PriorityQueue.insert() - so uncomment the code below
- /*
- if (currentFrag.getScore() >= minScore)
- {
- fragQueue.put(currentFrag);
- if (fragQueue.size() > maxNumFragments)
- { // if hit queue overfull
- fragQueue.pop(); // remove lowest in hit queue
- minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore
- }
-
-
- }
- */
- //The above code caused a problem as a result of Christoph Goller's 11th Sept 03
- //fix to PriorityQueue. The correct method to use here is the new "insert" method
- // USE ABOVE CODE IF THIS DOES NOT COMPILE!
- fragQueue.insertWithOverflow(currentFrag);
- }
-
- //return the most relevant fragments
- TextFragment frag[] = new TextFragment[fragQueue.size()];
- for (int i = frag.length - 1; i >= 0; i--)
- {
- frag[i] = fragQueue.pop();
- }
-
- //merge any contiguous fragments to improve readability
- if(mergeContiguousFragments)
- {
- mergeContiguousFragments(frag);
- ArrayList<TextFragment> fragTexts = new ArrayList<TextFragment>();
- for (int i = 0; i < frag.length; i++)
- {
- if ((frag[i] != null) && (frag[i].getScore() > 0))
- {
- fragTexts.add(frag[i]);
- }
- }
- frag= fragTexts.toArray(new TextFragment[0]);
- }
-
- return frag;
-
- }
- finally
- {
- if (tokenStream != null)
- {
- try
- {
- tokenStream.end();
- tokenStream.close();
- }
- catch (Exception e)
- {
- }
- }
- }
- }
-
-
- /** Improves readability of a score-sorted list of TextFragments by merging any fragments
- * that were contiguous in the original text into one larger fragment with the correct order.
- * This will leave a "null" in the array entry for the lesser scored fragment.
- *
- * @param frag An array of document fragments in descending score
- */
- private void mergeContiguousFragments(TextFragment[] frag)
- {
- boolean mergingStillBeingDone;
- if (frag.length > 1)
- do
- {
- mergingStillBeingDone = false; //initialise loop control flag
- //for each fragment, scan other frags looking for contiguous blocks
- for (int i = 0; i < frag.length; i++)
- {
- if (frag[i] == null)
- {
- continue;
- }
- //merge any contiguous blocks
- for (int x = 0; x < frag.length; x++)
- {
- if (frag[x] == null)
- {
- continue;
- }
- if (frag[i] == null)
- {
- break;
- }
- TextFragment frag1 = null;
- TextFragment frag2 = null;
- int frag1Num = 0;
- int frag2Num = 0;
- int bestScoringFragNum;
- int worstScoringFragNum;
- //if blocks are contiguous....
- if (frag[i].follows(frag[x]))
- {
- frag1 = frag[x];
- frag1Num = x;
- frag2 = frag[i];
- frag2Num = i;
- }
- else
- if (frag[x].follows(frag[i]))
- {
- frag1 = frag[i];
- frag1Num = i;
- frag2 = frag[x];
- frag2Num = x;
- }
- //merging required..
- if (frag1 != null)
- {
- if (frag1.getScore() > frag2.getScore())
- {
- bestScoringFragNum = frag1Num;
- worstScoringFragNum = frag2Num;
- }
- else
- {
- bestScoringFragNum = frag2Num;
- worstScoringFragNum = frag1Num;
- }
- frag1.merge(frag2);
- frag[worstScoringFragNum] = null;
- mergingStillBeingDone = true;
- frag[bestScoringFragNum] = frag1;
- }
- }
- }
- }
- while (mergingStillBeingDone);
- }
-
-
- /**
- * Highlights terms in the text , extracting the most relevant sections
- * and concatenating the chosen fragments with a separator (typically "...").
- * The document text is analysed in chunks to record hit statistics
- * across the document. After accumulating stats, the fragments with the highest scores
- * are returned in order as "separator" delimited strings.
- *
- * @param text text to highlight terms in
- * @param maxNumFragments the maximum number of fragments.
- * @param separator the separator used to intersperse the document fragments (typically "...")
- *
- * @return highlighted text
- * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
- */
- public final String getBestFragments(
- TokenStream tokenStream,
- String text,
- int maxNumFragments,
- String separator)
- throws IOException, InvalidTokenOffsetsException
- {
- String sections[] = getBestFragments(tokenStream,text, maxNumFragments);
- StringBuilder result = new StringBuilder();
- for (int i = 0; i < sections.length; i++)
- {
- if (i > 0)
- {
- result.append(separator);
- }
- result.append(sections[i]);
- }
- return result.toString();
- }
+ TokenStream newStream = fragmentScorer.init(tokenStream);
+ if(newStream != null) {
+ tokenStream = newStream;
+ }
+ fragmentScorer.startFragment(currentFrag);
+ docFrags.add(currentFrag);
+
+ FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
+
+ try
+ {
+
+ String tokenText;
+ int startOffset;
+ int endOffset;
+ int lastEndOffset = 0;
+ textFragmenter.start(text, tokenStream);
+
+ TokenGroup tokenGroup=new TokenGroup(tokenStream);
+
+ for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
+ next = tokenStream.incrementToken())
+ {
+ if( (offsetAtt.endOffset()>text.length())
+ ||
+ (offsetAtt.startOffset()>text.length())
+ )
+ {
+ throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
+ +" exceeds length of provided text sized "+text.length());
+ }
+ if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
+ {
+ //the current token is distinct from previous tokens -
+ // markup the cached token group info
+ startOffset = tokenGroup.matchStartOffset;
+ endOffset = tokenGroup.matchEndOffset;
+ tokenText = text.substring(startOffset, endOffset);
+ String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
+ //store any whitespace etc from between this and last group
+ if (startOffset > lastEndOffset)
+ newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
+ newText.append(markedUpText);
+ lastEndOffset=Math.max(endOffset, lastEndOffset);
+ tokenGroup.clear();
+
+ //check if current token marks the start of a new fragment
+ if(textFragmenter.isNewFragment())
+ {
+ currentFrag.setScore(fragmentScorer.getFragmentScore());
+ //record stats for a new fragment
+ currentFrag.textEndPos = newText.length();
+ currentFrag =new TextFragment(newText, newText.length(), docFrags.size());
+ fragmentScorer.startFragment(currentFrag);
+ docFrags.add(currentFrag);
+ }
+ }
+
+ tokenGroup.addToken(fragmentScorer.getTokenScore());
+
+// if(lastEndOffset>maxDocBytesToAnalyze)
+// {
+// break;
+// }
+ }
+ currentFrag.setScore(fragmentScorer.getFragmentScore());
+
+ if(tokenGroup.numTokens>0)
+ {
+ //flush the accumulated text (same code as in above loop)
+ startOffset = tokenGroup.matchStartOffset;
+ endOffset = tokenGroup.matchEndOffset;
+ tokenText = text.substring(startOffset, endOffset);
+ String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
+ //store any whitespace etc from between this and last group
+ if (startOffset > lastEndOffset)
+ newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
+ newText.append(markedUpText);
+ lastEndOffset=Math.max(lastEndOffset,endOffset);
+ }
+
+ //Test what remains of the original text beyond the point where we stopped analyzing
+ if (
+// if there is text beyond the last token considered..
+ (lastEndOffset < text.length())
+ &&
+// and that text is not too large...
+ (text.length()<= maxDocCharsToAnalyze)
+ )
+ {
+ //append it to the last fragment
+ newText.append(encoder.encodeText(text.substring(lastEndOffset)));
+ }
+
+ currentFrag.textEndPos = newText.length();
+
+ //sort the most relevant sections of the text
+ for (Iterator<TextFragment> i = docFrags.iterator(); i.hasNext();)
+ {
+ currentFrag = i.next();
+
+ //If you are running with a version of Lucene before 11th Sept 03
+ // you do not have PriorityQueue.insert() - so uncomment the code below
+ /*
+ if (currentFrag.getScore() >= minScore)
+ {
+ fragQueue.put(currentFrag);
+ if (fragQueue.size() > maxNumFragments)
+ { // if hit queue overfull
+ fragQueue.pop(); // remove lowest in hit queue
+ minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore
+ }
+
+
+ }
+ */
+ //The above code caused a problem as a result of Christoph Goller's 11th Sept 03
+ //fix to PriorityQueue. The correct method to use here is the new "insert" method
+ // USE ABOVE CODE IF THIS DOES NOT COMPILE!
+ fragQueue.insertWithOverflow(currentFrag);
+ }
+
+ //return the most relevant fragments
+ TextFragment frag[] = new TextFragment[fragQueue.size()];
+ for (int i = frag.length - 1; i >= 0; i--)
+ {
+ frag[i] = fragQueue.pop();
+ }
+
+ //merge any contiguous fragments to improve readability
+ if(mergeContiguousFragments)
+ {
+ mergeContiguousFragments(frag);
+ ArrayList<TextFragment> fragTexts = new ArrayList<TextFragment>();
+ for (int i = 0; i < frag.length; i++)
+ {
+ if ((frag[i] != null) && (frag[i].getScore() > 0))
+ {
+ fragTexts.add(frag[i]);
+ }
+ }
+ frag= fragTexts.toArray(new TextFragment[0]);
+ }
+
+ return frag;
+
+ }
+ finally
+ {
+ if (tokenStream != null)
+ {
+ try
+ {
+ tokenStream.end();
+ tokenStream.close();
+ }
+ catch (Exception e)
+ {
+ }
+ }
+ }
+ }
+
+
+ /** Improves readability of a score-sorted list of TextFragments by merging any fragments
+ * that were contiguous in the original text into one larger fragment with the correct order.
+ * This will leave a "null" in the array entry for the lesser scored fragment.
+ *
+ * @param frag An array of document fragments in descending score
+ */
+ private void mergeContiguousFragments(TextFragment[] frag)
+ {
+ boolean mergingStillBeingDone;
+ if (frag.length > 1)
+ do
+ {
+ mergingStillBeingDone = false; //initialise loop control flag
+ //for each fragment, scan other frags looking for contiguous blocks
+ for (int i = 0; i < frag.length; i++)
+ {
+ if (frag[i] == null)
+ {
+ continue;
+ }
+ //merge any contiguous blocks
+ for (int x = 0; x < frag.length; x++)
+ {
+ if (frag[x] == null)
+ {
+ continue;
+ }
+ if (frag[i] == null)
+ {
+ break;
+ }
+ TextFragment frag1 = null;
+ TextFragment frag2 = null;
+ int frag1Num = 0;
+ int frag2Num = 0;
+ int bestScoringFragNum;
+ int worstScoringFragNum;
+ //if blocks are contiguous....
+ if (frag[i].follows(frag[x]))
+ {
+ frag1 = frag[x];
+ frag1Num = x;
+ frag2 = frag[i];
+ frag2Num = i;
+ }
+ else
+ if (frag[x].follows(frag[i]))
+ {
+ frag1 = frag[i];
+ frag1Num = i;
+ frag2 = frag[x];
+ frag2Num = x;
+ }
+ //merging required..
+ if (frag1 != null)
+ {
+ if (frag1.getScore() > frag2.getScore())
+ {
+ bestScoringFragNum = frag1Num;
+ worstScoringFragNum = frag2Num;
+ }
+ else
+ {
+ bestScoringFragNum = frag2Num;
+ worstScoringFragNum = frag1Num;
+ }
+ frag1.merge(frag2);
+ frag[worstScoringFragNum] = null;
+ mergingStillBeingDone = true;
+ frag[bestScoringFragNum] = frag1;
+ }
+ }
+ }
+ }
+ while (mergingStillBeingDone);
+ }
+
+
+ /**
+ * Highlights terms in the text , extracting the most relevant sections
+ * and concatenating the chosen fragments with a separator (typically "...").
+ * The document text is analysed in chunks to record hit statistics
+ * across the document. After accumulating stats, the fragments with the highest scores
+ * are returned in order as "separator" delimited strings.
+ *
+ * @param text text to highlight terms in
+ * @param maxNumFragments the maximum number of fragments.
+ * @param separator the separator used to intersperse the document fragments (typically "...")
+ *
+ * @return highlighted text
+ * @throws InvalidTokenOffsetsException thrown if any token's endOffset exceeds the provided text's length
+ */
+ public final String getBestFragments(
+ TokenStream tokenStream,
+ String text,
+ int maxNumFragments,
+ String separator)
+ throws IOException, InvalidTokenOffsetsException
+ {
+ String sections[] = getBestFragments(tokenStream,text, maxNumFragments);
+ StringBuilder result = new StringBuilder();
+ for (int i = 0; i < sections.length; i++)
+ {
+ if (i > 0)
+ {
+ result.append(separator);
+ }
+ result.append(sections[i]);
+ }
+ return result.toString();
+ }
public int getMaxDocCharsToAnalyze() {
return maxDocCharsToAnalyze;
@@ -487,35 +487,35 @@ public class Highlighter
}
- public Fragmenter getTextFragmenter()
- {
- return textFragmenter;
- }
-
- /**
- * @param fragmenter
- */
- public void setTextFragmenter(Fragmenter fragmenter)
- {
- textFragmenter = fragmenter;
- }
-
- /**
- * @return Object used to score each text fragment
- */
- public Scorer getFragmentScorer()
- {
- return fragmentScorer;
- }
-
-
- /**
- * @param scorer
- */
- public void setFragmentScorer(Scorer scorer)
- {
- fragmentScorer = scorer;
- }
+ public Fragmenter getTextFragmenter()
+ {
+ return textFragmenter;
+ }
+
+ /**
+ * @param fragmenter
+ */
+ public void setTextFragmenter(Fragmenter fragmenter)
+ {
+ textFragmenter = fragmenter;
+ }
+
+ /**
+ * @return Object used to score each text fragment
+ */
+ public Scorer getFragmentScorer()
+ {
+ return fragmentScorer;
+ }
+
+
+ /**
+ * @param scorer
+ */
+ public void setFragmentScorer(Scorer scorer)
+ {
+ fragmentScorer = scorer;
+ }
public Encoder getEncoder()
{
@@ -528,17 +528,17 @@ public class Highlighter
}
class FragmentQueue extends PriorityQueue<TextFragment>
{
- public FragmentQueue(int size)
- {
- super(size);
- }
-
- @Override
- public final boolean lessThan(TextFragment fragA, TextFragment fragB)
- {
- if (fragA.getScore() == fragB.getScore())
- return fragA.fragNum > fragB.fragNum;
- else
- return fragA.getScore() < fragB.getScore();
- }
+ public FragmentQueue(int size)
+ {
+ super(size);
+ }
+
+ @Override
+ public final boolean lessThan(TextFragment fragA, TextFragment fragB)
+ {
+ if (fragA.getScore() == fragB.getScore())
+ return fragA.fragNum > fragB.fragNum;
+ else
+ return fragA.getScore() < fragB.getScore();
+ }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/InvalidTokenOffsetsException.java Mon Sep 17 16:01:56 2012
@@ -23,9 +23,9 @@ package org.apache.lucene.search.highlig
public class InvalidTokenOffsetsException extends Exception
{
- public InvalidTokenOffsetsException(String message)
- {
- super(message);
- }
+ public InvalidTokenOffsetsException(String message)
+ {
+ super(message);
+ }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermExtractor.java Mon Sep 17 16:01:56 2012
@@ -37,126 +37,118 @@ import org.apache.lucene.search.Query;
public final class QueryTermExtractor
{
- /**
- * Extracts all terms texts of a given Query into an array of WeightedTerms
- *
- * @param query Query to extract term texts from
- * @return an array of the terms used in a query, plus their weights.
- */
- public static final WeightedTerm[] getTerms(Query query)
- {
- return getTerms(query,false);
- }
-
- /**
- * Extracts all terms texts of a given Query into an array of WeightedTerms
- *
- * @param query Query to extract term texts from
- * @param reader used to compute IDF which can be used to a) score selected fragments better
- * b) use graded highlights eg changing intensity of font color
- * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
- * @return an array of the terms used in a query, plus their weights.
- */
- public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName)
- {
- WeightedTerm[] terms=getTerms(query,false, fieldName);
- int totalNumDocs=reader.maxDoc();
- for (int i = 0; i < terms.length; i++)
+ /**
+ * Extracts all terms texts of a given Query into an array of WeightedTerms
+ *
+ * @param query Query to extract term texts from
+ * @return an array of the terms used in a query, plus their weights.
+ */
+ public static final WeightedTerm[] getTerms(Query query)
+ {
+ return getTerms(query,false);
+ }
+
+ /**
+ * Extracts all terms texts of a given Query into an array of WeightedTerms
+ *
+ * @param query Query to extract term texts from
+ * @param reader used to compute IDF which can be used to a) score selected fragments better
+ * b) use graded highlights eg changing intensity of font color
+ * @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
+ * @return an array of the terms used in a query, plus their weights.
+ */
+ public static final WeightedTerm[] getIdfWeightedTerms(Query query, IndexReader reader, String fieldName)
+ {
+ WeightedTerm[] terms=getTerms(query,false, fieldName);
+ int totalNumDocs=reader.maxDoc();
+ for (int i = 0; i < terms.length; i++)
{
- try
+ try
{
int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
//IDF algorithm taken from DefaultSimilarity class
float idf=(float)(Math.log(totalNumDocs/(double)(docFreq+1)) + 1.0);
terms[i].weight*=idf;
}
- catch (IOException e)
+ catch (IOException e)
{
- //ignore
+ //ignore
}
}
- return terms;
- }
+ return terms;
+ }
- /**
- * Extracts all terms texts of a given Query into an array of WeightedTerms
- *
- * @param query Query to extract term texts from
- * @param prohibited <code>true</code> to extract "prohibited" terms, too
- * @param fieldName The fieldName used to filter query terms
+ /**
+ * Extracts all terms texts of a given Query into an array of WeightedTerms
+ *
+ * @param query Query to extract term texts from
+ * @param prohibited <code>true</code> to extract "prohibited" terms, too
+ * @param fieldName The fieldName used to filter query terms
* @return an array of the terms used in a query, plus their weights.
*/
- public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
- {
- HashSet<WeightedTerm> terms=new HashSet<WeightedTerm>();
- getTerms(query,terms,prohibited,fieldName);
- return terms.toArray(new WeightedTerm[0]);
- }
-
- /**
- * Extracts all terms texts of a given Query into an array of WeightedTerms
- *
- * @param query Query to extract term texts from
- * @param prohibited <code>true</code> to extract "prohibited" terms, too
+ public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
+ {
+ HashSet<WeightedTerm> terms=new HashSet<WeightedTerm>();
+ getTerms(query,terms,prohibited,fieldName);
+ return terms.toArray(new WeightedTerm[0]);
+ }
+
+ /**
+ * Extracts all terms texts of a given Query into an array of WeightedTerms
+ *
+ * @param query Query to extract term texts from
+ * @param prohibited <code>true</code> to extract "prohibited" terms, too
* @return an array of the terms used in a query, plus their weights.
*/
- public static final WeightedTerm[] getTerms(Query query, boolean prohibited)
- {
- return getTerms(query,prohibited,null);
- }
-
- private static final void getTerms(Query query, HashSet<WeightedTerm> terms,boolean prohibited, String fieldName)
- {
- try
- {
- if (query instanceof BooleanQuery)
- getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
- else
- if(query instanceof FilteredQuery)
- getTermsFromFilteredQuery((FilteredQuery)query, terms,prohibited, fieldName);
- else
- {
- HashSet<Term> nonWeightedTerms=new HashSet<Term>();
- query.extractTerms(nonWeightedTerms);
- for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext();)
- {
- Term term = iter.next();
- if((fieldName==null)||(term.field().equals(fieldName)))
- {
- terms.add(new WeightedTerm(query.getBoost(),term.text()));
- }
- }
- }
- }
- catch(UnsupportedOperationException ignore)
- {
- //this is non-fatal for our purposes
- }
- }
-
- /**
- * extractTerms is currently the only query-independent means of introspecting queries but it only reveals
- * a list of terms for that query - not the boosts each individual term in that query may or may not have.
- * "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
- * in each child element.
- * Some discussion around this topic here:
- * http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
- * Unfortunately there seemed to be limited interest in requiring all Query objects to implement
- * something common which would allow access to child queries so what follows here are query-specific
- * implementations for accessing embedded query elements.
- */
- private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
- {
- BooleanClause[] queryClauses = query.getClauses();
- for (int i = 0; i < queryClauses.length; i++)
- {
- if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
- getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
- }
- }
- private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
- {
- getTerms(query.getQuery(),terms,prohibited,fieldName);
- }
-
+ public static final WeightedTerm[] getTerms(Query query, boolean prohibited)
+ {
+ return getTerms(query,prohibited,null);
+ }
+
+ private static final void getTerms(Query query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName) {
+ try {
+ if (query instanceof BooleanQuery)
+ getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
+ else if (query instanceof FilteredQuery)
+ getTermsFromFilteredQuery((FilteredQuery) query, terms, prohibited, fieldName);
+ else {
+ HashSet<Term> nonWeightedTerms = new HashSet<Term>();
+ query.extractTerms(nonWeightedTerms);
+ for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext(); ) {
+ Term term = iter.next();
+ if ((fieldName == null) || (term.field().equals(fieldName))) {
+ terms.add(new WeightedTerm(query.getBoost(), term.text()));
+ }
+ }
+ }
+ } catch (UnsupportedOperationException ignore) {
+ //this is non-fatal for our purposes
+ }
+ }
+
+ /**
+ * extractTerms is currently the only query-independent means of introspecting queries but it only reveals
+ * a list of terms for that query - not the boosts each individual term in that query may or may not have.
+ * "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
+ * in each child element.
+ * Some discussion around this topic here:
+ * http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
+ * Unfortunately there seemed to be limited interest in requiring all Query objects to implement
+ * something common which would allow access to child queries so what follows here are query-specific
+ * implementations for accessing embedded query elements.
+ */
+ private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
+ {
+ BooleanClause[] queryClauses = query.getClauses();
+ for (int i = 0; i < queryClauses.length; i++)
+ {
+ if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
+ getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
+ }
+ }
+ private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
+ {
+ getTerms(query.getQuery(),terms,prohibited,fieldName);
+ }
+
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java Mon Sep 17 16:01:56 2012
@@ -21,61 +21,61 @@ package org.apache.lucene.search.highlig
*/
public class SimpleHTMLEncoder implements Encoder
{
- public SimpleHTMLEncoder()
- {
- }
-
- public String encodeText(String originalText)
- {
- return htmlEncode(originalText);
- }
-
- /**
- * Encode string into HTML
- */
- public final static String htmlEncode(String plainText)
- {
- if (plainText == null || plainText.length() == 0)
- {
- return "";
- }
-
- StringBuilder result = new StringBuilder(plainText.length());
-
- for (int index=0; index<plainText.length(); index++)
- {
- char ch = plainText.charAt(index);
-
- switch (ch)
- {
- case '"':
- result.append(""");
- break;
-
- case '&':
- result.append("&");
- break;
-
- case '<':
- result.append("<");
- break;
-
- case '>':
- result.append(">");
- break;
-
- default:
- if (ch < 128)
- {
- result.append(ch);
- }
- else
- {
- result.append("&#").append((int)ch).append(";");
- }
- }
- }
+ public SimpleHTMLEncoder()
+ {
+ }
+
+ public String encodeText(String originalText)
+ {
+ return htmlEncode(originalText);
+ }
+
+ /**
+ * Encode string into HTML
+ */
+ public final static String htmlEncode(String plainText)
+ {
+ if (plainText == null || plainText.length() == 0)
+ {
+ return "";
+ }
+
+ StringBuilder result = new StringBuilder(plainText.length());
+
+ for (int index=0; index<plainText.length(); index++)
+ {
+ char ch = plainText.charAt(index);
+
+ switch (ch)
+ {
+ case '"':
+ result.append(""");
+ break;
+
+ case '&':
+ result.append("&");
+ break;
+
+ case '<':
+ result.append("<");
+ break;
+
+ case '>':
+ result.append(">");
+ break;
+
+ default:
+ if (ch < 128)
+ {
+ result.append(ch);
+ }
+ else
+ {
+ result.append("&#").append((int)ch).append(";");
+ }
+ }
+ }
- return result.toString();
- }
+ return result.toString();
+ }
}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLFormatter.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLFormatter.java Mon Sep 17 16:01:56 2012
@@ -26,34 +26,34 @@ public class SimpleHTMLFormatter impleme
private static final String DEFAULT_PRE_TAG = "<B>";
private static final String DEFAULT_POST_TAG = "</B>";
- private String preTag;
- private String postTag;
-
- public SimpleHTMLFormatter(String preTag, String postTag) {
- this.preTag = preTag;
- this.postTag = postTag;
- }
+ private String preTag;
+ private String postTag;
- /** Default constructor uses HTML: <B> tags to markup terms. */
- public SimpleHTMLFormatter() {
- this(DEFAULT_PRE_TAG, DEFAULT_POST_TAG);
- }
+ public SimpleHTMLFormatter(String preTag, String postTag) {
+ this.preTag = preTag;
+ this.postTag = postTag;
+ }
- /* (non-Javadoc)
- * @see org.apache.lucene.search.highlight.Formatter#highlightTerm(java.lang.String, org.apache.lucene.search.highlight.TokenGroup)
- */
- public String highlightTerm(String originalText, TokenGroup tokenGroup) {
- if (tokenGroup.getTotalScore() <= 0) {
- return originalText;
- }
-
- // Allocate StringBuilder with the right number of characters from the
+ /** Default constructor uses HTML: <B> tags to markup terms. */
+ public SimpleHTMLFormatter() {
+ this(DEFAULT_PRE_TAG, DEFAULT_POST_TAG);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.highlight.Formatter#highlightTerm(java.lang.String, org.apache.lucene.search.highlight.TokenGroup)
+ */
+ public String highlightTerm(String originalText, TokenGroup tokenGroup) {
+ if (tokenGroup.getTotalScore() <= 0) {
+ return originalText;
+ }
+
+ // Allocate StringBuilder with the right number of characters from the
// beginning, to avoid char[] allocations in the middle of appends.
- StringBuilder returnBuffer = new StringBuilder(preTag.length() + originalText.length() + postTag.length());
- returnBuffer.append(preTag);
- returnBuffer.append(originalText);
- returnBuffer.append(postTag);
- return returnBuffer.toString();
- }
-
+ StringBuilder returnBuffer = new StringBuilder(preTag.length() + originalText.length() + postTag.length());
+ returnBuffer.append(preTag);
+ returnBuffer.append(originalText);
+ returnBuffer.append(postTag);
+ return returnBuffer.toString();
+ }
+
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SpanGradientFormatter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SpanGradientFormatter.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SpanGradientFormatter.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SpanGradientFormatter.java Mon Sep 17 16:01:56 2012
@@ -22,57 +22,49 @@ package org.apache.lucene.search.highlig
* doesn't work in Mozilla, thus this class.
*
* @see GradientFormatter
- *
*/
public class SpanGradientFormatter
- extends GradientFormatter
-{
- public SpanGradientFormatter(float maxScore, String minForegroundColor,
- String maxForegroundColor, String minBackgroundColor,
- String maxBackgroundColor)
- {
- super( maxScore, minForegroundColor,
- maxForegroundColor, minBackgroundColor,
- maxBackgroundColor);
- }
-
-
-
- @Override
- public String highlightTerm(String originalText, TokenGroup tokenGroup)
- {
- if (tokenGroup.getTotalScore() == 0)
- return originalText;
- float score = tokenGroup.getTotalScore();
- if (score == 0)
- {
- return originalText;
- }
-
- // try to size sb correctly
- StringBuilder sb = new StringBuilder( originalText.length() + EXTRA);
-
- sb.append("<span style=\"");
- if (highlightForeground)
- {
- sb.append("color: ");
- sb.append(getForegroundColorString(score));
- sb.append("; ");
- }
- if (highlightBackground)
- {
- sb.append("background: ");
- sb.append(getBackgroundColorString(score));
- sb.append("; ");
- }
- sb.append("\">");
- sb.append(originalText);
- sb.append("</span>");
- return sb.toString();
+ extends GradientFormatter {
+ public SpanGradientFormatter(float maxScore, String minForegroundColor,
+ String maxForegroundColor, String minBackgroundColor,
+ String maxBackgroundColor) {
+ super(maxScore, minForegroundColor,
+ maxForegroundColor, minBackgroundColor,
+ maxBackgroundColor);
+ }
+
+
+ @Override
+ public String highlightTerm(String originalText, TokenGroup tokenGroup) {
+ if (tokenGroup.getTotalScore() == 0)
+ return originalText;
+ float score = tokenGroup.getTotalScore();
+ if (score == 0) {
+ return originalText;
+ }
+
+ // try to size sb correctly
+ StringBuilder sb = new StringBuilder(originalText.length() + EXTRA);
+
+ sb.append("<span style=\"");
+ if (highlightForeground) {
+ sb.append("color: ");
+ sb.append(getForegroundColorString(score));
+ sb.append("; ");
+ }
+ if (highlightBackground) {
+ sb.append("background: ");
+ sb.append(getBackgroundColorString(score));
+ sb.append("; ");
}
+ sb.append("\">");
+ sb.append(originalText);
+ sb.append("</span>");
+ return sb.toString();
+ }
- // guess how much extra text we'll add to the text we're highlighting to try to avoid a StringBuilder resize
- private static final String TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
- private static final int EXTRA = TEMPLATE.length();
+ // guess how much extra text we'll add to the text we're highlighting to try to avoid a StringBuilder resize
+ private static final String TEMPLATE = "<span style=\"background: #EEEEEE; color: #000000;\">...</span>";
+ private static final int EXTRA = TEMPLATE.length();
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java Mon Sep 17 16:01:56 2012
@@ -25,57 +25,57 @@ package org.apache.lucene.search.highlig
*/
public class TextFragment
{
- CharSequence markedUpText;
- int fragNum;
- int textStartPos;
- int textEndPos;
- float score;
+ CharSequence markedUpText;
+ int fragNum;
+ int textStartPos;
+ int textEndPos;
+ float score;
- public TextFragment(CharSequence markedUpText,int textStartPos, int fragNum)
- {
- this.markedUpText=markedUpText;
- this.textStartPos = textStartPos;
- this.fragNum = fragNum;
- }
+ public TextFragment(CharSequence markedUpText,int textStartPos, int fragNum)
+ {
+ this.markedUpText=markedUpText;
+ this.textStartPos = textStartPos;
+ this.fragNum = fragNum;
+ }
- void setScore(float score)
- {
- this.score=score;
- }
- public float getScore()
- {
- return score;
- }
- /**
- * @param frag2 Fragment to be merged into this one
- */
+ void setScore(float score)
+ {
+ this.score=score;
+ }
+ public float getScore()
+ {
+ return score;
+ }
+ /**
+ * @param frag2 Fragment to be merged into this one
+ */
public void merge(TextFragment frag2)
{
textEndPos = frag2.textEndPos;
score=Math.max(score,frag2.score);
}
/**
- * @param fragment
- * @return true if this fragment follows the one passed
- */
- public boolean follows(TextFragment fragment)
- {
- return textStartPos == fragment.textEndPos;
- }
+ * @param fragment
+ * @return true if this fragment follows the one passed
+ */
+ public boolean follows(TextFragment fragment)
+ {
+ return textStartPos == fragment.textEndPos;
+ }
- /**
- * @return the fragment sequence number
- */
- public int getFragNum()
- {
- return fragNum;
- }
+ /**
+ * @return the fragment sequence number
+ */
+ public int getFragNum()
+ {
+ return fragNum;
+ }
- /* Returns the marked-up text for this text fragment
- */
- @Override
- public String toString() {
- return markedUpText.subSequence(textStartPos, textEndPos).toString();
- }
+ /* Returns the marked-up text for this text fragment
+ */
+ @Override
+ public String toString() {
+ return markedUpText.subSequence(textStartPos, textEndPos).toString();
+ }
}
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedTerm.java?rev=1386681&r1=1386680&r2=1386681&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedTerm.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedTerm.java Mon Sep 17 16:01:56 2012
@@ -20,45 +20,45 @@ package org.apache.lucene.search.highlig
*/
public class WeightedTerm
{
- float weight; // multiplier
- String term; //stemmed form
- public WeightedTerm (float weight,String term)
- {
- this.weight=weight;
- this.term=term;
- }
-
-
- /**
- * @return the term value (stemmed)
- */
- public String getTerm()
- {
- return term;
- }
-
- /**
- * @return the weight associated with this term
- */
- public float getWeight()
- {
- return weight;
- }
-
- /**
- * @param term the term value (stemmed)
- */
- public void setTerm(String term)
- {
- this.term = term;
- }
-
- /**
- * @param weight the weight associated with this term
- */
- public void setWeight(float weight)
- {
- this.weight = weight;
- }
+ float weight; // multiplier
+ String term; //stemmed form
+ public WeightedTerm (float weight,String term)
+ {
+ this.weight=weight;
+ this.term=term;
+ }
+
+
+ /**
+ * @return the term value (stemmed)
+ */
+ public String getTerm()
+ {
+ return term;
+ }
+
+ /**
+ * @return the weight associated with this term
+ */
+ public float getWeight()
+ {
+ return weight;
+ }
+
+ /**
+ * @param term the term value (stemmed)
+ */
+ public void setTerm(String term)
+ {
+ this.term = term;
+ }
+
+ /**
+ * @param weight the weight associated with this term
+ */
+ public void setWeight(float weight)
+ {
+ this.weight = weight;
+ }
}