You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/11/03 14:35:20 UTC
svn commit: r1197110 [8/10] - in /lucene/dev/branches/solrcloud: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/contrib/ dev-tools/idea/modules/benchmark/
dev-tools/idea/solr/contrib/langid/ dev-tools/...
Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java Thu Nov 3 13:35:07 2011
@@ -32,8 +32,8 @@ import org.apache.lucene.facet.search.re
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
/**
@@ -93,7 +93,7 @@ public class MultiIteratorsPerCLParamsTe
Directory taxoDir = newDirectory();
populateIndex(iParams, indexDir, taxoDir);
- TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+ TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
IndexReader reader = IndexReader.open(indexDir);
CategoryListCache clCache = null;
@@ -168,7 +168,7 @@ public class MultiIteratorsPerCLParamsTe
Directory taxoDir) throws Exception {
RandomIndexWriter writer = new RandomIndexWriter(random, indexDir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
- TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);
+ TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
for (CategoryPath[] categories : perDocCategories) {
writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)
Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Thu Nov 3 13:35:07 2011
@@ -14,8 +14,8 @@ import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.util.SlowRAMDirectory;
/**
@@ -159,7 +159,7 @@ public class TestTaxonomyCombined extend
@Test
public void testWriter() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
// is what we expect it to be.
@@ -175,7 +175,7 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterTwice() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// run fillTaxonomy again - this will try to add the same categories
// again, and check that we see the same ordinal paths again, not
@@ -197,10 +197,10 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterTwice2() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- tw = new LuceneTaxonomyWriter(indexDir);
+ tw = new DirectoryTaxonomyWriter(indexDir);
// run fillTaxonomy again - this will try to add the same categories
// again, and check that we see the same ordinals again, not different
// ones, and that the number of categories hasn't grown by the new
@@ -222,7 +222,7 @@ public class TestTaxonomyCombined extend
public void testWriterTwice3() throws Exception {
Directory indexDir = newDirectory();
// First, create and fill the taxonomy
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
// Now, open the same taxonomy and add the same categories again.
@@ -231,7 +231,7 @@ public class TestTaxonomyCombined extend
// all into memory and close it's reader. The bug was that it closed
// the reader, but forgot that it did (because it didn't set the reader
// reference to null).
- tw = new LuceneTaxonomyWriter(indexDir);
+ tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// Add one new category, just to make commit() do something:
tw.addCategory(new CategoryPath("hi"));
@@ -253,7 +253,7 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterSimpler() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
assertEquals(1, tw.getSize()); // the root only
// Test that adding a new top-level category works
assertEquals(1, tw.addCategory(new CategoryPath("a")));
@@ -297,12 +297,12 @@ public class TestTaxonomyCombined extend
@Test
public void testRootOnly() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
// right after opening the index, it should already contain the
// root, so have size 1:
assertEquals(1, tw.getSize());
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length());
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -319,9 +319,9 @@ public class TestTaxonomyCombined extend
@Test
public void testRootOnly2() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length());
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -339,10 +339,10 @@ public class TestTaxonomyCombined extend
@Test
public void testReaderBasic() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// test TaxonomyReader.getSize():
assertEquals(expectedCategories.length, tr.getSize());
@@ -398,10 +398,10 @@ public class TestTaxonomyCombined extend
@Test
public void testReaderParent() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// check that the parent of the root ordinal is the invalid ordinal:
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -463,11 +463,11 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterParent1() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- tw = new LuceneTaxonomyWriter(indexDir);
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ tw = new DirectoryTaxonomyWriter(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
checkWriterParent(tr, tw);
@@ -479,10 +479,10 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterParent2() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
checkWriterParent(tr, tw);
@@ -542,10 +542,10 @@ public class TestTaxonomyCombined extend
@Test
public void testReaderParentArray() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
int[] parents = tr.getParentArray();
assertEquals(tr.getSize(), parents.length);
for (int i=0; i<tr.getSize(); i++) {
@@ -563,10 +563,10 @@ public class TestTaxonomyCombined extend
@Test
public void testChildrenArrays() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ChildrenArrays ca = tr.getChildrenArrays();
int[] youngestChildArray = ca.getYoungestChildArray();
assertEquals(tr.getSize(), youngestChildArray.length);
@@ -627,10 +627,10 @@ public class TestTaxonomyCombined extend
@Test
public void testChildrenArraysInvariants() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
tw.close();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ChildrenArrays ca = tr.getChildrenArrays();
int[] youngestChildArray = ca.getYoungestChildArray();
assertEquals(tr.getSize(), youngestChildArray.length);
@@ -707,10 +707,10 @@ public class TestTaxonomyCombined extend
@Test
public void testChildrenArraysGrowth() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.addCategory(new CategoryPath("hi", "there"));
tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ChildrenArrays ca = tr.getChildrenArrays();
assertEquals(3, tr.getSize());
assertEquals(3, ca.getOlderSiblingArray().length);
@@ -747,12 +747,12 @@ public class TestTaxonomyCombined extend
public void testTaxonomyReaderRefreshRaces() throws Exception {
// compute base child arrays - after first chunk, and after the other
Directory indexDirBase = newDirectory();
- TaxonomyWriter twBase = new LuceneTaxonomyWriter(indexDirBase);
+ TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
twBase.addCategory(new CategoryPath("a", "0"));
final CategoryPath abPath = new CategoryPath("a", "b");
twBase.addCategory(abPath);
twBase.commit();
- TaxonomyReader trBase = new LuceneTaxonomyReader(indexDirBase);
+ TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
final ChildrenArrays ca1 = trBase.getChildrenArrays();
@@ -779,12 +779,12 @@ public class TestTaxonomyCombined extend
final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
throws Exception {
SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.addCategory(new CategoryPath("a", "0"));
tw.addCategory(abPath);
tw.commit();
- final TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
for (int i=0; i < 1<<10; i++) { //1024 facets
final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
tw.addCategory(cp);
@@ -865,9 +865,9 @@ public class TestTaxonomyCombined extend
@Test
public void testSeparateReaderAndWriter() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
int author = 1;
@@ -932,9 +932,9 @@ public class TestTaxonomyCombined extend
@Test
public void testSeparateReaderAndWriter2() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.commit();
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// Test getOrdinal():
CategoryPath author = new CategoryPath("Author");
@@ -968,26 +968,26 @@ public class TestTaxonomyCombined extend
public void testWriterLock() throws Exception {
// native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
Directory indexDir = new RAMDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
tw.addCategory(new CategoryPath("hi", "there"));
tw.commit();
// we deliberately not close the write now, and keep it open and
// locked.
// Verify that the writer worked:
- TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+ TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
// Try to open a second writer, with the first one locking the directory.
// We expect to get a LockObtainFailedException.
try {
- new LuceneTaxonomyWriter(indexDir);
+ new DirectoryTaxonomyWriter(indexDir);
fail("should have failed to write in locked directory");
} catch (LockObtainFailedException e) {
// this is what we expect to happen.
}
// Remove the lock, and now the open should succeed, and we can
// write to the new writer.
- LuceneTaxonomyWriter.unlock(indexDir);
- TaxonomyWriter tw2 = new LuceneTaxonomyWriter(indexDir);
+ DirectoryTaxonomyWriter.unlock(indexDir);
+ TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
tw2.addCategory(new CategoryPath("hey"));
tw2.close();
// See that the writer indeed wrote:
@@ -1054,7 +1054,7 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterCheckPaths() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomyCheckPaths(tw);
// Also check TaxonomyWriter.getSize() - see that the taxonomy's size
// is what we expect it to be.
@@ -1073,14 +1073,14 @@ public class TestTaxonomyCombined extend
@Test
public void testWriterCheckPaths2() throws Exception {
Directory indexDir = newDirectory();
- TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+ TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
checkPaths(tw);
fillTaxonomy(tw);
checkPaths(tw);
tw.close();
- tw = new LuceneTaxonomyWriter(indexDir);
+ tw = new DirectoryTaxonomyWriter(indexDir);
checkPaths(tw);
fillTaxonomy(tw);
checkPaths(tw);
Modified: lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (original)
+++ lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java Thu Nov 3 13:35:07 2011
@@ -505,7 +505,7 @@ public class BlockGroupingCollector exte
subDocUpto = 0;
docBase = readerContext.docBase;
//System.out.println("setNextReader base=" + docBase + " r=" + readerContext.reader);
- lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext).iterator();
+ lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext, readerContext.reader.getLiveDocs()).iterator();
groupEndDocID = -1;
currentReaderContext = readerContext;
Modified: lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Thu Nov 3 13:35:07 2011
@@ -1221,11 +1221,11 @@ public class TestGrouping extends Lucene
}
public void search(Weight weight, Collector collector) throws IOException {
- search(ctx, weight, null, collector);
+ search(ctx, weight, collector);
}
public TopDocs search(Weight weight, int topN) throws IOException {
- return search(ctx, weight, null, null, topN);
+ return search(ctx, weight, null, topN);
}
@Override
Modified: lucene/dev/branches/solrcloud/modules/join/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/build.xml (original)
+++ lucene/dev/branches/solrcloud/modules/join/build.xml Thu Nov 3 13:35:07 2011
@@ -5,10 +5,10 @@
</description>
<property name="build.dir" location="build/" />
- <import file="../../lucene/contrib/contrib-build.xml"/>
-
- <property name="build.dir" location="build/" />
<property name="dist.dir" location="dist/" />
+ <property name="maven.dist.dir" location="../dist/maven" />
+
+ <import file="../../lucene/contrib/contrib-build.xml"/>
<path id="classpath">
<pathelement path="${grouping.jar}"/>
@@ -20,10 +20,6 @@
<pathelement location="${build.dir}/classes/java"/>
</path>
- <property name="build.dir" location="build/" />
- <property name="dist.dir" location="dist/" />
- <property name="maven.dist.dir" location="../dist/maven" />
-
<target name="init" depends="contrib-build.init,jar-grouping"/>
<target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven" />
Modified: lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java Thu Nov 3 13:35:07 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.search.join;
*/
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
@@ -387,15 +386,17 @@ public class BlockJoinCollector extends
// unbox once
final int slot = _slot;
- if (offset >= queue.size()) {
- return null;
- }
- int totalGroupedHitCount = 0;
-
if (sortedGroups == null) {
+ if (offset >= queue.size()) {
+ return null;
+ }
sortQueue();
+ } else if (offset > sortedGroups.length) {
+ return null;
}
+ int totalGroupedHitCount = 0;
+
final FakeScorer fakeScorer = new FakeScorer();
final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];
Modified: lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java Thu Nov 3 13:35:07 2011
@@ -163,7 +163,7 @@ public class BlockJoinQuery extends Quer
return null;
}
- final DocIdSet parents = parentsFilter.getDocIdSet(readerContext);
+ final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader.getLiveDocs());
// TODO: once we do random-access filters we can
// generalize this:
if (parents == null) {
Modified: lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java Thu Nov 3 13:35:07 2011
@@ -57,6 +57,14 @@ public class TestBlockJoin extends Lucen
return job;
}
+ // ... has multiple qualifications
+ private Document makeQualification(String qualification, int year) {
+ Document job = new Document();
+ job.add(newField("qualification", qualification, StringField.TYPE_STORED));
+ job.add(new NumericField("year").setIntValue(year));
+ return job;
+ }
+
public void testSimple() throws Exception {
final Directory dir = newDirectory();
@@ -492,4 +500,94 @@ public class TestBlockJoin extends Lucen
}
}
}
+
+ public void testMultiChildTypes() throws Exception {
+
+ final Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+ final List<Document> docs = new ArrayList<Document>();
+
+ docs.add(makeJob("java", 2007));
+ docs.add(makeJob("python", 2010));
+ docs.add(makeQualification("maths", 1999));
+ docs.add(makeResume("Lisa", "United Kingdom"));
+ w.addDocuments(docs);
+
+ IndexReader r = w.getReader();
+ w.close();
+ IndexSearcher s = new IndexSearcher(r);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+ // Define child document criteria (finds an example of relevant work experience)
+ BooleanQuery childJobQuery = new BooleanQuery();
+ childJobQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
+ childJobQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
+
+ BooleanQuery childQualificationQuery = new BooleanQuery();
+ childQualificationQuery.add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), Occur.MUST));
+ childQualificationQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 1980, 2000, true, true), Occur.MUST));
+
+
+ // Define parent document criteria (find a resident in the UK)
+ Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+ // Wrap the child document query to 'join' any matches
+ // up to corresponding parent:
+ BlockJoinQuery childJobJoinQuery = new BlockJoinQuery(childJobQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
+ BlockJoinQuery childQualificationJoinQuery = new BlockJoinQuery(childQualificationQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
+
+ // Combine the parent and nested child queries into a single query for a candidate
+ BooleanQuery fullQuery = new BooleanQuery();
+ fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
+ fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
+ fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
+
+ //????? How do I control volume of jobs vs qualifications per parent?
+ BlockJoinCollector c = new BlockJoinCollector(Sort.RELEVANCE, 10, true, false);
+
+ s.search(fullQuery, c);
+
+ //Examine "Job" children
+ boolean showNullPointerIssue=true;
+ if (showNullPointerIssue) {
+ TopGroups<Integer> jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
+
+ //assertEquals(1, results.totalHitCount);
+ assertEquals(1, jobResults.totalGroupedHitCount);
+ assertEquals(1, jobResults.groups.length);
+
+ final GroupDocs<Integer> group = jobResults.groups[0];
+ assertEquals(1, group.totalHits);
+
+ Document childJobDoc = s.doc(group.scoreDocs[0].doc);
+ //System.out.println(" doc=" + group.scoreDocs[0].doc);
+ assertEquals("java", childJobDoc.get("skill"));
+ assertNotNull(group.groupValue);
+ Document parentDoc = s.doc(group.groupValue);
+ assertEquals("Lisa", parentDoc.get("name"));
+ }
+
+ //Now Examine qualification children
+ TopGroups<Integer> qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
+
+ //!!!!! This next line can null pointer - but only if prior "jobs" section called first
+ assertEquals(1, qualificationResults.totalGroupedHitCount);
+ assertEquals(1, qualificationResults.groups.length);
+
+ final GroupDocs<Integer> qGroup = qualificationResults.groups[0];
+ assertEquals(1, qGroup.totalHits);
+
+ Document childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc);
+ assertEquals("maths", childQualificationDoc.get("qualification"));
+ assertNotNull(qGroup.groupValue);
+ Document parentDoc = s.doc(qGroup.groupValue);
+ assertEquals("Lisa", parentDoc.get("name"));
+
+
+ r.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java Thu Nov 3 13:35:07 2011
@@ -24,10 +24,12 @@ import java.util.Iterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
/**
@@ -48,7 +50,7 @@ public class BooleanFilter extends Filte
* of the filters that have been added.
*/
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
FixedBitSet res = null;
final IndexReader reader = context.reader;
@@ -91,12 +93,13 @@ public class BooleanFilter extends Filte
}
}
- return res != null ? res : DocIdSet.EMPTY_DOCIDSET;
+ return res != null ? BitsFilteredDocIdSet.wrap(res, acceptDocs) : DocIdSet.EMPTY_DOCIDSET;
}
private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
throws IOException {
- final DocIdSet set = filter.getDocIdSet(context);
+ // we dont pass acceptDocs, we will filter at the end using an additional filter
+ final DocIdSet set = filter.getDocIdSet(context, null);
return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator();
}
Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java Thu Nov 3 13:35:07 2011
@@ -19,9 +19,11 @@ package org.apache.lucene.queries;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
@@ -97,21 +99,22 @@ public class ChainedFilter extends Filte
* {@link Filter#getDocIdSet}.
*/
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
int[] index = new int[1]; // use array as reference to modifiable int;
index[0] = 0; // an object attribute would not be thread safe.
if (logic != -1) {
- return getDocIdSet(context, logic, index);
+ return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logic, index), acceptDocs);
} else if (logicArray != null) {
- return getDocIdSet(context, logicArray, index);
+ return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logicArray, index), acceptDocs);
}
-
- return getDocIdSet(context, DEFAULT, index);
+
+ return BitsFilteredDocIdSet.wrap(getDocIdSet(context, DEFAULT, index), acceptDocs);
}
private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
throws IOException {
- DocIdSet docIdSet = filter.getDocIdSet(context);
+ // we dont pass acceptDocs, we will filter at the end using an additional filter
+ DocIdSet docIdSet = filter.getDocIdSet(context, null);
if (docIdSet == null) {
return DocIdSet.EMPTY_DOCIDSET.iterator();
} else {
@@ -156,7 +159,8 @@ public class ChainedFilter extends Filte
throws IOException {
OpenBitSetDISI result = initialResult(context, logic, index);
for (; index[0] < chain.length; index[0]++) {
- doChain(result, logic, chain[index[0]].getDocIdSet(context));
+ // we dont pass acceptDocs, we will filter at the end using an additional filter
+ doChain(result, logic, chain[index[0]].getDocIdSet(context, null));
}
return result;
}
@@ -176,7 +180,8 @@ public class ChainedFilter extends Filte
OpenBitSetDISI result = initialResult(context, logic[0], index);
for (; index[0] < chain.length; index[0]++) {
- doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context));
+ // we dont pass acceptDocs, we will filter at the end using an additional filter
+ doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null));
}
return result;
}
Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java Thu Nov 3 13:35:07 2011
@@ -54,7 +54,7 @@ public class TermsFilter extends Filter
*/
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
IndexReader reader = context.reader;
FixedBitSet result = new FixedBitSet(reader.maxDoc());
Fields fields = reader.fields();
@@ -64,7 +64,6 @@ public class TermsFilter extends Filter
}
BytesRef br = new BytesRef();
- Bits liveDocs = reader.getLiveDocs();
String lastField = null;
Terms termsC = null;
TermsEnum termsEnum = null;
@@ -72,6 +71,9 @@ public class TermsFilter extends Filter
for (Term term : terms) {
if (!term.field().equals(lastField)) {
termsC = fields.terms(term.field());
+ if (termsC == null) {
+ return result;
+ }
termsEnum = termsC.iterator();
lastField = term.field();
}
@@ -79,7 +81,7 @@ public class TermsFilter extends Filter
if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for
br.copy(term.bytes());
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
- docs = termsEnum.docs(liveDocs, docs);
+ docs = termsEnum.docs(acceptDocs, docs);
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
result.set(docs.docID());
}
Modified: lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java Thu Nov 3 13:35:07 2011
@@ -35,6 +35,7 @@ import org.apache.lucene.search.DocIdSet
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
@@ -94,7 +95,7 @@ public class BooleanFilterTest extends L
private Filter getNullDISFilter() {
return new Filter() {
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
return null;
}
};
@@ -103,7 +104,7 @@ public class BooleanFilterTest extends L
private Filter getNullDISIFilter() {
return new Filter() {
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
@@ -122,7 +123,7 @@ public class BooleanFilterTest extends L
private void tstFilterCard(String mes, int expected, Filter filt)
throws Exception {
// BooleanFilter never returns null DIS or null DISI!
- DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader)).iterator();
+ DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader), reader.getLiveDocs()).iterator();
int actual = 0;
while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
actual++;
Modified: lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java Thu Nov 3 13:35:07 2011
@@ -23,6 +23,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowMultiReaderWrapper;
import org.apache.lucene.index.Term;
@@ -30,6 +31,7 @@ import org.apache.lucene.search.Filter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ReaderUtil;
public class TermsFilterTest extends LuceneTestCase {
@@ -68,22 +70,57 @@ public class TermsFilterTest extends Luc
TermsFilter tf = new TermsFilter();
tf.addTerm(new Term(fieldName, "19"));
- FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context);
+ FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
assertEquals("Must match nothing", 0, bits.cardinality());
tf.addTerm(new Term(fieldName, "20"));
- bits = (FixedBitSet) tf.getDocIdSet(context);
+ bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
assertEquals("Must match 1", 1, bits.cardinality());
tf.addTerm(new Term(fieldName, "10"));
- bits = (FixedBitSet) tf.getDocIdSet(context);
+ bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
assertEquals("Must match 2", 2, bits.cardinality());
tf.addTerm(new Term(fieldName, "00"));
- bits = (FixedBitSet) tf.getDocIdSet(context);
+ bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
assertEquals("Must match 2", 2, bits.cardinality());
reader.close();
rd.close();
}
+
+ public void testMissingField() throws Exception {
+ String fieldName = "field1";
+ Directory rd1 = newDirectory();
+ RandomIndexWriter w1 = new RandomIndexWriter(random, rd1);
+ Document doc = new Document();
+ doc.add(newField(fieldName, "content1", StringField.TYPE_STORED));
+ w1.addDocument(doc);
+ IndexReader reader1 = w1.getReader();
+ w1.close();
+
+ fieldName = "field2";
+ Directory rd2 = newDirectory();
+ RandomIndexWriter w2 = new RandomIndexWriter(random, rd2);
+ doc = new Document();
+ doc.add(newField(fieldName, "content2", StringField.TYPE_STORED));
+ w2.addDocument(doc);
+ IndexReader reader2 = w2.getReader();
+ w2.close();
+
+ TermsFilter tf = new TermsFilter();
+ tf.addTerm(new Term(fieldName, "content1"));
+
+ MultiReader multi = new MultiReader(reader1, reader2);
+ for (IndexReader.AtomicReaderContext context : ReaderUtil.leaves(multi.getTopReaderContext())) {
+ FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
+ assertTrue("Must be >= 0", bits.cardinality() >= 0);
+ }
+ multi.close();
+ reader1.close();
+ reader2.close();
+ rd1.close();
+ rd2.close();
+ }
+
}
Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java Thu Nov 3 13:35:07 2011
@@ -203,12 +203,8 @@ public class AnalyzerQueryNodeProcessor
children.add(new FieldQueryNode(field, term, -1, -1));
}
- if (positionCount == 1)
- return new GroupQueryNode(
- new StandardBooleanQueryNode(children, true));
- else
- return new StandardBooleanQueryNode(children, false);
-
+ return new GroupQueryNode(
+ new StandardBooleanQueryNode(children, positionCount==1));
} else {
// phrase query:
MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java Thu Nov 3 13:35:07 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.FilterBuilder;
@@ -155,7 +156,7 @@ public class NumericRangeFilterBuilder i
static class NoMatchFilter extends Filter {
@Override
- public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return null;
}
Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java Thu Nov 3 13:35:07 2011
@@ -359,8 +359,16 @@ public class TestQPHelper extends Lucene
BooleanQuery expected = new BooleanQuery();
expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
expected.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
-
assertEquals(expected, getQuery("ä¸å½", analyzer));
+
+ expected = new BooleanQuery();
+ expected.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.MUST);
+ BooleanQuery inner = new BooleanQuery();
+ inner.add(new TermQuery(new Term("field", "ä¸")), BooleanClause.Occur.SHOULD);
+ inner.add(new TermQuery(new Term("field", "å½")), BooleanClause.Occur.SHOULD);
+ expected.add(inner, BooleanClause.Occur.MUST);
+ assertEquals(expected, getQuery("ä¸ AND ä¸å½", new SimpleCJKAnalyzer()));
+
}
public void testCJKBoostedTerm() throws Exception {
@@ -609,7 +617,7 @@ public class TestQPHelper extends Lucene
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
- "term phrase1 phrase2 term");
+ "term (phrase1 phrase2) term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
"+term -(phrase1 phrase2) term");
Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java Thu Nov 3 13:35:07 2011
@@ -65,7 +65,7 @@ public class TestNumericRangeFilterBuild
try {
IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(ramDir, true));
try {
- assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext()));
+ assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext(), reader.getLiveDocs()));
}
finally {
reader.close();
Modified: lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (original)
+++ lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java Thu Nov 3 13:35:07 2011
@@ -70,10 +70,7 @@ public class DirectSpellChecker {
* shortest of the two terms instead of the longest.
* </ul>
*/
- public static final StringDistance INTERNAL_LEVENSHTEIN = new StringDistance() {
- public float getDistance(String s1, String s2) {
- throw new UnsupportedOperationException("Not for external use.");
- }};
+ public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
/** maximum edit distance for candidate terms */
private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
Modified: lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java (original)
+++ lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java Thu Nov 3 13:35:07 2011
@@ -29,7 +29,36 @@ import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
public class TestDirectSpellChecker extends LuceneTestCase {
+
+ public void testInternalLevenshteinDistance() throws Exception {
+ DirectSpellChecker spellchecker = new DirectSpellChecker();
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir,
+ new MockAnalyzer(random, MockTokenizer.KEYWORD, true));
+
+ String[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið" };
+ for (int i = 0; i < termsToAdd.length; i++) {
+ Document doc = new Document();
+ doc.add(newField("repentance", termsToAdd[i], TextField.TYPE_UNSTORED));
+ writer.addDocument(doc);
+ }
+ IndexReader ir = writer.getReader();
+ String misspelled = "metanoix";
+ SuggestWord[] similar = spellchecker.suggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
+ assertTrue(similar.length == 4);
+
+ StringDistance sd = spellchecker.getDistance();
+ assertTrue(sd instanceof LuceneLevenshteinDistance);
+ for(SuggestWord word : similar) {
+ assertTrue(word.score==sd.getDistance(word.string, misspelled));
+ assertTrue(word.score==sd.getDistance(misspelled, word.string));
+ }
+
+ ir.close();
+ writer.close();
+ dir.close();
+ }
public void testSimpleExamples() throws Exception {
DirectSpellChecker spellChecker = new DirectSpellChecker();
spellChecker.setMinQueryLength(0);
Modified: lucene/dev/branches/solrcloud/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/CHANGES.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/solr/CHANGES.txt Thu Nov 3 13:35:07 2011
@@ -56,6 +56,11 @@ Upgrading from Solr 3.5-dev
* FacetComponent no longer catches and embeds exceptions occurred during facet
processing, it throws HTTP 400 or 500 exceptions instead.
+
+* The VelocityResponseWriter is no longer built into the core. Its JAR and
+ dependencies now need to be added (via <lib> or solr/home lib inclusion),
+ and it needs to be registered in solrconfig.xml like this:
+ <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"/>
Detailed Change List
----------------------
@@ -86,7 +91,7 @@ New Features
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
levenshtein automata. (James Dyer, rmuir)
-* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
+* SOLR-1873: SolrCloud - added shared/central config and core/shard management via zookeeper,
built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller)
Additional Work:
SOLR-2324: SolrCloud solr.xml parameters are not persisted by CoreContainer.
@@ -257,6 +262,12 @@ Bug Fixes
* SOLR-2654: Directorys used by a SolrCore are now closed when they are no longer used.
(Mark Miller)
+* SOLR-2854: Now load URL content stream data (via stream.url) when called for during request handling,
+ rather than loading URL content streams automatically regardless of use.
+ (David Smiley and Ryan McKinley via ehatcher)
+
+
+
Other Changes
----------------------
@@ -334,6 +345,15 @@ Other Changes
* SOLR-2756: Maven configuration: Excluded transitive stax:stax-api dependency
from org.codehaus.woodstox:wstx-asl dependency. (David Smiley via Steve Rowe)
+* SOLR-2588: Moved VelocityResponseWriter back to contrib module in order to
+ remove it as a mandatory core dependency. (ehatcher)
+
+* SOLR-2718: Add ability to lazy load response writers, defined with startup="lazy".
+ (ehatcher)
+
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering
+ extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
Documentation
----------------------
@@ -355,10 +375,14 @@ New Features
for 99 languages (janhoy, cmale)
* SOLR-1979: New contrib "langid". Adds language identification capabilities as an
- Update Processor, using Tika's LanguageIdentifier (janhoy, Tommaso Teofili, gsingers)
+ Update Processor, using Tika's LanguageIdentifier or Cybozu language-detection
+ library (janhoy, Tommaso Teofili, gsingers)
* SOLR-2818: Added before/after count response parsing support for range facets in
SolrJ. (Bernhard Frauendienst via Martijn van Groningen)
+
+* SOLR-2276: Add support for cologne phonetic to PhoneticFilterFactory.
+ (Marc Pompl via rmuir)
Bug Fixes
----------------------
@@ -383,6 +407,10 @@ Bug Fixes
* SOLR-2791: Replication: abortfetch command is broken if replication was started
by fetchindex command instead of a regular poll (Yury Kats via shalin)
+* SOLR-2861: Fix extremely rare race condition on commit that can result
+ in a NPE (yonik)
+
+
Other Changes
----------------------
@@ -413,6 +441,11 @@ Bug Fixes
* SOLR-2792: Allow case insensitive Hunspell stemming (janhoy, rmuir)
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering
+ extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
+* SOLR-2849: Fix dependencies in Maven POMs. (David Smiley via Steve Rowe)
+
================== 3.4.0 ==================
Upgrading from Solr 3.3
Modified: lucene/dev/branches/solrcloud/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/NOTICE.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/NOTICE.txt (original)
+++ lucene/dev/branches/solrcloud/solr/NOTICE.txt Thu Nov 3 13:35:07 2011
@@ -253,6 +253,18 @@ Copyright 2004 Sun Microsystems, Inc. (R
Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
+=========================================================================
+== Language Detection Notices ==
+=========================================================================
+
+The following notices apply to the libraries in contrib/langid/lib:
+
+This product includes software developed by Cybozu Labs, Inc.
+(c)2010 All rights reserved by Cybozu Labs, Inc.
+http://code.google.com/p/language-detection/
+
+This product includes software developed by the Jsonic project:
+http://sourceforge.jp/projects/jsonic/
=========================================================================
== Carrot2 Notice ==
Modified: lucene/dev/branches/solrcloud/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/common-build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/common-build.xml (original)
+++ lucene/dev/branches/solrcloud/solr/common-build.xml Thu Nov 3 13:35:07 2011
@@ -145,6 +145,8 @@
<arg value="-c" />
<arg value="${common-solr.dir}/contrib/uima/lib" />
<arg value="-c" />
+ <arg value="${common-solr.dir}/contrib/velocity/lib" />
+ <arg value="-c" />
<arg value="${common-solr.dir}/example/example-DIH/solr/db/lib" />
<arg value="-c" />
<arg value="${common-solr.dir}/example/example-DIH/solr/mail/lib" />
Modified: lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Thu Nov 3 13:35:07 2011
@@ -17,8 +17,7 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@@ -28,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrDocument;
@@ -67,13 +67,14 @@ import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
+import com.google.common.io.Closeables;
/**
* Search results clustering engine based on Carrot2 clustering algorithms.
* <p/>
* Output from this class is subject to change.
*
- * @link http://project.carrot2.org
+ * @see "http://project.carrot2.org"
*/
public class CarrotClusteringEngine extends SearchClusteringEngine {
private transient static Logger log = LoggerFactory
@@ -101,6 +102,90 @@ public class CarrotClusteringEngine exte
*/
private Controller controller = ControllerFactory.createPooling();
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
+
+ private static class SolrResourceLocator implements IResourceLocator {
+ private final SolrResourceLoader resourceLoader;
+ private final String carrot2ResourcesDir;
+
+ public SolrResourceLocator(SolrCore core, SolrParams initParams) {
+ resourceLoader = core.getResourceLoader();
+ carrot2ResourcesDir = initParams.get(
+ CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
+ }
+
+ @Override
+ public IResource[] getAll(final String resource) {
+ final String resourceName = carrot2ResourcesDir + "/" + resource;
+ log.debug("Looking for Solr resource: " + resourceName);
+
+ InputStream resourceStream = null;
+ final byte [] asBytes;
+ try {
+ resourceStream = resourceLoader.openResource(resourceName);
+ asBytes = IOUtils.toByteArray(resourceStream);
+ } catch (RuntimeException e) {
+ log.debug("Resource not found in Solr's config: " + resourceName
+ + ". Using the default " + resource + " from Carrot JAR.");
+ return new IResource[] {};
+ } catch (IOException e) {
+ log.warn("Could not read Solr resource " + resourceName);
+ return new IResource[] {};
+ } finally {
+ if (resourceStream != null) Closeables.closeQuietly(resourceStream);
+ }
+
+ log.info("Loaded Solr resource: " + resourceName);
+
+ final IResource foundResource = new IResource() {
+ @Override
+ public InputStream open() throws IOException {
+ return new ByteArrayInputStream(asBytes);
+ }
+
+ @Override
+ public int hashCode() {
+ // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.equals(obj);
+ }
+
+ @Override
+ public String toString() {
+ return "Solr config resource: " + resourceName;
+ }
+ };
+
+ return new IResource[] { foundResource };
+ }
+
+ @Override
+ public int hashCode() {
+ // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+ // so simply rely on instance equivalence.
+ return super.equals(obj);
+ }
+
+ @Override
+ public String toString() {
+ return "SolrResourceLocator, "
+ + "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath()
+ + ", Carrot2 relative lexicalResourcesDir=";
+ }
+ }
@Override
@Deprecated
@@ -168,38 +253,10 @@ public class CarrotClusteringEngine exte
// Customize Carrot2's resource lookup to first look for resources
// using Solr's resource loader. If that fails, try loading from the classpath.
- DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
- .resourceLookup(new ResourceLookup(new IResourceLocator() {
- @Override
- public IResource[] getAll(final String resource) {
- final SolrResourceLoader resourceLoader = core.getResourceLoader();
- final String carrot2ResourcesDir = initParams.get(
- CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
- try {
- log.debug("Looking for " + resource + " in "
- + carrot2ResourcesDir);
- final InputStream resourceStream = resourceLoader
- .openResource(carrot2ResourcesDir + "/" + resource);
-
- log.info(resource + " loaded from " + carrot2ResourcesDir);
- final IResource foundResource = new IResource() {
- @Override
- public InputStream open() throws IOException {
- return resourceStream;
- }
- };
- return new IResource[] { foundResource };
- } catch (RuntimeException e) {
- // No way to distinguish if the resource was found but failed
- // to load or wasn't found at all, so we simply fall back
- // to Carrot2 defaults here by returning an empty locations array.
- log.debug(resource + " not found in " + carrot2ResourcesDir
- + ". Using the default " + resource + " from Carrot JAR.");
- return new IResource[] {};
- }
- }
- },
-
+ DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
+ new ResourceLookup(
+ // Solr-specific resource loading.
+ new SolrResourceLocator(core, initParams),
// Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
Modified: lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Thu Nov 3 13:35:07 2011
@@ -143,10 +143,6 @@ public class ExtractingDocumentLoader ex
}
if (parser != null) {
Metadata metadata = new Metadata();
- metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
- metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
- metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
- metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
// If you specify the resource name (the filename, roughly) with this parameter,
// then Tika can make use of it in guessing the appropriate MIME type:
@@ -155,12 +151,16 @@ public class ExtractingDocumentLoader ex
metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
}
- SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
InputStream inputStream = null;
try {
inputStream = stream.getStream();
+ metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
+ metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
+ metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
+ metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+ SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
ContentHandler parsingHandler = handler;
StringWriter writer = null;
Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt Thu Nov 3 13:35:07 2011
@@ -13,3 +13,6 @@ Initial release. See README.txt.
* SOLR-1979: New contrib "langid". Adds language identification capabilities as an
Update Processor, using Tika's LanguageIdentifier (janhoy, Tommaso Teofili, gsingers)
+
+* SOLR-2839: Add alternative implementation supporting 53 languages,
+ based on http://code.google.com/p/language-detection/ (rmuir)
Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml Thu Nov 3 13:35:07 2011
@@ -27,7 +27,16 @@
<path id="classpath">
<fileset dir="../extraction/lib" includes="*.jar"/>
+ <fileset dir="lib" includes="*.jar"/>
<path refid="solr.base.classpath"/>
</path>
+ <dirname file="${ant.file.solr-langid}" property="solr-langid.dir"/>
+ <target name="dist-maven" depends="jar-core,javadocs,jar-src,contrib-build.dist-maven">
+ <m2-deploy-with-pom-template pom.xml="${solr-langid.dir}/lib/jsonic-pom.xml.template"
+ jar.file="${solr-langid.dir}/lib/jsonic-1.2.0.jar" />
+
+ <m2-deploy-with-pom-template pom.xml="${solr-langid.dir}/lib/langdetect-pom.xml.template"
+ jar.file="${solr-langid.dir}/lib/langdetect-r111.jar" />
+ </target>
</project>
Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Thu Nov 3 13:35:07 2011
@@ -26,7 +26,6 @@ import org.apache.solr.request.SolrQuery
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.AddUpdateCommand;
-import org.apache.tika.language.LanguageIdentifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -40,15 +39,15 @@ import java.util.regex.Pattern;
/**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier. Also supports mapping of field names based
+ * Identifies the language of a set of input fields.
+ * Also supports mapping of field names based
* on detected language.
- * The tika-core-x.y.jar must be on the classpath
* <p>
* See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
* @since 3.5
+ * @lucene.experimental
*/
-public class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
+public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
protected final static Logger log = LoggerFactory
.getLogger(LanguageIdentifierUpdateProcessor.class);
@@ -300,23 +299,7 @@ public class LanguageIdentifierUpdatePro
* @param content The content to identify
* @return List of detected language(s) according to RFC-3066
*/
- protected List<DetectedLanguage> detectLanguage(String content) {
- List<DetectedLanguage> languages = new ArrayList<DetectedLanguage>();
- if(content.trim().length() != 0) {
- LanguageIdentifier identifier = new LanguageIdentifier(content.toString());
- // FIXME: Hack - we get the distance from toString and calculate our own certainty score
- Double distance = Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
- // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better sweetspot than isReasonablyCertain()
- Double certainty = 1 - (5 * distance);
- certainty = (certainty < 0) ? 0 : certainty;
- DetectedLanguage language = new DetectedLanguage(identifier.getLanguage(), certainty);
- languages.add(language);
- log.debug("Language detected as "+language+" with a certainty of "+language.getCertainty()+" (Tika distance="+identifier.toString()+")");
- } else {
- log.debug("No input text to detect language from, returning empty list");
- }
- return languages;
- }
+ protected abstract List<DetectedLanguage> detectLanguage(String content);
/**
* Chooses a language based on the list of candidates detected
Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml Thu Nov 3 13:35:07 2011
@@ -62,7 +62,24 @@
</requestHandler>
<updateRequestProcessorChain name="lang_id">
- <processor class="org.apache.solr.update.processor.LanguageIdentifierUpdateProcessorFactory">
+ <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+ <!-- Can take defaults, invariants and appends just like req handlers-->
+ <lst name="defaults">
+ <bool name="langid">true</bool>
+ <str name="langid.fl">name,subject</str>
+ <bool name="langid.map">true</bool>
+ <str name="langid.langField">language_s</str>
+ <str name="langid.langsField">language_sm</str>
+ <str name="langid.map.lcmap">th:thai</str>
+ <float name="threshold">0.5</float>
+ <str name="langid.fallback">fallback</str>
+ </lst>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="lang_id_alt">
+ <processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
<!-- Can take defaults, invariants and appends just like req handlers-->
<lst name="defaults">
<bool name="langid">true</bool>
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java Thu Nov 3 13:35:07 2011
@@ -21,9 +21,12 @@ import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.Caverphone;
+import org.apache.commons.codec.language.ColognePhonetic;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.RefinedSoundex;
@@ -59,16 +62,18 @@ public class PhoneticFilterFactory exten
{
public static final String ENCODER = "encoder";
public static final String INJECT = "inject"; // boolean
+ private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language.";
- private static final Map<String, Class<? extends Encoder>> registry;
- static {
- registry = new HashMap<String, Class<? extends Encoder>>();
- registry.put( "DoubleMetaphone".toUpperCase(Locale.ENGLISH), DoubleMetaphone.class );
- registry.put( "Metaphone".toUpperCase(Locale.ENGLISH), Metaphone.class );
- registry.put( "Soundex".toUpperCase(Locale.ENGLISH), Soundex.class );
- registry.put( "RefinedSoundex".toUpperCase(Locale.ENGLISH), RefinedSoundex.class );
- registry.put( "Caverphone".toUpperCase(Locale.ENGLISH), Caverphone.class );
- }
+ private static final Map<String, Class<? extends Encoder>> registry = new HashMap<String, Class<? extends Encoder>>()
+ {{
+ put( "DoubleMetaphone".toUpperCase(Locale.ENGLISH), DoubleMetaphone.class );
+ put( "Metaphone".toUpperCase(Locale.ENGLISH), Metaphone.class );
+ put( "Soundex".toUpperCase(Locale.ENGLISH), Soundex.class );
+ put( "RefinedSoundex".toUpperCase(Locale.ENGLISH), RefinedSoundex.class );
+ put( "Caverphone".toUpperCase(Locale.ENGLISH), Caverphone.class );
+ put( "ColognePhonetic".toUpperCase(Locale.ENGLISH), ColognePhonetic.class );
+ }};
+ private static final Lock lock = new ReentrantLock();
protected boolean inject = true;
protected String name = null;
@@ -87,7 +92,12 @@ public class PhoneticFilterFactory exten
}
Class<? extends Encoder> clazz = registry.get(name.toUpperCase(Locale.ENGLISH));
if( clazz == null ) {
- throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown encoder: "+name +" ["+registry.keySet()+"]" );
+ lock.lock();
+ try {
+ clazz = resolveEncoder(name);
+ } finally {
+ lock.unlock();
+ }
}
try {
@@ -105,6 +115,30 @@ public class PhoneticFilterFactory exten
}
}
+ private Class<? extends Encoder> resolveEncoder(String name) {
+ Class<? extends Encoder> clazz = null;
+ try {
+ clazz = lookupEncoder(PACKAGE_CONTAINING_ENCODERS+name);
+ } catch (ClassNotFoundException e) {
+ try {
+ clazz = lookupEncoder(name);
+ } catch (ClassNotFoundException cnfe) {
+ throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown encoder: "+name +" ["+registry.keySet()+"]" );
+ }
+ }
+ catch (ClassCastException e) {
+ throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Not an encoder: "+name +" ["+registry.keySet()+"]" );
+ }
+ return clazz;
+ }
+
+ private Class<? extends Encoder> lookupEncoder(String name)
+ throws ClassNotFoundException {
+ Class<? extends Encoder> clazz = Class.forName(name).asSubclass(Encoder.class);
+ registry.put( name.toUpperCase(Locale.ENGLISH), clazz );
+ return clazz;
+ }
+
public PhoneticFilter create(TokenStream input) {
return new PhoneticFilter(input,encoder,inject);
}
Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java Thu Nov 3 13:35:07 2011
@@ -177,7 +177,7 @@ final class RequestHandlers {
}
}
- // we've now registered all handlers, time ot init them in the same order
+ // we've now registered all handlers, time to init them in the same order
for (Map.Entry<PluginInfo,SolrRequestHandler> entry : handlers.entrySet()) {
PluginInfo info = entry.getKey();
SolrRequestHandler requestHandler = entry.getValue();