You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2011/11/03 14:35:20 UTC

svn commit: r1197110 [8/10] - in /lucene/dev/branches/solrcloud: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/contrib/ dev-tools/idea/modules/benchmark/ dev-tools/idea/solr/contrib/langid/ dev-tools/...

Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java Thu Nov  3 13:35:07 2011
@@ -32,8 +32,8 @@ import org.apache.lucene.facet.search.re
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.facet.util.ScoredDocIdsUtils;
 
 /**
@@ -93,7 +93,7 @@ public class MultiIteratorsPerCLParamsTe
     Directory taxoDir = newDirectory();
     populateIndex(iParams, indexDir, taxoDir);
 
-    TaxonomyReader taxo = new LuceneTaxonomyReader(taxoDir);
+    TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
     IndexReader reader = IndexReader.open(indexDir);
 
     CategoryListCache clCache = null;
@@ -168,7 +168,7 @@ public class MultiIteratorsPerCLParamsTe
       Directory taxoDir) throws Exception {
     RandomIndexWriter writer = new RandomIndexWriter(random, indexDir, 
         newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)));
-    TaxonomyWriter taxoWriter = new LuceneTaxonomyWriter(taxoDir);
+    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
 
     for (CategoryPath[] categories : perDocCategories) {
       writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)

Modified: lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/branches/solrcloud/modules/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Thu Nov  3 13:35:07 2011
@@ -14,8 +14,8 @@ import org.junit.Test;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.util.SlowRAMDirectory;
 
 /**
@@ -159,7 +159,7 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriter() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
     // is what we expect it to be.
@@ -175,7 +175,7 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterTwice() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     // run fillTaxonomy again - this will try to add the same categories
     // again, and check that we see the same ordinal paths again, not
@@ -197,10 +197,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterTwice2() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    tw = new LuceneTaxonomyWriter(indexDir);
+    tw = new DirectoryTaxonomyWriter(indexDir);
     // run fillTaxonomy again - this will try to add the same categories
     // again, and check that we see the same ordinals again, not different
     // ones, and that the number of categories hasn't grown by the new
@@ -222,7 +222,7 @@ public class TestTaxonomyCombined extend
   public void testWriterTwice3() throws Exception {
     Directory indexDir = newDirectory();
     // First, create and fill the taxonomy
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
     // Now, open the same taxonomy and add the same categories again.
@@ -231,7 +231,7 @@ public class TestTaxonomyCombined extend
     // all into memory and close it's reader. The bug was that it closed
     // the reader, but forgot that it did (because it didn't set the reader
     // reference to null).
-    tw = new LuceneTaxonomyWriter(indexDir);
+    tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     // Add one new category, just to make commit() do something:
     tw.addCategory(new CategoryPath("hi"));
@@ -253,7 +253,7 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterSimpler() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     assertEquals(1, tw.getSize()); // the root only
     // Test that adding a new top-level category works
     assertEquals(1, tw.addCategory(new CategoryPath("a")));
@@ -297,12 +297,12 @@ public class TestTaxonomyCombined extend
   @Test
   public void testRootOnly() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     // right after opening the index, it should already contain the
     // root, so have size 1:
     assertEquals(1, tw.getSize());
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     assertEquals(1, tr.getSize());
     assertEquals(0, tr.getPath(0).length());
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -319,9 +319,9 @@ public class TestTaxonomyCombined extend
   @Test
   public void testRootOnly2() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.commit();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     assertEquals(1, tr.getSize());
     assertEquals(0, tr.getPath(0).length());
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -339,10 +339,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testReaderBasic() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
 
     // test TaxonomyReader.getSize():
     assertEquals(expectedCategories.length, tr.getSize());
@@ -398,10 +398,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testReaderParent() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
 
     // check that the parent of the root ordinal is the invalid ordinal:
     assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
@@ -463,11 +463,11 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterParent1() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    tw = new LuceneTaxonomyWriter(indexDir);
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    tw = new DirectoryTaxonomyWriter(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     
     checkWriterParent(tr, tw);
     
@@ -479,10 +479,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterParent2() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.commit();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     
     checkWriterParent(tr, tw);
     
@@ -542,10 +542,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testReaderParentArray() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     int[] parents = tr.getParentArray();
     assertEquals(tr.getSize(), parents.length);
     for (int i=0; i<tr.getSize(); i++) {
@@ -563,10 +563,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testChildrenArrays() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     ChildrenArrays ca = tr.getChildrenArrays();
     int[] youngestChildArray = ca.getYoungestChildArray();
     assertEquals(tr.getSize(), youngestChildArray.length);
@@ -627,10 +627,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testChildrenArraysInvariants() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     tw.close();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     ChildrenArrays ca = tr.getChildrenArrays();
     int[] youngestChildArray = ca.getYoungestChildArray();
     assertEquals(tr.getSize(), youngestChildArray.length);
@@ -707,10 +707,10 @@ public class TestTaxonomyCombined extend
   @Test
   public void testChildrenArraysGrowth() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.addCategory(new CategoryPath("hi", "there"));
     tw.commit();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     ChildrenArrays ca = tr.getChildrenArrays();
     assertEquals(3, tr.getSize());
     assertEquals(3, ca.getOlderSiblingArray().length);
@@ -747,12 +747,12 @@ public class TestTaxonomyCombined extend
   public void testTaxonomyReaderRefreshRaces() throws Exception {
     // compute base child arrays - after first chunk, and after the other
     Directory indexDirBase =  newDirectory();
-    TaxonomyWriter twBase = new LuceneTaxonomyWriter(indexDirBase);
+    TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
     twBase.addCategory(new CategoryPath("a", "0"));
     final CategoryPath abPath = new CategoryPath("a", "b");
     twBase.addCategory(abPath);
     twBase.commit();
-    TaxonomyReader trBase = new LuceneTaxonomyReader(indexDirBase);
+    TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
 
     final ChildrenArrays ca1 = trBase.getChildrenArrays();
     
@@ -779,12 +779,12 @@ public class TestTaxonomyCombined extend
       final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
       throws Exception {
     SlowRAMDirectory indexDir =  new SlowRAMDirectory(-1,null); // no slowness for intialization
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.addCategory(new CategoryPath("a", "0"));
     tw.addCategory(abPath);
     tw.commit();
     
-    final TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     for (int i=0; i < 1<<10; i++) { //1024 facets
       final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
       tw.addCategory(cp);
@@ -865,9 +865,9 @@ public class TestTaxonomyCombined extend
   @Test
   public void testSeparateReaderAndWriter() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.commit();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
 
     int author = 1;
 
@@ -932,9 +932,9 @@ public class TestTaxonomyCombined extend
   @Test
   public void testSeparateReaderAndWriter2() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.commit();
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
 
     // Test getOrdinal():
     CategoryPath author = new CategoryPath("Author");
@@ -968,26 +968,26 @@ public class TestTaxonomyCombined extend
   public void testWriterLock() throws Exception {
     // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
     Directory indexDir = new RAMDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     tw.addCategory(new CategoryPath("hi", "there"));
     tw.commit();
     // we deliberately not close the write now, and keep it open and
     // locked.
     // Verify that the writer worked:
-    TaxonomyReader tr = new LuceneTaxonomyReader(indexDir);
+    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
     assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
     // Try to open a second writer, with the first one locking the directory.
     // We expect to get a LockObtainFailedException.
     try {
-      new LuceneTaxonomyWriter(indexDir);
+      new DirectoryTaxonomyWriter(indexDir);
       fail("should have failed to write in locked directory");
     } catch (LockObtainFailedException e) {
       // this is what we expect to happen.
     }
     // Remove the lock, and now the open should succeed, and we can
     // write to the new writer.
-    LuceneTaxonomyWriter.unlock(indexDir);
-    TaxonomyWriter tw2 = new LuceneTaxonomyWriter(indexDir);
+    DirectoryTaxonomyWriter.unlock(indexDir);
+    TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
     tw2.addCategory(new CategoryPath("hey"));
     tw2.close();
     // See that the writer indeed wrote:
@@ -1054,7 +1054,7 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterCheckPaths() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomyCheckPaths(tw);
     // Also check TaxonomyWriter.getSize() - see that the taxonomy's size
     // is what we expect it to be.
@@ -1073,14 +1073,14 @@ public class TestTaxonomyCombined extend
   @Test
   public void testWriterCheckPaths2() throws Exception {
     Directory indexDir = newDirectory();
-    TaxonomyWriter tw = new LuceneTaxonomyWriter(indexDir);
+    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
     fillTaxonomy(tw);
     checkPaths(tw);
     fillTaxonomy(tw);
     checkPaths(tw);
     tw.close();
 
-    tw = new LuceneTaxonomyWriter(indexDir);
+    tw = new DirectoryTaxonomyWriter(indexDir);
     checkPaths(tw);
     fillTaxonomy(tw);
     checkPaths(tw);

Modified: lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (original)
+++ lucene/dev/branches/solrcloud/modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java Thu Nov  3 13:35:07 2011
@@ -505,7 +505,7 @@ public class BlockGroupingCollector exte
     subDocUpto = 0;
     docBase = readerContext.docBase;
     //System.out.println("setNextReader base=" + docBase + " r=" + readerContext.reader);
-    lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext).iterator();
+    lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext, readerContext.reader.getLiveDocs()).iterator();
     groupEndDocID = -1;
 
     currentReaderContext = readerContext;

Modified: lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/solrcloud/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Thu Nov  3 13:35:07 2011
@@ -1221,11 +1221,11 @@ public class TestGrouping extends Lucene
     }
 
     public void search(Weight weight, Collector collector) throws IOException {
-      search(ctx, weight, null, collector);
+      search(ctx, weight, collector);
     }
 
     public TopDocs search(Weight weight, int topN) throws IOException {
-      return search(ctx, weight, null, null, topN);
+      return search(ctx, weight, null, topN);
     }
 
     @Override

Modified: lucene/dev/branches/solrcloud/modules/join/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/build.xml (original)
+++ lucene/dev/branches/solrcloud/modules/join/build.xml Thu Nov  3 13:35:07 2011
@@ -5,10 +5,10 @@
   </description>
 
   <property name="build.dir" location="build/" />
-  <import file="../../lucene/contrib/contrib-build.xml"/>
-
-  <property name="build.dir" location="build/" />
   <property name="dist.dir" location="dist/" />
+  <property name="maven.dist.dir" location="../dist/maven" />
+
+  <import file="../../lucene/contrib/contrib-build.xml"/>
 
   <path id="classpath">
     <pathelement path="${grouping.jar}"/>
@@ -20,10 +20,6 @@
     <pathelement location="${build.dir}/classes/java"/>
   </path>
 
-  <property name="build.dir" location="build/" />
-  <property name="dist.dir" location="dist/" />
-  <property name="maven.dist.dir" location="../dist/maven" />
-
   <target name="init" depends="contrib-build.init,jar-grouping"/>
 
   <target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven" />

Modified: lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinCollector.java Thu Nov  3 13:35:07 2011
@@ -18,7 +18,6 @@ package org.apache.lucene.search.join;
  */
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedList;
@@ -387,15 +386,17 @@ public class BlockJoinCollector extends 
     // unbox once
     final int slot = _slot;
 
-    if (offset >= queue.size()) {
-      return null;
-    }
-    int totalGroupedHitCount = 0;
-
     if (sortedGroups == null) {
+      if (offset >= queue.size()) {
+        return null;
+      }
       sortQueue();
+    } else if (offset > sortedGroups.length) {
+      return null;
     }
 
+    int totalGroupedHitCount = 0;
+
     final FakeScorer fakeScorer = new FakeScorer();
 
     final GroupDocs<Integer>[] groups = new GroupDocs[sortedGroups.length - offset];

Modified: lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java Thu Nov  3 13:35:07 2011
@@ -163,7 +163,7 @@ public class BlockJoinQuery extends Quer
         return null;
       }
 
-      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext);
+      final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader.getLiveDocs());
       // TODO: once we do random-access filters we can
       // generalize this:
       if (parents == null) {

Modified: lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java (original)
+++ lucene/dev/branches/solrcloud/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java Thu Nov  3 13:35:07 2011
@@ -57,6 +57,14 @@ public class TestBlockJoin extends Lucen
     return job;
   }
 
+  // ... has multiple qualifications
+  private Document makeQualification(String qualification, int year) {
+    Document job = new Document();
+    job.add(newField("qualification", qualification, StringField.TYPE_STORED));
+    job.add(new NumericField("year").setIntValue(year));
+    return job;
+  }
+
   public void testSimple() throws Exception {
 
     final Directory dir = newDirectory();
@@ -492,4 +500,94 @@ public class TestBlockJoin extends Lucen
       }
     }
   }
+
+  public void testMultiChildTypes() throws Exception {
+
+    final Directory dir = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+    final List<Document> docs = new ArrayList<Document>();
+
+    docs.add(makeJob("java", 2007));
+    docs.add(makeJob("python", 2010));
+    docs.add(makeQualification("maths", 1999));
+    docs.add(makeResume("Lisa", "United Kingdom"));
+    w.addDocuments(docs);
+
+    IndexReader r = w.getReader();
+    w.close();
+    IndexSearcher s = new IndexSearcher(r);
+
+    // Create a filter that defines "parent" documents in the index - in this case resumes
+    Filter parentsFilter = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));
+
+    // Define child document criteria (finds an example of relevant work experience)
+    BooleanQuery childJobQuery = new BooleanQuery();
+    childJobQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
+    childJobQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 2006, 2011, true, true), Occur.MUST));
+
+    BooleanQuery childQualificationQuery = new BooleanQuery();
+    childQualificationQuery.add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), Occur.MUST));
+    childQualificationQuery.add(new BooleanClause(NumericRangeQuery.newIntRange("year", 1980, 2000, true, true), Occur.MUST));
+
+
+    // Define parent document criteria (find a resident in the UK)
+    Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));
+
+    // Wrap the child document query to 'join' any matches
+    // up to corresponding parent:
+    BlockJoinQuery childJobJoinQuery = new BlockJoinQuery(childJobQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
+    BlockJoinQuery childQualificationJoinQuery = new BlockJoinQuery(childQualificationQuery, parentsFilter, BlockJoinQuery.ScoreMode.Avg);
+
+    // Combine the parent and nested child queries into a single query for a candidate
+    BooleanQuery fullQuery = new BooleanQuery();
+    fullQuery.add(new BooleanClause(parentQuery, Occur.MUST));
+    fullQuery.add(new BooleanClause(childJobJoinQuery, Occur.MUST));
+    fullQuery.add(new BooleanClause(childQualificationJoinQuery, Occur.MUST));
+
+    //????? How do I control volume of jobs vs qualifications per parent?
+    BlockJoinCollector c = new BlockJoinCollector(Sort.RELEVANCE, 10, true, false);
+
+    s.search(fullQuery, c);
+
+    //Examine "Job" children
+    boolean showNullPointerIssue=true;
+    if (showNullPointerIssue) {
+      TopGroups<Integer> jobResults = c.getTopGroups(childJobJoinQuery, null, 0, 10, 0, true);
+
+      //assertEquals(1, results.totalHitCount);
+      assertEquals(1, jobResults.totalGroupedHitCount);
+      assertEquals(1, jobResults.groups.length);
+
+      final GroupDocs<Integer> group = jobResults.groups[0];
+      assertEquals(1, group.totalHits);
+
+      Document childJobDoc = s.doc(group.scoreDocs[0].doc);
+      //System.out.println("  doc=" + group.scoreDocs[0].doc);
+      assertEquals("java", childJobDoc.get("skill"));
+      assertNotNull(group.groupValue);
+      Document parentDoc = s.doc(group.groupValue);
+      assertEquals("Lisa", parentDoc.get("name"));
+    }
+
+    //Now Examine qualification children
+    TopGroups<Integer> qualificationResults = c.getTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);
+
+    //!!!!! This next line can null pointer - but only if prior "jobs" section called first
+    assertEquals(1, qualificationResults.totalGroupedHitCount);
+    assertEquals(1, qualificationResults.groups.length);
+
+    final GroupDocs<Integer> qGroup = qualificationResults.groups[0];
+    assertEquals(1, qGroup.totalHits);
+
+    Document childQualificationDoc = s.doc(qGroup.scoreDocs[0].doc);
+    assertEquals("maths", childQualificationDoc.get("qualification"));
+    assertNotNull(qGroup.groupValue);
+    Document parentDoc = s.doc(qGroup.groupValue);
+    assertEquals("Lisa", parentDoc.get("name"));
+
+
+    r.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java Thu Nov  3 13:35:07 2011
@@ -24,10 +24,12 @@ import java.util.Iterator;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 
 /**
@@ -48,7 +50,7 @@ public class BooleanFilter extends Filte
    * of the filters that have been added.
    */
   @Override
-  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+  public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
     FixedBitSet res = null;
     final IndexReader reader = context.reader;
     
@@ -91,12 +93,13 @@ public class BooleanFilter extends Filte
       }
     }
 
-    return res != null ? res : DocIdSet.EMPTY_DOCIDSET;
+    return res != null ? BitsFilteredDocIdSet.wrap(res, acceptDocs) : DocIdSet.EMPTY_DOCIDSET;
   }
 
   private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
       throws IOException {
-    final DocIdSet set = filter.getDocIdSet(context);
+    // we dont pass acceptDocs, we will filter at the end using an additional filter
+    final DocIdSet set = filter.getDocIdSet(context, null);
     return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator();
   }
 

Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java Thu Nov  3 13:35:07 2011
@@ -19,9 +19,11 @@ package org.apache.lucene.queries;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.BitsFilteredDocIdSet;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.OpenBitSetDISI;
 
@@ -97,21 +99,22 @@ public class ChainedFilter extends Filte
    * {@link Filter#getDocIdSet}.
    */
   @Override
-  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+  public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
     int[] index = new int[1]; // use array as reference to modifiable int;
     index[0] = 0;             // an object attribute would not be thread safe.
     if (logic != -1) {
-      return getDocIdSet(context, logic, index);
+      return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logic, index), acceptDocs);
     } else if (logicArray != null) {
-      return getDocIdSet(context, logicArray, index);
+      return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logicArray, index), acceptDocs);
     }
-
-    return getDocIdSet(context, DEFAULT, index);
+    
+    return BitsFilteredDocIdSet.wrap(getDocIdSet(context, DEFAULT, index), acceptDocs);
   }
 
   private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
       throws IOException {
-    DocIdSet docIdSet = filter.getDocIdSet(context);
+    // we dont pass acceptDocs, we will filter at the end using an additional filter
+    DocIdSet docIdSet = filter.getDocIdSet(context, null);
     if (docIdSet == null) {
       return DocIdSet.EMPTY_DOCIDSET.iterator();
     } else {
@@ -156,7 +159,8 @@ public class ChainedFilter extends Filte
       throws IOException {
     OpenBitSetDISI result = initialResult(context, logic, index);
     for (; index[0] < chain.length; index[0]++) {
-      doChain(result, logic, chain[index[0]].getDocIdSet(context));
+      // we dont pass acceptDocs, we will filter at the end using an additional filter
+      doChain(result, logic, chain[index[0]].getDocIdSet(context, null));
     }
     return result;
   }
@@ -176,7 +180,8 @@ public class ChainedFilter extends Filte
 
     OpenBitSetDISI result = initialResult(context, logic[0], index);
     for (; index[0] < chain.length; index[0]++) {
-      doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context));
+      // we dont pass acceptDocs, we will filter at the end using an additional filter
+      doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null));
     }
     return result;
   }

Modified: lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java Thu Nov  3 13:35:07 2011
@@ -54,7 +54,7 @@ public class TermsFilter extends Filter 
    */
 
   @Override
-  public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+  public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
     IndexReader reader = context.reader;
     FixedBitSet result = new FixedBitSet(reader.maxDoc());
     Fields fields = reader.fields();
@@ -64,7 +64,6 @@ public class TermsFilter extends Filter 
     }
 
     BytesRef br = new BytesRef();
-    Bits liveDocs = reader.getLiveDocs();
     String lastField = null;
     Terms termsC = null;
     TermsEnum termsEnum = null;
@@ -72,6 +71,9 @@ public class TermsFilter extends Filter 
     for (Term term : terms) {
       if (!term.field().equals(lastField)) {
         termsC = fields.terms(term.field());
+        if (termsC == null) {
+          return result;
+        }
         termsEnum = termsC.iterator();
         lastField = term.field();
       }
@@ -79,7 +81,7 @@ public class TermsFilter extends Filter 
       if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for
         br.copy(term.bytes());
         if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
-          docs = termsEnum.docs(liveDocs, docs);
+          docs = termsEnum.docs(acceptDocs, docs);
           while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
             result.set(docs.docID());
           }

Modified: lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java Thu Nov  3 13:35:07 2011
@@ -35,6 +35,7 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.QueryWrapperFilter;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
@@ -94,7 +95,7 @@ public class BooleanFilterTest extends L
   private Filter getNullDISFilter() {
     return new Filter() {
       @Override
-      public DocIdSet getDocIdSet(AtomicReaderContext context) {
+      public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
         return null;
       }
     };
@@ -103,7 +104,7 @@ public class BooleanFilterTest extends L
   private Filter getNullDISIFilter() {
     return new Filter() {
       @Override
-      public DocIdSet getDocIdSet(AtomicReaderContext context) {
+      public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) {
         return new DocIdSet() {
           @Override
           public DocIdSetIterator iterator() {
@@ -122,7 +123,7 @@ public class BooleanFilterTest extends L
   private void tstFilterCard(String mes, int expected, Filter filt)
       throws Exception {
     // BooleanFilter never returns null DIS or null DISI!
-    DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader)).iterator();
+    DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader), reader.getLiveDocs()).iterator();
     int actual = 0;
     while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
       actual++;

Modified: lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (original)
+++ lucene/dev/branches/solrcloud/modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java Thu Nov  3 13:35:07 2011
@@ -23,6 +23,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SlowMultiReaderWrapper;
 import org.apache.lucene.index.Term;
@@ -30,6 +31,7 @@ import org.apache.lucene.search.Filter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ReaderUtil;
 
 public class TermsFilterTest extends LuceneTestCase {
 
@@ -68,22 +70,57 @@ public class TermsFilterTest extends Luc
 
     TermsFilter tf = new TermsFilter();
     tf.addTerm(new Term(fieldName, "19"));
-    FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context);
+    FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
     assertEquals("Must match nothing", 0, bits.cardinality());
 
     tf.addTerm(new Term(fieldName, "20"));
-    bits = (FixedBitSet) tf.getDocIdSet(context);
+    bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
     assertEquals("Must match 1", 1, bits.cardinality());
 
     tf.addTerm(new Term(fieldName, "10"));
-    bits = (FixedBitSet) tf.getDocIdSet(context);
+    bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
     assertEquals("Must match 2", 2, bits.cardinality());
 
     tf.addTerm(new Term(fieldName, "00"));
-    bits = (FixedBitSet) tf.getDocIdSet(context);
+    bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
     assertEquals("Must match 2", 2, bits.cardinality());
 
     reader.close();
     rd.close();
   }
+  
+  public void testMissingField() throws Exception {
+    String fieldName = "field1";
+    Directory rd1 = newDirectory();
+    RandomIndexWriter w1 = new RandomIndexWriter(random, rd1);
+    Document doc = new Document();
+    doc.add(newField(fieldName, "content1", StringField.TYPE_STORED));
+    w1.addDocument(doc);
+    IndexReader reader1 = w1.getReader();
+    w1.close();
+    
+    fieldName = "field2";
+    Directory rd2 = newDirectory();
+    RandomIndexWriter w2 = new RandomIndexWriter(random, rd2);
+    doc = new Document();
+    doc.add(newField(fieldName, "content2", StringField.TYPE_STORED));
+    w2.addDocument(doc);
+    IndexReader reader2 = w2.getReader();
+    w2.close();
+    
+    TermsFilter tf = new TermsFilter();
+    tf.addTerm(new Term(fieldName, "content1"));
+    
+    MultiReader multi = new MultiReader(reader1, reader2);
+    for (IndexReader.AtomicReaderContext context : ReaderUtil.leaves(multi.getTopReaderContext())) {
+      FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs());
+      assertTrue("Must be >= 0", bits.cardinality() >= 0);      
+    }
+    multi.close();
+    reader1.close();
+    reader2.close();
+    rd1.close();
+    rd2.close();
+  }
+
 }

Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java Thu Nov  3 13:35:07 2011
@@ -203,12 +203,8 @@ public class AnalyzerQueryNodeProcessor 
             children.add(new FieldQueryNode(field, term, -1, -1));
 
           }
-          if (positionCount == 1)
-            return new GroupQueryNode(
-              new StandardBooleanQueryNode(children, true));
-          else
-            return new StandardBooleanQueryNode(children, false);
-
+          return new GroupQueryNode(
+            new StandardBooleanQueryNode(children, positionCount==1));
         } else {
           // phrase query:
           MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();

Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java Thu Nov  3 13:35:07 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.NumericRangeFilter;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.queryparser.xml.DOMUtils;
 import org.apache.lucene.queryparser.xml.FilterBuilder;
@@ -155,7 +156,7 @@ public class NumericRangeFilterBuilder i
   static class NoMatchFilter extends Filter {
 
     @Override
-    public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
+    public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
       return null;
 		}
 

Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java Thu Nov  3 13:35:07 2011
@@ -359,8 +359,16 @@ public class TestQPHelper extends Lucene
     BooleanQuery expected = new BooleanQuery();
     expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
     expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
-    
     assertEquals(expected, getQuery("中国", analyzer));
+    
+    expected = new BooleanQuery();
+    expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.MUST);
+    BooleanQuery inner = new BooleanQuery();
+    inner.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
+    inner.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
+    expected.add(inner, BooleanClause.Occur.MUST);
+    assertEquals(expected, getQuery("中 AND 中国", new SimpleCJKAnalyzer()));
+
   }
   
   public void testCJKBoostedTerm() throws Exception {
@@ -609,7 +617,7 @@ public class TestQPHelper extends Lucene
 
     assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
     assertQueryEquals("term phrase term", qpAnalyzer,
-        "term phrase1 phrase2 term");
+        "term (phrase1 phrase2) term");
 
     assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
         "+term -(phrase1 phrase2) term");

Modified: lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/solrcloud/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java Thu Nov  3 13:35:07 2011
@@ -65,7 +65,7 @@ public class TestNumericRangeFilterBuild
     try {
       IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(ramDir, true));
       try {
-        assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext()));
+        assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext(), reader.getLiveDocs()));
       }
       finally {
         reader.close();

Modified: lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (original)
+++ lucene/dev/branches/solrcloud/modules/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java Thu Nov  3 13:35:07 2011
@@ -70,10 +70,7 @@ public class DirectSpellChecker {
    *         shortest of the two terms instead of the longest.
    *  </ul>
    */
-  public static final StringDistance INTERNAL_LEVENSHTEIN = new StringDistance() {
-    public float getDistance(String s1, String s2) {
-      throw new UnsupportedOperationException("Not for external use.");
-    }};
+  public static final StringDistance INTERNAL_LEVENSHTEIN = new LuceneLevenshteinDistance();
 
   /** maximum edit distance for candidate terms */
   private int maxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;

Modified: lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java (original)
+++ lucene/dev/branches/solrcloud/modules/suggest/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java Thu Nov  3 13:35:07 2011
@@ -29,7 +29,36 @@ import org.apache.lucene.util.English;
 import org.apache.lucene.util.LuceneTestCase;
 
 public class TestDirectSpellChecker extends LuceneTestCase {
+  
+  public void testInternalLevenshteinDistance() throws Exception {
+    DirectSpellChecker spellchecker = new DirectSpellChecker();
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random, dir, 
+        new MockAnalyzer(random, MockTokenizer.KEYWORD, true));
+
+    String[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoi𐑍" };
+    for (int i = 0; i < termsToAdd.length; i++) {
+      Document doc = new Document();
+      doc.add(newField("repentance", termsToAdd[i], TextField.TYPE_UNSTORED));
+      writer.addDocument(doc);
+    }
 
+    IndexReader ir = writer.getReader();
+    String misspelled = "metanoix";
+    SuggestWord[] similar = spellchecker.suggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
+    assertTrue(similar.length == 4);
+    
+    StringDistance sd = spellchecker.getDistance();
+    assertTrue(sd instanceof LuceneLevenshteinDistance);
+    for(SuggestWord word : similar) {
+      assertTrue(word.score==sd.getDistance(word.string, misspelled));
+      assertTrue(word.score==sd.getDistance(misspelled, word.string));
+    }
+    
+    ir.close();
+    writer.close();
+    dir.close();
+  }
   public void testSimpleExamples() throws Exception {
     DirectSpellChecker spellChecker = new DirectSpellChecker();
     spellChecker.setMinQueryLength(0);

Modified: lucene/dev/branches/solrcloud/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/CHANGES.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/solr/CHANGES.txt Thu Nov  3 13:35:07 2011
@@ -56,6 +56,11 @@ Upgrading from Solr 3.5-dev
   
 * FacetComponent no longer catches and embeds exceptions occurred during facet
   processing, it throws HTTP 400 or 500 exceptions instead.
+  
+* The VelocityResponseWriter is no longer built into the core.  Its JAR and
+  dependencies now need to be added (via <lib> or solr/home lib inclusion),
+  and it needs to be registered in solrconfig.xml like this:
+    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter"/>
 
 Detailed Change List
 ----------------------
@@ -86,7 +91,7 @@ New Features
   DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
   levenshtein automata.  (James Dyer, rmuir)
    
-* SOLR-1873: SolrCloud - added shared/central config and core/shard managment via zookeeper,
+* SOLR-1873: SolrCloud - added shared/central config and core/shard management via zookeeper,
   built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller)
   Additional Work:
   SOLR-2324: SolrCloud solr.xml parameters are not persisted by CoreContainer.
@@ -257,6 +262,12 @@ Bug Fixes
 * SOLR-2654: Directorys used by a SolrCore are now closed when they are no longer used.
   (Mark Miller)
   
+* SOLR-2854: Now load URL content stream data (via stream.url) when called for during request handling,
+  rather than loading URL content streams automatically regardless of use.
+  (David Smiley and Ryan McKinley via ehatcher)
+  
+  
+  
 Other Changes
 ----------------------
 
@@ -334,6 +345,15 @@ Other Changes
 * SOLR-2756: Maven configuration: Excluded transitive stax:stax-api dependency
   from org.codehaus.woodstox:wstx-asl dependency. (David Smiley via Steve Rowe)
 
+* SOLR-2588: Moved VelocityResponseWriter back to contrib module in order to 
+  remove it as a mandatory core dependency.  (ehatcher)
+  
+* SOLR-2718: Add ability to lazy load response writers, defined with startup="lazy".
+  (ehatcher)
+
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering 
+  extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
 Documentation
 ----------------------
 
@@ -355,10 +375,14 @@ New Features
   for 99 languages (janhoy, cmale)
 
 * SOLR-1979: New contrib "langid". Adds language identification capabilities as an 
-  Update Processor, using Tika's LanguageIdentifier (janhoy, Tommaso Teofili, gsingers)
+  Update Processor, using Tika's LanguageIdentifier or Cybozu language-detection
+  library (janhoy, Tommaso Teofili, gsingers)
 
 * SOLR-2818: Added before/after count response parsing support for range facets in
   SolrJ. (Bernhard Frauendienst via Martijn van Groningen)  
+  
+* SOLR-2276: Add support for cologne phonetic to PhoneticFilterFactory. 
+  (Marc Pompl via rmuir)
 
 Bug Fixes
 ----------------------
@@ -383,6 +407,10 @@ Bug Fixes
 * SOLR-2791: Replication: abortfetch command is broken if replication was started
   by fetchindex command instead of a regular poll (Yury Kats via shalin)
 
+* SOLR-2861: Fix extremely rare race condition on commit that can result
+  in a NPE (yonik)
+
+
  Other Changes
 ----------------------
 
@@ -413,6 +441,11 @@ Bug Fixes
 
 * SOLR-2792: Allow case insensitive Hunspell stemming (janhoy, rmuir)
 
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering 
+  extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
+* SOLR-2849: Fix dependencies in Maven POMs. (David Smiley via Steve Rowe)
+
 ==================  3.4.0  ==================
 
 Upgrading from Solr 3.3

Modified: lucene/dev/branches/solrcloud/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/NOTICE.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/NOTICE.txt (original)
+++ lucene/dev/branches/solrcloud/solr/NOTICE.txt Thu Nov  3 13:35:07 2011
@@ -253,6 +253,18 @@ Copyright 2004 Sun Microsystems, Inc. (R
 
 Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)
  
+=========================================================================
+==  Language Detection Notices                                         ==
+=========================================================================
+
+The following notices apply to the libraries in contrib/langid/lib:
+
+This product includes software developed by Cybozu Labs, Inc.
+(c)2010 All rights reserved by Cybozu Labs, Inc.
+http://code.google.com/p/language-detection/
+
+This product includes software developed by the Jsonic project:
+http://sourceforge.jp/projects/jsonic/
 
 =========================================================================
 ==     Carrot2 Notice                                                  ==

Modified: lucene/dev/branches/solrcloud/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/common-build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/common-build.xml (original)
+++ lucene/dev/branches/solrcloud/solr/common-build.xml Thu Nov  3 13:35:07 2011
@@ -145,6 +145,8 @@
       <arg value="-c" />
       <arg value="${common-solr.dir}/contrib/uima/lib" />
       <arg value="-c" />
+      <arg value="${common-solr.dir}/contrib/velocity/lib" />
+      <arg value="-c" />
       <arg value="${common-solr.dir}/example/example-DIH/solr/db/lib" />
       <arg value="-c" />
       <arg value="${common-solr.dir}/example/example-DIH/solr/mail/lib" />

Modified: lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Thu Nov  3 13:35:07 2011
@@ -17,8 +17,7 @@ package org.apache.solr.handler.clusteri
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -28,6 +27,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrDocument;
@@ -67,13 +67,14 @@ import org.slf4j.LoggerFactory;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+import com.google.common.io.Closeables;
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
  * <p/>
  * Output from this class is subject to change.
  *
- * @link http://project.carrot2.org
+ * @see "http://project.carrot2.org"
  */
 public class CarrotClusteringEngine extends SearchClusteringEngine {
 	private transient static Logger log = LoggerFactory
@@ -101,6 +102,90 @@ public class CarrotClusteringEngine exte
    */
   private Controller controller = ControllerFactory.createPooling();
   private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
+  
+  private static class SolrResourceLocator implements IResourceLocator {
+    private final SolrResourceLoader resourceLoader;
+    private final String carrot2ResourcesDir;
+
+    public SolrResourceLocator(SolrCore core, SolrParams initParams) {
+      resourceLoader = core.getResourceLoader();
+      carrot2ResourcesDir = initParams.get(
+          CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
+    }
+
+    @Override
+    public IResource[] getAll(final String resource) {
+      final String resourceName = carrot2ResourcesDir + "/" + resource;
+      log.debug("Looking for Solr resource: " + resourceName);
+
+      InputStream resourceStream = null;
+      final byte [] asBytes;
+      try {
+        resourceStream = resourceLoader.openResource(resourceName);
+        asBytes = IOUtils.toByteArray(resourceStream);
+      } catch (RuntimeException e) {
+        log.debug("Resource not found in Solr's config: " + resourceName
+            + ". Using the default " + resource + " from Carrot JAR.");          
+        return new IResource[] {};
+      } catch (IOException e) {
+        log.warn("Could not read Solr resource " + resourceName);
+        return new IResource[] {};
+      } finally {
+        if (resourceStream != null) Closeables.closeQuietly(resourceStream);
+      }
+
+      log.info("Loaded Solr resource: " + resourceName);
+
+      final IResource foundResource = new IResource() {
+        @Override
+        public InputStream open() throws IOException {
+          return new ByteArrayInputStream(asBytes);
+        }
+        
+        @Override
+        public int hashCode() {
+          // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+          // so simply rely on instance equivalence.
+          return super.hashCode();
+        }
+        
+        @Override
+        public boolean equals(Object obj) {
+          // In case multiple resources are found they will be deduped, but we don't use it in Solr,
+          // so simply rely on instance equivalence.
+          return super.equals(obj);
+        }
+
+        @Override
+        public String toString() {
+          return "Solr config resource: " + resourceName;
+        }
+      };
+
+      return new IResource[] { foundResource };
+    }
+
+    @Override
+    public int hashCode() {
+      // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+      // so simply rely on instance equivalence.
+      return super.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      // In case multiple locations are used locators will be deduped, but we don't use it in Solr,
+      // so simply rely on instance equivalence.
+      return super.equals(obj);
+    }
+
+    @Override
+    public String toString() {
+      return "SolrResourceLocator, " 
+          + "configDir=" + new File(resourceLoader.getConfigDir()).getAbsolutePath()
+          + ", Carrot2 relative lexicalResourcesDir=";
+    }
+  }
 
   @Override
   @Deprecated
@@ -168,38 +253,10 @@ public class CarrotClusteringEngine exte
 
     // Customize Carrot2's resource lookup to first look for resources
     // using Solr's resource loader. If that fails, try loading from the classpath.
-    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
-        .resourceLookup(new ResourceLookup(new IResourceLocator() {
-          @Override
-          public IResource[] getAll(final String resource) {
-            final SolrResourceLoader resourceLoader = core.getResourceLoader();
-            final String carrot2ResourcesDir = initParams.get(
-                CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
-            try {
-              log.debug("Looking for " + resource + " in "
-                  + carrot2ResourcesDir);
-              final InputStream resourceStream = resourceLoader
-                  .openResource(carrot2ResourcesDir + "/" + resource);
-
-              log.info(resource + " loaded from " + carrot2ResourcesDir);
-              final IResource foundResource = new IResource() {
-                @Override
-                public InputStream open() throws IOException {
-                  return resourceStream;
-                }
-              };
-              return new IResource[] { foundResource };
-            } catch (RuntimeException e) {
-              // No way to distinguish if the resource was found but failed
-              // to load or wasn't found at all, so we simply fall back
-              // to Carrot2 defaults here by returning an empty locations array.
-              log.debug(resource + " not found in " + carrot2ResourcesDir
-                  + ". Using the default " + resource + " from Carrot JAR.");
-              return new IResource[] {};
-            }
-          }
-        },
-
+    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
+      new ResourceLookup(
+        // Solr-specific resource loading.
+        new SolrResourceLocator(core, initParams),
         // Using the class loader directly because this time we want to omit the prefix
         new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
 

Modified: lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Thu Nov  3 13:35:07 2011
@@ -143,10 +143,6 @@ public class ExtractingDocumentLoader ex
     }
     if (parser != null) {
       Metadata metadata = new Metadata();
-      metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
-      metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
-      metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
-      metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
 
       // If you specify the resource name (the filename, roughly) with this parameter,
       // then Tika can make use of it in guessing the appropriate MIME type:
@@ -155,12 +151,16 @@ public class ExtractingDocumentLoader ex
         metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
       }
 
-      SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
       InputStream inputStream = null;
       try {
         inputStream = stream.getStream();
+        metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
+        metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
+        metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
+        metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
         String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
         boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+        SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
         ContentHandler parsingHandler = handler;
 
         StringWriter writer = null;

Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/CHANGES.txt Thu Nov  3 13:35:07 2011
@@ -13,3 +13,6 @@ Initial release.  See README.txt.
 
 * SOLR-1979: New contrib "langid". Adds language identification capabilities as an 
   Update Processor, using Tika's LanguageIdentifier (janhoy, Tommaso Teofili, gsingers)
+
+* SOLR-2839: Add alternative implementation supporting 53 languages, 
+  based on http://code.google.com/p/language-detection/ (rmuir)

Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/build.xml Thu Nov  3 13:35:07 2011
@@ -27,7 +27,16 @@
 
 	<path id="classpath">
     <fileset dir="../extraction/lib" includes="*.jar"/>
+    <fileset dir="lib" includes="*.jar"/>
     <path refid="solr.base.classpath"/>   
   </path>
 
+  <dirname file="${ant.file.solr-langid}" property="solr-langid.dir"/>
+  <target name="dist-maven" depends="jar-core,javadocs,jar-src,contrib-build.dist-maven">
+    <m2-deploy-with-pom-template pom.xml="${solr-langid.dir}/lib/jsonic-pom.xml.template"
+                                 jar.file="${solr-langid.dir}/lib/jsonic-1.2.0.jar" />
+
+    <m2-deploy-with-pom-template pom.xml="${solr-langid.dir}/lib/langdetect-pom.xml.template"
+                                 jar.file="${solr-langid.dir}/lib/langdetect-r111.jar" />
+  </target>
 </project>

Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java Thu Nov  3 13:35:07 2011
@@ -26,7 +26,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.update.AddUpdateCommand;
-import org.apache.tika.language.LanguageIdentifier;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,15 +39,15 @@ import java.util.regex.Pattern;
 
 
 /**
- * Identifies the language of a set of input fields using Tika's
- * LanguageIdentifier. Also supports mapping of field names based
+ * Identifies the language of a set of input fields. 
+ * Also supports mapping of field names based
  * on detected language. 
- * The tika-core-x.y.jar must be on the classpath
  * <p>
  * See <a href="http://wiki.apache.org/solr/LanguageDetection">http://wiki.apache.org/solr/LanguageDetection</a>
  * @since 3.5
+ * @lucene.experimental
  */
-public class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
+public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
 
   protected final static Logger log = LoggerFactory
           .getLogger(LanguageIdentifierUpdateProcessor.class);
@@ -300,23 +299,7 @@ public class LanguageIdentifierUpdatePro
    * @param content The content to identify
    * @return List of detected language(s) according to RFC-3066
    */
-  protected List<DetectedLanguage> detectLanguage(String content) {
-    List<DetectedLanguage> languages = new ArrayList<DetectedLanguage>();
-    if(content.trim().length() != 0) { 
-      LanguageIdentifier identifier = new LanguageIdentifier(content.toString());
-      // FIXME: Hack - we get the distance from toString and calculate our own certainty score
-      Double distance = Double.parseDouble(tikaSimilarityPattern.matcher(identifier.toString()).replaceFirst("$1"));
-      // This formula gives: 0.02 => 0.8, 0.1 => 0.5 which is a better sweetspot than isReasonablyCertain()
-      Double certainty = 1 - (5 * distance); 
-      certainty = (certainty < 0) ? 0 : certainty;
-      DetectedLanguage language = new DetectedLanguage(identifier.getLanguage(), certainty);
-      languages.add(language);
-      log.debug("Language detected as "+language+" with a certainty of "+language.getCertainty()+" (Tika distance="+identifier.toString()+")");
-    } else {
-      log.debug("No input text to detect language from, returning empty list");
-    }
-    return languages;
-  }
+  protected abstract List<DetectedLanguage> detectLanguage(String content);
 
   /**
    * Chooses a language based on the list of candidates detected 

Modified: lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml (original)
+++ lucene/dev/branches/solrcloud/solr/contrib/langid/src/test-files/langid/solr/conf/solrconfig-languageidentifier.xml Thu Nov  3 13:35:07 2011
@@ -62,7 +62,24 @@
   </requestHandler>  
 
   <updateRequestProcessorChain name="lang_id">
-		<processor class="org.apache.solr.update.processor.LanguageIdentifierUpdateProcessorFactory">
+		<processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
+	    <!-- Can take defaults, invariants and appends just like req handlers-->
+	    <lst name="defaults">
+	      <bool name="langid">true</bool>
+	      <str name="langid.fl">name,subject</str>
+	      <bool name="langid.map">true</bool>
+	      <str name="langid.langField">language_s</str>
+	      <str name="langid.langsField">language_sm</str>
+	      <str name="langid.map.lcmap">th:thai</str>
+	      <float name="threshold">0.5</float>
+	      <str name="langid.fallback">fallback</str>
+	    </lst>
+		</processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+  
+    <updateRequestProcessorChain name="lang_id_alt">
+		<processor class="org.apache.solr.update.processor.LangDetectLanguageIdentifierUpdateProcessorFactory">
 	    <!-- Can take defaults, invariants and appends just like req handlers-->
 	    <lst name="defaults">
 	      <bool name="langid">true</bool>

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java Thu Nov  3 13:35:07 2011
@@ -21,9 +21,12 @@ import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.codec.Encoder;
 import org.apache.commons.codec.language.Caverphone;
+import org.apache.commons.codec.language.ColognePhonetic;
 import org.apache.commons.codec.language.DoubleMetaphone;
 import org.apache.commons.codec.language.Metaphone;
 import org.apache.commons.codec.language.RefinedSoundex;
@@ -59,16 +62,18 @@ public class PhoneticFilterFactory exten
 {
   public static final String ENCODER = "encoder";
   public static final String INJECT = "inject"; // boolean
+  private static final String PACKAGE_CONTAINING_ENCODERS = "org.apache.commons.codec.language.";
   
-  private static final Map<String, Class<? extends Encoder>> registry;
-  static {
-    registry = new HashMap<String, Class<? extends Encoder>>();
-    registry.put( "DoubleMetaphone".toUpperCase(Locale.ENGLISH), DoubleMetaphone.class );
-    registry.put( "Metaphone".toUpperCase(Locale.ENGLISH),       Metaphone.class );
-    registry.put( "Soundex".toUpperCase(Locale.ENGLISH),         Soundex.class );
-    registry.put( "RefinedSoundex".toUpperCase(Locale.ENGLISH),  RefinedSoundex.class );
-    registry.put( "Caverphone".toUpperCase(Locale.ENGLISH),      Caverphone.class );
-  }
+  private static final Map<String, Class<? extends Encoder>> registry = new HashMap<String, Class<? extends Encoder>>()
+  {{
+    put( "DoubleMetaphone".toUpperCase(Locale.ENGLISH), DoubleMetaphone.class );
+    put( "Metaphone".toUpperCase(Locale.ENGLISH),       Metaphone.class );
+    put( "Soundex".toUpperCase(Locale.ENGLISH),         Soundex.class );
+    put( "RefinedSoundex".toUpperCase(Locale.ENGLISH),  RefinedSoundex.class );
+    put( "Caverphone".toUpperCase(Locale.ENGLISH),      Caverphone.class );
+    put( "ColognePhonetic".toUpperCase(Locale.ENGLISH), ColognePhonetic.class );
+  }};
+  private static final Lock lock = new ReentrantLock();
   
   protected boolean inject = true;
   protected String name = null;
@@ -87,7 +92,12 @@ public class PhoneticFilterFactory exten
     }
     Class<? extends Encoder> clazz = registry.get(name.toUpperCase(Locale.ENGLISH));
     if( clazz == null ) {
-      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown encoder: "+name +" ["+registry.keySet()+"]" );
+      lock.lock();
+      try {
+        clazz = resolveEncoder(name);
+      } finally {
+        lock.unlock();
+      }
     }
     
     try {
@@ -105,6 +115,30 @@ public class PhoneticFilterFactory exten
     }
   }
   
+  private Class<? extends Encoder> resolveEncoder(String name) {
+    Class<? extends Encoder> clazz = null;
+    try {
+      clazz = lookupEncoder(PACKAGE_CONTAINING_ENCODERS+name);
+    } catch (ClassNotFoundException e) {
+      try {
+        clazz = lookupEncoder(name);
+      } catch (ClassNotFoundException cnfe) {
+        throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown encoder: "+name +" ["+registry.keySet()+"]" );
+      }
+    }
+    catch (ClassCastException e) {
+      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Not an encoder: "+name +" ["+registry.keySet()+"]" );
+    }
+    return clazz;
+  }
+  
+  private Class<? extends Encoder> lookupEncoder(String name)
+      throws ClassNotFoundException {
+    Class<? extends Encoder> clazz = Class.forName(name).asSubclass(Encoder.class);
+    registry.put( name.toUpperCase(Locale.ENGLISH), clazz );
+    return clazz;
+  }
+
   public PhoneticFilter create(TokenStream input) {
     return new PhoneticFilter(input,encoder,inject);
   }

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java?rev=1197110&r1=1197109&r2=1197110&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/RequestHandlers.java Thu Nov  3 13:35:07 2011
@@ -177,7 +177,7 @@ final class RequestHandlers {
       }
     }
 
-    // we've now registered all handlers, time ot init them in the same order
+    // we've now registered all handlers, time to init them in the same order
     for (Map.Entry<PluginInfo,SolrRequestHandler> entry : handlers.entrySet()) {
       PluginInfo info = entry.getKey();
       SolrRequestHandler requestHandler = entry.getValue();