You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/19 19:23:52 UTC

svn commit: r1515520 - in /lucene/dev/branches/lucene5178: lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ lucene/test-framework/src/java/org/apache/lucene/index/ solr/core/src/java/org/apache/solr/request/ solr/core/src/java/or...

Author: rmuir
Date: Mon Aug 19 17:23:52 2013
New Revision: 1515520

URL: http://svn.apache.org/r1515520
Log:
improve DV faceting tests, support missing count for single valued string fields, remove required/default restriction

Removed:
    lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml
Modified:
    lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
    lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
    lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
    lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java
    lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java
    lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java
    lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
    lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
    lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
    lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml

Modified: lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java (original)
+++ lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java Mon Aug 19 17:23:52 2013
@@ -45,12 +45,13 @@ public class BytesRefFieldSource extends
     // To be sorted or not to be sorted, that is the question
     // TODO: do it cleaner?
     if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
+      final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
       final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
       return new FunctionValues() {
 
         @Override
         public boolean exists(int doc) {
-          return true; // doc values are dense
+          return docsWithField.get(doc);
         }
 
         @Override

Modified: lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Mon Aug 19 17:23:52 2013
@@ -650,6 +650,43 @@ public abstract class BaseDocValuesForma
     ireader.close();
     directory.close();
   }
+  
+  public void testSortedMergeAwayAllValues() throws IOException {
+    Directory directory = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    iwconfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+    
+    Document doc = new Document();
+    doc.add(new StringField("id", "0", Field.Store.NO));
+    iwriter.addDocument(doc);    
+    doc = new Document();
+    doc.add(new StringField("id", "1", Field.Store.NO));
+    doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+    iwriter.addDocument(doc);
+    iwriter.commit();
+    iwriter.deleteDocuments(new Term("id", "1"));
+    iwriter.forceMerge(1);
+    
+    DirectoryReader ireader = iwriter.getReader();
+    iwriter.close();
+    
+    SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+    if (codecSupportsDocsWithField("field")) {
+      assertEquals(-1, dv.getOrd(0));
+      assertEquals(0, dv.getValueCount());
+    } else {
+      assertEquals(0, dv.getOrd(0));
+      assertEquals(1, dv.getValueCount());
+      BytesRef ref = new BytesRef();
+      dv.lookupOrd(0, ref);
+      assertEquals(new BytesRef(), ref);
+    }
+    
+    ireader.close();
+    directory.close();
+  }
 
   public void testBytesWithNewline() throws IOException {
     Analyzer analyzer = new MockAnalyzer(random());

Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java Mon Aug 19 17:23:52 2013
@@ -218,12 +218,7 @@ public class DocValuesFacets {
   static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
     if (missing) {
       if (missingCount < 0) {
-        if (schemaField.multiValued()) {
-          missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
-        } else {
-          // nocommit: support missing count (ord = -1) for single-valued here.
-          missingCount = 0; // single-valued dv is implicitly 0
-        }
+        missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
       }
       res.add(null, missingCount);
     }
@@ -232,12 +227,12 @@ public class DocValuesFacets {
   }
   
   /** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
-  // specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
+  // specialized since the single-valued case is different
   static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       int term = si.getOrd(doc);
-      if (map != null) {
+      if (map != null && term >= 0) {
         term = (int) map.getGlobalOrd(subIndex, term);
       }
       int arrIdx = term-startTermIndex;

Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java Mon Aug 19 17:23:52 2013
@@ -255,7 +255,7 @@ final class NumericFacets {
 
       if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
         if (!sf.indexed()) {
-          throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
+          throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
         }
         // Add zeros until there are limit results
         final Set<String> alreadySeen = new HashSet<String>();

Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java Mon Aug 19 17:23:52 2013
@@ -80,9 +80,6 @@ public class StrField extends PrimitiveF
 
   @Override
   public void checkSchemaField(SchemaField field) {
-    if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
-      throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
-    }
   }
 }
 

Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java Mon Aug 19 17:23:52 2013
@@ -696,9 +696,6 @@ public class TrieField extends Primitive
 
   @Override
   public void checkSchemaField(final SchemaField field) {
-    if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
-      throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
-    }
   }
 }
 

Modified: lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml Mon Aug 19 17:23:52 2013
@@ -26,17 +26,16 @@
   <fields>
     <field name="id"    type="string" indexed="true"  stored="true"  docValues="false" multiValued="false" required="true"/>
     <field name="id_dv" type="string" indexed="false" stored="false" docValues="true"  multiValued="false" required="true"/>
-    <!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
     <dynamicField name="*_i"     type="int"    indexed="true"  stored="false" docValues="false"/>
-    <dynamicField name="*_i_dv"  type="int"    indexed="false" stored="false" docValues="true"  multiValued="true"/>  
+    <dynamicField name="*_i_dv"  type="int"    indexed="false" stored="false" docValues="true"/>  
     <dynamicField name="*_is"    type="int"    indexed="true"  stored="false" docValues="false" multiValued="true"/>
     <dynamicField name="*_is_dv" type="int"    indexed="false" stored="false" docValues="true"  multiValued="true"/>
-    <dynamicField name="*_s"     type="string" indexed="true"  stored="false" docValues="false" multiValued="true"/>
-    <dynamicField name="*_s_dv"  type="string" indexed="false" stored="false" docValues="true"  multiValued="true"/>
+    <dynamicField name="*_s"     type="string" indexed="true"  stored="false" docValues="false"/>
+    <dynamicField name="*_s_dv"  type="string" indexed="false" stored="false" docValues="true"/>
     <dynamicField name="*_ss"    type="string" indexed="true"  stored="false" docValues="false" multiValued="true"/>
     <dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true"  multiValued="true"/>
     <dynamicField name="*_f"     type="float"  indexed="true"  stored="false" docValues="false"/>
-    <dynamicField name="*_f_dv"  type="float"  indexed="false" stored="false" docValues="true"  multiValued="true"/>
+    <dynamicField name="*_f_dv"  type="float"  indexed="false" stored="false" docValues="true"/>
   </fields>
 
   <defaultSearchField>id</defaultSearchField>

Modified: lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java Mon Aug 19 17:23:52 2013
@@ -39,7 +39,7 @@ import org.junit.Test;
  * to the indexed facet results as if it were just another faceting method.
  */
 @Slow
-@SuppressCodecs({"Lucene40", "Lucene41"})
+@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
 public class TestRandomDVFaceting extends SolrTestCaseJ4 {
 
   @BeforeClass
@@ -162,6 +162,8 @@ public class TestRandomDVFaceting extend
 
       SchemaField sf = req.getSchema().getField(ftype.fname);
       boolean multiValued = sf.getType().multiValuedFieldCache();
+      boolean indexed = sf.indexed();
+      boolean numeric = sf.getType().getNumericType() != null;
 
       int offset = 0;
       if (rand.nextInt(100) < 20) {
@@ -179,8 +181,21 @@ public class TestRandomDVFaceting extend
         params.add("facet.limit", Integer.toString(limit));
       }
 
-      if (rand.nextBoolean()) {
-        params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+      // the following two situations cannot work for unindexed single-valued numerics:
+      // (currently none of the dv fields in this test config)
+      //     facet.sort = index
+      //     facet.minCount = 0
+      if (!numeric || sf.multiValued()) {
+        if (rand.nextBoolean()) {
+          params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+        }
+        
+        if (rand.nextInt(100) < 10) {
+          params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
+        }
+      } else {
+        params.add("facet.sort", "count");
+        params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
       }
 
       if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
@@ -192,10 +207,6 @@ public class TestRandomDVFaceting extend
         params.add("facet.prefix", prefix);
       }
 
-      if (rand.nextInt(100) < 10) {
-        params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
-      }
-
       if (rand.nextInt(100) < 20) {
         params.add("facet.missing", "true");
       }

Modified: lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java Mon Aug 19 17:23:52 2013
@@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends 
     doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
   }
 
-  public void testDocValuesNotRequiredNoDefault() throws Exception {
-    doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
-  }
-
   public void testDocValuesUnsupported() throws Exception {
     doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
   }

Modified: lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml Mon Aug 19 17:23:52 2013
@@ -168,8 +168,10 @@
    <!--
      Some fields such as popularity and manu_exact could be modified to
      leverage doc values:
-     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
-     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
+     <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
+     <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
+     <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
+
 
      Although it would make indexing slightly slower and the index bigger, it
      would also make the index faster to load, more memory-efficient and more