You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/19 19:23:52 UTC
svn commit: r1515520 - in /lucene/dev/branches/lucene5178:
lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/
lucene/test-framework/src/java/org/apache/lucene/index/
solr/core/src/java/org/apache/solr/request/ solr/core/src/java/or...
Author: rmuir
Date: Mon Aug 19 17:23:52 2013
New Revision: 1515520
URL: http://svn.apache.org/r1515520
Log:
improve DV faceting tests, support missing count for single valued string fields, remove required/default restriction
Removed:
lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/bad-schema-docValues-not-required-no-default.xml
Modified:
lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java
lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java
lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java
lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml
Modified: lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java (original)
+++ lucene/dev/branches/lucene5178/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java Mon Aug 19 17:23:52 2013
@@ -45,12 +45,13 @@ public class BytesRefFieldSource extends
// To be sorted or not to be sorted, that is the question
// TODO: do it cleaner?
if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
+ final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(readerContext.reader(), field);
final BinaryDocValues binaryValues = FieldCache.DEFAULT.getTerms(readerContext.reader(), field);
return new FunctionValues() {
@Override
public boolean exists(int doc) {
- return true; // doc values are dense
+ return docsWithField.get(doc);
}
@Override
Modified: lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/lucene5178/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Mon Aug 19 17:23:52 2013
@@ -650,6 +650,43 @@ public abstract class BaseDocValuesForma
ireader.close();
directory.close();
}
+
+ public void testSortedMergeAwayAllValues() throws IOException {
+ Directory directory = newDirectory();
+ Analyzer analyzer = new MockAnalyzer(random());
+ IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+ iwconfig.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "0", Field.Store.NO));
+ iwriter.addDocument(doc);
+ doc = new Document();
+ doc.add(new StringField("id", "1", Field.Store.NO));
+ doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
+ iwriter.addDocument(doc);
+ iwriter.commit();
+ iwriter.deleteDocuments(new Term("id", "1"));
+ iwriter.forceMerge(1);
+
+ DirectoryReader ireader = iwriter.getReader();
+ iwriter.close();
+
+ SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
+ if (codecSupportsDocsWithField("field")) {
+ assertEquals(-1, dv.getOrd(0));
+ assertEquals(0, dv.getValueCount());
+ } else {
+ assertEquals(0, dv.getOrd(0));
+ assertEquals(1, dv.getValueCount());
+ BytesRef ref = new BytesRef();
+ dv.lookupOrd(0, ref);
+ assertEquals(new BytesRef(), ref);
+ }
+
+ ireader.close();
+ directory.close();
+ }
public void testBytesWithNewline() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java Mon Aug 19 17:23:52 2013
@@ -218,12 +218,7 @@ public class DocValuesFacets {
static NamedList<Integer> finalize(NamedList<Integer> res, SolrIndexSearcher searcher, SchemaField schemaField, DocSet docs, int missingCount, boolean missing) throws IOException {
if (missing) {
if (missingCount < 0) {
- if (schemaField.multiValued()) {
- missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
- } else {
- // nocommit: support missing count (ord = -1) for single-valued here.
- missingCount = 0; // single-valued dv is implicitly 0
- }
+ missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,schemaField.getName());
}
res.add(null, missingCount);
}
@@ -232,12 +227,12 @@ public class DocValuesFacets {
}
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space */
- // specialized since the single-valued case is simpler: you don't have to deal with missing count, etc
+ // specialized since the single-valued case is different
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int term = si.getOrd(doc);
- if (map != null) {
+ if (map != null && term >= 0) {
term = (int) map.getGlobalOrd(subIndex, term);
}
int arrIdx = term-startTermIndex;
Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/request/NumericFacets.java Mon Aug 19 17:23:52 2013
@@ -255,7 +255,7 @@ final class NumericFacets {
if (zeros && (limit < 0 || result.size() < limit)) { // need to merge with the term dict
if (!sf.indexed()) {
- throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on a field which is not indexed");
+ throw new IllegalStateException("Cannot use " + FacetParams.FACET_MINCOUNT + "=0 on field " + sf.getName() + " which is not indexed");
}
// Add zeros until there are limit results
final Set<String> alreadySeen = new HashSet<String>();
Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/StrField.java Mon Aug 19 17:23:52 2013
@@ -80,9 +80,6 @@ public class StrField extends PrimitiveF
@Override
public void checkSchemaField(SchemaField field) {
- if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
- throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
- }
}
}
Modified: lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/java/org/apache/solr/schema/TrieField.java Mon Aug 19 17:23:52 2013
@@ -696,9 +696,6 @@ public class TrieField extends Primitive
@Override
public void checkSchemaField(final SchemaField field) {
- if (field.hasDocValues() && !field.multiValued() && !(field.isRequired() || field.getDefaultValue() != null)) {
- throw new IllegalStateException("Field " + this + " has single-valued doc values enabled, but has no default value and is not required");
- }
}
}
Modified: lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test-files/solr/collection1/conf/schema-docValuesFaceting.xml Mon Aug 19 17:23:52 2013
@@ -26,17 +26,16 @@
<fields>
<field name="id" type="string" indexed="true" stored="true" docValues="false" multiValued="false" required="true"/>
<field name="id_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="false" required="true"/>
- <!-- TODO: improve this test so we don't have to make all these DV types multivalued (for missing values) -->
<dynamicField name="*_i" type="int" indexed="true" stored="false" docValues="false"/>
- <dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
+ <dynamicField name="*_i_dv" type="int" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_is" type="int" indexed="true" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_is_dv" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
- <dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
- <dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="false" docValues="false"/>
+ <dynamicField name="*_s_dv" type="string" indexed="false" stored="false" docValues="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="false" docValues="false" multiValued="true"/>
<dynamicField name="*_ss_dv" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="false" docValues="false"/>
- <dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
+ <dynamicField name="*_f_dv" type="float" indexed="false" stored="false" docValues="true"/>
</fields>
<defaultSearchField>id</defaultSearchField>
Modified: lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/TestRandomDVFaceting.java Mon Aug 19 17:23:52 2013
@@ -39,7 +39,7 @@ import org.junit.Test;
* to the indexed facet results as if it were just another faceting method.
*/
@Slow
-@SuppressCodecs({"Lucene40", "Lucene41"})
+@SuppressCodecs({"Lucene40", "Lucene41", "Lucene42"})
public class TestRandomDVFaceting extends SolrTestCaseJ4 {
@BeforeClass
@@ -162,6 +162,8 @@ public class TestRandomDVFaceting extend
SchemaField sf = req.getSchema().getField(ftype.fname);
boolean multiValued = sf.getType().multiValuedFieldCache();
+ boolean indexed = sf.indexed();
+ boolean numeric = sf.getType().getNumericType() != null;
int offset = 0;
if (rand.nextInt(100) < 20) {
@@ -179,8 +181,21 @@ public class TestRandomDVFaceting extend
params.add("facet.limit", Integer.toString(limit));
}
- if (rand.nextBoolean()) {
- params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+ // the following two situations cannot work for unindexed single-valued numerics:
+ // (currently none of the dv fields in this test config)
+ // facet.sort = index
+ // facet.minCount = 0
+ if (!numeric || sf.multiValued()) {
+ if (rand.nextBoolean()) {
+ params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
+ }
+
+ if (rand.nextInt(100) < 10) {
+ params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
+ }
+ } else {
+ params.add("facet.sort", "count");
+ params.add("facet.mincount", Integer.toString(1+rand.nextInt(5)));
}
if ((ftype.vals instanceof SVal) && rand.nextInt(100) < 20) {
@@ -192,10 +207,6 @@ public class TestRandomDVFaceting extend
params.add("facet.prefix", prefix);
}
- if (rand.nextInt(100) < 10) {
- params.add("facet.mincount", Integer.toString(rand.nextInt(5)));
- }
-
if (rand.nextInt(100) < 20) {
params.add("facet.missing", "true");
}
Modified: lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java (original)
+++ lucene/dev/branches/lucene5178/solr/core/src/test/org/apache/solr/schema/BadIndexSchemaTest.java Mon Aug 19 17:23:52 2013
@@ -93,10 +93,6 @@ public class BadIndexSchemaTest extends
doTest("bad-schema-codec-global-vs-ft-mismatch.xml", "codec does not support");
}
- public void testDocValuesNotRequiredNoDefault() throws Exception {
- doTest("bad-schema-docValues-not-required-no-default.xml", "has no default value and is not required");
- }
-
public void testDocValuesUnsupported() throws Exception {
doTest("bad-schema-unsupported-docValues.xml", "does not support doc values");
}
Modified: lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml?rev=1515520&r1=1515519&r2=1515520&view=diff
==============================================================================
--- lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/lucene5178/solr/example/solr/collection1/conf/schema.xml Mon Aug 19 17:23:52 2013
@@ -168,8 +168,10 @@
<!--
Some fields such as popularity and manu_exact could be modified to
leverage doc values:
- <field name="popularity" type="int" indexed="true" stored="true" docValues="true" default="0" />
- <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" default="" />
+ <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
+ <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
+ <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
+
Although it would make indexing slightly slower and the index bigger, it
would also make the index faster to load, more memory-efficient and more