You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/27 12:34:48 UTC
svn commit: r1642110 [2/12] - in /lucene/dev/branches/lucene6005/lucene:
analysis/uima/src/test/org/apache/lucene/analysis/uima/
backward-codecs/src/test/org/apache/lucene/index/
benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ benchmark/sr...
Modified: lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java Thu Nov 27 11:34:43 2014
@@ -30,13 +30,17 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.IOUtils;
/** Tests the functionality of {@link DocMaker}. */
@@ -98,7 +102,7 @@ public class DocMakerTest extends Benchm
reader.close();
}
- private Document createTestNormsDocument(boolean setNormsProp,
+ private Document2 createTestNormsDocument(boolean setNormsProp,
boolean normsPropVal, boolean setBodyNormsProp, boolean bodyNormsVal)
throws Exception {
Properties props = new Properties();
@@ -117,8 +121,13 @@ public class DocMakerTest extends Benchm
Config config = new Config(props);
DocMaker dm = new DocMaker();
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
dm.setConfig(config, new OneDocSource());
- return dm.makeDocument();
+ Document2 doc = dm.makeDocument(w);
+ w.close();
+ dir.close();
+ return doc;
}
/* Tests doc.index.props property. */
@@ -136,7 +145,7 @@ public class DocMakerTest extends Benchm
/* Tests doc.tokenized.norms and doc.body.tokenized.norms properties. */
public void testNorms() throws Exception {
- Document doc;
+ Document2 doc;
// Don't set anything, use the defaults
doc = createTestNormsDocument(false, false, false, false);
Modified: lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteEnwikiLineDocTaskTest.java Thu Nov 27 11:34:43 2014
@@ -32,9 +32,11 @@ import org.apache.lucene.benchmark.Bench
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexWriter;
/** Tests the functionality of {@link WriteEnwikiLineDocTask}. */
public class WriteEnwikiLineDocTaskTest extends BenchmarkTestCase {
@@ -47,12 +49,12 @@ public class WriteEnwikiLineDocTaskTest
AtomicInteger flip = new AtomicInteger(0);
@Override
- public Document makeDocument() throws Exception {
+ public Document2 makeDocument(IndexWriter w) throws Exception {
boolean isCategory = (flip.incrementAndGet() % 2 == 0);
- Document doc = new Document();
- doc.add(new StringField(BODY_FIELD, "body text", Field.Store.NO));
- doc.add(new StringField(TITLE_FIELD, isCategory ? "Category:title text" : "title text", Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date text", Field.Store.NO));
+ Document2 doc = w.newDocument();
+ doc.addAtom(BODY_FIELD, "body text");
+ doc.addAtom(TITLE_FIELD, isCategory ? "Category:title text" : "title text");
+ doc.addAtom(DATE_FIELD, "date text");
return doc;
}
Modified: lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java Thu Nov 27 11:34:43 2014
@@ -33,9 +33,11 @@ import org.apache.lucene.benchmark.byTas
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexWriter;
/** Tests the functionality of {@link WriteLineDocTask}. */
public class WriteLineDocTaskTest extends BenchmarkTestCase {
@@ -44,11 +46,11 @@ public class WriteLineDocTaskTest extend
public static final class WriteLineDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO));
- doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(BODY_FIELD, "body");
+ doc.addAtom(TITLE_FIELD, "title");
+ doc.addAtom(DATE_FIELD, "date");
return doc;
}
@@ -58,11 +60,11 @@ public class WriteLineDocTaskTest extend
public static final class NewLinesDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(BODY_FIELD, "body\r\ntext\ttwo", Field.Store.NO));
- doc.add(new StringField(TITLE_FIELD, "title\r\ntext", Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date\r\ntext", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(BODY_FIELD, "body\r\ntext\ttwo");
+ doc.addAtom(TITLE_FIELD, "title\r\ntext");
+ doc.addAtom(DATE_FIELD, "date\r\ntext");
return doc;
}
@@ -71,10 +73,10 @@ public class WriteLineDocTaskTest extend
// class has to be public so that Class.forName.newInstance() will work
public static final class NoBodyDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(TITLE_FIELD, "title", Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(TITLE_FIELD, "title");
+ doc.addAtom(DATE_FIELD, "date");
return doc;
}
}
@@ -82,10 +84,10 @@ public class WriteLineDocTaskTest extend
// class has to be public so that Class.forName.newInstance() will work
public static final class NoTitleDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(BODY_FIELD, "body", Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(BODY_FIELD, "body");
+ doc.addAtom(DATE_FIELD, "date");
return doc;
}
}
@@ -93,9 +95,9 @@ public class WriteLineDocTaskTest extend
// class has to be public so that Class.forName.newInstance() will work
public static final class JustDateDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(DATE_FIELD, "date");
return doc;
}
}
@@ -104,9 +106,9 @@ public class WriteLineDocTaskTest extend
// same as JustDate just that this one is treated as legal
public static final class LegalJustDateDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
- doc.add(new StringField(DATE_FIELD, "date", Field.Store.NO));
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
+ doc.addAtom(DATE_FIELD, "date");
return doc;
}
}
@@ -114,8 +116,8 @@ public class WriteLineDocTaskTest extend
// class has to be public so that Class.forName.newInstance() will work
public static final class EmptyDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- return new Document();
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ return w.newDocument();
}
}
@@ -123,15 +125,14 @@ public class WriteLineDocTaskTest extend
public static final class ThreadingDocMaker extends DocMaker {
@Override
- public Document makeDocument() throws Exception {
- Document doc = new Document();
+ public Document2 makeDocument(IndexWriter w) throws Exception {
+ Document2 doc = w.newDocument();
String name = Thread.currentThread().getName();
- doc.add(new StringField(BODY_FIELD, "body_" + name, Field.Store.NO));
- doc.add(new StringField(TITLE_FIELD, "title_" + name, Field.Store.NO));
- doc.add(new StringField(DATE_FIELD, "date_" + name, Field.Store.NO));
+ doc.addAtom(BODY_FIELD, "body_" + name);
+ doc.addAtom(TITLE_FIELD, "title_" + name);
+ doc.addAtom(DATE_FIELD, "date_" + name);
return doc;
}
-
}
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java Thu Nov 27 11:34:43 2014
@@ -16,11 +16,15 @@
*/
package org.apache.lucene.classification;
+import java.io.IOException;
+import java.util.Random;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
@@ -32,9 +36,6 @@ import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
-import java.io.IOException;
-import java.util.Random;
-
/**
* Base class for testing {@link Classifier}s
*/
@@ -98,6 +99,7 @@ public abstract class ClassificationTest
leafReader.close();
}
}
+
protected void checkOnlineClassification(Classifier<T> classifier, String inputDoc, T expectedResult, Analyzer analyzer, String textFieldName, String classFieldName) throws Exception {
checkOnlineClassification(classifier, inputDoc, expectedResult, analyzer, textFieldName, classFieldName, null);
}
@@ -113,7 +115,7 @@ public abstract class ClassificationTest
assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
double score = classificationResult.getScore();
assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
- updateSampleIndex(analyzer);
+ updateSampleIndex();
ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
@@ -125,7 +127,8 @@ public abstract class ClassificationTest
}
private void populateSampleIndex(Analyzer analyzer) throws IOException {
- indexWriter.deleteAll();
+ indexWriter.close();
+ indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
indexWriter.commit();
String text;
@@ -138,7 +141,7 @@ public abstract class ClassificationTest
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Mitt Romney seeks to assure Israel and Iran, as well as Jewish voters in the United" +
@@ -146,7 +149,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "And there's a threshold question that he has to answer for the American people and " +
@@ -155,7 +158,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Still, when it comes to gun policy, many congressional Democrats have \"decided to " +
@@ -164,7 +167,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Standing amongst the thousands of people at the state Capitol, Jorstad, director of " +
@@ -173,7 +176,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "So, about all those experts and analysts who've spent the past year or so saying " +
@@ -181,7 +184,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "More than 400 million people trust Google with their e-mail, and 50 million store files" +
@@ -190,12 +193,12 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "unlabeled doc";
doc.add(new Field(textFieldName, text, ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
indexWriter.commit();
}
@@ -217,7 +220,8 @@ public abstract class ClassificationTest
}
private void populatePerformanceIndex(Analyzer analyzer) throws IOException {
- indexWriter.deleteAll();
+ indexWriter.close();
+ indexWriter = new RandomIndexWriter(random(), dir, newIndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE));
indexWriter.commit();
FieldType ft = new FieldType(TextField.TYPE_STORED);
@@ -232,7 +236,7 @@ public abstract class ClassificationTest
doc.add(new Field(textFieldName, createRandomString(random), ft));
doc.add(new Field(categoryFieldName, b ? "technology" : "politics", ft));
doc.add(new Field(booleanFieldName, String.valueOf(b), ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
}
indexWriter.commit();
}
@@ -246,7 +250,7 @@ public abstract class ClassificationTest
return builder.toString();
}
- private void updateSampleIndex(Analyzer analyzer) throws Exception {
+ private void updateSampleIndex() throws Exception {
String text;
@@ -256,54 +260,54 @@ public abstract class ClassificationTest
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Julian Zelizer says Bill Clinton is still trying to shape his party, years after the White House, while George W. Bush opts for a much more passive role.";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Crossfire: Sen. Tim Scott passes on Sen. Lindsey Graham endorsement";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Illinois becomes 16th state to allow same-sex marriage.";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "politics", ft));
doc.add(new Field(booleanFieldName, "true", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Apple is developing iPhones with curved-glass screens and enhanced sensors that detect different levels of pressure, according to a new report.";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "The Xbox One is Microsoft's first new gaming console in eight years. It's a quality piece of hardware but it's also noteworthy because Microsoft is using it to make a statement.";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "Google says it will replace a Google Maps image after a California father complained it shows the body of his teen-age son, who was shot to death in 2009.";
doc.add(new Field(textFieldName, text, ft));
doc.add(new Field(categoryFieldName, "technology", ft));
doc.add(new Field(booleanFieldName, "false", ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
doc = new Document();
text = "second unlabeled doc";
doc.add(new Field(textFieldName, text, ft));
- indexWriter.addDocument(doc, analyzer);
+ indexWriter.addDocument(doc);
indexWriter.commit();
}
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DataSplitterTest.java Thu Nov 27 11:34:43 2014
@@ -17,28 +17,30 @@ package org.apache.lucene.classification
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Random;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-import java.io.IOException;
-import java.util.Random;
-
/**
* Testcase for {@link org.apache.lucene.classification.utils.DatasetSplitter}
*/
@@ -59,21 +61,26 @@ public class DataSplitterTest extends Lu
dir = newDirectory();
indexWriter = new RandomIndexWriter(random(), dir);
- FieldType ft = new FieldType(TextField.TYPE_STORED);
- ft.setStoreTermVectors(true);
- ft.setStoreTermVectorOffsets(true);
- ft.setStoreTermVectorPositions(true);
-
- Analyzer analyzer = new MockAnalyzer(random());
+ FieldTypes fieldTypes = indexWriter.getFieldTypes();
+ fieldTypes.enableTermVectors(idFieldName);
+ fieldTypes.enableTermVectorPositions(idFieldName);
+ fieldTypes.enableTermVectorOffsets(idFieldName);
+
+ fieldTypes.enableTermVectors(textFieldName);
+ fieldTypes.enableTermVectorPositions(textFieldName);
+ fieldTypes.enableTermVectorOffsets(textFieldName);
+
+ fieldTypes.enableTermVectors(classFieldName);
+ fieldTypes.enableTermVectorPositions(classFieldName);
+ fieldTypes.enableTermVectorOffsets(classFieldName);
- Document doc;
Random rnd = random();
for (int i = 0; i < 100; i++) {
- doc = new Document();
- doc.add(new Field(idFieldName, Integer.toString(i), ft));
- doc.add(new Field(textFieldName, TestUtil.randomUnicodeString(rnd, 1024), ft));
- doc.add(new Field(classFieldName, TestUtil.randomUnicodeString(rnd, 10), ft));
- indexWriter.addDocument(doc, analyzer);
+ Document2 doc = indexWriter.newDocument();
+ doc.addAtom(idFieldName, Integer.toString(i));
+ doc.addLargeText(textFieldName, TestUtil.randomUnicodeString(rnd, 1024));
+ doc.addLargeText(classFieldName, TestUtil.randomUnicodeString(rnd, 10));
+ indexWriter.addDocument(doc);
}
indexWriter.commit();
Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/utils/DocToDoubleVectorUtilsTest.java Thu Nov 27 11:34:43 2014
@@ -18,9 +18,11 @@ package org.apache.lucene.classification
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
@@ -49,20 +51,20 @@ public class DocToDoubleVectorUtilsTest
super.setUp();
dir = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), dir);
+ FieldTypes fieldTypes = indexWriter.getFieldTypes();
+ fieldTypes.enableTermVectors("id");
+ fieldTypes.enableTermVectorPositions("id");
+ fieldTypes.enableTermVectorOffsets("id");
+
+ fieldTypes.enableTermVectors("text");
+ fieldTypes.enableTermVectorPositions("text");
+ fieldTypes.enableTermVectorOffsets("text");
- FieldType ft = new FieldType(TextField.TYPE_STORED);
- ft.setStoreTermVectors(true);
- ft.setStoreTermVectorOffsets(true);
- ft.setStoreTermVectorPositions(true);
-
- Analyzer analyzer = new MockAnalyzer(random());
-
- Document doc;
for (int i = 0; i < 10; i++) {
- doc = new Document();
- doc.add(new Field("id", Integer.toString(i), ft));
- doc.add(new Field("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10), ft));
- indexWriter.addDocument(doc, analyzer);
+ Document2 doc = indexWriter.newDocument();
+ doc.addAtom("id", Integer.toString(i));
+ doc.addLargeText("text", random().nextInt(10) + " " + random().nextInt(10) + " " + random().nextInt(10));
+ indexWriter.addDocument(doc);
}
indexWriter.commit();
Modified: lucene/dev/branches/lucene6005/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java (original)
+++ lucene/dev/branches/lucene6005/lucene/codecs/src/test/org/apache/lucene/codecs/blocktreeords/TestOrdsBlockTree.java Thu Nov 27 11:34:43 2014
@@ -24,8 +24,10 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document2;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.index.BasePostingsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
@@ -49,8 +51,8 @@ public class TestOrdsBlockTree extends B
public void testBasic() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
- Document doc = new Document();
- doc.add(newTextField("field", "a b c", Field.Store.NO));
+ Document2 doc = w.newDocument();
+ doc.addLargeText("field", "a b c");
w.addDocument(doc);
IndexReader r = w.getReader();
TermsEnum te = MultiFields.getTerms(r, "field").iterator(null);
@@ -90,23 +92,23 @@ public class TestOrdsBlockTree extends B
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
List<String> terms = new ArrayList<>();
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addLargeText("field", term);
w.addDocument(doc);
}
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "m" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addLargeText("field", term);
w.addDocument(doc);
}
if (VERBOSE) {
@@ -159,33 +161,33 @@ public class TestOrdsBlockTree extends B
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
List<String> terms = new ArrayList<>();
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addLargeText("field", term);
w.addDocument(doc);
}
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "m" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addLargeText("field", term);
w.addDocument(doc);
}
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "mo" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addLargeText("field", term);
w.addDocument(doc);
}
w.forceMerge(1);
@@ -215,7 +217,7 @@ public class TestOrdsBlockTree extends B
Collections.sort(terms);
for(int i=terms.size()-1;i>=0;i--) {
if (VERBOSE) {
- System.out.println("TEST: seek to ord=" + i);
+ System.out.println("TEST: seek to ord=" + i + " term=" + terms.get(i));
}
te.seekExact(i);
assertEquals(i, te.ord());
@@ -240,12 +242,12 @@ public class TestOrdsBlockTree extends B
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
for(int i=0;i<128;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "" + (char) i;
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
}
- doc.add(newStringField("field", term, Field.Store.NO));
+ doc.addAtom("field", term);
w.addDocument(doc);
}
w.forceMerge(1);
@@ -279,23 +281,23 @@ public class TestOrdsBlockTree extends B
IndexWriter w = new IndexWriter(dir, iwc);
List<String> terms = new ArrayList<>();
for(int i=0;i<36;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "" + (char) (97+i);
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term);
}
- doc.add(newTextField("field", term, Field.Store.NO));
+ doc.addAtom("field", term);
w.addDocument(doc);
}
for(int i=0;i<128;i++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "m" + (char) i;
terms.add(term);
if (VERBOSE) {
System.out.println("i=" + i + " term=" + term + " bytes=" + new BytesRef(term));
}
- doc.add(newStringField("field", term, Field.Store.NO));
+ doc.addAtom("field", term);
w.addDocument(doc);
}
w.forceMerge(1);
@@ -326,13 +328,13 @@ public class TestOrdsBlockTree extends B
List<String> terms = new ArrayList<>();
for(int i=0;i<30;i++) {
for(int j=0;j<30;j++) {
- Document doc = new Document();
+ Document2 doc = w.newDocument();
String term = "" + (char) (97+i) + (char) (97+j);
terms.add(term);
if (VERBOSE) {
System.out.println("term=" + term);
}
- doc.add(newTextField("body", term, Field.Store.NO));
+ doc.addLargeText("body", term);
w.addDocument(doc);
}
}
@@ -364,16 +366,19 @@ public class TestOrdsBlockTree extends B
public void testSeekCeilNotFound() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
- Document doc = new Document();
+ FieldTypes fieldTypes = w.getFieldTypes();
+ fieldTypes.setMultiValued("field");
+ Document2 doc = w.newDocument();
// Get empty string in there!
- doc.add(newStringField("field", "", Field.Store.NO));
+ doc.addAtom("field", "");
w.addDocument(doc);
for(int i=0;i<36;i++) {
- doc = new Document();
+ doc = w.newDocument();
String term = "" + (char) (97+i);
String term2 = "a" + (char) (97+i);
- doc.add(newTextField("field", term + " " + term2, Field.Store.NO));
+ doc.addAtom("field", term);
+ doc.addAtom("field", term2);
w.addDocument(doc);
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java Thu Nov 27 11:34:43 2014
@@ -21,10 +21,6 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.document.DoubleField; // for javadocs
-import org.apache.lucene.document.FloatField; // for javadocs
-import org.apache.lucene.document.IntField; // for javadocs
-import org.apache.lucene.document.LongField; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.util.Attribute;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Thu Nov 27 11:34:43 2014
@@ -129,7 +129,6 @@ public abstract class PerFieldPostingsFo
FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
final PostingsFormat format = getPostingsFormatForField(field);
-
if (format == null) {
throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field + "\"");
}
@@ -192,7 +191,6 @@ public abstract class PerFieldPostingsFo
return group.fields.iterator();
}
};
-
FieldsConsumer consumer = format.fieldsConsumer(group.state);
toClose.add(consumer);
consumer.write(maskedFields);
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/BinaryTokenStream.java Thu Nov 27 11:34:43 2014
@@ -14,7 +14,7 @@ package org.apache.lucene.document;
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License.
+n * limitations under the License.
*/
import java.io.IOException;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/Document2.java Thu Nov 27 11:34:43 2014
@@ -22,9 +22,11 @@ import java.io.Reader;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Date;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -46,8 +48,9 @@ public class Document2 implements Iterab
private static final float DEFAULT_BOOST = 1.0f;
private final FieldTypes fieldTypes;
- private final List<FieldValue> fields = new ArrayList<>();
+ private final List<IndexableField> fields = new ArrayList<>();
private final boolean changeSchema;
+ private final Set<String> seenFields;
// nocommit make private again and somehow deal w/ generics
public class FieldValue implements IndexableField {
@@ -85,6 +88,10 @@ public class Document2 implements Iterab
curFieldType = fieldTypes.getFieldType(fieldName);
}
this.fieldType = curFieldType;
+ if (seenFields != null && seenFields.add(fieldName) == false && fieldType.multiValued != Boolean.TRUE) {
+ // nocommit testme
+ throw new IllegalArgumentException("field=\"" + fieldName + "\": this field is added more than once but is not multiValued");
+ }
}
@Override
@@ -142,16 +149,17 @@ public class Document2 implements Iterab
assert fieldTypes.getIndexOptions(fieldName) != IndexOptions.NONE;
FieldTypes.FieldType fieldType = fieldTypes.getFieldType(fieldName);
+ // nocommit should we be using Double.doubleToRawLongBits / Float.floatToRawIntBits?
switch (fieldType.valueType) {
case INT:
return getReusedBinaryTokenStream(intToBytes(((Number) value).intValue()), reuse);
case FLOAT:
- return getReusedBinaryTokenStream(intToBytes(sortableFloatBits(Float.floatToIntBits(((Number) value).floatValue()))), reuse);
+ return getReusedBinaryTokenStream(floatToBytes(((Number) value).floatValue()), reuse);
case LONG:
return getReusedBinaryTokenStream(longToBytes(((Number) value).longValue()), reuse);
case DOUBLE:
- return getReusedBinaryTokenStream(longToBytes(sortableDoubleBits(Double.doubleToLongBits(((Number) value).doubleValue()))), reuse);
+ return getReusedBinaryTokenStream(doubleToBytes(((Number) value).doubleValue()), reuse);
case DATE:
return getReusedBinaryTokenStream(longToBytes(((Date) value).getTime()), reuse);
case ATOM:
@@ -213,6 +221,8 @@ public class Document2 implements Iterab
case TEXT:
if (value instanceof TokenStream) {
return (TokenStream) value;
+ } else if (value instanceof StringAndTokenStream) {
+ return ((StringAndTokenStream) value).tokens;
} else if (value instanceof Reader) {
return analyzer.tokenStream(name(), (Reader) value);
} else {
@@ -274,10 +284,22 @@ public class Document2 implements Iterab
return (Number) value;
case FLOAT:
// nocommit i shouldn't do sortableFloatBits? but why does ot TestSortedNumericSortField.testFloat fail?
- return Integer.valueOf(sortableFloatBits(Float.floatToIntBits((Float) value)));
+ int intBits = Float.floatToIntBits((Float) value);
+ if (fieldType.multiValued) {
+ // nocommit this is weird?
+ intBits = sortableFloatBits(intBits);
+ }
+ return Integer.valueOf(intBits);
+ //return Integer.valueOf(Float.floatToRawIntBits((Float) value));
case DOUBLE:
// nocommit i shouldn't do sortableDoubleBits?
- return Long.valueOf(sortableDoubleBits(Double.doubleToLongBits((Double) value)));
+ long longBits = Double.doubleToLongBits((Double) value);
+ if (fieldType.multiValued) {
+ // nocommit this is weird?
+ longBits = sortableDoubleBits(longBits);
+ }
+ return Long.valueOf(longBits);
+ //return Long.valueOf(Double.doubleToRawLongBits((Double) value));
case DATE:
return Long.valueOf(((Date) value).getTime());
case BOOLEAN:
@@ -306,6 +328,8 @@ public class Document2 implements Iterab
case TEXT:
if (value instanceof String) {
return (String) value;
+ } else if (value instanceof StringAndTokenStream) {
+ return ((StringAndTokenStream) value).value;
} else {
return null;
}
@@ -362,13 +386,38 @@ public class Document2 implements Iterab
}
}
+ private static class StringAndTokenStream {
+ public final String value;
+ public final TokenStream tokens;
+ public StringAndTokenStream(String value, TokenStream tokens) {
+ this.value = value;
+ this.tokens = tokens;
+ }
+ }
+
public Document2(FieldTypes fieldTypes) {
this(fieldTypes, true);
}
+ public Document2(Document2 other) {
+ this.fieldTypes = other.fieldTypes;
+ this.changeSchema = other.changeSchema;
+ if (changeSchema) {
+ seenFields = new HashSet<>();
+ } else {
+ seenFields = null;
+ }
+ addAll(other);
+ }
+
Document2(FieldTypes fieldTypes, boolean changeSchema) {
this.fieldTypes = fieldTypes;
this.changeSchema = changeSchema;
+ if (changeSchema) {
+ seenFields = new HashSet<>();
+ } else {
+ seenFields = null;
+ }
}
private boolean enableExistsField = true;
@@ -402,7 +451,7 @@ public class Document2 implements Iterab
} else if (enableExistsField && fieldTypes != null && changeSchema && fieldTypes.enableExistsFilters && fieldNamesIndex < fields.size()) {
// nocommit make a more efficient version? e.g. a single field that takes a list and iterates each via TokenStream. maybe we
// should addAtom(String...)?
- return new FieldValue(FieldTypes.FIELD_NAMES_FIELD, fields.get(fieldNamesIndex++).fieldName);
+ return new FieldValue(FieldTypes.FIELD_NAMES_FIELD, fields.get(fieldNamesIndex++).name());
} else {
throw new NoSuchElementException();
}
@@ -410,18 +459,18 @@ public class Document2 implements Iterab
};
}
+ /*
public List<FieldValue> getFieldValues() {
return fields;
}
+ */
public List<IndexableField> getFields() {
- List<IndexableField> result = new ArrayList<>();
- result.addAll(fields);
- return result;
+ return fields;
}
public IndexableField getField(String name) {
- for (FieldValue field : fields) {
+ for (IndexableField field : fields) {
if (field.name().equals(name)) {
return field;
}
@@ -431,7 +480,7 @@ public class Document2 implements Iterab
public List<IndexableField> getFields(String name) {
List<IndexableField> result = new ArrayList<>();
- for (FieldValue field : fields) {
+ for (IndexableField field : fields) {
if (field.name().equals(name)) {
result.add(field);
}
@@ -490,6 +539,7 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, value));
}
+ // nocommit throw exc if this field was already indexed/dvd?
/** Default: store this value. */
public void addStored(String fieldName, BytesRef value) {
// nocommit akward we inferred binary here?
@@ -499,11 +549,13 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, value));
}
+ // nocommit throw exc if this field was already indexed/dvd?
/** Default: store this value. */
public void addStored(String fieldName, byte[] value) {
addStored(fieldName, new BytesRef(value));
}
+ // nocommit throw exc if this field was already indexed/dvd?
/** Default: store this value. */
public void addStored(String fieldName, String value) {
// nocommit akward we inferred large_text here?
@@ -513,6 +565,17 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, value));
}
+ // nocommit throw exc if this field was already indexed/dvd?
+ /** Default: store this value. */
+ // nocommit testme, or remove?
+ public void addStoredInt(String fieldName, int value) {
+ // nocommit akward we inferred large_text here?
+ if (changeSchema) {
+ fieldTypes.recordStoredValueType(fieldName, FieldTypes.ValueType.INT);
+ }
+ fields.add(new FieldValue(fieldName, value));
+ }
+
/** Default: store & DV this value. */
public void addBinary(String fieldName, BytesRef value) {
if (changeSchema) {
@@ -552,6 +615,13 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, value, boost));
}
+ public void addLargeText(String fieldName, String value, TokenStream tokens, float boost) {
+ if (changeSchema) {
+ fieldTypes.recordLargeTextType(fieldName, true, true);
+ }
+ fields.add(new FieldValue(fieldName, new StringAndTokenStream(value, tokens), boost));
+ }
+
/** E.g. a "body" field. Default: indexes this value as multiple tokens from analyzer. */
public void addLargeText(String fieldName, Reader reader) {
addLargeText(fieldName, reader, DEFAULT_BOOST);
@@ -638,6 +708,11 @@ public class Document2 implements Iterab
fields.add(new FieldValue(fieldName, value));
}
+ // nocommit mmmmmm
+ public void add(IndexableField field) {
+ fields.add(field);
+ }
+
static {
// nocommit is there a cleaner/general way to detect missing enum value in case switch statically? must we use ecj?
assert FieldTypes.ValueType.values().length == 12: "missing case for switch statement below";
@@ -646,61 +721,66 @@ public class Document2 implements Iterab
/** Note: this FieldTypes must already know about all the fields in the incoming doc. */
public void addAll(Document2 other) {
// nocommit should we insist other.fieldTypes == this.fieldTypes? or, that they are "congruent"?
- for (FieldValue field : other.fields) {
- String fieldName = field.name();
- FieldType fieldType = fieldTypes.getFieldType(fieldName);
- // nocommit need more checking here ... but then, we should somehow remove StoredDocument, sicne w/ FieldTypes we can now fully
- // reconstruct (as long as all fields were stored) what was indexed:
- switch (fieldType.valueType) {
- case TEXT:
- addLargeText(fieldName, field.stringValue());
- break;
- case SHORT_TEXT:
- addShortText(fieldName, field.stringValue());
- break;
- case ATOM:
- if (field.value instanceof BytesRef) {
- addAtom(fieldName, (BytesRef) field.value);
- } else {
- addAtom(fieldName, (String) field.value);
+ for (IndexableField indexableField : other.fields) {
+ String fieldName = indexableField.name();
+ if (indexableField instanceof FieldValue) {
+ FieldValue field = (FieldValue) indexableField;
+ FieldType fieldType = fieldTypes.getFieldType(fieldName);
+ // nocommit need more checking here ... but then, we should somehow remove StoredDocument, sicne w/ FieldTypes we can now fully
+ // reconstruct (as long as all fields were stored) what was indexed:
+ switch (fieldType.valueType) {
+ case TEXT:
+ addLargeText(fieldName, field.stringValue());
+ break;
+ case SHORT_TEXT:
+ addShortText(fieldName, field.stringValue());
+ break;
+ case ATOM:
+ if (field.value instanceof BytesRef) {
+ addAtom(fieldName, (BytesRef) field.value);
+ } else {
+ addAtom(fieldName, (String) field.value);
+ }
+ break;
+ case INT:
+ addInt(fieldName, field.numericValue().intValue());
+ break;
+ case FLOAT:
+ addFloat(fieldName, field.numericValue().floatValue());
+ break;
+ case LONG:
+ addLong(fieldName, field.numericValue().longValue());
+ break;
+ case DOUBLE:
+ addDouble(fieldName, field.numericValue().doubleValue());
+ break;
+ case BINARY:
+ addStored(fieldName, field.binaryValue());
+ break;
+ case BOOLEAN:
+ addBoolean(fieldName, ((Boolean) field.value).booleanValue());
+ break;
+ case DATE:
+ addDate(fieldName, (Date) field.value);
+ break;
+ case INET_ADDRESS:
+ addInetAddress(fieldName, (InetAddress) field.value);
+ break;
+ default:
+ // BUG:
+ throw new AssertionError("missing valueType=" + fieldType.valueType + " in switch");
}
- break;
- case INT:
- addInt(fieldName, field.numericValue().intValue());
- break;
- case FLOAT:
- addFloat(fieldName, field.numericValue().floatValue());
- break;
- case LONG:
- addLong(fieldName, field.numericValue().longValue());
- break;
- case DOUBLE:
- addDouble(fieldName, field.numericValue().doubleValue());
- break;
- case BINARY:
- addStored(fieldName, field.binaryValue());
- break;
- case BOOLEAN:
- addBoolean(fieldName, ((Boolean) field.value).booleanValue());
- break;
- case DATE:
- addDate(fieldName, (Date) field.value);
- break;
- case INET_ADDRESS:
- addInetAddress(fieldName, (InetAddress) field.value);
- break;
- default:
- // BUG:
- throw new AssertionError("missing valueType=" + fieldType.valueType + " in switch");
+ } else {
+ add(indexableField);
}
}
}
// nocommit i don't like that we have this ... it's linear cost, and this class is not supposed to be a generic container
public void removeField(String name) {
- Iterator<FieldValue> it = fields.iterator();
+ Iterator<IndexableField> it = fields.iterator();
while (it.hasNext()) {
- FieldValue field = it.next();
+ IndexableField field = it.next();
if (field.name().equals(name)) {
it.remove();
return;
@@ -708,7 +788,8 @@ public class Document2 implements Iterab
}
}
- static BytesRef intToBytes(int v) {
+ // nocommit public just for TestBlockJoin ...
+ public static BytesRef intToBytes(int v) {
int sortableBits = v ^ 0x80000000;
BytesRef token = new BytesRef(4);
token.length = 4;
@@ -721,6 +802,30 @@ public class Document2 implements Iterab
return token;
}
+ public static BytesRef floatToBytes(float value) {
+ return intToBytes(sortableFloatBits(Float.floatToIntBits(value)));
+ }
+
+ /** Converts numeric DV field back to double. */
+ public static double sortableLongToDouble(long v) {
+ return Double.longBitsToDouble(sortableDoubleBits(v));
+ }
+
+ /** Converts numeric DV field back to double. */
+ public static double longToDouble(long v) {
+ return Double.longBitsToDouble(v);
+ }
+
+ /** Converts numeric DV field back to float. */
+ public static float sortableIntToFloat(int v) {
+ return Float.intBitsToFloat(sortableFloatBits(v));
+ }
+
+ /** Converts numeric DV field back to float. */
+ public static float intToFloat(int v) {
+ return Float.intBitsToFloat(v);
+ }
+
// nocommit move elsewhere?
public static int bytesToInt(BytesRef bytes) {
if (bytes.length != 4) {
@@ -734,7 +839,7 @@ public class Document2 implements Iterab
return sortableBits ^ 0x80000000;
}
- static BytesRef longToBytes(long v) {
+ public static BytesRef longToBytes(long v) {
long sortableBits = v ^ 0x8000000000000000L;
BytesRef token = new BytesRef(8);
token.length = 8;
@@ -747,6 +852,10 @@ public class Document2 implements Iterab
return token;
}
+ public static BytesRef doubleToBytes(double value) {
+ return longToBytes(sortableDoubleBits(Double.doubleToLongBits(value)));
+ }
+
// nocommit move elsewhere?
public static long bytesToLong(BytesRef bytes) {
if (bytes.length != 8) {
@@ -782,56 +891,52 @@ public class Document2 implements Iterab
public Boolean getBoolean(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (Boolean) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Boolean) fieldValue.value;
}
-
- return null;
}
// nocommit getFloat, getDouble, getLong
public Date getDate(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (Date) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Date) fieldValue.value;
}
-
- return null;
}
public InetAddress getInetAddress(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (InetAddress) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (InetAddress) fieldValue.value;
}
-
- return null;
}
public String getString(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return fieldValue.value.toString();
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (String) fieldValue.value;
}
-
- return null;
}
public String[] getStrings(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
List<String> values = new ArrayList<>();
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- values.add(fieldValue.value.toString());
+ for(IndexableField fieldValue : fields) {
+ if (fieldValue.name().equals(fieldName) && fieldValue instanceof FieldValue) {
+ values.add((String) ((FieldValue) fieldValue).value);
}
}
@@ -840,55 +945,85 @@ public class Document2 implements Iterab
public BytesRef getBinary(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (BytesRef) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (BytesRef) fieldValue.value;
}
-
- return null;
}
public Integer getInt(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (Integer) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Integer) fieldValue.value;
}
+ }
- return null;
+ public Long getLong(String fieldName) {
+ // nocommit can we assert this is a known field and that its type is correct?
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Long) fieldValue.value;
+ }
}
- public Double getDouble(String fieldName) {
+ public Float getFloat(String fieldName) {
// nocommit can we assert this is a known field and that its type is correct?
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return (Double) fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Float) fieldValue.value;
}
+ }
- return null;
+ public Double getDouble(String fieldName) {
+ // nocommit can we assert this is a known field and that its type is correct?
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return (Double) fieldValue.value;
+ }
}
public Object get(String fieldName) {
- for(FieldValue fieldValue : fields) {
- if (fieldValue.fieldName.equals(fieldName)) {
- return fieldValue.value;
- }
+ FieldValue fieldValue = getFirstFieldValue(fieldName);
+ if (fieldValue == null) {
+ return null;
+ } else {
+ return fieldValue.value;
}
+ }
+ private FieldValue getFirstFieldValue(String name) {
+ for(IndexableField fieldValue : fields) {
+ if (fieldValue.name().equals(name) && fieldValue instanceof FieldValue) {
+ return (FieldValue) fieldValue;
+ }
+ }
return null;
}
@Override
public String toString() {
StringBuilder b = new StringBuilder();
- for(FieldValue fieldValue : fields) {
+ for(IndexableField field : fields) {
b.append("\n ");
- b.append(fieldValue.fieldName);
+ b.append(field.name());
b.append(": ");
- String s = fieldValue.value.toString();
+ String s;
+ if (field instanceof FieldValue) {
+ s = ((FieldValue) field).value.toString();
+ } else {
+ s = field.toString();
+ }
if (s.length() > 20) {
b.append(s.substring(0, 20));
b.append("...");
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/document/FieldTypes.java Thu Nov 27 11:34:43 2014
@@ -90,17 +90,6 @@ import org.apache.lucene.util.Version;
// - move analyzer out of IW/IWC into Field/FieldType/s only?
// - why does STS fill offset...
-// Lucene's secret schemas
-// StoredFieldsVisitor
-// FieldInfos/GlobalFieldNumbers
-// SortField.type
-// DocValuesType
-// subclassing QueryParsers
-// PerFieldPF/DVF
-// PerFieldSimilarityWrapper
-// PerFieldAnalyzerWrapper
-// oal.document
-
// tie into query parser
// default operators?
// default search field
@@ -109,6 +98,12 @@ import org.apache.lucene.util.Version;
// tie into faceting
// tie into index sorting
+// nocommit a segment should store the field type as of when it was written? on upgrade/reindex we can use that?
+
+// nocommit addStored should take numbers too?
+
+// nocommit should we detect if we are used to change schema after the IW holding us is closed?
+
// nocommit run all monster tests
// nocommit cutover AnalyzingInfixSuggester to binary atom for contexts
@@ -166,6 +161,8 @@ import org.apache.lucene.util.Version;
// nocommit sort proxy field?
+// nocommit highlight proxy field (LUCENE-6061)
+
// nocommit controlling compression of stored fields, norms
// nocommit can we somehow detect at search time if the field types you are using doesn't match the searcher you are now searching against?
@@ -202,8 +199,6 @@ import org.apache.lucene.util.Version;
// nocommit how to randomize IWC? RIW?
-// nocommit unique/primary key ?
-
// nocommit fix all change methods to call validate / rollback
// nocommit float16?
@@ -367,7 +362,7 @@ public class FieldTypes {
private volatile Boolean fastRanges;
// Whether this field may appear more than once per document:
- private volatile Boolean multiValued;
+ volatile Boolean multiValued;
// Whether this field's norms are indexed:
private volatile Boolean indexNorms;
@@ -713,14 +708,16 @@ public class FieldTypes {
b.append(" sortable: ");
if (sortable != null) {
b.append(sortable);
- if (sortReversed != null) {
- b.append(" reversed=");
- b.append(sortReversed);
- }
- if (sortMissingLast == Boolean.TRUE) {
- b.append(" (missing: last)");
- } else if (sortMissingLast == Boolean.FALSE) {
- b.append(" (missing: first)");
+ if (sortable == Boolean.TRUE) {
+ if (sortReversed != null) {
+ b.append(" reversed=");
+ b.append(sortReversed);
+ }
+ if (sortMissingLast == Boolean.TRUE) {
+ b.append(" (missing: last)");
+ } else if (sortMissingLast == Boolean.FALSE) {
+ b.append(" (missing: first)");
+ }
}
} else {
b.append("unset");
@@ -776,7 +773,7 @@ public class FieldTypes {
b.append("\n termVectors: yes");
if (storeTermVectorPositions == Boolean.TRUE) {
b.append(" positions");
- if (storeTermVectorPayloads) {
+ if (storeTermVectorPayloads == Boolean.TRUE) {
b.append(" payloads");
}
}
@@ -1176,6 +1173,8 @@ public class FieldTypes {
}
}
+ // nocommit need test that you cannot .addStored after already .addLargeText(TokenStream)?
+
// nocommit move to oal.index and remove these ctors, so you must ask IW or IR for the FieldTypes
/** Only invoked by IndexWriter directly.
@@ -1312,6 +1311,10 @@ public class FieldTypes {
}
}
+ public IndexableFieldType getIndexableFieldType(String fieldName) {
+ return getFieldType(fieldName);
+ }
+
public String getFieldTypeString(String fieldName) {
return getFieldType(fieldName).toString();
}
@@ -2438,6 +2441,55 @@ public class FieldTypes {
}
}
+ synchronized void recordStoredValueType(String fieldName, ValueType valueType) {
+ ensureWritable();
+ indexedDocs = true;
+ FieldType current = fields.get(fieldName);
+ if (current == null) {
+ current = newFieldType(fieldName);
+ current.valueType = valueType;
+ current.isUnique = Boolean.FALSE;
+ current.indexOptionsSet = true;
+ current.indexOptions = IndexOptions.NONE;
+ current.docValuesTypeSet = true;
+ current.docValuesType = DocValuesType.NONE;
+ fields.put(fieldName, current);
+ setDefaults(current);
+ changed();
+ } else {
+
+ if (current.indexOptionsSet && current.indexOptions != IndexOptions.NONE) {
+ // nocommit testme
+ illegalState(fieldName, "cannot addStored: field is already indexed with indexOptions=" + current.indexOptions);
+ }
+
+ if (current.docValuesTypeSet && current.docValuesType != DocValuesType.NONE) {
+ // nocommit testme
+ illegalState(fieldName, "cannot addStored: field already has docValuesType=" + current.docValuesType);
+ }
+
+ if (current.valueType == ValueType.NONE) {
+ FieldType sav = new FieldType(current);
+ boolean success = false;
+ try {
+ current.valueType = valueType;
+ current.indexOptions = IndexOptions.NONE;
+ current.docValuesType = DocValuesType.NONE;
+ current.validate();
+ success = true;
+ } finally {
+ if (success == false) {
+ fields.put(fieldName, sav);
+ }
+ }
+ setDefaults(current);
+ changed();
+ } else if (current.valueType != valueType) {
+ illegalState(fieldName, "cannot change from value type " + current.valueType + " to " + valueType);
+ }
+ }
+ }
+
synchronized void recordLargeTextType(String fieldName, boolean allowStored, boolean indexed) {
ensureWritable();
indexedDocs = true;
@@ -2962,6 +3014,9 @@ public class FieldTypes {
return new TermQuery(new Term(fieldName, new BytesRef(token.getAddress())));
}
+ // nocommit split to newInt/Float/etc./Range
+
+ // nocommit not great that the toString of the filter returned here is ... not easy to understand
public Filter newRangeFilter(String fieldName, Number min, boolean minInclusive, Number max, boolean maxInclusive) {
// Field must exist:
@@ -2990,8 +3045,8 @@ public class FieldTypes {
break;
case FLOAT:
- minTerm = min == null ? null : Document2.intToBytes(Float.floatToIntBits(min.floatValue()));
- maxTerm = max == null ? null : Document2.intToBytes(Float.floatToIntBits(max.floatValue()));
+ minTerm = min == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(min.floatValue())));
+ maxTerm = max == null ? null : Document2.intToBytes(Document2.sortableFloatBits(Float.floatToIntBits(max.floatValue())));
break;
case LONG:
@@ -3000,8 +3055,8 @@ public class FieldTypes {
break;
case DOUBLE:
- minTerm = min == null ? null : Document2.longToBytes(Double.doubleToLongBits(min.doubleValue()));
- maxTerm = max == null ? null : Document2.longToBytes(Double.doubleToLongBits(max.doubleValue()));
+ minTerm = min == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(min.doubleValue())));
+ maxTerm = max == null ? null : Document2.longToBytes(Document2.sortableDoubleBits(Double.doubleToLongBits(max.doubleValue())));
break;
default:
@@ -3010,8 +3065,24 @@ public class FieldTypes {
// Dead code but javac disagrees:
return null;
}
+ StringBuilder sb = new StringBuilder();
+ sb.append(fieldType.valueType);
+ sb.append(':');
+ if (min != null) {
+ sb.append(min);
+ sb.append(" (");
+ sb.append(minInclusive ? "incl" : "excl");
+ sb.append(')');
+ }
+ sb.append(" to ");
+ if (max != null) {
+ sb.append(max);
+ sb.append(" (");
+ sb.append(maxInclusive ? "incl" : "excl");
+ sb.append(')');
+ }
- return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive);
+ return new TermRangeFilter(fieldName, minTerm, maxTerm, minInclusive, maxInclusive, sb.toString());
}
public Filter newRangeFilter(String fieldName, byte[] minTerm, boolean minInclusive, byte[] maxTerm, boolean maxInclusive) {
@@ -3466,4 +3537,6 @@ public class FieldTypes {
indexedDocs = false;
addFieldNamesField();
}
+
+ // nocommit add sugar to wrap long NDVs as float/double?
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Thu Nov 27 11:34:43 2014
@@ -1982,58 +1982,55 @@ public class CheckIndex implements Close
case SORTED:
status.totalSortedFields++;
checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField);
- if (reader.getBinaryDocValues(fi.name) != null ||
- reader.getNumericDocValues(fi.name) != null ||
- reader.getSortedNumericDocValues(fi.name) != null ||
- reader.getSortedSetDocValues(fi.name) != null) {
- throw new RuntimeException(fi.name + " returns multiple docvalues types!");
- }
+ checkMoreThanOneDocValuesType(fi.name, reader);
break;
case SORTED_NUMERIC:
status.totalSortedNumericFields++;
checkSortedNumericDocValues(fi.name, reader, reader.getSortedNumericDocValues(fi.name), docsWithField);
- if (reader.getBinaryDocValues(fi.name) != null ||
- reader.getNumericDocValues(fi.name) != null ||
- reader.getSortedSetDocValues(fi.name) != null ||
- reader.getSortedDocValues(fi.name) != null) {
- throw new RuntimeException(fi.name + " returns multiple docvalues types!");
- }
+ checkMoreThanOneDocValuesType(fi.name, reader);
break;
case SORTED_SET:
status.totalSortedSetFields++;
checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField);
- if (reader.getBinaryDocValues(fi.name) != null ||
- reader.getNumericDocValues(fi.name) != null ||
- reader.getSortedNumericDocValues(fi.name) != null ||
- reader.getSortedDocValues(fi.name) != null) {
- throw new RuntimeException(fi.name + " returns multiple docvalues types!");
- }
+ checkMoreThanOneDocValuesType(fi.name, reader);
break;
case BINARY:
status.totalBinaryFields++;
checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField);
- if (reader.getNumericDocValues(fi.name) != null ||
- reader.getSortedDocValues(fi.name) != null ||
- reader.getSortedNumericDocValues(fi.name) != null ||
- reader.getSortedSetDocValues(fi.name) != null) {
- throw new RuntimeException(fi.name + " returns multiple docvalues types!");
- }
+ checkMoreThanOneDocValuesType(fi.name, reader);
break;
case NUMERIC:
status.totalNumericFields++;
checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField);
- if (reader.getBinaryDocValues(fi.name) != null ||
- reader.getSortedDocValues(fi.name) != null ||
- reader.getSortedNumericDocValues(fi.name) != null ||
- reader.getSortedSetDocValues(fi.name) != null) {
- throw new RuntimeException(fi.name + " returns multiple docvalues types!");
- }
+ checkMoreThanOneDocValuesType(fi.name, reader);
break;
default:
throw new AssertionError();
}
}
+ private static void checkMoreThanOneDocValuesType(String fieldName, LeafReader reader) throws IOException {
+ List<String> docValues = new ArrayList<>();
+ if (reader.getBinaryDocValues(fieldName) != null) {
+ docValues.add("BINARY");
+ }
+ if (reader.getNumericDocValues(fieldName) != null) {
+ docValues.add("NUMERIC");
+ }
+ if (reader.getSortedDocValues(fieldName) != null) {
+ docValues.add("SORTED");
+ }
+ if (reader.getSortedSetDocValues(fieldName) != null) {
+ docValues.add("SORTED_SET");
+ }
+ if (reader.getSortedNumericDocValues(fieldName) != null) {
+ docValues.add("SORTED_NUMERIC");
+ }
+ if (docValues.size() > 1) {
+ throw new RuntimeException("field=\"" + fieldName + "\" returns multiple docvalues types: " + docValues);
+ }
+ }
+
private static void checkNorms(FieldInfo fi, LeafReader reader, PrintStream infoStream) throws IOException {
if (fi.hasNorms()) {
checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc()));
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Thu Nov 27 11:34:43 2014
@@ -23,7 +23,6 @@ import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.NormsConsumer;
@@ -646,7 +645,6 @@ final class DefaultIndexingChain extends
// will be marked as deleted, but still
// consume a docID since we will have already
// written some if its postings into our RAM buffer.
-
int posIncr = invertState.posIncrAttribute.getPositionIncrement();
invertState.position += posIncr;
if (invertState.position < invertState.lastPosition) {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterDirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterDirectoryReader.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterDirectoryReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterDirectoryReader.java Thu Nov 27 11:34:43 2014
@@ -20,6 +20,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.List;
+import org.apache.lucene.document.FieldTypes;
+
/**
* A FilterDirectoryReader wraps another DirectoryReader, allowing implementations
* to transform or extend it.
@@ -125,4 +127,8 @@ public abstract class FilterDirectoryRea
in.doClose();
}
+ @Override
+ public FieldTypes getFieldTypes() {
+ return in.getFieldTypes();
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java Thu Nov 27 11:34:43 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
@@ -468,4 +469,10 @@ public class FilterLeafReader extends Le
ensureOpen();
in.checkIntegrity();
}
+
+ @Override
+ public FieldTypes getFieldTypes() {
+ ensureOpen();
+ return in.getFieldTypes();
+ }
}
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Thu Nov 27 11:34:43 2014
@@ -1150,23 +1150,7 @@ public class IndexWriter implements Clos
* @throws IOException if there is a low-level IO error
*/
public void addDocument(Iterable<? extends IndexableField> doc) throws IOException {
- addDocument(doc, analyzer);
- }
-
- /**
- * Adds a document to this index, using the provided analyzer instead of the
- * value of {@link #getAnalyzer()}.
- *
- * <p>See {@link #addDocument(IndexDocument)} for details on
- * index and IndexWriter state after an Exception, and
- * flushing/merging temporary free space requirements.</p>
- *
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- */
- // nocommit remove
- public void addDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer) throws IOException {
- updateDocument(null, doc, analyzer);
+ updateDocument(null, doc);
}
/**
@@ -1207,23 +1191,7 @@ public class IndexWriter implements Clos
* @lucene.experimental
*/
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
- addDocuments(docs, analyzer);
- }
-
- /**
- * Atomically adds a block of documents, analyzed using the
- * provided analyzer, with sequentially assigned document
- * IDs, such that an external reader will see all or none
- * of the documents.
- *
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- *
- * @lucene.experimental
- */
- // nocommit remove
- public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws IOException {
- updateDocuments(null, docs, analyzer);
+ updateDocuments(null, docs);
}
/**
@@ -1240,25 +1208,6 @@ public class IndexWriter implements Clos
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
- updateDocuments(delTerm, docs, analyzer);
- }
-
- /**
- * Atomically deletes documents matching the provided
- * delTerm and adds a block of documents, analyzed using
- * the provided analyzer, with sequentially
- * assigned document IDs, such that an external reader
- * will see all or none of the documents.
- *
- * See {@link #addDocuments(Iterable)}.
- *
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- *
- * @lucene.experimental
- */
- // nocommit remove
- public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws IOException {
ensureOpen();
try {
boolean success = false;
@@ -1410,27 +1359,6 @@ public class IndexWriter implements Clos
*/
public void updateDocument(Term term, Iterable<? extends IndexableField> doc) throws IOException {
ensureOpen();
- updateDocument(term, doc, analyzer);
- }
-
- /**
- * Updates a document by first deleting the document(s)
- * containing <code>term</code> and then adding the new
- * document. The delete and then add are atomic as seen
- * by a reader on the same index (flush may happen only after
- * the add).
- *
- * @param term the term to identify the document(s) to be
- * deleted
- * @param doc the document to be added
- * @param analyzer the analyzer to use when analyzing the document
- * @throws CorruptIndexException if the index is corrupt
- * @throws IOException if there is a low-level IO error
- */
- // nocommit remove
- public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer)
- throws IOException {
- ensureOpen();
try {
boolean success = false;
try {
@@ -2567,6 +2495,11 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
final List<LeafReader> mergeReaders = new ArrayList<>();
for (IndexReader indexReader : readers) {
+ // nocommit how to undo this on exc?
+ FieldTypes ft = indexReader.getFieldTypes();
+ if (ft != null) {
+ fieldTypes.addAll(ft);
+ }
numDocs += indexReader.numDocs();
for (LeafReaderContext ctx : indexReader.leaves()) {
mergeReaders.add(ctx.reader());
@@ -2576,7 +2509,7 @@ public class IndexWriter implements Clos
// Make sure adding the new documents to this index won't
// exceed the limit:
reserveDocs(numDocs);
-
+
final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
// TODO: somehow we should fix this merge so it's
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/MultiFields.java Thu Nov 27 11:34:43 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/TrackingIndexWriter.java Thu Nov 27 11:34:43 2014
@@ -58,15 +58,6 @@ public class TrackingIndexWriter {
}
/** Calls {@link
- * IndexWriter#updateDocument(Term,Iterable,Analyzer)}
- * and returns the generation that reflects this change. */
- public long updateDocument(Term t, Iterable<? extends IndexableField> d, Analyzer a) throws IOException {
- writer.updateDocument(t, d, a);
- // Return gen as of when indexing finished:
- return indexingGen.get();
- }
-
- /** Calls {@link
* IndexWriter#updateDocument(Term,Iterable)} and
* returns the generation that reflects this change. */
public long updateDocument(Term t, Iterable<? extends IndexableField> d) throws IOException {
@@ -76,15 +67,6 @@ public class TrackingIndexWriter {
}
/** Calls {@link
- * IndexWriter#updateDocuments(Term,Iterable,Analyzer)}
- * and returns the generation that reflects this change. */
- public long updateDocuments(Term t, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer a) throws IOException {
- writer.updateDocuments(t, docs, a);
- // Return gen as of when indexing finished:
- return indexingGen.get();
- }
-
- /** Calls {@link
* IndexWriter#updateDocuments(Term,Iterable)} and returns
* the generation that reflects this change. */
public long updateDocuments(Term t, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
@@ -133,24 +115,6 @@ public class TrackingIndexWriter {
return indexingGen.get();
}
- /** Calls {@link
- * IndexWriter#addDocument(Iterable,Analyzer)} and
- * returns the generation that reflects this change. */
- public long addDocument(Iterable<? extends IndexableField> d, Analyzer a) throws IOException {
- writer.addDocument(d, a);
- // Return gen as of when indexing finished:
- return indexingGen.get();
- }
-
- /** Calls {@link
- * IndexWriter#addDocuments(Iterable,Analyzer)} and
- * returns the generation that reflects this change. */
- public long addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer a) throws IOException {
- writer.addDocuments(docs, a);
- // Return gen as of when indexing finished:
- return indexingGen.get();
- }
-
/** Calls {@link IndexWriter#addDocument(Iterable)}
* and returns the generation that reflects this change. */
public long addDocument(Iterable<? extends IndexableField> d) throws IOException {
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/DocValuesRangeFilter.java Thu Nov 27 11:34:43 2014
@@ -18,10 +18,6 @@ package org.apache.lucene.search;
import java.io.IOException;
-import org.apache.lucene.document.DoubleField; // for javadocs
-import org.apache.lucene.document.FloatField; // for javadocs
-import org.apache.lucene.document.IntField; // for javadocs
-import org.apache.lucene.document.LongField; // for javadocs
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1642110&r1=1642109&r2=1642110&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java Thu Nov 27 11:34:43 2014
@@ -181,6 +181,10 @@ public class IndexSearcher {
return reader;
}
+ public FieldTypes getFieldTypes() {
+ return reader.getFieldTypes();
+ }
+
/**
* Sugar for <code>.getIndexReader().document2(docID)</code>
* @see IndexReader#document2(int)