You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/15 18:33:46 UTC
svn commit: r1409892 - in /lucene/dev/branches/lucene4547/lucene:
codecs/src/java/org/apache/lucene/codecs/simpletext/
core/src/java/org/apache/lucene/codecs/ core/src/test/org/apache/lucene/
Author: mikemccand
Date: Thu Nov 15 17:33:45 2012
New Revision: 1409892
URL: http://svn.apache.org/viewvc?rev=1409892&view=rev
Log:
get basic sorted bytes example working
Modified:
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Thu Nov 15 17:33:45 2012
@@ -67,8 +67,8 @@ public class SimpleTextSimpleDocValuesFo
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
final static BytesRef LENGTH = new BytesRef("length ");
// used for sorted bytes
- final static BytesRef NUMVALUES = new BytesRef(" numvalues");
- final static BytesRef ORDPATTERN = new BytesRef(" ordpattern");
+ final static BytesRef NUMVALUES = new BytesRef(" numvalues ");
+ final static BytesRef ORDPATTERN = new BytesRef(" ordpattern ");
@Override
public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
@@ -287,7 +287,7 @@ public class SimpleTextSimpleDocValuesFo
@Override
public void addDoc(int ord) throws IOException {
- SimpleTextUtil.write(data, encoder.format(ord), scratch);
+ SimpleTextUtil.write(data, ordEncoder.format(ord), scratch);
SimpleTextUtil.writeNewline(data);
}
};
@@ -397,6 +397,7 @@ public class SimpleTextSimpleDocValuesFo
assert startsWith(ORDPATTERN);
field.ordPattern = stripPrefix(ORDPATTERN);
field.dataStartFilePointer = data.getFilePointer();
+ data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
// nocommit: we need to seek past the data section!!!!
} else if (DocValues.isFloat(dvType)) {
// nocommit
@@ -424,7 +425,6 @@ public class SimpleTextSimpleDocValuesFo
DocValues.Type dvType = field.fieldInfo.getDocValuesType();
if (DocValues.isNumber(dvType)) {
Source source = loadDirectSource();
- System.out.println(maxDoc);
long[] values = new long[maxDoc];
for(int docID=0;docID<maxDoc;docID++) {
values[docID] = source.getInt(docID);
@@ -522,7 +522,7 @@ public class SimpleTextSimpleDocValuesFo
// value from the wrong field ...
in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
SimpleTextUtil.readLine(in, scratch);
- System.out.println("parsing delta: " + scratch.utf8ToString());
+ //System.out.println("parsing delta: " + scratch.utf8ToString());
BigDecimal bd = (BigDecimal) decoder.parse(scratch.utf8ToString(), new ParsePosition(0));
return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
} catch (IOException ioe) {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java Thu Nov 15 17:33:45 2012
@@ -57,6 +57,11 @@ public abstract class SortedDocValuesCon
int ord = -1;
SortedSource source;
BytesRef scratch = new BytesRef();
+
+ // nocommit can we factor out the compressed fields
+ // compression? ie we have a good idea "roughly" what
+ // the ord should be (linear projection) so we only
+ // need to encode the delta from that ...:
int[] segOrdToMergedOrd;
public BytesRef nextTerm() {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Thu Nov 15 17:33:45 2012
@@ -24,15 +24,16 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.PackedLongDocValuesField;
+import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -196,4 +197,48 @@ public class TestDemoDocValue extends Lu
ireader.close();
directory.close();
}
+
+ public void testDemoSortedBytes() throws IOException {
+ Analyzer analyzer = new MockAnalyzer(random());
+
+ // Store the index in memory:
+ Directory directory = newDirectory();
+ // To store an index on disk, use this instead:
+ // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
+ // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
+ IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+ Document doc = new Document();
+ String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
+ String text = "This is the text to be indexed. " + longTerm;
+ doc.add(newTextField("fieldname", text, Field.Store.YES));
+ doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world")));
+ iwriter.addDocument(doc);
+ iwriter.close();
+
+ // Now search the index:
+ IndexReader ireader = DirectoryReader.open(directory); // read-only=true
+ IndexSearcher isearcher = new IndexSearcher(ireader);
+
+ assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
+ Query query = new TermQuery(new Term("fieldname", "text"));
+ TopDocs hits = isearcher.search(query, null, 1);
+ assertEquals(1, hits.totalHits);
+ // Iterate through the results:
+ for (int i = 0; i < hits.scoreDocs.length; i++) {
+ StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
+ assertEquals(text, hitDoc.get("fieldname"));
+ assert ireader.leaves().size() == 1;
+ DocValues dv = ireader.leaves().get(0).reader().docValues("dv");
+ assertEquals(new BytesRef("hello world"), dv.getSource().getBytes(hits.scoreDocs[i].doc, new BytesRef()));
+ }
+
+ // Test simple phrase query
+ PhraseQuery phraseQuery = new PhraseQuery();
+ phraseQuery.add(new Term("fieldname", "to"));
+ phraseQuery.add(new Term("fieldname", "be"));
+ assertEquals(1, isearcher.search(phraseQuery, null, 1).totalHits);
+
+ ireader.close();
+ directory.close();
+ }
}