You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/15 18:33:46 UTC
svn commit: r1409892 - in /lucene/dev/branches/lucene4547/lucene: codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java/org/apache/lucene/codecs/ core/src/test/org/apache/lucene/

Author: mikemccand
Date: Thu Nov 15 17:33:45 2012
New Revision: 1409892

URL: http://svn.apache.org/viewvc?rev=1409892&view=rev
Log:
get basic sorted bytes example working

Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Thu Nov 15 17:33:45 2012
@@ -67,8 +67,8 @@ public class SimpleTextSimpleDocValuesFo
   final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
   final static BytesRef LENGTH = new BytesRef("length ");
   // used for sorted bytes
-  final static BytesRef NUMVALUES = new BytesRef("  numvalues");
-  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern");
+  final static BytesRef NUMVALUES = new BytesRef("  numvalues ");
+  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern ");
 
   @Override
   public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
@@ -287,7 +287,7 @@ public class SimpleTextSimpleDocValuesFo
 
         @Override
         public void addDoc(int ord) throws IOException {
-          SimpleTextUtil.write(data, encoder.format(ord), scratch);
+          SimpleTextUtil.write(data, ordEncoder.format(ord), scratch);
           SimpleTextUtil.writeNewline(data);
         }
       };
@@ -397,6 +397,7 @@ public class SimpleTextSimpleDocValuesFo
           assert startsWith(ORDPATTERN);
           field.ordPattern = stripPrefix(ORDPATTERN);
           field.dataStartFilePointer = data.getFilePointer();
+          data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
           // nocommit: we need to seek past the data section!!!!
         } else if (DocValues.isFloat(dvType)) {
           // nocommit
@@ -424,7 +425,6 @@ public class SimpleTextSimpleDocValuesFo
         DocValues.Type dvType = field.fieldInfo.getDocValuesType();
         if (DocValues.isNumber(dvType)) {
           Source source = loadDirectSource();
-          System.out.println(maxDoc);
           long[] values = new long[maxDoc];
           for(int docID=0;docID<maxDoc;docID++) {
             values[docID] = source.getInt(docID);
@@ -522,7 +522,7 @@ public class SimpleTextSimpleDocValuesFo
                 // value from the wrong field ...
                 in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
                 SimpleTextUtil.readLine(in, scratch);
-                System.out.println("parsing delta: " + scratch.utf8ToString());
+                //System.out.println("parsing delta: " + scratch.utf8ToString());
                 BigDecimal bd = (BigDecimal) decoder.parse(scratch.utf8ToString(), new ParsePosition(0));
                 return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
               } catch (IOException ioe) {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java Thu Nov 15 17:33:45 2012
@@ -57,6 +57,11 @@ public abstract class SortedDocValuesCon
       int ord = -1;
       SortedSource source;
       BytesRef scratch = new BytesRef();
+
+      // nocommit can we factor out the compressed fields
+      // compression?  ie we have a good idea "roughly" what
+      // the ord should be (linear projection) so we only
+      // need to encode the delta from that ...:        
       int[] segOrdToMergedOrd;
 
       public BytesRef nextTerm() {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1409892&r1=1409891&r2=1409892&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Thu Nov 15 17:33:45 2012
@@ -24,15 +24,16 @@ import org.apache.lucene.analysis.MockAn
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.PackedLongDocValuesField;
+import org.apache.lucene.document.SortedBytesDocValuesField;
 import org.apache.lucene.document.StraightBytesDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -196,4 +197,48 @@ public class TestDemoDocValue extends Lu
     ireader.close();
     directory.close();
   }
+
+  public void testDemoSortedBytes() throws IOException {
+    Analyzer analyzer = new MockAnalyzer(random());
+
+    // Store the index in memory:
+    Directory directory = newDirectory();
+    // To store an index on disk, use this instead:
+    // Directory directory = FSDirectory.open(new File("/tmp/testindex"));
+    // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
+    IndexWriter iwriter = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+    Document doc = new Document();
+    String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm";
+    String text = "This is the text to be indexed. " + longTerm;
+    doc.add(newTextField("fieldname", text, Field.Store.YES));
+    doc.add(new SortedBytesDocValuesField("dv", new BytesRef("hello world")));
+    iwriter.addDocument(doc);
+    iwriter.close();
+    
+    // Now search the index:
+    IndexReader ireader = DirectoryReader.open(directory); // read-only=true
+    IndexSearcher isearcher = new IndexSearcher(ireader);
+
+    assertEquals(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits);
+    Query query = new TermQuery(new Term("fieldname", "text"));
+    TopDocs hits = isearcher.search(query, null, 1);
+    assertEquals(1, hits.totalHits);
+    // Iterate through the results:
+    for (int i = 0; i < hits.scoreDocs.length; i++) {
+      StoredDocument hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
+      assertEquals(text, hitDoc.get("fieldname"));
+      assert ireader.leaves().size() == 1;
+      DocValues dv = ireader.leaves().get(0).reader().docValues("dv");
+      assertEquals(new BytesRef("hello world"), dv.getSource().getBytes(hits.scoreDocs[i].doc, new BytesRef()));
+    }
+
+    // Test simple phrase query
+    PhraseQuery phraseQuery = new PhraseQuery();
+    phraseQuery.add(new Term("fieldname", "to"));
+    phraseQuery.add(new Term("fieldname", "be"));
+    assertEquals(1, isearcher.search(phraseQuery, null, 1).totalHits);
+
+    ireader.close();
+    directory.close();
+  }
 }