You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/10 18:28:56 UTC

svn commit: r1407855 - in /lucene/dev/branches/lucene4547/lucene: codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java core/src/java/org/apache/lucene/index/DocValues.java

Author: mikemccand
Date: Sat Nov 10 17:28:56 2012
New Revision: 1407855

URL: http://svn.apache.org/viewvc?rev=1407855&view=rev
Log:
ST sorted source producer

Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1407855&r1=1407854&r2=1407855&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Sat Nov 10 17:28:56 2012
@@ -46,6 +46,7 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.packed.PackedInts;
 
 
 /**
@@ -102,7 +103,7 @@ public class SimpleTextSimpleDocValuesFo
    *  baz[space][space][space][space][space]
    *  ...
    *  </pre>
-   *  so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
+   *  so an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
    *  the extra 9 is 2 newlines, plus "length " itself.
    *  
    *  for sorted bytes this is a fixed-width file, for example:
@@ -315,6 +316,12 @@ public class SimpleTextSimpleDocValuesFo
     }
   };
 
+  // nocommit once we do "in ram cache of direct source"
+  // ... and hopeuflly under SCR control ... then if app
+  // asks for direct soruce but it was already cached in ram
+  // ... we should use the ram cached one!  we don't do this
+  // correctly today ...
+
   // nocommit make sure we test "all docs have 0 value",
   // "all docs have empty BytesREf"
 
@@ -324,8 +331,10 @@ public class SimpleTextSimpleDocValuesFo
       FieldInfo fieldInfo;
       long dataStartFilePointer;
       String pattern;
+      String ordPattern;
       int maxLength;
       int minValue;
+      int numValues;
     };
 
     final int maxDoc;
@@ -353,9 +362,15 @@ public class SimpleTextSimpleDocValuesFo
         
         DocValues.Type dvType = fieldInfo.getDocValuesType();
         assert dvType != null;
-        switch(dvType) {
-        case BYTES_VAR_STRAIGHT:
-        case BYTES_FIXED_STRAIGHT:
+        if (DocValues.isNumber(dvType)) {
+          readLine();
+          assert startsWith(MINVALUE);
+          field.minValue = Integer.parseInt(stripPrefix(MINVALUE));
+          readLine();
+          assert startsWith(PATTERN);
+          field.pattern = stripPrefix(PATTERN);
+          data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
+        } else if (DocValues.isBytes(dvType)) {
           readLine();
           assert startsWith(MAXLENGTH);
           field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@@ -364,28 +379,22 @@ public class SimpleTextSimpleDocValuesFo
           field.pattern = stripPrefix(PATTERN);
           data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
           break;
-        case BYTES_VAR_SORTED:
-        case BYTES_FIXED_SORTED:
-        case BYTES_VAR_DEREF:
-        case BYTES_FIXED_DEREF:
-          // nocommit TODO
-          break;
-        case VAR_INTS:
-        case FIXED_INTS_8:
-        case FIXED_INTS_16:
-        case FIXED_INTS_32:
-        case FIXED_INTS_64:
-        case FLOAT_64:
-        case FLOAT_32:
+        } else if (DocValues.isSortedBytes(dvType)) {
           readLine();
-          assert startsWith(MINVALUE);
-          field.minValue = Integer.parseInt(stripPrefix(MINVALUE));
+          assert startsWith(NUMVALUES);
+          field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
+          readLine();
+          assert startsWith(MAXLENGTH);
+          field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
           readLine();
           assert startsWith(PATTERN);
           field.pattern = stripPrefix(PATTERN);
-          data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
-          break;
-        default:
+          readLine();
+          assert startsWith(ORDPATTERN);
+          field.ordPattern = stripPrefix(ORDPATTERN);
+        } else if (DocValues.isFloat(dvType)) {
+          // nocommit
+        } else {
           throw new AssertionError();
         }
         field.dataStartFilePointer = data.getFilePointer();
@@ -399,6 +408,11 @@ public class SimpleTextSimpleDocValuesFo
         this.field = field;
       }
 
+      // nocommit provide a simple default Source impl that
+      // loads DirectSource and pulls things into RAM; we
+      // need producer API to provide the min/max value,
+      // fixed/max length, etc.
+
       @Override
       public Source loadSource() throws IOException {
         // nocommit todo
@@ -422,17 +436,60 @@ public class SimpleTextSimpleDocValuesFo
 
           return new Source(dvType) {
             @Override
-            public BytesRef getBytes(int docID, BytesRef bytesIn) {
-              return new BytesRef(values[docID]);
+            public BytesRef getBytes(int docID, BytesRef result) {
+              result.bytes = values[docID];
+              result.offset = 0;
+              result.length = result.bytes.length;
+              return result;
             }
           };
 
         } else if (DocValues.isSortedBytes(dvType)) {
+          SortedSource source = (SortedSource) loadDirectSource();
+          final byte[][] values = new byte[field.numValues][];
+          BytesRef scratch = new BytesRef();
+          for(int ord=0;ord<field.numValues;ord++) {
+            source.getByOrd(ord, scratch);
+            values[ord] = new byte[scratch.length];
+            System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
+          }
+
+          final int[] ords = new int[maxDoc];
+          for(int docID=0;docID<maxDoc;docID++) {
+            ords[docID] = source.ord(docID);
+          }
+
+          return new SortedSource(dvType, BytesRef.getUTF8SortedAsUnicodeComparator()) {
+            @Override
+            public int ord(int docID) {
+              return ords[docID];
+            }
+
+            @Override
+            public BytesRef getByOrd(int ord, BytesRef result) {
+              result.bytes = values[ord];
+              result.offset = 0;
+              result.length = result.bytes.length;
+              return result;
+            }
+
+            @Override
+            public int getValueCount() {
+              return field.numValues;
+            }
+
+            @Override
+            public PackedInts.Reader getDocToOrd() {
+              return null;
+            }
+          };
+
+        } else if (DocValues.isFloat(dvType)) {
           // nocommit
           return null;
+        } else {
+          throw new AssertionError();
         }
-        // nocommit
-        return null;
       }
 
       @Override
@@ -467,7 +524,7 @@ public class SimpleTextSimpleDocValuesFo
         } else if (DocValues.isBytes(dvType)) {
           return new Source(dvType) {
             @Override
-            public BytesRef getBytes(int docID, BytesRef bytesIn) {
+            public BytesRef getBytes(int docID, BytesRef result) {
               try {
                 // nocommit bounds check docID?  spooky
                 // because if we don't you can maybe get
@@ -481,22 +538,73 @@ public class SimpleTextSimpleDocValuesFo
                 } catch (ParseException pe) {
                   throw new RuntimeException(pe);
                 }
-                byte[] bytes = new byte[len];
-                in.readBytes(bytes, 0, bytes.length);
-                // nocommit MUST i reuse the incoming
-                // arg....?  we should clarify semantics
-                return new BytesRef(bytes);
+                result.bytes = new byte[len];
+                result.offset = 0;
+                result.length = len;
+                in.readBytes(result.bytes, 0, len);
+                return result;
               } catch (IOException ioe) {
+                // nocommit should .get() just throw IOE...
                 throw new RuntimeException(ioe);
               }
             }
           };
         } else if (DocValues.isSortedBytes(dvType)) {
+
+          final DecimalFormat ordDecoder = new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));
+
+          return new SortedSource(dvType, BytesRef.getUTF8SortedAsUnicodeComparator()) {
+            @Override
+            public int ord(int docID) {
+              try {
+                in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + (1 + field.ordPattern.length()) * docID);
+                SimpleTextUtil.readLine(in, scratch);
+                return ordDecoder.parse(scratch.utf8ToString(), pos).intValue();
+              } catch (IOException ioe) {
+                // nocommit should .get() just throw IOE...
+                throw new RuntimeException(ioe);
+              }
+            }
+
+            @Override
+            public BytesRef getByOrd(int ord, BytesRef result) {
+              try {
+                in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
+                SimpleTextUtil.readLine(in, scratch);
+                assert StringHelper.startsWith(scratch, LENGTH);
+                int len;
+                try {
+                  len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+                } catch (ParseException pe) {
+                  throw new RuntimeException(pe);
+                }
+                result.bytes = new byte[len];
+                result.offset = 0;
+                result.length = len;
+                in.readBytes(result.bytes, 0, len);
+                return result;
+              } catch (IOException ioe) {
+                // nocommit should .get() just throw IOE...
+                throw new RuntimeException(ioe);
+              }
+            }
+
+            @Override
+            public int getValueCount() {
+              return field.numValues;
+            }
+
+            @Override
+            public PackedInts.Reader getDocToOrd() {
+              return null;
+            }
+          };
+        } else if (DocValues.isFloat(dvType)) {
           // nocommit
           return null;
+        } else {
+          throw new AssertionError();
         }
-        // nocommit
-        return null;
       }
     }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java?rev=1407855&r1=1407854&r2=1407855&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java Sat Nov 10 17:28:56 2012
@@ -344,6 +344,7 @@ public abstract class DocValues implemen
     /**
      * Returns the PackedInts.Reader impl that maps document to ord.
      */
+    // nocommit make non-abstract returning null?
     public abstract PackedInts.Reader getDocToOrd();
     
     /**
@@ -549,6 +550,16 @@ public abstract class DocValues implemen
       return false;
     }
   }
+
+  public static boolean isFloat(Type type) {
+    switch(type) {
+    case FLOAT_64:
+    case FLOAT_32:
+      return true;
+    default:
+      return false;
+    }
+  }
   
   /**
    * <code>Type</code> specifies the {@link DocValues} type for a