You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/11/10 18:28:56 UTC
svn commit: r1407855 - in /lucene/dev/branches/lucene4547/lucene:
codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
core/src/java/org/apache/lucene/index/DocValues.java
Author: mikemccand
Date: Sat Nov 10 17:28:56 2012
New Revision: 1407855
URL: http://svn.apache.org/viewvc?rev=1407855&view=rev
Log:
ST sorted source producer
Modified:
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1407855&r1=1407854&r2=1407855&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Sat Nov 10 17:28:56 2012
@@ -46,6 +46,7 @@ import org.apache.lucene.store.IndexOutp
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.packed.PackedInts;
/**
@@ -102,7 +103,7 @@ public class SimpleTextSimpleDocValuesFo
* baz[space][space][space][space][space]
* ...
* </pre>
- * so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
+ * so an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
* the extra 9 is 2 newlines, plus "length " itself.
*
* for sorted bytes this is a fixed-width file, for example:
@@ -315,6 +316,12 @@ public class SimpleTextSimpleDocValuesFo
}
};
+ // nocommit once we do "in ram cache of direct source"
+ // ... and hopeuflly under SCR control ... then if app
+ // asks for direct soruce but it was already cached in ram
+ // ... we should use the ram cached one! we don't do this
+ // correctly today ...
+
// nocommit make sure we test "all docs have 0 value",
// "all docs have empty BytesREf"
@@ -324,8 +331,10 @@ public class SimpleTextSimpleDocValuesFo
FieldInfo fieldInfo;
long dataStartFilePointer;
String pattern;
+ String ordPattern;
int maxLength;
int minValue;
+ int numValues;
};
final int maxDoc;
@@ -353,9 +362,15 @@ public class SimpleTextSimpleDocValuesFo
DocValues.Type dvType = fieldInfo.getDocValuesType();
assert dvType != null;
- switch(dvType) {
- case BYTES_VAR_STRAIGHT:
- case BYTES_FIXED_STRAIGHT:
+ if (DocValues.isNumber(dvType)) {
+ readLine();
+ assert startsWith(MINVALUE);
+ field.minValue = Integer.parseInt(stripPrefix(MINVALUE));
+ readLine();
+ assert startsWith(PATTERN);
+ field.pattern = stripPrefix(PATTERN);
+ data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
+ } else if (DocValues.isBytes(dvType)) {
readLine();
assert startsWith(MAXLENGTH);
field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
@@ -364,28 +379,22 @@ public class SimpleTextSimpleDocValuesFo
field.pattern = stripPrefix(PATTERN);
data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
break;
- case BYTES_VAR_SORTED:
- case BYTES_FIXED_SORTED:
- case BYTES_VAR_DEREF:
- case BYTES_FIXED_DEREF:
- // nocommit TODO
- break;
- case VAR_INTS:
- case FIXED_INTS_8:
- case FIXED_INTS_16:
- case FIXED_INTS_32:
- case FIXED_INTS_64:
- case FLOAT_64:
- case FLOAT_32:
+ } else if (DocValues.isSortedBytes(dvType)) {
readLine();
- assert startsWith(MINVALUE);
- field.minValue = Integer.parseInt(stripPrefix(MINVALUE));
+ assert startsWith(NUMVALUES);
+ field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
+ readLine();
+ assert startsWith(MAXLENGTH);
+ field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
readLine();
assert startsWith(PATTERN);
field.pattern = stripPrefix(PATTERN);
- data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
- break;
- default:
+ readLine();
+ assert startsWith(ORDPATTERN);
+ field.ordPattern = stripPrefix(ORDPATTERN);
+ } else if (DocValues.isFloat(dvType)) {
+ // nocommit
+ } else {
throw new AssertionError();
}
field.dataStartFilePointer = data.getFilePointer();
@@ -399,6 +408,11 @@ public class SimpleTextSimpleDocValuesFo
this.field = field;
}
+ // nocommit provide a simple default Source impl that
+ // loads DirectSource and pulls things into RAM; we
+ // need producer API to provide the min/max value,
+ // fixed/max length, etc.
+
@Override
public Source loadSource() throws IOException {
// nocommit todo
@@ -422,17 +436,60 @@ public class SimpleTextSimpleDocValuesFo
return new Source(dvType) {
@Override
- public BytesRef getBytes(int docID, BytesRef bytesIn) {
- return new BytesRef(values[docID]);
+ public BytesRef getBytes(int docID, BytesRef result) {
+ result.bytes = values[docID];
+ result.offset = 0;
+ result.length = result.bytes.length;
+ return result;
}
};
} else if (DocValues.isSortedBytes(dvType)) {
+ SortedSource source = (SortedSource) loadDirectSource();
+ final byte[][] values = new byte[field.numValues][];
+ BytesRef scratch = new BytesRef();
+ for(int ord=0;ord<field.numValues;ord++) {
+ source.getByOrd(ord, scratch);
+ values[ord] = new byte[scratch.length];
+ System.arraycopy(scratch.bytes, scratch.offset, values[ord], 0, scratch.length);
+ }
+
+ final int[] ords = new int[maxDoc];
+ for(int docID=0;docID<maxDoc;docID++) {
+ ords[docID] = source.ord(docID);
+ }
+
+ return new SortedSource(dvType, BytesRef.getUTF8SortedAsUnicodeComparator()) {
+ @Override
+ public int ord(int docID) {
+ return ords[docID];
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef result) {
+ result.bytes = values[ord];
+ result.offset = 0;
+ result.length = result.bytes.length;
+ return result;
+ }
+
+ @Override
+ public int getValueCount() {
+ return field.numValues;
+ }
+
+ @Override
+ public PackedInts.Reader getDocToOrd() {
+ return null;
+ }
+ };
+
+ } else if (DocValues.isFloat(dvType)) {
// nocommit
return null;
+ } else {
+ throw new AssertionError();
}
- // nocommit
- return null;
}
@Override
@@ -467,7 +524,7 @@ public class SimpleTextSimpleDocValuesFo
} else if (DocValues.isBytes(dvType)) {
return new Source(dvType) {
@Override
- public BytesRef getBytes(int docID, BytesRef bytesIn) {
+ public BytesRef getBytes(int docID, BytesRef result) {
try {
// nocommit bounds check docID? spooky
// because if we don't you can maybe get
@@ -481,22 +538,73 @@ public class SimpleTextSimpleDocValuesFo
} catch (ParseException pe) {
throw new RuntimeException(pe);
}
- byte[] bytes = new byte[len];
- in.readBytes(bytes, 0, bytes.length);
- // nocommit MUST i reuse the incoming
- // arg....? we should clarify semantics
- return new BytesRef(bytes);
+ result.bytes = new byte[len];
+ result.offset = 0;
+ result.length = len;
+ in.readBytes(result.bytes, 0, len);
+ return result;
} catch (IOException ioe) {
+ // nocommit should .get() just throw IOE...
throw new RuntimeException(ioe);
}
}
};
} else if (DocValues.isSortedBytes(dvType)) {
+
+ final DecimalFormat ordDecoder = new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));
+
+ return new SortedSource(dvType, BytesRef.getUTF8SortedAsUnicodeComparator()) {
+ @Override
+ public int ord(int docID) {
+ try {
+ in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + (1 + field.ordPattern.length()) * docID);
+ SimpleTextUtil.readLine(in, scratch);
+ return ordDecoder.parse(scratch.utf8ToString(), pos).intValue();
+ } catch (IOException ioe) {
+ // nocommit should .get() just throw IOE...
+ throw new RuntimeException(ioe);
+ }
+ }
+
+ @Override
+ public BytesRef getByOrd(int ord, BytesRef result) {
+ try {
+ in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
+ SimpleTextUtil.readLine(in, scratch);
+ assert StringHelper.startsWith(scratch, LENGTH);
+ int len;
+ try {
+ len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+ } catch (ParseException pe) {
+ throw new RuntimeException(pe);
+ }
+ result.bytes = new byte[len];
+ result.offset = 0;
+ result.length = len;
+ in.readBytes(result.bytes, 0, len);
+ return result;
+ } catch (IOException ioe) {
+ // nocommit should .get() just throw IOE...
+ throw new RuntimeException(ioe);
+ }
+ }
+
+ @Override
+ public int getValueCount() {
+ return field.numValues;
+ }
+
+ @Override
+ public PackedInts.Reader getDocToOrd() {
+ return null;
+ }
+ };
+ } else if (DocValues.isFloat(dvType)) {
// nocommit
return null;
+ } else {
+ throw new AssertionError();
}
- // nocommit
- return null;
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java?rev=1407855&r1=1407854&r2=1407855&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/DocValues.java Sat Nov 10 17:28:56 2012
@@ -344,6 +344,7 @@ public abstract class DocValues implemen
/**
* Returns the PackedInts.Reader impl that maps document to ord.
*/
+ // nocommit make non-abstract returning null?
public abstract PackedInts.Reader getDocToOrd();
/**
@@ -549,6 +550,16 @@ public abstract class DocValues implemen
return false;
}
}
+
+ public static boolean isFloat(Type type) {
+ switch(type) {
+ case FLOAT_64:
+ case FLOAT_32:
+ return true;
+ default:
+ return false;
+ }
+ }
/**
* <code>Type</code> specifies the {@link DocValues} type for a