You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/08 19:21:00 UTC

svn commit: r1407203 - in /lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs: BinaryDocValuesConsumer.java NumericDocValuesConsumer.java SimpleDVConsumer.java SortedDocValuesConsumer.java

Author: rmuir
Date: Thu Nov  8 18:21:00 2012
New Revision: 1407203

URL: http://svn.apache.org/viewvc?rev=1407203&view=rev
Log:
really rough prototype of 'streaming codec api'

Added:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java   (with props)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java   (with props)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java   (with props)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java   (with props)

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java Thu Nov  8 18:21:00 2012
@@ -0,0 +1,51 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+public abstract class BinaryDocValuesConsumer {
+  public abstract void add(BytesRef value) throws IOException;
+  public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
+  
+  public int merge(MergeState mergeState) throws IOException {
+    int docCount = 0;
+    final BytesRef bytes = new BytesRef();
+    for (AtomicReader reader : mergeState.readers) {
+      final int maxDoc = reader.maxDoc();
+      final Bits liveDocs = reader.getLiveDocs();
+      final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+      for (int i = 0; i < maxDoc; i++) {
+        if (liveDocs == null || liveDocs.get(i)) {
+          add(source.getBytes(i, bytes));
+        }
+        docCount++;
+        mergeState.checkAbort.work(300);
+      }
+    }
+    finish(mergeState.fieldInfos, docCount);
+    return docCount;
+  }
+}

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java Thu Nov  8 18:21:00 2012
@@ -0,0 +1,49 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.util.Bits;
+
+public abstract class NumericDocValuesConsumer {
+  public abstract void add(long value) throws IOException;
+  public abstract void finish(FieldInfos fieldInfos, int numDocs) throws IOException;
+  
+  public int merge(MergeState mergeState) throws IOException {
+    int docCount = 0;
+    for (AtomicReader reader : mergeState.readers) {
+      final int maxDoc = reader.maxDoc();
+      final Bits liveDocs = reader.getLiveDocs();
+      final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+      for (int i = 0; i < maxDoc; i++) {
+        if (liveDocs == null || liveDocs.get(i)) {
+          add(source.getInt(i));
+        }
+        docCount++;
+        mergeState.checkAbort.work(300);
+      }
+    }
+    finish(mergeState.fieldInfos, docCount);
+    return docCount;
+  }
+}

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Thu Nov  8 18:21:00 2012
@@ -0,0 +1,113 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+// prototype streaming DV api
+public abstract class SimpleDVConsumer implements Closeable {
+  // TODO: are any of these params too "infringing" on codec?
+  // we want codec to get necessary stuff from IW, but trading off against merge complexity.
+
+  public abstract NumericDocValuesConsumer addNumericField(FieldInfo field, long minValue, long maxValue) throws IOException;
+  public abstract BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, int maxLength) throws IOException;
+  // nocommit: figure out whats fair here.
+  public abstract SortedDocValuesConsumer addSortedField(FieldInfo field) throws IOException;
+  
+  public void merge(MergeState mergeState) throws IOException {
+    for (FieldInfo field : mergeState.fieldInfos) {
+      if (field.hasDocValues()) {
+        mergeState.fieldInfo = field;
+        // nocommit: switch on 3 types: NUMBER, BYTES, SORTED
+        DocValues.Type type = field.getDocValuesType();
+        if (type == DocValues.Type.VAR_INTS) {
+          mergeNumericField(mergeState);
+        } else if (type == DocValues.Type.BYTES_VAR_STRAIGHT) {
+          mergeBinaryField(mergeState);
+        } else if (type == DocValues.Type.BYTES_VAR_SORTED) {
+          mergeSortedField(mergeState);
+        }
+      }
+    }
+  }
+
+  // dead simple impl: codec can optimize
+  protected void mergeNumericField(MergeState mergeState) throws IOException {
+    // first compute min and max value of live ones to be merged.
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    for (AtomicReader reader : mergeState.readers) {
+      final int maxDoc = reader.maxDoc();
+      final Bits liveDocs = reader.getLiveDocs();
+      final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+      for (int i = 0; i < maxDoc; i++) {
+        if (liveDocs == null || liveDocs.get(i)) {
+          long val = source.getInt(i);
+          minValue = Math.min(val, minValue);
+          maxValue = Math.min(val, maxValue);
+        }
+        mergeState.checkAbort.work(300);
+      }
+    }
+    // now we can merge
+    NumericDocValuesConsumer field = addNumericField(mergeState.fieldInfo, minValue, maxValue);
+    field.merge(mergeState);
+  }
+  
+  // dead simple impl: codec can optimize
+  protected void mergeBinaryField(MergeState mergeState) throws IOException {
+    // first compute fixedLength and maxLength of live ones to be merged.
+    boolean fixedLength = true;
+    int maxLength = -1;
+    BytesRef bytes = new BytesRef();
+    for (AtomicReader reader : mergeState.readers) {
+      final int maxDoc = reader.maxDoc();
+      final Bits liveDocs = reader.getLiveDocs();
+      final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+      for (int i = 0; i < maxDoc; i++) {
+        if (liveDocs == null || liveDocs.get(i)) {
+          source.getBytes(i, bytes);
+          if (maxLength == -1) {
+            maxLength = bytes.length;
+          } else {
+            fixedLength &= bytes.length == maxLength;
+            maxLength = Math.max(bytes.length, maxLength);
+          }
+        }
+        mergeState.checkAbort.work(300);
+      }
+    }
+    // now we can merge
+    assert maxLength >= 0; // could this happen (nothing to do?)
+    BinaryDocValuesConsumer field = addBinaryField(mergeState.fieldInfo, fixedLength, maxLength);
+    field.merge(mergeState);
+  }
+  
+  protected void mergeSortedField(MergeState mergeState) throws IOException {
+    
+  }
+}

Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java Thu Nov  8 18:21:00 2012
@@ -0,0 +1,23 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO!
+public class SortedDocValuesConsumer {
+  
+}