You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/08 19:21:00 UTC
svn commit: r1407203 - in
/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs:
BinaryDocValuesConsumer.java NumericDocValuesConsumer.java
SimpleDVConsumer.java SortedDocValuesConsumer.java
Author: rmuir
Date: Thu Nov 8 18:21:00 2012
New Revision: 1407203
URL: http://svn.apache.org/viewvc?rev=1407203&view=rev
Log:
really rough prototype of 'streaming codec api'
Added:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java (with props)
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java (with props)
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (with props)
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java (with props)
Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java Thu Nov 8 18:21:00 2012
@@ -0,0 +1,51 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+public abstract class BinaryDocValuesConsumer {
+ public abstract void add(BytesRef value) throws IOException;
+ public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
+
+ public int merge(MergeState mergeState) throws IOException {
+ int docCount = 0;
+ final BytesRef bytes = new BytesRef();
+ for (AtomicReader reader : mergeState.readers) {
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
+ final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+ for (int i = 0; i < maxDoc; i++) {
+ if (liveDocs == null || liveDocs.get(i)) {
+ add(source.getBytes(i, bytes));
+ }
+ docCount++;
+ mergeState.checkAbort.work(300);
+ }
+ }
+ finish(mergeState.fieldInfos, docCount);
+ return docCount;
+ }
+}
Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/NumericDocValuesConsumer.java Thu Nov 8 18:21:00 2012
@@ -0,0 +1,49 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.util.Bits;
+
+public abstract class NumericDocValuesConsumer {
+ public abstract void add(long value) throws IOException;
+ public abstract void finish(FieldInfos fieldInfos, int numDocs) throws IOException;
+
+ public int merge(MergeState mergeState) throws IOException {
+ int docCount = 0;
+ for (AtomicReader reader : mergeState.readers) {
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
+ final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+ for (int i = 0; i < maxDoc; i++) {
+ if (liveDocs == null || liveDocs.get(i)) {
+ add(source.getInt(i));
+ }
+ docCount++;
+ mergeState.checkAbort.work(300);
+ }
+ }
+ finish(mergeState.fieldInfos, docCount);
+ return docCount;
+ }
+}
Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Thu Nov 8 18:21:00 2012
@@ -0,0 +1,113 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+// prototype streaming DV api
+public abstract class SimpleDVConsumer implements Closeable {
+ // TODO: are any of these params too "infringing" on codec?
+ // we want codec to get necessary stuff from IW, but trading off against merge complexity.
+
+ public abstract NumericDocValuesConsumer addNumericField(FieldInfo field, long minValue, long maxValue) throws IOException;
+ public abstract BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, int maxLength) throws IOException;
+ // nocommit: figure out whats fair here.
+ public abstract SortedDocValuesConsumer addSortedField(FieldInfo field) throws IOException;
+
+ public void merge(MergeState mergeState) throws IOException {
+ for (FieldInfo field : mergeState.fieldInfos) {
+ if (field.hasDocValues()) {
+ mergeState.fieldInfo = field;
+ // nocommit: switch on 3 types: NUMBER, BYTES, SORTED
+ DocValues.Type type = field.getDocValuesType();
+ if (type == DocValues.Type.VAR_INTS) {
+ mergeNumericField(mergeState);
+ } else if (type == DocValues.Type.BYTES_VAR_STRAIGHT) {
+ mergeBinaryField(mergeState);
+ } else if (type == DocValues.Type.BYTES_VAR_SORTED) {
+ mergeSortedField(mergeState);
+ }
+ }
+ }
+ }
+
+ // dead simple impl: codec can optimize
+ protected void mergeNumericField(MergeState mergeState) throws IOException {
+ // first compute min and max value of live ones to be merged.
+ long minValue = Long.MAX_VALUE;
+ long maxValue = Long.MIN_VALUE;
+ for (AtomicReader reader : mergeState.readers) {
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
+ final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+ for (int i = 0; i < maxDoc; i++) {
+ if (liveDocs == null || liveDocs.get(i)) {
+ long val = source.getInt(i);
+ minValue = Math.min(val, minValue);
+ maxValue = Math.min(val, maxValue);
+ }
+ mergeState.checkAbort.work(300);
+ }
+ }
+ // now we can merge
+ NumericDocValuesConsumer field = addNumericField(mergeState.fieldInfo, minValue, maxValue);
+ field.merge(mergeState);
+ }
+
+ // dead simple impl: codec can optimize
+ protected void mergeBinaryField(MergeState mergeState) throws IOException {
+ // first compute fixedLength and maxLength of live ones to be merged.
+ boolean fixedLength = true;
+ int maxLength = -1;
+ BytesRef bytes = new BytesRef();
+ for (AtomicReader reader : mergeState.readers) {
+ final int maxDoc = reader.maxDoc();
+ final Bits liveDocs = reader.getLiveDocs();
+ final Source source = reader.docValues(mergeState.fieldInfo.name).getDirectSource();
+ for (int i = 0; i < maxDoc; i++) {
+ if (liveDocs == null || liveDocs.get(i)) {
+ source.getBytes(i, bytes);
+ if (maxLength == -1) {
+ maxLength = bytes.length;
+ } else {
+ fixedLength &= bytes.length == maxLength;
+ maxLength = Math.max(bytes.length, maxLength);
+ }
+ }
+ mergeState.checkAbort.work(300);
+ }
+ }
+ // now we can merge
+ assert maxLength >= 0; // could this happen (nothing to do?)
+ BinaryDocValuesConsumer field = addBinaryField(mergeState.fieldInfo, fixedLength, maxLength);
+ field.merge(mergeState);
+ }
+
+ protected void mergeSortedField(MergeState mergeState) throws IOException {
+
+ }
+}
Added: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java?rev=1407203&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java (added)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java Thu Nov 8 18:21:00 2012
@@ -0,0 +1,23 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO!
+public class SortedDocValuesConsumer {
+
+}