You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/09/06 21:36:18 UTC
[07/51] [abbrv] [partial] Cleaning up and getting ready to
development towards v4.8
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
new file mode 100644
index 0000000..85c711a
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexInput.cs
@@ -0,0 +1,198 @@
+package org.apache.lucene.codecs.intblock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Naive int block API that writes vInts. This is
+ * expected to give poor performance; it's really only for
+ * testing the pluggability. One should typically use pfor instead. */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexInput;
+
+// TODO: much of this can be shared code w/ the fixed case
+
+/** Abstract base class that reads variable-size blocks of ints
+ * from an IndexInput. While this is a simple approach, a
+ * more performant approach would directly create an impl
+ * of IntIndexInput inside Directory. Wrapping a generic
+ * IndexInput will likely cost performance.
+ *
+ * @lucene.experimental
+ */
+public abstract class VariableIntBlockIndexInput extends IntIndexInput {
+
+ protected final IndexInput in;
+ protected final int maxBlockSize;
+
+ protected VariableIntBlockIndexInput(final IndexInput in) {
+ this.in = in;
+ maxBlockSize = in.readInt();
+ }
+
+ @Override
+ public IntIndexInput.Reader reader() {
+ final int[] buffer = new int[maxBlockSize];
+ final IndexInput clone = in.clone();
+ // TODO: can this be simplified?
+ return new Reader(clone, buffer, this.getBlockReader(clone, buffer));
+ }
+
+ @Override
+ public void close() {
+ in.close();
+ }
+
+ @Override
+ public IntIndexInput.Index index() {
+ return new Index();
+ }
+
+ protected abstract BlockReader getBlockReader(IndexInput in, int[] buffer) ;
+
+ /**
+ * Interface for variable-size block decoders.
+ * <p>
+ * Implementations should decode into the buffer in {@link #readBlock}.
+ */
+ public interface BlockReader {
+ public int readBlock() ;
+ public void seek(long pos) ;
+ }
+
+ private static class Reader extends IntIndexInput.Reader {
+ private final IndexInput in;
+
+ public final int[] pending;
+ int upto;
+
+ private bool seekPending;
+ private long pendingFP;
+ private int pendingUpto;
+ private long lastBlockFP;
+ private int blockSize;
+ private final BlockReader blockReader;
+
+ public Reader(final IndexInput in, final int[] pending, final BlockReader blockReader) {
+ this.in = in;
+ this.pending = pending;
+ this.blockReader = blockReader;
+ }
+
+ void seek(final long fp, final int upto) {
+ // TODO: should we do this in real-time, not lazy?
+ pendingFP = fp;
+ pendingUpto = upto;
+ Debug.Assert( pendingUpto >= 0: "pendingUpto=" + pendingUpto;
+ seekPending = true;
+ }
+
+ private final void maybeSeek() {
+ if (seekPending) {
+ if (pendingFP != lastBlockFP) {
+ // need new block
+ in.seek(pendingFP);
+ blockReader.seek(pendingFP);
+ lastBlockFP = pendingFP;
+ blockSize = blockReader.readBlock();
+ }
+ upto = pendingUpto;
+
+ // TODO: if we were more clever when writing the
+ // index, such that a seek point wouldn't be written
+ // until the int encoder "committed", we could avoid
+ // this (likely minor) inefficiency:
+
+ // This is necessary for int encoders that are
+ // non-causal, ie must see future int values to
+ // encode the current ones.
+ while(upto >= blockSize) {
+ upto -= blockSize;
+ lastBlockFP = in.getFilePointer();
+ blockSize = blockReader.readBlock();
+ }
+ seekPending = false;
+ }
+ }
+
+ @Override
+ public int next() {
+ this.maybeSeek();
+ if (upto == blockSize) {
+ lastBlockFP = in.getFilePointer();
+ blockSize = blockReader.readBlock();
+ upto = 0;
+ }
+
+ return pending[upto++];
+ }
+ }
+
+ private class Index extends IntIndexInput.Index {
+ private long fp;
+ private int upto;
+
+ @Override
+ public void read(final DataInput indexIn, final bool absolute) {
+ if (absolute) {
+ upto = indexIn.readVInt();
+ fp = indexIn.readVLong();
+ } else {
+ final int uptoDelta = indexIn.readVInt();
+ if ((uptoDelta & 1) == 1) {
+ // same block
+ upto += uptoDelta >>> 1;
+ } else {
+ // new block
+ upto = uptoDelta >>> 1;
+ fp += indexIn.readVLong();
+ }
+ }
+ // TODO: we can't do this Debug.Assert( because non-causal
+ // int encoders can have upto over the buffer size
+ //Debug.Assert( upto < maxBlockSize: "upto=" + upto + " max=" + maxBlockSize;
+ }
+
+ @Override
+ public String toString() {
+ return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize;
+ }
+
+ @Override
+ public void seek(final IntIndexInput.Reader other) {
+ ((Reader) other).seek(fp, upto);
+ }
+
+ @Override
+ public void copyFrom(final IntIndexInput.Index other) {
+ final Index idx = (Index) other;
+ fp = idx.fp;
+ upto = idx.upto;
+ }
+
+ @Override
+ public Index clone() {
+ Index other = new Index();
+ other.fp = fp;
+ other.upto = upto;
+ return other;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
new file mode 100644
index 0000000..574b7f4
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Intblock/VariableIntBlockIndexOutput.cs
@@ -0,0 +1,136 @@
+package org.apache.lucene.codecs.intblock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Naive int block API that writes vInts. This is
+ * expected to give poor performance; it's really only for
+ * testing the pluggability. One should typically use pfor instead. */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.sep.IntIndexOutput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexOutput;
+
+// TODO: much of this can be shared code w/ the fixed case
+
+/** Abstract base class that writes variable-size blocks of ints
+ * to an IndexOutput. While this is a simple approach, a
+ * more performant approach would directly create an impl
+ * of IntIndexOutput inside Directory. Wrapping a generic
+ * IndexInput will likely cost performance.
+ *
+ * @lucene.experimental
+ */
+public abstract class VariableIntBlockIndexOutput extends IntIndexOutput {
+
+ protected final IndexOutput out;
+
+ private int upto;
+ private bool hitExcDuringWrite;
+
+ // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
+ // if its less than 128 we should set that as max and use byte?
+
+ /** NOTE: maxBlockSize must be the maximum block size
+ * plus the max non-causal lookahead of your codec. EG Simple9
+ * requires lookahead=1 because on seeing the Nth value
+ * it knows it must now encode the N-1 values before it. */
+ protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) {
+ this.out = out;
+ out.writeInt(maxBlockSize);
+ }
+
+ /** Called one value at a time. Return the number of
+ * buffered input values that have been written to out. */
+ protected abstract int add(int value) ;
+
+ @Override
+ public IntIndexOutput.Index index() {
+ return new Index();
+ }
+
+ private class Index extends IntIndexOutput.Index {
+ long fp;
+ int upto;
+ long lastFP;
+ int lastUpto;
+
+ @Override
+ public void mark() {
+ fp = out.getFilePointer();
+ upto = VariableIntBlockIndexOutput.this.upto;
+ }
+
+ @Override
+ public void copyFrom(IntIndexOutput.Index other, bool copyLast) {
+ Index idx = (Index) other;
+ fp = idx.fp;
+ upto = idx.upto;
+ if (copyLast) {
+ lastFP = fp;
+ lastUpto = upto;
+ }
+ }
+
+ @Override
+ public void write(DataOutput indexOut, bool absolute) {
+ Debug.Assert( upto >= 0;
+ if (absolute) {
+ indexOut.writeVInt(upto);
+ indexOut.writeVLong(fp);
+ } else if (fp == lastFP) {
+ // same block
+ Debug.Assert( upto >= lastUpto;
+ int uptoDelta = upto - lastUpto;
+ indexOut.writeVInt(uptoDelta << 1 | 1);
+ } else {
+ // new block
+ indexOut.writeVInt(upto << 1);
+ indexOut.writeVLong(fp - lastFP);
+ }
+ lastUpto = upto;
+ lastFP = fp;
+ }
+ }
+
+ @Override
+ public void write(int v) {
+ hitExcDuringWrite = true;
+ upto -= add(v)-1;
+ hitExcDuringWrite = false;
+ Debug.Assert( upto >= 0;
+ }
+
+ @Override
+ public void close() {
+ try {
+ if (!hitExcDuringWrite) {
+ // stuff 0s in until the "real" data is flushed:
+ int stuffed = 0;
+ while(upto > stuffed) {
+ upto -= add(0)-1;
+ Debug.Assert( upto >= 0;
+ stuffed += 1;
+ }
+ }
+ } finally {
+ out.close();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
new file mode 100644
index 0000000..3f014ce
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.csproj
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{3F79B6D4-4359-4F83-B64F-07F4F6262425}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Codecs</RootNamespace>
+ <AssemblyName>Lucene.Net.Codecs</AssemblyName>
+ <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ <TargetFrameworkProfile />
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="Appending\AppendingCodec.cs" />
+ <Compile Include="Appending\AppendingPostingsFormat.cs" />
+ <Compile Include="Appending\AppendingTermsReader.cs" />
+ <Compile Include="BlockTerms\BlockTermsFieldAndTerm.cs" />
+ <Compile Include="BlockTerms\BlockTermsReader.cs" />
+ <Compile Include="BlockTerms\BlockTermsWriter.cs" />
+ <Compile Include="BlockTerms\FixedGapTermsIndexReader.cs" />
+ <Compile Include="BlockTerms\FixedGapTermsIndexWriter.cs" />
+ <Compile Include="BlockTerms\TermsIndexReaderBase.cs" />
+ <Compile Include="BlockTerms\TermsIndexWriterBase.cs" />
+ <Compile Include="BlockTerms\VariableGapTermsIndexReader.cs" />
+ <Compile Include="BlockTerms\VariableGapTermsIndexWriter.cs" />
+ <Compile Include="Bloom\BloomFilterFactory.cs" />
+ <Compile Include="Bloom\BloomFilteringPostingsFormat.cs" />
+ <Compile Include="Bloom\DefaultBloomFilterFactory.cs" />
+ <Compile Include="Bloom\FuzzySet.cs" />
+ <Compile Include="Bloom\HashFunction.cs" />
+ <Compile Include="Bloom\MurmurHash2.cs" />
+ <Compile Include="DiskDV\DiskDocValuesFormat.cs" />
+ <Compile Include="DiskDV\DiskDocValuesProducer.cs" />
+ <Compile Include="DiskDV\DiskNormsFormat.cs" />
+ <Compile Include="Intblock\FixedIntBlockIndexInput.cs" />
+ <Compile Include="Intblock\FixedIntBlockIndexOutput.cs" />
+ <Compile Include="Intblock\IBlockReader.cs" />
+ <Compile Include="Intblock\Index.cs" />
+ <Compile Include="Intblock\Reader.cs" />
+ <Compile Include="Intblock\VariableIntBlockIndexInput.cs" />
+ <Compile Include="Intblock\VariableIntBlockIndexOutput.cs" />
+ <Compile Include="Memory\DirectDocValuesConsumer.cs" />
+ <Compile Include="Memory\DirectDocValuesFormat.cs" />
+ <Compile Include="Memory\DirectDocValuesProducer.cs" />
+ <Compile Include="Memory\DirectPostingsFormat.cs" />
+ <Compile Include="Memory\FSTOrdPostingsFormat.cs" />
+ <Compile Include="Memory\FSTOrdPulsing41PostingsFormat.cs" />
+ <Compile Include="Memory\FSTOrdTermsReader.cs" />
+ <Compile Include="Memory\FSTOrdTermsWriter.cs" />
+ <Compile Include="Memory\FSTPostingsFormat.cs" />
+ <Compile Include="Memory\FSTPulsing41PostingsFormat.cs" />
+ <Compile Include="Memory\FSTTermOutputs.cs" />
+ <Compile Include="Memory\FSTTermsReader.cs" />
+ <Compile Include="Memory\FSTTermsWriter.cs" />
+ <Compile Include="Memory\MemoryDocValuesConsumer.cs" />
+ <Compile Include="Memory\MemoryDocValuesFormat.cs" />
+ <Compile Include="Memory\MemoryDocValuesProducer.cs" />
+ <Compile Include="Memory\MemoryPostingsFormat.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Pulsing\Pulsing41PostingsFormat.cs" />
+ <Compile Include="Pulsing\PulsingPostingsFormat.cs" />
+ <Compile Include="Pulsing\PulsingPostingsReader.cs" />
+ <Compile Include="Pulsing\PulsingPostingsWriter.cs" />
+ <Compile Include="Sep\IntIndexInput.cs" />
+ <Compile Include="Sep\IntIndexOutput.cs" />
+ <Compile Include="Sep\IntStreamFactory.cs" />
+ <Compile Include="Sep\SepPostingsReader.cs" />
+ <Compile Include="Sep\SepPostingsWriter.cs" />
+ <Compile Include="Sep\SepSkipListReader.cs" />
+ <Compile Include="Sep\SepSkipListWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextCodec.cs" />
+ <Compile Include="SimpleText\SimpleTextDocValuesFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextDocValuesReader.cs" />
+ <Compile Include="SimpleText\SimpleTextDocValuesWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextFieldInfosFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextFieldInfosReader.cs" />
+ <Compile Include="SimpleText\SimpleTextFieldInfosWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextFieldsReader.cs" />
+ <Compile Include="SimpleText\SimpleTextFieldsWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextLiveDocsFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextNormsFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextPostingsFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextSegmentInfoFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextSegmentInfoReader.cs" />
+ <Compile Include="SimpleText\SimpleTextSegmentInfoWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextStoredFieldsFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextStoredFieldsReader.cs" />
+ <Compile Include="SimpleText\SimpleTextStoredFieldsWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextTermVectorsFormat.cs" />
+ <Compile Include="SimpleText\SimpleTextTermVectorsReader.cs" />
+ <Compile Include="SimpleText\SimpleTextTermVectorsWriter.cs" />
+ <Compile Include="SimpleText\SimpleTextUtil.cs" />
+ </ItemGroup>
+ <ItemGroup />
+ <ItemGroup>
+ <ProjectReference Include="..\core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
new file mode 100644
index 0000000..3cf5780
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Lucene.Net.Codecs.sln
@@ -0,0 +1,26 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 2013
+VisualStudioVersion = 12.0.30110.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net.Codecs", "Lucene.Net.Codecs.csproj", "{3F79B6D4-4359-4F83-B64F-07F4F6262425}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lucene.Net", "..\Lucene.Net.Core\Lucene.Net.csproj", "{5D4AD9BE-1FFB-41AB-9943-25737971BF57}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Release|Any CPU = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {3F79B6D4-4359-4F83-B64F-07F4F6262425}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {3F79B6D4-4359-4F83-B64F-07F4F6262425}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {3F79B6D4-4359-4F83-B64F-07F4F6262425}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {3F79B6D4-4359-4F83-B64F-07F4F6262425}.Release|Any CPU.Build.0 = Release|Any CPU
+ {5D4AD9BE-1FFB-41AB-9943-25737971BF57}.Debug|Any CPU.ActiveCfg = Debug|x86
+ {5D4AD9BE-1FFB-41AB-9943-25737971BF57}.Release|Any CPU.ActiveCfg = Release|x86
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
new file mode 100644
index 0000000..2e7e013
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesConsumer.cs
@@ -0,0 +1,304 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET;
+import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
+
+/**
+ * Writer for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesConsumer extends DocValuesConsumer {
+ IndexOutput data, meta;
+ final int maxDoc;
+
+ DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) {
+ maxDoc = state.segmentInfo.getDocCount();
+ bool success = false;
+ try {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.createOutput(dataName, state.context);
+ CodecUtil.writeHeader(data, dataCodec, VERSION_CURRENT);
+ String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+ meta = state.directory.createOutput(metaName, state.context);
+ CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+ @Override
+ public void addNumericField(FieldInfo field, Iterable<Number> values) {
+ meta.writeVInt(field.number);
+ meta.writeByte(NUMBER);
+ addNumericFieldValues(field, values);
+ }
+
+ private void addNumericFieldValues(FieldInfo field, Iterable<Number> values) {
+ meta.writeLong(data.getFilePointer());
+ long minValue = Long.MAX_VALUE;
+ long maxValue = Long.MIN_VALUE;
+ bool missing = false;
+
+ long count = 0;
+ for (Number nv : values) {
+ if (nv != null) {
+ long v = nv.longValue();
+ minValue = Math.min(minValue, v);
+ maxValue = Math.max(maxValue, v);
+ } else {
+ missing = true;
+ }
+ count++;
+ if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) {
+ throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords");
+ }
+ }
+ meta.writeInt((int) count);
+
+ if (missing) {
+ long start = data.getFilePointer();
+ writeMissingBitset(values);
+ meta.writeLong(start);
+ meta.writeLong(data.getFilePointer() - start);
+ } else {
+ meta.writeLong(-1L);
+ }
+
+ byte byteWidth;
+ if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
+ byteWidth = 1;
+ } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) {
+ byteWidth = 2;
+ } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) {
+ byteWidth = 4;
+ } else {
+ byteWidth = 8;
+ }
+ meta.writeByte(byteWidth);
+
+ for (Number nv : values) {
+ long v;
+ if (nv != null) {
+ v = nv.longValue();
+ } else {
+ v = 0;
+ }
+
+ switch(byteWidth) {
+ case 1:
+ data.writeByte((byte) v);
+ break;
+ case 2:
+ data.writeShort((short) v);
+ break;
+ case 4:
+ data.writeInt((int) v);
+ break;
+ case 8:
+ data.writeLong(v);
+ break;
+ }
+ }
+ }
+
+ @Override
+ public void close() {
+ bool success = false;
+ try {
+ if (meta != null) {
+ meta.writeVInt(-1); // write EOF marker
+ CodecUtil.writeFooter(meta); // write checksum
+ }
+ if (data != null) {
+ CodecUtil.writeFooter(data);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(data, meta);
+ } else {
+ IOUtils.closeWhileHandlingException(data, meta);
+ }
+ data = meta = null;
+ }
+ }
+
+ @Override
+ public void addBinaryField(FieldInfo field, final Iterable<BytesRef> values) {
+ meta.writeVInt(field.number);
+ meta.writeByte(BYTES);
+ addBinaryFieldValues(field, values);
+ }
+
+ private void addBinaryFieldValues(FieldInfo field, final Iterable<BytesRef> values) {
+ // write the byte[] data
+ final long startFP = data.getFilePointer();
+ bool missing = false;
+ long totalBytes = 0;
+ int count = 0;
+ for(BytesRef v : values) {
+ if (v != null) {
+ data.writeBytes(v.bytes, v.offset, v.length);
+ totalBytes += v.length;
+ if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) {
+ throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
+ }
+ } else {
+ missing = true;
+ }
+ count++;
+ }
+
+ meta.writeLong(startFP);
+ meta.writeInt((int) totalBytes);
+ meta.writeInt(count);
+ if (missing) {
+ long start = data.getFilePointer();
+ writeMissingBitset(values);
+ meta.writeLong(start);
+ meta.writeLong(data.getFilePointer() - start);
+ } else {
+ meta.writeLong(-1L);
+ }
+
+ int addr = 0;
+ for (BytesRef v : values) {
+ data.writeInt(addr);
+ if (v != null) {
+ addr += v.length;
+ }
+ }
+ data.writeInt(addr);
+ }
+
+ // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
+ // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
+ void writeMissingBitset(Iterable<?> values) {
+ long bits = 0;
+ int count = 0;
+ for (Object v : values) {
+ if (count == 64) {
+ data.writeLong(bits);
+ count = 0;
+ bits = 0;
+ }
+ if (v != null) {
+ bits |= 1L << (count & 0x3f);
+ }
+ count++;
+ }
+ if (count > 0) {
+ data.writeLong(bits);
+ }
+ }
+
+ @Override
+ public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) {
+ meta.writeVInt(field.number);
+ meta.writeByte(SORTED);
+
+ // write the ordinals as numerics
+ addNumericFieldValues(field, docToOrd);
+
+ // write the values as binary
+ addBinaryFieldValues(field, values);
+ }
+
+ // note: this might not be the most efficient... but its fairly simple
+ @Override
+ public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, final Iterable<Number> docToOrdCount, final Iterable<Number> ords) {
+ meta.writeVInt(field.number);
+ meta.writeByte(SORTED_SET);
+
+ // First write docToOrdCounts, except we "aggregate" the
+ // counts so they turn into addresses, and add a final
+ // value = the total aggregate:
+ addNumericFieldValues(field, new Iterable<Number>() {
+
+ // Just aggregates the count values so they become
+ // "addresses", and adds one more value in the end
+ // (the final sum):
+
+ @Override
+ public Iterator<Number> iterator() {
+ final Iterator<Number> iter = docToOrdCount.iterator();
+
+ return new Iterator<Number>() {
+
+ long sum;
+ bool ended;
+
+ @Override
+ public bool hasNext() {
+ return iter.hasNext() || !ended;
+ }
+
+ @Override
+ public Number next() {
+ long toReturn = sum;
+
+ if (iter.hasNext()) {
+ Number n = iter.next();
+ if (n != null) {
+ sum += n.longValue();
+ }
+ } else if (!ended) {
+ ended = true;
+ } else {
+ Debug.Assert( false;
+ }
+
+ return toReturn;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ });
+
+ // Write ordinals for all docs, appended into one big
+ // numerics:
+ addNumericFieldValues(field, ords);
+
+ // write the values as binary
+ addBinaryFieldValues(field, values);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
new file mode 100644
index 0000000..1f89e43
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesFormat.cs
@@ -0,0 +1,83 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.ArrayUtil;
+
+/** In-memory docvalues format that does no (or very little)
+ * compression. Indexed values are stored on disk, but
+ * then at search time all values are loaded into memory as
+ * simple java arrays. For numeric values, it uses
+ * byte[], short[], int[], long[] as necessary to fit the
+ * range of the values. For binary values, there is an int
+ * (4 bytes) overhead per value.
+ *
+ * <p>Limitations:
+ * <ul>
+ * <li>For binary and sorted fields the total space
+ * required for all binary values cannot exceed about
+ * 2.1 GB (see #MAX_TOTAL_BYTES_LENGTH).</li>
+ *
+ * <li>For sorted set fields, the sum of the size of each
+ * document's set of values cannot exceed about 2.1 B
+ * values (see #MAX_SORTED_SET_ORDS). For example,
+ * if every document has 10 values (10 instances of
+ * {@link SortedSetDocValuesField}) added, then no
+ * more than ~210 M documents can be added to one
+ * segment. </li>
+ * </ul> */
+
+public class DirectDocValuesFormat extends DocValuesFormat {
+
+ /** The sum of all byte lengths for binary field, or for
+ * the unique values in sorted or sorted set fields, cannot
+ * exceed this. */
+ public final static int MAX_TOTAL_BYTES_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH;
+
+ /** The sum of the number of values across all documents
+ * in a sorted set field cannot exceed this. */
+ public final static int MAX_SORTED_SET_ORDS = ArrayUtil.MAX_ARRAY_LENGTH;
+
+ /** Sole constructor. */
+ public DirectDocValuesFormat() {
+ super("Direct");
+ }
+
+ @Override
+ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) {
+ return new DirectDocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+ }
+
+ @Override
+ public DocValuesProducer fieldsProducer(SegmentReadState state) {
+ return new DirectDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
+ }
+
+ static final String DATA_CODEC = "DirectDocValuesData";
+ static final String DATA_EXTENSION = "dvdd";
+ static final String METADATA_CODEC = "DirectDocValuesMetadata";
+ static final String METADATA_EXTENSION = "dvdm";
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1da1cb5b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
new file mode 100644
index 0000000..a95f384
--- /dev/null
+++ b/src/Lucene.Net.Codecs/Memory/DirectDocValuesProducer.cs
@@ -0,0 +1,511 @@
+package org.apache.lucene.codecs.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomAccessOrds;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Reader for {@link DirectDocValuesFormat}
+ */
+
+class DirectDocValuesProducer extends DocValuesProducer {
+ // metadata maps (just file pointers and minimal stuff)
+ private final Map<Integer,NumericEntry> numerics = new HashMap<>();
+ private final Map<Integer,BinaryEntry> binaries = new HashMap<>();
+ private final Map<Integer,SortedEntry> sorteds = new HashMap<>();
+ private final Map<Integer,SortedSetEntry> sortedSets = new HashMap<>();
+ private final IndexInput data;
+
+ // ram instances we have already loaded
+ private final Map<Integer,NumericDocValues> numericInstances =
+ new HashMap<>();
+ private final Map<Integer,BinaryDocValues> binaryInstances =
+ new HashMap<>();
+ private final Map<Integer,SortedDocValues> sortedInstances =
+ new HashMap<>();
+ private final Map<Integer,SortedSetRawValues> sortedSetInstances =
+ new HashMap<>();
+ private final Map<Integer,Bits> docsWithFieldInstances = new HashMap<>();
+
+ private final int maxDoc;
+ private final AtomicLong ramBytesUsed;
+ private final int version;
+
+ static final byte NUMBER = 0;
+ static final byte BYTES = 1;
+ static final byte SORTED = 2;
+ static final byte SORTED_SET = 3;
+
+ static final int VERSION_START = 0;
+ static final int VERSION_CHECKSUM = 1;
+ static final int VERSION_CURRENT = VERSION_CHECKSUM;
+
+ DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) {
+ maxDoc = state.segmentInfo.getDocCount();
+ String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
+ // read in the entries from the metadata file.
+ ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context);
+ ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass()));
+ bool success = false;
+ try {
+ version = CodecUtil.checkHeader(in, metaCodec,
+ VERSION_START,
+ VERSION_CURRENT);
+ readFields(in);
+
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checkFooter(in);
+ } else {
+ CodecUtil.checkEOF(in);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+
+ success = false;
+ try {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.openInput(dataName, state.context);
+ final int version2 = CodecUtil.checkHeader(data, dataCodec,
+ VERSION_START,
+ VERSION_CURRENT);
+ if (version != version2) {
+ throw new CorruptIndexException("Format versions mismatch");
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this.data);
+ }
+ }
+ }
+
+ private NumericEntry readNumericEntry(IndexInput meta) {
+ NumericEntry entry = new NumericEntry();
+ entry.offset = meta.readLong();
+ entry.count = meta.readInt();
+ entry.missingOffset = meta.readLong();
+ if (entry.missingOffset != -1) {
+ entry.missingBytes = meta.readLong();
+ } else {
+ entry.missingBytes = 0;
+ }
+ entry.byteWidth = meta.readByte();
+
+ return entry;
+ }
+
+ private BinaryEntry readBinaryEntry(IndexInput meta) {
+ BinaryEntry entry = new BinaryEntry();
+ entry.offset = meta.readLong();
+ entry.numBytes = meta.readInt();
+ entry.count = meta.readInt();
+ entry.missingOffset = meta.readLong();
+ if (entry.missingOffset != -1) {
+ entry.missingBytes = meta.readLong();
+ } else {
+ entry.missingBytes = 0;
+ }
+
+ return entry;
+ }
+
+ private SortedEntry readSortedEntry(IndexInput meta) {
+ SortedEntry entry = new SortedEntry();
+ entry.docToOrd = readNumericEntry(meta);
+ entry.values = readBinaryEntry(meta);
+ return entry;
+ }
+
+ private SortedSetEntry readSortedSetEntry(IndexInput meta) {
+ SortedSetEntry entry = new SortedSetEntry();
+ entry.docToOrdAddress = readNumericEntry(meta);
+ entry.ords = readNumericEntry(meta);
+ entry.values = readBinaryEntry(meta);
+ return entry;
+ }
+
+ private void readFields(IndexInput meta) {
+ int fieldNumber = meta.readVInt();
+ while (fieldNumber != -1) {
+ int fieldType = meta.readByte();
+ if (fieldType == NUMBER) {
+ numerics.put(fieldNumber, readNumericEntry(meta));
+ } else if (fieldType == BYTES) {
+ binaries.put(fieldNumber, readBinaryEntry(meta));
+ } else if (fieldType == SORTED) {
+ sorteds.put(fieldNumber, readSortedEntry(meta));
+ } else if (fieldType == SORTED_SET) {
+ sortedSets.put(fieldNumber, readSortedSetEntry(meta));
+ } else {
+ throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
+ }
+ fieldNumber = meta.readVInt();
+ }
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return ramBytesUsed.get();
+ }
+
+ @Override
+ public void checkIntegrity() {
+ if (version >= VERSION_CHECKSUM) {
+ CodecUtil.checksumEntireFile(data);
+ }
+ }
+
+ @Override
+ public synchronized NumericDocValues getNumeric(FieldInfo field) {
+ NumericDocValues instance = numericInstances.get(field.number);
+ if (instance == null) {
+ // Lazy load
+ instance = loadNumeric(numerics.get(field.number));
+ numericInstances.put(field.number, instance);
+ }
+ return instance;
+ }
+
+ private NumericDocValues loadNumeric(NumericEntry entry) {
+ data.seek(entry.offset + entry.missingBytes);
+ switch (entry.byteWidth) {
+ case 1:
+ {
+ final byte[] values = new byte[entry.count];
+ data.readBytes(values, 0, entry.count);
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+ return new NumericDocValues() {
+ @Override
+ public long get(int idx) {
+ return values[idx];
+ }
+ };
+ }
+
+ case 2:
+ {
+ final short[] values = new short[entry.count];
+ for(int i=0;i<entry.count;i++) {
+ values[i] = data.readShort();
+ }
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+ return new NumericDocValues() {
+ @Override
+ public long get(int idx) {
+ return values[idx];
+ }
+ };
+ }
+
+ case 4:
+ {
+ final int[] values = new int[entry.count];
+ for(int i=0;i<entry.count;i++) {
+ values[i] = data.readInt();
+ }
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+ return new NumericDocValues() {
+ @Override
+ public long get(int idx) {
+ return values[idx];
+ }
+ };
+ }
+
+ case 8:
+ {
+ final long[] values = new long[entry.count];
+ for(int i=0;i<entry.count;i++) {
+ values[i] = data.readLong();
+ }
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values));
+ return new NumericDocValues() {
+ @Override
+ public long get(int idx) {
+ return values[idx];
+ }
+ };
+ }
+
+ default:
+ throw new Debug.Assert(ionError();
+ }
+ }
+
+ @Override
+ public synchronized BinaryDocValues getBinary(FieldInfo field) {
+ BinaryDocValues instance = binaryInstances.get(field.number);
+ if (instance == null) {
+ // Lazy load
+ instance = loadBinary(binaries.get(field.number));
+ binaryInstances.put(field.number, instance);
+ }
+ return instance;
+ }
+
+ private BinaryDocValues loadBinary(BinaryEntry entry) {
+ data.seek(entry.offset);
+ final byte[] bytes = new byte[entry.numBytes];
+ data.readBytes(bytes, 0, entry.numBytes);
+ data.seek(entry.offset + entry.numBytes + entry.missingBytes);
+
+ final int[] address = new int[entry.count+1];
+ for(int i=0;i<entry.count;i++) {
+ address[i] = data.readInt();
+ }
+ address[entry.count] = data.readInt();
+
+ ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes) + RamUsageEstimator.sizeOf(address));
+
+ return new BinaryDocValues() {
+ @Override
+ public void get(int docID, BytesRef result) {
+ result.bytes = bytes;
+ result.offset = address[docID];
+ result.length = address[docID+1] - result.offset;
+ };
+ };
+ }
+
+ @Override
+ public synchronized SortedDocValues getSorted(FieldInfo field) {
+ SortedDocValues instance = sortedInstances.get(field.number);
+ if (instance == null) {
+ // Lazy load
+ instance = loadSorted(field);
+ sortedInstances.put(field.number, instance);
+ }
+ return instance;
+ }
+
+ private SortedDocValues loadSorted(FieldInfo field) {
+ final SortedEntry entry = sorteds.get(field.number);
+ final NumericDocValues docToOrd = loadNumeric(entry.docToOrd);
+ final BinaryDocValues values = loadBinary(entry.values);
+
+ return new SortedDocValues() {
+
+ @Override
+ public int getOrd(int docID) {
+ return (int) docToOrd.get(docID);
+ }
+
+ @Override
+ public void lookupOrd(int ord, BytesRef result) {
+ values.get(ord, result);
+ }
+
+ @Override
+ public int getValueCount() {
+ return entry.values.count;
+ }
+
+ // Leave lookupTerm to super's binary search
+
+ // Leave termsEnum to super
+ };
+ }
+
+ @Override
+ public synchronized SortedSetDocValues getSortedSet(FieldInfo field) {
+ SortedSetRawValues instance = sortedSetInstances.get(field.number);
+ final SortedSetEntry entry = sortedSets.get(field.number);
+ if (instance == null) {
+ // Lazy load
+ instance = loadSortedSet(entry);
+ sortedSetInstances.put(field.number, instance);
+ }
+
+ final NumericDocValues docToOrdAddress = instance.docToOrdAddress;
+ final NumericDocValues ords = instance.ords;
+ final BinaryDocValues values = instance.values;
+
+ // Must make a new instance since the iterator has state:
+ return new RandomAccessOrds() {
+ int ordStart;
+ int ordUpto;
+ int ordLimit;
+
+ @Override
+ public long nextOrd() {
+ if (ordUpto == ordLimit) {
+ return NO_MORE_ORDS;
+ } else {
+ return ords.get(ordUpto++);
+ }
+ }
+
+ @Override
+ public void setDocument(int docID) {
+ ordStart = ordUpto = (int) docToOrdAddress.get(docID);
+ ordLimit = (int) docToOrdAddress.get(docID+1);
+ }
+
+ @Override
+ public void lookupOrd(long ord, BytesRef result) {
+ values.get((int) ord, result);
+ }
+
+ @Override
+ public long getValueCount() {
+ return entry.values.count;
+ }
+
+ @Override
+ public long ordAt(int index) {
+ return ords.get(ordStart + index);
+ }
+
+ @Override
+ public int cardinality() {
+ return ordLimit - ordStart;
+ }
+
+ // Leave lookupTerm to super's binary search
+
+ // Leave termsEnum to super
+ };
+ }
+
+ private SortedSetRawValues loadSortedSet(SortedSetEntry entry) {
+ SortedSetRawValues instance = new SortedSetRawValues();
+ instance.docToOrdAddress = loadNumeric(entry.docToOrdAddress);
+ instance.ords = loadNumeric(entry.ords);
+ instance.values = loadBinary(entry.values);
+ return instance;
+ }
+
+ private Bits getMissingBits(int fieldNumber, final long offset, final long length) {
+ if (offset == -1) {
+ return new Bits.MatchAllBits(maxDoc);
+ } else {
+ Bits instance;
+ synchronized(this) {
+ instance = docsWithFieldInstances.get(fieldNumber);
+ if (instance == null) {
+ IndexInput data = this.data.clone();
+ data.seek(offset);
+ Debug.Assert( length % 8 == 0;
+ long bits[] = new long[(int) length >> 3];
+ for (int i = 0; i < bits.length; i++) {
+ bits[i] = data.readLong();
+ }
+ instance = new FixedBitSet(bits, maxDoc);
+ docsWithFieldInstances.put(fieldNumber, instance);
+ }
+ }
+ return instance;
+ }
+ }
+
+ @Override
+ public Bits getDocsWithField(FieldInfo field) {
+ switch(field.getDocValuesType()) {
+ case SORTED_SET:
+ return DocValues.docsWithValue(getSortedSet(field), maxDoc);
+ case SORTED:
+ return DocValues.docsWithValue(getSorted(field), maxDoc);
+ case BINARY:
+ BinaryEntry be = binaries.get(field.number);
+ return getMissingBits(field.number, be.missingOffset, be.missingBytes);
+ case NUMERIC:
+ NumericEntry ne = numerics.get(field.number);
+ return getMissingBits(field.number, ne.missingOffset, ne.missingBytes);
+ default:
+ throw new Debug.Assert(ionError();
+ }
+ }
+
+ @Override
+ public void close() {
+ data.close();
+ }
+
+ static class SortedSetRawValues {
+ NumericDocValues docToOrdAddress;
+ NumericDocValues ords;
+ BinaryDocValues values;
+ }
+
+ static class NumericEntry {
+ long offset;
+ int count;
+ long missingOffset;
+ long missingBytes;
+ byte byteWidth;
+ int packedIntsVersion;
+ }
+
+ static class BinaryEntry {
+ long offset;
+ long missingOffset;
+ long missingBytes;
+ int count;
+ int numBytes;
+ int minLength;
+ int maxLength;
+ int packedIntsVersion;
+ int blockSize;
+ }
+
+ static class SortedEntry {
+ NumericEntry docToOrd;
+ BinaryEntry values;
+ }
+
+ static class SortedSetEntry {
+ NumericEntry docToOrdAddress;
+ NumericEntry ords;
+ BinaryEntry values;
+ }
+
+ static class FSTEntry {
+ long offset;
+ long numOrds;
+ }
+}