You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/09/16 00:47:08 UTC
[10/11] git commit: Skeleton porting of Lucene.Net.Misc
Skeleton porting of Lucene.Net.Misc
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/674f0cb9
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/674f0cb9
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/674f0cb9
Branch: refs/heads/master
Commit: 674f0cb97dfae0961d5f9622f49f17d891be08bc
Parents: 882f487
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Tue Sep 16 01:39:08 2014 +0300
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Tue Sep 16 01:39:08 2014 +0300
----------------------------------------------------------------------
src/Lucene.Net.Misc/ByteBuffer.cs | 325 ++++++
src/Lucene.Net.Misc/Document/LazyDocument.cs | 226 ++++
.../Index/CompoundFileExtractor.cs | 165 +++
src/Lucene.Net.Misc/Index/IndexSplitter.cs | 200 ++++
.../Index/MultiPassIndexSplitter.cs | 329 ++++++
src/Lucene.Net.Misc/Index/PKIndexSplitter.cs | 220 ++++
.../Index/Sorter/BlockJoinComparatorSource.cs | 321 ++++++
.../Sorter/EarlyTerminatingSortingCollector.cs | 147 +++
src/Lucene.Net.Misc/Index/Sorter/Sorter.cs | 404 +++++++
.../Index/Sorter/SortingAtomicReader.cs | 1081 ++++++++++++++++++
.../Index/Sorter/SortingMergePolicy.cs | 309 +++++
src/Lucene.Net.Misc/Lucene.Net.Misc.csproj | 73 ++
src/Lucene.Net.Misc/Misc/GetTermInfo.cs | 74 ++
src/Lucene.Net.Misc/Misc/HighFreqTerms.cs | 230 ++++
src/Lucene.Net.Misc/Misc/IndexMergeTool.cs | 66 ++
src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs | 238 ++++
src/Lucene.Net.Misc/Misc/TermStats.cs | 55 +
src/Lucene.Net.Misc/Properties/AssemblyInfo.cs | 35 +
src/Lucene.Net.Misc/Store/NativePosixUtil.cs | 64 ++
.../Store/NativeUnixDirectory.cs | 527 +++++++++
src/Lucene.Net.Misc/Store/WindowsDirectory.cs | 181 +++
src/Lucene.Net.Misc/Util/Fst/ListOfOutputs.cs | 246 ++++
.../Util/Fst/UpToTwoPositiveIntOutputs.cs | 328 ++++++
23 files changed, 5844 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/ByteBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/ByteBuffer.cs b/src/Lucene.Net.Misc/ByteBuffer.cs
new file mode 100644
index 0000000..204f7de
--- /dev/null
+++ b/src/Lucene.Net.Misc/ByteBuffer.cs
@@ -0,0 +1,325 @@
+//-------------------------------------------------------------------------------------------
+// Copyright © 2007 - 2014 Tangible Software Solutions Inc.
+// This class can be used by anyone provided that the copyright notice remains intact.
+//
+// This class is used to simulate the java.nio.ByteBuffer class in C#.
+//
+// Instances are only obtainable via the static 'allocate' method.
+//
+// Some methods are not available:
+// Methods which create shared views of the buffer, such as: array,
+// asCharBuffer, asDoubleBuffer, asFloatBuffer, asIntBuffer, asLongBuffer,
+// asReadOnlyBuffer, asShortBuffer, duplicate, slice, & wrap.
+//
+// Methods mark, reset, isReadOnly, order, compareTo, arrayOffset, & limit (setter).
+//-------------------------------------------------------------------------------------------
+public class ByteBuffer
+{
+ //'Mode' is only used to determine whether to return data length or capacity from the 'limit' method:
+ private enum Mode
+ {
+ Read,
+ Write
+ }
+ private Mode mode;
+
+ private System.IO.MemoryStream stream;
+ private System.IO.BinaryReader reader;
+ private System.IO.BinaryWriter writer;
+
+ private ByteBuffer()
+ {
+ stream = new System.IO.MemoryStream();
+ reader = new System.IO.BinaryReader(stream);
+ writer = new System.IO.BinaryWriter(stream);
+ }
+
+ ~ByteBuffer()
+ {
+ reader.Close();
+ writer.Close();
+ stream.Close();
+ stream.Dispose();
+ }
+
+ public static ByteBuffer allocate(int capacity)
+ {
+ ByteBuffer buffer = new ByteBuffer();
+ buffer.stream.Capacity = capacity;
+ buffer.mode = Mode.Write;
+ return buffer;
+ }
+
+ public static ByteBuffer allocateDirect(int capacity)
+ {
+ //this wrapper class makes no distinction between 'allocate' & 'allocateDirect'
+ return allocate(capacity);
+ }
+
+ public int capacity()
+ {
+ return stream.Capacity;
+ }
+
+ public ByteBuffer flip()
+ {
+ mode = Mode.Read;
+ stream.SetLength(stream.Position);
+ stream.Position = 0;
+ return this;
+ }
+
+ public ByteBuffer clear()
+ {
+ mode = Mode.Write;
+ stream.Position = 0;
+ return this;
+ }
+
+ public ByteBuffer compact()
+ {
+ mode = Mode.Write;
+ System.IO.MemoryStream newStream = new System.IO.MemoryStream(stream.Capacity);
+ stream.CopyTo(newStream);
+ stream = newStream;
+ return this;
+ }
+
+ public ByteBuffer rewind()
+ {
+ stream.Position = 0;
+ return this;
+ }
+
+ public long limit()
+ {
+ if (mode == Mode.Write)
+ return stream.Capacity;
+ else
+ return stream.Length;
+ }
+
+ public long position()
+ {
+ return stream.Position;
+ }
+
+ public ByteBuffer position(long newPosition)
+ {
+ stream.Position = newPosition;
+ return this;
+ }
+
+ public long remaining()
+ {
+ return this.limit() - this.position();
+ }
+
+ public bool hasRemaining()
+ {
+ return this.remaining() > 0;
+ }
+
+ public int get()
+ {
+ return stream.ReadByte();
+ }
+
+ public ByteBuffer get(byte[] dst, int offset, int length)
+ {
+ stream.Read(dst, offset, length);
+ return this;
+ }
+
+ public ByteBuffer put(byte b)
+ {
+ stream.WriteByte(b);
+ return this;
+ }
+
+ public ByteBuffer put(byte[] src, int offset, int length)
+ {
+ stream.Write(src, offset, length);
+ return this;
+ }
+
+ public bool Equals(ByteBuffer other)
+ {
+ if (other != null && this.remaining() == other.remaining())
+ {
+ long thisOriginalPosition = this.position();
+ long otherOriginalPosition = other.position();
+
+ bool differenceFound = false;
+ while (stream.Position < stream.Length)
+ {
+ if (this.get() != other.get())
+ {
+ differenceFound = true;
+ break;
+ }
+ }
+
+ this.position(thisOriginalPosition);
+ other.position(otherOriginalPosition);
+
+ return ! differenceFound;
+ }
+ else
+ return false;
+ }
+
+ //methods using the internal BinaryReader:
+ public char getChar()
+ {
+ return reader.ReadChar();
+ }
+ public char getChar(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ char value = reader.ReadChar();
+ stream.Position = originalPosition;
+ return value;
+ }
+ public double getDouble()
+ {
+ return reader.ReadDouble();
+ }
+ public double getDouble(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ double value = reader.ReadDouble();
+ stream.Position = originalPosition;
+ return value;
+ }
+ public float getFloat()
+ {
+ return reader.ReadSingle();
+ }
+ public float getFloat(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ float value = reader.ReadSingle();
+ stream.Position = originalPosition;
+ return value;
+ }
+ public int getInt()
+ {
+ return reader.ReadInt32();
+ }
+ public int getInt(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ int value = reader.ReadInt32();
+ stream.Position = originalPosition;
+ return value;
+ }
+ public long getLong()
+ {
+ return reader.ReadInt64();
+ }
+ public long getLong(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ long value = reader.ReadInt64();
+ stream.Position = originalPosition;
+ return value;
+ }
+ public short getShort()
+ {
+ return reader.ReadInt16();
+ }
+ public short getShort(int index)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ short value = reader.ReadInt16();
+ stream.Position = originalPosition;
+ return value;
+ }
+
+ //methods using the internal BinaryWriter:
+ public ByteBuffer putChar(char value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putChar(int index, char value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+ public ByteBuffer putDouble(double value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putDouble(int index, double value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+ public ByteBuffer putFloat(float value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putFloat(int index, float value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+ public ByteBuffer putInt(int value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putInt(int index, int value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+ public ByteBuffer putLong(long value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putLong(int index, long value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+ public ByteBuffer putShort(short value)
+ {
+ writer.Write(value);
+ return this;
+ }
+ public ByteBuffer putShort(int index, short value)
+ {
+ long originalPosition = stream.Position;
+ stream.Position = index;
+ writer.Write(value);
+ stream.Position = originalPosition;
+ return this;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Document/LazyDocument.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Document/LazyDocument.cs b/src/Lucene.Net.Misc/Document/LazyDocument.cs
new file mode 100644
index 0000000..6faed94
--- /dev/null
+++ b/src/Lucene.Net.Misc/Document/LazyDocument.cs
@@ -0,0 +1,226 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.document
+{
+
+ /// <summary>
+ /// Copyright 2004 The Apache Software Foundation
+ ///
+ /// Licensed under the Apache License, Version 2.0 (the "License");
+ /// you may not use this file except in compliance with the License.
+ /// You may obtain a copy of the License at
+ ///
+ /// http://www.apache.org/licenses/LICENSE-2.0
+ ///
+ /// Unless required by applicable law or agreed to in writing, software
+ /// distributed under the License is distributed on an "AS IS" BASIS,
+ /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ /// See the License for the specific language governing permissions and
+ /// limitations under the License.
+ /// </summary>
+
+ using Analyzer = org.apache.lucene.analysis.Analyzer;
+ using TokenStream = org.apache.lucene.analysis.TokenStream;
+ using FieldInfo = org.apache.lucene.index.FieldInfo;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using IndexableField = org.apache.lucene.index.IndexableField;
+ using IndexableFieldType = org.apache.lucene.index.IndexableFieldType;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+ /// <summary>
+ /// Defers actually loading a field's value until you ask
+ /// for it. You must not use the returned Field instances
+ /// after the provided reader has been closed. </summary>
+ /// <seealso cref= #getField </seealso>
+ public class LazyDocument
+ {
+ private readonly IndexReader reader;
+ private readonly int docID;
+
+ // null until first field is loaded
+ private Document doc;
+
+ private IDictionary<int?, IList<LazyField>> fields = new Dictionary<int?, IList<LazyField>>();
+ private HashSet<string> fieldNames = new HashSet<string>();
+
+ public LazyDocument(IndexReader reader, int docID)
+ {
+ this.reader = reader;
+ this.docID = docID;
+ }
+
+ /// <summary>
+ /// Creates an IndexableField whose value will be lazy loaded if and
+ /// when it is used.
+ /// <para>
+ /// <b>NOTE:</b> This method must be called once for each value of the field
+ /// name specified in sequence that the values exist. This method may not be
+ /// used to generate multiple, lazy, IndexableField instances refering to
+ /// the same underlying IndexableField instance.
+ /// </para>
+ /// <para>
+ /// The lazy loading of field values from all instances of IndexableField
+ /// objects returned by this method are all backed by a single Document
+ /// per LazyDocument instance.
+ /// </para>
+ /// </summary>
+ public virtual IndexableField getField(FieldInfo fieldInfo)
+ {
+
+ fieldNames.Add(fieldInfo.name);
+ IList<LazyField> values = fields[fieldInfo.number];
+ if (null == values)
+ {
+ values = new List<>();
+ fields[fieldInfo.number] = values;
+ }
+
+ LazyField value = new LazyField(this, fieldInfo.name, fieldInfo.number);
+ values.Add(value);
+
+ lock (this)
+ {
+ // edge case: if someone asks this LazyDoc for more LazyFields
+ // after other LazyFields from the same LazyDoc have been
+ // actuallized, we need to force the doc to be re-fetched
+ // so the new LazyFields are also populated.
+ doc = null;
+ }
+ return value;
+ }
+
+ /// <summary>
+ /// non-private for test only access
+ /// @lucene.internal
+ /// </summary>
+ internal virtual Document Document
+ {
+ get
+ {
+ lock (this)
+ {
+ if (doc == null)
+ {
+ try
+ {
+ doc = reader.document(docID, fieldNames);
+ }
+ catch (IOException ioe)
+ {
+ throw new IllegalStateException("unable to load document", ioe);
+ }
+ }
+ return doc;
+ }
+ }
+ }
+
+ // :TODO: synchronize to prevent redundent copying? (sync per field name?)
+ private void fetchRealValues(string name, int fieldNum)
+ {
+ Document d = Document;
+
+ IList<LazyField> lazyValues = fields[fieldNum];
+ IndexableField[] realValues = d.getFields(name);
+
+ Debug.Assert(realValues.Length <= lazyValues.Count, "More lazy values then real values for field: " + name);
+
+ for (int i = 0; i < lazyValues.Count; i++)
+ {
+ LazyField f = lazyValues[i];
+ if (null != f)
+ {
+ f.realValue = realValues[i];
+ }
+ }
+ }
+
+
+ /// <summary>
+ /// @lucene.internal
+ /// </summary>
+ public class LazyField : IndexableField
+ {
+ private readonly LazyDocument outerInstance;
+
+ internal string name_Renamed;
+ internal int fieldNum;
+ internal volatile IndexableField realValue = null;
+
+ internal LazyField(LazyDocument outerInstance, string name, int fieldNum)
+ {
+ this.outerInstance = outerInstance;
+ this.name_Renamed = name;
+ this.fieldNum = fieldNum;
+ }
+
+ /// <summary>
+ /// non-private for test only access
+ /// @lucene.internal
+ /// </summary>
+ public virtual bool hasBeenLoaded()
+ {
+ return null != realValue;
+ }
+
+ internal virtual IndexableField RealValue
+ {
+ get
+ {
+ if (null == realValue)
+ {
+ outerInstance.fetchRealValues(name_Renamed, fieldNum);
+ }
+ Debug.Assert(hasBeenLoaded(), "field value was not lazy loaded");
+ Debug.Assert(realValue.name().Equals(name()), "realvalue name != name: " + realValue.name() + " != " + name());
+
+ return realValue;
+ }
+ }
+
+ public override string name()
+ {
+ return name_Renamed;
+ }
+
+ public override float boost()
+ {
+ return 1.0f;
+ }
+
+ public override BytesRef binaryValue()
+ {
+ return RealValue.binaryValue();
+ }
+
+ public override string stringValue()
+ {
+ return RealValue.stringValue();
+ }
+
+ public override Reader readerValue()
+ {
+ return RealValue.readerValue();
+ }
+
+ public override Number numericValue()
+ {
+ return RealValue.numericValue();
+ }
+
+ public override IndexableFieldType fieldType()
+ {
+ return RealValue.fieldType();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.analysis.TokenStream tokenStream(org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException
+ public override TokenStream tokenStream(Analyzer analyzer)
+ {
+ return RealValue.tokenStream(analyzer);
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
new file mode 100644
index 0000000..855b6f3
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs
@@ -0,0 +1,165 @@
+using System;
+
+namespace org.apache.lucene.index
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Prints the filename and size of each file within a given compound file.
+ /// Add the -extract flag to extract files to the current working directory.
+ /// In order to make the extracted version of the index work, you have to copy
+ /// the segments file from the compound index into the directory where the extracted files are stored. </summary>
+ /// <param name="args"> Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> </param>
+
+
+ using CompoundFileDirectory = org.apache.lucene.store.CompoundFileDirectory;
+ using Directory = org.apache.lucene.store.Directory;
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+ using IOContext = org.apache.lucene.store.IOContext;
+ using IndexInput = org.apache.lucene.store.IndexInput;
+ using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+ using CommandLineUtil = org.apache.lucene.util.CommandLineUtil;
+
+ /// <summary>
+ /// Command-line tool for extracting sub-files out of a compound file.
+ /// </summary>
+ public class CompoundFileExtractor
+ {
+
+ public static void Main(string[] args)
+ {
+ string filename = null;
+ bool extract = false;
+ string dirImpl = null;
+
+ int j = 0;
+ while (j < args.Length)
+ {
+ string arg = args[j];
+ if ("-extract".Equals(arg))
+ {
+ extract = true;
+ }
+ else if ("-dir-impl".Equals(arg))
+ {
+ if (j == args.Length - 1)
+ {
+ Console.WriteLine("ERROR: missing value for -dir-impl option");
+ Environment.Exit(1);
+ }
+ j++;
+ dirImpl = args[j];
+ }
+ else if (filename == null)
+ {
+ filename = arg;
+ }
+ j++;
+ }
+
+ if (filename == null)
+ {
+ Console.WriteLine("Usage: org.apache.lucene.index.CompoundFileExtractor [-extract] [-dir-impl X] <cfsfile>");
+ return;
+ }
+
+ Directory dir = null;
+ CompoundFileDirectory cfr = null;
+ IOContext context = IOContext.READ;
+
+ try
+ {
+ File file = new File(filename);
+ string dirname = file.AbsoluteFile.Parent;
+ filename = file.Name;
+ if (dirImpl == null)
+ {
+ dir = FSDirectory.open(new File(dirname));
+ }
+ else
+ {
+ dir = CommandLineUtil.newFSDirectory(dirImpl, new File(dirname));
+ }
+
+ cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false);
+
+ string[] files = cfr.listAll();
+ ArrayUtil.timSort(files); // sort the array of filename so that the output is more readable
+
+ for (int i = 0; i < files.Length; ++i)
+ {
+ long len = cfr.fileLength(files[i]);
+
+ if (extract)
+ {
+ Console.WriteLine("extract " + files[i] + " with " + len + " bytes to local directory...");
+ IndexInput ii = cfr.openInput(files[i], context);
+
+ FileOutputStream f = new FileOutputStream(files[i]);
+
+ // read and write with a small buffer, which is more effective than reading byte by byte
+ sbyte[] buffer = new sbyte[1024];
+ int chunk = buffer.Length;
+ while (len > 0)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int bufLen = (int) Math.min(chunk, len);
+ int bufLen = (int) Math.Min(chunk, len);
+ ii.readBytes(buffer, 0, bufLen);
+ f.write(buffer, 0, bufLen);
+ len -= bufLen;
+ }
+
+ f.close();
+ ii.close();
+ }
+ else
+ {
+ Console.WriteLine(files[i] + ": " + len + " bytes");
+ }
+ }
+ }
+ catch (IOException ioe)
+ {
+ Console.WriteLine(ioe.ToString());
+ Console.Write(ioe.StackTrace);
+ }
+ finally
+ {
+ try
+ {
+ if (dir != null)
+ {
+ dir.close();
+ }
+ if (cfr != null)
+ {
+ cfr.close();
+ }
+ }
+ catch (IOException ioe)
+ {
+ Console.WriteLine(ioe.ToString());
+ Console.Write(ioe.StackTrace);
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/IndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/IndexSplitter.cs b/src/Lucene.Net.Misc/Index/IndexSplitter.cs
new file mode 100644
index 0000000..a0e9946
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/IndexSplitter.cs
@@ -0,0 +1,200 @@
+using System;
+using System.Collections.Generic;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+namespace org.apache.lucene.index
+{
+
+
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+
+ /// <summary>
+ /// Command-line tool that enables listing segments in an
+ /// index, copying specific segments to another index, and
+ /// deleting segments from an index.
+ ///
+ /// <para>This tool does file-level copying of segments files.
+ /// This means it's unable to split apart a single segment
+ /// into multiple segments. For example if your index is a
+ /// single segment, this tool won't help. Also, it does basic
+ /// file-level copying (using simple
+ /// File{In,Out}putStream) so it will not work with non
+ /// FSDirectory Directory impls.</para>
+ ///
+ /// @lucene.experimental You can easily
+ /// accidentally remove segments from your index so be
+ /// careful!
+ /// </summary>
+ public class IndexSplitter
+ {
+ public SegmentInfos infos;
+
+ internal FSDirectory fsDir;
+
+ internal File dir;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void main(String[] args) throws Exception
+ public static void Main(string[] args)
+ {
+ if (args.Length < 2)
+ {
+ Console.Error.WriteLine("Usage: IndexSplitter <srcDir> -l (list the segments and their sizes)");
+ Console.Error.WriteLine("IndexSplitter <srcDir> <destDir> <segments>+");
+ Console.Error.WriteLine("IndexSplitter <srcDir> -d (delete the following segments)");
+ return;
+ }
+ File srcDir = new File(args[0]);
+ IndexSplitter @is = new IndexSplitter(srcDir);
+ if (!srcDir.exists())
+ {
+ throw new Exception("srcdir:" + srcDir.AbsolutePath + " doesn't exist");
+ }
+ if (args[1].Equals("-l"))
+ {
+ @is.listSegments();
+ }
+ else if (args[1].Equals("-d"))
+ {
+ IList<string> segs = new List<string>();
+ for (int x = 2; x < args.Length; x++)
+ {
+ segs.Add(args[x]);
+ }
+ @is.remove(segs.ToArray());
+ }
+ else
+ {
+ File targetDir = new File(args[1]);
+ IList<string> segs = new List<string>();
+ for (int x = 2; x < args.Length; x++)
+ {
+ segs.Add(args[x]);
+ }
+ @is.Split(targetDir, segs.ToArray());
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public IndexSplitter(java.io.File dir) throws java.io.IOException
+ public IndexSplitter(File dir)
+ {
+ this.dir = dir;
+ fsDir = FSDirectory.open(dir);
+ infos = new SegmentInfos();
+ infos.read(fsDir);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void listSegments() throws java.io.IOException
+ public virtual void listSegments()
+ {
+ DecimalFormat formatter = new DecimalFormat("###,###.###", DecimalFormatSymbols.getInstance(Locale.ROOT));
+ for (int x = 0; x < infos.size(); x++)
+ {
+ SegmentCommitInfo info = infos.info(x);
+ string sizeStr = formatter.format(info.sizeInBytes());
+ Console.WriteLine(info.info.name + " " + sizeStr);
+ }
+ }
+
+ private int getIdx(string name)
+ {
+ for (int x = 0; x < infos.size(); x++)
+ {
+ if (name.Equals(infos.info(x).info.name))
+ {
+ return x;
+ }
+ }
+ return -1;
+ }
+
+ private SegmentCommitInfo getInfo(string name)
+ {
+ for (int x = 0; x < infos.size(); x++)
+ {
+ if (name.Equals(infos.info(x).info.name))
+ {
+ return infos.info(x);
+ }
+ }
+ return null;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void remove(String[] segs) throws java.io.IOException
+ public virtual void remove(string[] segs)
+ {
+ foreach (string n in segs)
+ {
+ int idx = getIdx(n);
+ infos.remove(idx);
+ }
+ infos.changed();
+ infos.commit(fsDir);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split(java.io.File destDir, String[] segs) throws java.io.IOException
+ public virtual void Split(File destDir, string[] segs)
+ {
+ destDir.mkdirs();
+ FSDirectory destFSDir = FSDirectory.open(destDir);
+ SegmentInfos destInfos = new SegmentInfos();
+ destInfos.counter = infos.counter;
+ foreach (string n in segs)
+ {
+ SegmentCommitInfo infoPerCommit = getInfo(n);
+ SegmentInfo info = infoPerCommit.info;
+ // Same info just changing the dir:
+ SegmentInfo newInfo = new SegmentInfo(destFSDir, info.Version, info.name, info.DocCount, info.UseCompoundFile, info.Codec, info.Diagnostics);
+ destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.DelCount, infoPerCommit.DelGen, infoPerCommit.FieldInfosGen));
+ // now copy files over
+ ICollection<string> files = infoPerCommit.files();
+ foreach (String srcName in files)
+ {
+ File srcFile = new File(dir, srcName);
+ File destFile = new File(destDir, srcName);
+ copyFile(srcFile, destFile);
+ }
+ }
+ destInfos.changed();
+ destInfos.commit(destFSDir);
+ // System.out.println("destDir:"+destDir.getAbsolutePath());
+ }
+
+ private static readonly sbyte[] copyBuffer = new sbyte[32 * 1024];
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private static void copyFile(java.io.File src, java.io.File dst) throws java.io.IOException
+ private static void copyFile(File src, File dst)
+ {
+ InputStream @in = new FileInputStream(src);
+ OutputStream @out = new FileOutputStream(dst);
+ int len;
+ while ((len = @in.read(copyBuffer)) > 0)
+ {
+ @out.write(copyBuffer, 0, len);
+ }
+ @in.close();
+ @out.close();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
new file mode 100644
index 0000000..1e03fed
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/MultiPassIndexSplitter.cs
@@ -0,0 +1,329 @@
+using System;
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.index
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode;
+ using Directory = org.apache.lucene.store.Directory;
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+ using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+ using Bits = org.apache.lucene.util.Bits;
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// This tool splits input index into multiple equal parts. The method employed
+ /// here uses <seealso cref="IndexWriter#addIndexes(IndexReader[])"/> where the input data
+ /// comes from the input index with artificially applied deletes to the document
+ /// id-s that fall outside the selected partition.
+ /// <para>Note 1: Deletes are only applied to a buffered list of deleted docs and
+ /// don't affect the source index - this tool works also with read-only indexes.
+ /// </para>
+ /// <para>Note 2: the disadvantage of this tool is that source index needs to be
+ /// read as many times as there are parts to be created, hence the name of this
+ /// tool.
+ ///
+ /// </para>
+ /// <para><b>NOTE</b>: this tool is unaware of documents added
+ /// atomically via <seealso cref="IndexWriter#addDocuments"/> or {@link
+ /// IndexWriter#updateDocuments}, which means it can easily
+ /// break up such document groups.
+ /// </para>
+ /// </summary>
+ public class MultiPassIndexSplitter
+ {
+
+ /// <summary>
+ /// Split source index into multiple parts. </summary>
+ /// <param name="in"> source index, can have deletions, can have
+ /// multiple segments (or multiple readers). </param>
+ /// <param name="outputs"> list of directories where the output parts will be stored. </param>
+ /// <param name="seq"> if true, then the source index will be split into equal
+ /// increasing ranges of document id-s. If false, source document id-s will be
+ /// assigned in a deterministic round-robin fashion to one of the output splits. </param>
+ /// <exception cref="IOException"> If there is a low-level I/O error </exception>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split(org.apache.lucene.util.Version version, IndexReader in, org.apache.lucene.store.Directory[] outputs, boolean seq) throws java.io.IOException
+ public virtual void Split(Version version, IndexReader @in, Directory[] outputs, bool seq)
+ {
+ if (outputs == null || outputs.Length < 2)
+ {
+ throw new IOException("Invalid number of outputs.");
+ }
+ if (@in == null || @in.numDocs() < 2)
+ {
+ throw new IOException("Not enough documents for splitting");
+ }
+ int numParts = outputs.Length;
+ // wrap a potentially read-only input
+ // this way we don't have to preserve original deletions because neither
+ // deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
+ FakeDeleteIndexReader input = new FakeDeleteIndexReader(@in);
+ int maxDoc = input.maxDoc();
+ int partLen = maxDoc / numParts;
+ for (int i = 0; i < numParts; i++)
+ {
+ input.undeleteAll();
+ if (seq) // sequential range
+ {
+ int lo = partLen * i;
+ int hi = lo + partLen;
+ // below range
+ for (int j = 0; j < lo; j++)
+ {
+ input.deleteDocument(j);
+ }
+ // above range - last part collects all id-s that remained due to
+ // integer rounding errors
+ if (i < numParts - 1)
+ {
+ for (int j = hi; j < maxDoc; j++)
+ {
+ input.deleteDocument(j);
+ }
+ }
+ }
+ else
+ {
+ // round-robin
+ for (int j = 0; j < maxDoc; j++)
+ {
+ if ((j + numParts - i) % numParts != 0)
+ {
+ input.deleteDocument(j);
+ }
+ }
+ }
+ IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(version, null)
+ .setOpenMode(OpenMode.CREATE));
+ Console.Error.WriteLine("Writing part " + (i + 1) + " ...");
+ // pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<? extends FakeDeleteAtomicIndexReader> sr = input.getSequentialSubReaders();
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ IList<?> sr = input.SequentialSubReaders;
+ w.addIndexes(sr.ToArray()); // TODO: maybe take List<IR> here?
+ w.close();
+ }
+ Console.Error.WriteLine("Done.");
+ }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+ public static void Main(string[] args)
+ {
+ if (args.Length < 5)
+ {
+ Console.Error.WriteLine("Usage: MultiPassIndexSplitter -out <outputDir> -num <numParts> [-seq] <inputIndex1> [<inputIndex2 ...]");
+ Console.Error.WriteLine("\tinputIndex\tpath to input index, multiple values are ok");
+ Console.Error.WriteLine("\t-out ouputDir\tpath to output directory to contain partial indexes");
+ Console.Error.WriteLine("\t-num numParts\tnumber of parts to produce");
+ Console.Error.WriteLine("\t-seq\tsequential docid-range split (default is round-robin)");
+ Environment.Exit(-1);
+ }
+ List<IndexReader> indexes = new List<IndexReader>();
+ string outDir = null;
+ int numParts = -1;
+ bool seq = false;
+ for (int i = 0; i < args.Length; i++)
+ {
+ if (args[i].Equals("-out"))
+ {
+ outDir = args[++i];
+ }
+ else if (args[i].Equals("-num"))
+ {
+ numParts = Convert.ToInt32(args[++i]);
+ }
+ else if (args[i].Equals("-seq"))
+ {
+ seq = true;
+ }
+ else
+ {
+ File file = new File(args[i]);
+ if (!file.exists() || !file.Directory)
+ {
+ Console.Error.WriteLine("Invalid input path - skipping: " + file);
+ continue;
+ }
+ Directory dir = FSDirectory.open(new File(args[i]));
+ try
+ {
+ if (!DirectoryReader.indexExists(dir))
+ {
+ Console.Error.WriteLine("Invalid input index - skipping: " + file);
+ continue;
+ }
+ }
+ catch (Exception)
+ {
+ Console.Error.WriteLine("Invalid input index - skipping: " + file);
+ continue;
+ }
+ indexes.Add(DirectoryReader.open(dir));
+ }
+ }
+ if (outDir == null)
+ {
+ throw new Exception("Required argument missing: -out outputDir");
+ }
+ if (numParts < 2)
+ {
+ throw new Exception("Invalid value of required argument: -num numParts");
+ }
+ if (indexes.Count == 0)
+ {
+ throw new Exception("No input indexes to process");
+ }
+ File @out = new File(outDir);
+ if (!@out.mkdirs())
+ {
+ throw new Exception("Can't create output directory: " + @out);
+ }
+ Directory[] dirs = new Directory[numParts];
+ for (int i = 0; i < numParts; i++)
+ {
+ dirs[i] = FSDirectory.open(new File(@out, "part-" + i));
+ }
+ MultiPassIndexSplitter splitter = new MultiPassIndexSplitter();
+ IndexReader input;
+ if (indexes.Count == 1)
+ {
+ input = indexes[0];
+ }
+ else
+ {
+ input = new MultiReader(indexes.ToArray());
+ }
+ splitter.Split(Version.LUCENE_CURRENT, input, dirs, seq);
+ }
+
+ /// <summary>
+ /// This class emulates deletions on the underlying index.
+ /// </summary>
+ private sealed class FakeDeleteIndexReader : BaseCompositeReader<FakeDeleteAtomicIndexReader>
+ {
+
+ public FakeDeleteIndexReader(IndexReader reader) : base(initSubReaders(reader))
+ {
+ }
+
+ internal static FakeDeleteAtomicIndexReader[] initSubReaders(IndexReader reader)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves();
+ IList<AtomicReaderContext> leaves = reader.leaves();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.size()];
+ FakeDeleteAtomicIndexReader[] subs = new FakeDeleteAtomicIndexReader[leaves.Count];
+ int i = 0;
+ foreach (AtomicReaderContext ctx in leaves)
+ {
+ subs[i++] = new FakeDeleteAtomicIndexReader(ctx.reader());
+ }
+ return subs;
+ }
+
+ public void deleteDocument(int docID)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int i = readerIndex(docID);
+ int i = readerIndex(docID);
+ SequentialSubReaders.get(i).deleteDocument(docID - readerBase(i));
+ }
+
+ public void undeleteAll()
+ {
+ foreach (FakeDeleteAtomicIndexReader r in SequentialSubReaders)
+ {
+ r.undeleteAll();
+ }
+ }
+
+ protected internal override void doClose()
+ {
+ }
+
+ // no need to override numDocs/hasDeletions,
+ // as we pass the subreaders directly to IW.addIndexes().
+ }
+
+ private sealed class FakeDeleteAtomicIndexReader : FilterAtomicReader
+ {
+ internal FixedBitSet liveDocs;
+
+ public FakeDeleteAtomicIndexReader(AtomicReader reader) : base(reader)
+ {
+ undeleteAll(); // initialize main bitset
+ }
+
+ public override int numDocs()
+ {
+ return liveDocs.cardinality();
+ }
+
+ public void undeleteAll()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = in.maxDoc();
+ int maxDoc = @in.maxDoc();
+ liveDocs = new FixedBitSet(@in.maxDoc());
+ if (@in.hasDeletions())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs();
+ Bits oldLiveDocs = @in.LiveDocs;
+ Debug.Assert(oldLiveDocs != null);
+ // this loop is a little bit ineffective, as Bits has no nextSetBit():
+ for (int i = 0; i < maxDoc; i++)
+ {
+ if (oldLiveDocs.get(i))
+ {
+ liveDocs.set(i);
+ }
+ }
+ }
+ else
+ {
+ // mark all docs as valid
+ liveDocs.set(0, maxDoc);
+ }
+ }
+
+ public void deleteDocument(int n)
+ {
+ liveDocs.clear(n);
+ }
+
+ public override Bits LiveDocs
+ {
+ get
+ {
+ return liveDocs;
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
new file mode 100644
index 0000000..f3e7ed4
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/PKIndexSplitter.cs
@@ -0,0 +1,220 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.index
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode;
+ using DocIdSet = org.apache.lucene.search.DocIdSet;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using Filter = org.apache.lucene.search.Filter;
+ using TermRangeFilter = org.apache.lucene.search.TermRangeFilter;
+ using Directory = org.apache.lucene.store.Directory;
+ using Bits = org.apache.lucene.util.Bits;
+ using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+ using IOUtils = org.apache.lucene.util.IOUtils;
+ using Version = org.apache.lucene.util.Version;
+
+ /// <summary>
+ /// Split an index based on a <seealso cref="Filter"/>.
+ /// </summary>
+
+ public class PKIndexSplitter
+ {
+ private readonly Filter docsInFirstIndex;
+ private readonly Directory input;
+ private readonly Directory dir1;
+ private readonly Directory dir2;
+ private readonly IndexWriterConfig config1;
+ private readonly IndexWriterConfig config2;
+
+ /// <summary>
+ /// Split an index based on a <seealso cref="Filter"/>. All documents that match the filter
+ /// are sent to dir1, remaining ones to dir2.
+ /// </summary>
+ public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex) : this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(version), newDefaultConfig(version))
+ {
+ }
+
+ private static IndexWriterConfig newDefaultConfig(Version version)
+ {
+ return (new IndexWriterConfig(version, null)).setOpenMode(OpenMode.CREATE);
+ }
+
+ public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
+ {
+ this.input = input;
+ this.dir1 = dir1;
+ this.dir2 = dir2;
+ this.docsInFirstIndex = docsInFirstIndex;
+ this.config1 = config1;
+ this.config2 = config2;
+ }
+
+ /// <summary>
+ /// Split an index based on a given primary key term
+ /// and a 'middle' term. If the middle term is present, it's
+ /// sent to dir2.
+ /// </summary>
+ public PKIndexSplitter(Version version, Directory input, Directory dir1, Directory dir2, Term midTerm) : this(version, input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false))
+ {
+ }
+
+ public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) : this(input, dir1, dir2, new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2)
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void split() throws java.io.IOException
+ public virtual void Split()
+ {
+ bool success = false;
+ DirectoryReader reader = DirectoryReader.open(input);
+ try
+ {
+ // pass an individual config in here since one config can not be reused!
+ createIndex(config1, dir1, reader, docsInFirstIndex, false);
+ createIndex(config2, dir2, reader, docsInFirstIndex, true);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.close(reader);
+ }
+ else
+ {
+ IOUtils.closeWhileHandlingException(reader);
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void createIndex(IndexWriterConfig config, org.apache.lucene.store.Directory target, IndexReader reader, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException
+ private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter)
+ {
+ bool success = false;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final IndexWriter w = new IndexWriter(target, config);
+ IndexWriter w = new IndexWriter(target, config);
+ try
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.List<AtomicReaderContext> leaves = reader.leaves();
+ IList<AtomicReaderContext> leaves = reader.leaves();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final IndexReader[] subReaders = new IndexReader[leaves.size()];
+ IndexReader[] subReaders = new IndexReader[leaves.Count];
+ int i = 0;
+ foreach (AtomicReaderContext ctx in leaves)
+ {
+ subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter);
+ }
+ w.addIndexes(subReaders);
+ success = true;
+ }
+ finally
+ {
+ if (success)
+ {
+ IOUtils.close(w);
+ }
+ else
+ {
+ IOUtils.closeWhileHandlingException(w);
+ }
+ }
+ }
+
+ private class DocumentFilteredAtomicIndexReader : FilterAtomicReader
+ {
+ internal readonly Bits liveDocs;
+ internal readonly int numDocs_Renamed;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, org.apache.lucene.search.Filter preserveFilter, boolean negateFilter) throws java.io.IOException
+ public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter) : base(context.reader())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = in.maxDoc();
+ int maxDoc = @in.maxDoc();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.FixedBitSet bits = new org.apache.lucene.util.FixedBitSet(maxDoc);
+ FixedBitSet bits = new FixedBitSet(maxDoc);
+ // ignore livedocs here, as we filter them later:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet docs = preserveFilter.getDocIdSet(context, null);
+ DocIdSet docs = preserveFilter.getDocIdSet(context, null);
+ if (docs != null)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = docs.iterator();
+ DocIdSetIterator it = docs.GetEnumerator();
+ if (it != null)
+ {
+ bits.or(it);
+ }
+ }
+ if (negateFilter)
+ {
+ bits.flip(0, maxDoc);
+ }
+
+ if (@in.hasDeletions())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.Bits oldLiveDocs = in.getLiveDocs();
+ Bits oldLiveDocs = @in.LiveDocs;
+ Debug.Assert(oldLiveDocs != null);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSetIterator it = bits.iterator();
+ DocIdSetIterator it = bits.GetEnumerator();
+ for (int i = it.nextDoc(); i < maxDoc; i = it.nextDoc())
+ {
+ if (!oldLiveDocs.get(i))
+ {
+ // we can safely modify the current bit, as the iterator already stepped over it:
+ bits.clear(i);
+ }
+ }
+ }
+
+ this.liveDocs = bits;
+ this.numDocs_Renamed = bits.cardinality();
+ }
+
+ public override int numDocs()
+ {
+ return numDocs_Renamed;
+ }
+
+ public override Bits LiveDocs
+ {
+ get
+ {
+ return liveDocs;
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
new file mode 100644
index 0000000..70ad20a
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/BlockJoinComparatorSource.cs
@@ -0,0 +1,321 @@
+using System;
+
+namespace org.apache.lucene.index.sorter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using DocIdSet = org.apache.lucene.search.DocIdSet;
+ using FieldComparator = org.apache.lucene.search.FieldComparator;
+ using FieldComparatorSource = org.apache.lucene.search.FieldComparatorSource;
+ using Filter = org.apache.lucene.search.Filter;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher; // javadocs
+ using Query = org.apache.lucene.search.Query; // javadocs
+ using ScoreDoc = org.apache.lucene.search.ScoreDoc; // javadocs
+ using Scorer = org.apache.lucene.search.Scorer;
+ using Sort = org.apache.lucene.search.Sort;
+ using SortField = org.apache.lucene.search.SortField;
+ using FixedBitSet = org.apache.lucene.util.FixedBitSet;
+
+ /// <summary>
+ /// Helper class to sort readers that contain blocks of documents.
+ /// <para>
+ /// Note that this class is intended to used with <seealso cref="SortingMergePolicy"/>,
+ /// and for other purposes has some limitations:
+ /// <ul>
+ /// <li>Cannot yet be used with <seealso cref="IndexSearcher#searchAfter(ScoreDoc, Query, int, Sort) IndexSearcher.searchAfter"/>
+ /// <li>Filling sort field values is not yet supported.
+ /// </ul>
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ // TODO: can/should we clean this thing up (e.g. return a proper sort value)
+ // and move to the join/ module?
+ public class BlockJoinComparatorSource : FieldComparatorSource
+ {
+ internal readonly Filter parentsFilter;
+ internal readonly Sort parentSort;
+ internal readonly Sort childSort;
+
+ /// <summary>
+ /// Create a new BlockJoinComparatorSource, sorting only blocks of documents
+ /// with {@code parentSort} and not reordering children with a block.
+ /// </summary>
+ /// <param name="parentsFilter"> Filter identifying parent documents </param>
+ /// <param name="parentSort"> Sort for parent documents </param>
+ public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort) : this(parentsFilter, parentSort, new Sort(SortField.FIELD_DOC))
+ {
+ }
+
+ /// <summary>
+ /// Create a new BlockJoinComparatorSource, specifying the sort order for both
+ /// blocks of documents and children within a block.
+ /// </summary>
+ /// <param name="parentsFilter"> Filter identifying parent documents </param>
+ /// <param name="parentSort"> Sort for parent documents </param>
+ /// <param name="childSort"> Sort for child documents in the same block </param>
+ public BlockJoinComparatorSource(Filter parentsFilter, Sort parentSort, Sort childSort)
+ {
+ this.parentsFilter = parentsFilter;
+ this.parentSort = parentSort;
+ this.childSort = childSort;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator<Integer> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws java.io.IOException
+ public override FieldComparator<int?> newComparator(string fieldname, int numHits, int sortPos, bool reversed)
+ {
+ // we keep parallel slots: the parent ids and the child ids
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int parentSlots[] = new int[numHits];
+ int[] parentSlots = new int[numHits];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int childSlots[] = new int[numHits];
+ int[] childSlots = new int[numHits];
+
+ SortField[] parentFields = parentSort.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int parentReverseMul[] = new int[parentFields.length];
+ int[] parentReverseMul = new int[parentFields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> parentComparators[] = new org.apache.lucene.search.FieldComparator[parentFields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ FieldComparator<?>[] parentComparators = new FieldComparator[parentFields.Length];
+ for (int i = 0; i < parentFields.Length; i++)
+ {
+ parentReverseMul[i] = parentFields[i].Reverse ? - 1 : 1;
+ parentComparators[i] = parentFields[i].getComparator(1, i);
+ }
+
+ SortField[] childFields = childSort.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int childReverseMul[] = new int[childFields.length];
+ int[] childReverseMul = new int[childFields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> childComparators[] = new org.apache.lucene.search.FieldComparator[childFields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ FieldComparator<?>[] childComparators = new FieldComparator[childFields.Length];
+ for (int i = 0; i < childFields.Length; i++)
+ {
+ childReverseMul[i] = childFields[i].Reverse ? - 1 : 1;
+ childComparators[i] = childFields[i].getComparator(1, i);
+ }
+
+ // NOTE: we could return parent ID as value but really our sort "value" is more complex...
+ // So we throw UOE for now. At the moment you really should only use this at indexing time.
+ return new FieldComparatorAnonymousInnerClassHelper(this, parentSlots, childSlots, parentReverseMul, parentComparators, childReverseMul, childComparators);
+ }
+
+ private class FieldComparatorAnonymousInnerClassHelper : FieldComparator<int?>
+ {
+ private readonly BlockJoinComparatorSource outerInstance;
+
+ private int[] parentSlots;
+ private int[] childSlots;
+ private int[] parentReverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] parentComparators;
+ private FieldComparator<?>[] parentComparators;
+ private int[] childReverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] childComparators;
+ private FieldComparator<?>[] childComparators;
+
+ public FieldComparatorAnonymousInnerClassHelper<T1, T2>(BlockJoinComparatorSource outerInstance, int[] parentSlots, int[] childSlots, int[] parentReverseMul, FieldComparator<T1>[] parentComparators, int[] childReverseMul, FieldComparator<T2>[] childComparators)
+ {
+ this.outerInstance = outerInstance;
+ this.parentSlots = parentSlots;
+ this.childSlots = childSlots;
+ this.parentReverseMul = parentReverseMul;
+ this.parentComparators = parentComparators;
+ this.childReverseMul = childReverseMul;
+ this.childComparators = childComparators;
+ }
+
+ internal int bottomParent;
+ internal int bottomChild;
+ internal FixedBitSet parentBits;
+
+ public override int compare(int slot1, int slot2)
+ {
+ try
+ {
+ return compare(childSlots[slot1], parentSlots[slot1], childSlots[slot2], parentSlots[slot2]);
+ }
+ catch (IOException e)
+ {
+ throw new Exception(e);
+ }
+ }
+
+ public override int Bottom
+ {
+ set
+ {
+ bottomParent = parentSlots[value];
+ bottomChild = childSlots[value];
+ }
+ }
+
+ public override int? TopValue
+ {
+ set
+ {
+ // we dont have enough information (the docid is needed)
+ throw new System.NotSupportedException("this comparator cannot be used with deep paging");
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int compareBottom(int doc) throws java.io.IOException
+ public override int compareBottom(int doc)
+ {
+ return compare(bottomChild, bottomParent, doc, parent(doc));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int compareTop(int doc) throws java.io.IOException
+ public override int compareTop(int doc)
+ {
+ // we dont have enough information (the docid is needed)
+ throw new System.NotSupportedException("this comparator cannot be used with deep paging");
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void copy(int slot, int doc) throws java.io.IOException
+ public override void copy(int slot, int doc)
+ {
+ childSlots[slot] = doc;
+ parentSlots[slot] = parent(doc);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.search.FieldComparator<Integer> setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override FieldComparator<int?> setNextReader(AtomicReaderContext context)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.DocIdSet parents = parentsFilter.getDocIdSet(context, null);
+ DocIdSet parents = outerInstance.parentsFilter.getDocIdSet(context, null);
+ if (parents == null)
+ {
+ throw new IllegalStateException("AtomicReader " + context.reader() + " contains no parents!");
+ }
+ if (!(parents is FixedBitSet))
+ {
+ throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
+ }
+ parentBits = (FixedBitSet) parents;
+ for (int i = 0; i < parentComparators.Length; i++)
+ {
+ parentComparators[i] = parentComparators[i].setNextReader(context);
+ }
+ for (int i = 0; i < childComparators.Length; i++)
+ {
+ childComparators[i] = childComparators[i].setNextReader(context);
+ }
+ return this;
+ }
+
+ public override int? value(int slot)
+ {
+ // really our sort "value" is more complex...
+ throw new System.NotSupportedException("filling sort field values is not yet supported");
+ }
+
+ public override Scorer Scorer
+ {
+ set
+ {
+ base.Scorer = value;
+ //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ //ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator<?> comp : parentComparators)
+ foreach (FieldComparator<?> comp in parentComparators)
+ {
+ comp.Scorer = value;
+ }
+ //JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ //ORIGINAL LINE: for (org.apache.lucene.search.FieldComparator<?> comp : childComparators)
+ foreach (FieldComparator<?> comp in childComparators)
+ {
+ comp.Scorer = value;
+ }
+ }
+ }
+
+ internal virtual int parent(int doc)
+ {
+ return parentBits.nextSetBit(doc);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: int compare(int docID1, int parent1, int docID2, int parent2) throws java.io.IOException
+ internal virtual int compare(int docID1, int parent1, int docID2, int parent2)
+ {
+ if (parent1 == parent2) // both are in the same block
+ {
+ if (docID1 == parent1 || docID2 == parent2)
+ {
+ // keep parents at the end of blocks
+ return docID1 - docID2;
+ }
+ else
+ {
+ return compare(docID1, docID2, childComparators, childReverseMul);
+ }
+ }
+ else
+ {
+ int cmp = compare(parent1, parent2, parentComparators, parentReverseMul);
+ if (cmp == 0)
+ {
+ return parent1 - parent2;
+ }
+ else
+ {
+ return cmp;
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: int compare(int docID1, int docID2, org.apache.lucene.search.FieldComparator<?> comparators[] , int reverseMul[]) throws java.io.IOException
+ internal virtual int compare<T1>(int docID1, int docID2, FieldComparator<T1>[] comparators, int[] reverseMul)
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+ // the segments are always the same here...
+ comparators[i].copy(0, docID1);
+ comparators[i].Bottom = 0;
+ int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+ if (comp != 0)
+ {
+ return comp;
+ }
+ }
+ return 0; // no need to docid tiebreak
+ }
+ }
+
+ public override string ToString()
+ {
+ return "blockJoin(parentSort=" + parentSort + ",childSort=" + childSort + ")";
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
new file mode 100644
index 0000000..654ba85
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/EarlyTerminatingSortingCollector.cs
@@ -0,0 +1,147 @@
+namespace org.apache.lucene.index.sorter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CollectionTerminatedException = org.apache.lucene.search.CollectionTerminatedException;
+ using Collector = org.apache.lucene.search.Collector;
+ using Scorer = org.apache.lucene.search.Scorer;
+ using Sort = org.apache.lucene.search.Sort;
+ using TopDocsCollector = org.apache.lucene.search.TopDocsCollector;
+ using TotalHitCountCollector = org.apache.lucene.search.TotalHitCountCollector;
+
+ /// <summary>
+ /// A <seealso cref="Collector"/> that early terminates collection of documents on a
+ /// per-segment basis, if the segment was sorted according to the given
+ /// <seealso cref="Sort"/>.
+ ///
+ /// <para>
+ /// <b>NOTE:</b> the {@code Collector} detects sorted segments according to
+ /// <seealso cref="SortingMergePolicy"/>, so it's best used in conjunction with it. Also,
+ /// it collects up to a specified {@code numDocsToCollect} from each segment,
+ /// and therefore is mostly suitable for use in conjunction with collectors such as
+ /// <seealso cref="TopDocsCollector"/>, and not e.g. <seealso cref="TotalHitCountCollector"/>.
+ /// </para>
+ /// <para>
+ /// <b>NOTE</b>: If you wrap a {@code TopDocsCollector} that sorts in the same
+ /// order as the index order, the returned <seealso cref="TopDocsCollector#topDocs() TopDocs"/>
+ /// will be correct. However the total of {@link TopDocsCollector#getTotalHits()
+ /// hit count} will be underestimated since not all matching documents will have
+ /// been collected.
+ /// </para>
+ /// <para>
+ /// <b>NOTE</b>: This {@code Collector} uses <seealso cref="Sort#toString()"/> to detect
+ /// whether a segment was sorted with the same {@code Sort}. This has
+ /// two implications:
+ /// <ul>
+ /// <li>if a custom comparator is not implemented correctly and returns
+ /// different identifiers for equivalent instances, this collector will not
+ /// detect sorted segments,</li>
+ /// <li>if you suddenly change the <seealso cref="IndexWriter"/>'s
+ /// {@code SortingMergePolicy} to sort according to another criterion and if both
+ /// the old and the new {@code Sort}s have the same identifier, this
+ /// {@code Collector} will incorrectly detect sorted segments.</li>
+ /// </ul>
+ ///
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public class EarlyTerminatingSortingCollector : Collector
+ {
+ /// <summary>
+ /// The wrapped Collector </summary>
+ protected internal readonly Collector @in;
+ /// <summary>
+ /// Sort used to sort the search results </summary>
+ protected internal readonly Sort sort;
+ /// <summary>
+ /// Number of documents to collect in each segment </summary>
+ protected internal readonly int numDocsToCollect;
+ /// <summary>
+ /// Number of documents to collect in the current segment being processed </summary>
+ protected internal int segmentTotalCollect;
+ /// <summary>
+ /// True if the current segment being processed is sorted by <seealso cref="#sort"/> </summary>
+ protected internal bool segmentSorted;
+
+ private int numCollected;
+
+ /// <summary>
+ /// Create a new <seealso cref="EarlyTerminatingSortingCollector"/> instance.
+ /// </summary>
+ /// <param name="in">
+ /// the collector to wrap </param>
+ /// <param name="sort">
+ /// the sort you are sorting the search results on </param>
+ /// <param name="numDocsToCollect">
+ /// the number of documents to collect on each segment. When wrapping
+ /// a <seealso cref="TopDocsCollector"/>, this number should be the number of
+ /// hits. </param>
+ public EarlyTerminatingSortingCollector(Collector @in, Sort sort, int numDocsToCollect)
+ {
+ if (numDocsToCollect <= 0)
+ {
+ throw new IllegalStateException("numDocsToCollect must always be > 0, got " + segmentTotalCollect);
+ }
+ this.@in = @in;
+ this.sort = sort;
+ this.numDocsToCollect = numDocsToCollect;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setScorer(org.apache.lucene.search.Scorer scorer) throws java.io.IOException
+ public override Scorer Scorer
+ {
+ set
+ {
+ @in.Scorer = value;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void collect(int doc) throws java.io.IOException
+ public override void collect(int doc)
+ {
+ @in.collect(doc);
+ if (++numCollected >= segmentTotalCollect)
+ {
+ throw new CollectionTerminatedException();
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void setNextReader(org.apache.lucene.index.AtomicReaderContext context) throws java.io.IOException
+ public override AtomicReaderContext NextReader
+ {
+ set
+ {
+ @in.NextReader = value;
+ segmentSorted = SortingMergePolicy.isSorted(value.reader(), sort);
+ segmentTotalCollect = segmentSorted ? numDocsToCollect : int.MaxValue;
+ numCollected = 0;
+ }
+ }
+
+ public override bool acceptsDocsOutOfOrder()
+ {
+ return !segmentSorted && @in.acceptsDocsOutOfOrder();
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
new file mode 100644
index 0000000..f315b9c
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/Sorter.cs
@@ -0,0 +1,404 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.index.sorter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using FieldComparator = org.apache.lucene.search.FieldComparator;
+ using Scorer = org.apache.lucene.search.Scorer;
+ using Sort = org.apache.lucene.search.Sort;
+ using SortField = org.apache.lucene.search.SortField;
+ using TimSorter = org.apache.lucene.util.TimSorter;
+ using MonotonicAppendingLongBuffer = org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+
+ /// <summary>
+ /// Sorts documents of a given index by returning a permutation on the document
+ /// IDs.
+ /// @lucene.experimental
+ /// </summary>
+ internal sealed class Sorter
+ {
+ internal readonly Sort sort_Renamed;
+
+ /// <summary>
+ /// Creates a new Sorter to sort the index with {@code sort} </summary>
+ internal Sorter(Sort sort)
+ {
+ if (sort.needsScores())
+ {
+ throw new System.ArgumentException("Cannot sort an index with a Sort that refers to the relevance score");
+ }
+ this.sort_Renamed = sort;
+ }
+
+ /// <summary>
+ /// A permutation of doc IDs. For every document ID between <tt>0</tt> and
+ /// <seealso cref="IndexReader#maxDoc()"/>, <code>oldToNew(newToOld(docID))</code> must
+ /// return <code>docID</code>.
+ /// </summary>
+ internal abstract class DocMap
+ {
+
+ /// <summary>
+ /// Given a doc ID from the original index, return its ordinal in the
+ /// sorted index.
+ /// </summary>
+ internal abstract int oldToNew(int docID);
+
+ /// <summary>
+ /// Given the ordinal of a doc ID, return its doc ID in the original index. </summary>
+ internal abstract int newToOld(int docID);
+
+ /// <summary>
+ /// Return the number of documents in this map. This must be equal to the
+ /// <seealso cref="AtomicReader#maxDoc() number of documents"/> of the
+ /// <seealso cref="AtomicReader"/> which is sorted.
+ /// </summary>
+ internal abstract int size();
+ }
+
+ /// <summary>
+ /// Check consistency of a <seealso cref="DocMap"/>, useful for assertions. </summary>
+ internal static bool isConsistent(DocMap docMap)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = docMap.size();
+ int maxDoc = docMap.size();
+ for (int i = 0; i < maxDoc; ++i)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newID = docMap.oldToNew(i);
+ int newID = docMap.oldToNew(i);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int oldID = docMap.newToOld(newID);
+ int oldID = docMap.newToOld(newID);
+ Debug.Assert(newID >= 0 && newID < maxDoc, "doc IDs must be in [0-" + maxDoc + "[, got " + newID);
+ Debug.Assert(i == oldID, "mapping is inconsistent: " + i + " --oldToNew--> " + newID + " --newToOld--> " + oldID);
+ if (i != oldID || newID < 0 || newID >= maxDoc)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /// <summary>
+ /// A comparator of doc IDs. </summary>
+ internal abstract class DocComparator
+ {
+
+ /// <summary>
+ /// Compare docID1 against docID2. The contract for the return value is the
+ /// same as <seealso cref="Comparator#compare(Object, Object)"/>.
+ /// </summary>
+ public abstract int compare(int docID1, int docID2);
+
+ }
+
+ private sealed class DocValueSorter : TimSorter
+ {
+
+ internal readonly int[] docs;
+ internal readonly Sorter.DocComparator comparator;
+ internal readonly int[] tmp;
+
+ internal DocValueSorter(int[] docs, Sorter.DocComparator comparator) : base(docs.Length / 64)
+ {
+ this.docs = docs;
+ this.comparator = comparator;
+ tmp = new int[docs.Length / 64];
+ }
+
+ protected internal override int compare(int i, int j)
+ {
+ return comparator.compare(docs[i], docs[j]);
+ }
+
+ protected internal override void swap(int i, int j)
+ {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+ }
+
+ protected internal override void copy(int src, int dest)
+ {
+ docs[dest] = docs[src];
+ }
+
+ protected internal override void save(int i, int len)
+ {
+ Array.Copy(docs, i, tmp, 0, len);
+ }
+
+ protected internal override void restore(int i, int j)
+ {
+ docs[j] = tmp[i];
+ }
+
+ protected internal override int compareSaved(int i, int j)
+ {
+ return comparator.compare(tmp[i], docs[j]);
+ }
+ }
+
+ /// <summary>
+ /// Computes the old-to-new permutation over the given comparator. </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private static Sorter.DocMap sort(final int maxDoc, DocComparator comparator)
+ private static Sorter.DocMap sort(int maxDoc, DocComparator comparator)
+ {
+ // check if the index is sorted
+ bool sorted = true;
+ for (int i = 1; i < maxDoc; ++i)
+ {
+ if (comparator.compare(i - 1, i) > 0)
+ {
+ sorted = false;
+ break;
+ }
+ }
+ if (sorted)
+ {
+ return null;
+ }
+
+ // sort doc IDs
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int[] docs = new int[maxDoc];
+ int[] docs = new int[maxDoc];
+ for (int i = 0; i < maxDoc; i++)
+ {
+ docs[i] = i;
+ }
+
+ DocValueSorter sorter = new DocValueSorter(docs, comparator);
+ // It can be common to sort a reader, add docs, sort it again, ... and in
+ // that case timSort can save a lot of time
+ sorter.sort(0, docs.Length); // docs is now the newToOld mapping
+
+ // The reason why we use MonotonicAppendingLongBuffer here is that it
+ // wastes very little memory if the index is in random order but can save
+ // a lot of memory if the index is already "almost" sorted
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer newToOld = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer();
+ MonotonicAppendingLongBuffer newToOld = new MonotonicAppendingLongBuffer();
+ for (int i = 0; i < maxDoc; ++i)
+ {
+ newToOld.add(docs[i]);
+ }
+ newToOld.freeze();
+
+ for (int i = 0; i < maxDoc; ++i)
+ {
+ docs[(int) newToOld.get(i)] = i;
+ } // docs is now the oldToNew mapping
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer oldToNew = new org.apache.lucene.util.packed.MonotonicAppendingLongBuffer();
+ MonotonicAppendingLongBuffer oldToNew = new MonotonicAppendingLongBuffer();
+ for (int i = 0; i < maxDoc; ++i)
+ {
+ oldToNew.add(docs[i]);
+ }
+ oldToNew.freeze();
+
+ return new DocMapAnonymousInnerClassHelper(maxDoc, newToOld, oldToNew);
+ }
+
+ private class DocMapAnonymousInnerClassHelper : Sorter.DocMap
+ {
+ private int maxDoc;
+ private MonotonicAppendingLongBuffer newToOld;
+ private MonotonicAppendingLongBuffer oldToNew;
+
+ public DocMapAnonymousInnerClassHelper(int maxDoc, MonotonicAppendingLongBuffer newToOld, MonotonicAppendingLongBuffer oldToNew)
+ {
+ this.maxDoc = maxDoc;
+ this.newToOld = newToOld;
+ this.oldToNew = oldToNew;
+ }
+
+
+ public override int oldToNew(int docID)
+ {
+ return (int) oldToNew.get(docID);
+ }
+
+ public override int newToOld(int docID)
+ {
+ return (int) newToOld.get(docID);
+ }
+
+ public override int size()
+ {
+ return maxDoc;
+ }
+ }
+
+ /// <summary>
+ /// Returns a mapping from the old document ID to its new location in the
+ /// sorted index. Implementations can use the auxiliary
+ /// <seealso cref="#sort(int, DocComparator)"/> to compute the old-to-new permutation
+ /// given a list of documents and their corresponding values.
+ /// <para>
+ /// A return value of <tt>null</tt> is allowed and means that
+ /// <code>reader</code> is already sorted.
+ /// </para>
+ /// <para>
+ /// <b>NOTE:</b> deleted documents are expected to appear in the mapping as
+ /// well, they will however be marked as deleted in the sorted view.
+ /// </para>
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: DocMap sort(org.apache.lucene.index.AtomicReader reader) throws java.io.IOException
+ internal DocMap sort(AtomicReader reader)
+ {
+ SortField[] fields = sort_Renamed.Sort;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int reverseMul[] = new int[fields.length];
+ int[] reverseMul = new int[fields.Length];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.FieldComparator<?> comparators[] = new org.apache.lucene.search.FieldComparator[fields.length];
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+ FieldComparator<?>[] comparators = new FieldComparator[fields.Length];
+
+ for (int i = 0; i < fields.Length; i++)
+ {
+ reverseMul[i] = fields[i].Reverse ? - 1 : 1;
+ comparators[i] = fields[i].getComparator(1, i);
+ comparators[i].NextReader = reader.Context;
+ comparators[i].Scorer = FAKESCORER;
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final DocComparator comparator = new DocComparator()
+ DocComparator comparator = new DocComparatorAnonymousInnerClassHelper(this, reverseMul, comparators);
+ return sort(reader.maxDoc(), comparator);
+ }
+
+ private class DocComparatorAnonymousInnerClassHelper : DocComparator
+ {
+ private readonly Sorter outerInstance;
+
+ private int[] reverseMul;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private org.apache.lucene.search.FieldComparator<JavaToDotNetGenericWildcard>[] comparators;
+ private FieldComparator<?>[] comparators;
+
+ public DocComparatorAnonymousInnerClassHelper<T1>(Sorter outerInstance, int[] reverseMul, FieldComparator<T1>[] comparators)
+ {
+ this.outerInstance = outerInstance;
+ this.reverseMul = reverseMul;
+ this.comparators = comparators;
+ }
+
+ public override int compare(int docID1, int docID2)
+ {
+ try
+ {
+ for (int i = 0; i < comparators.Length; i++)
+ {
+ // TODO: would be better if copy() didnt cause a term lookup in TermOrdVal & co,
+ // the segments are always the same here...
+ comparators[i].copy(0, docID1);
+ comparators[i].Bottom = 0;
+ int comp = reverseMul[i] * comparators[i].compareBottom(docID2);
+ if (comp != 0)
+ {
+ return comp;
+ }
+ }
+ return int.compare(docID1, docID2); // docid order tiebreak
+ }
+ catch (IOException e)
+ {
+ throw new Exception(e);
+ }
+ }
+ }
+
+ /// <summary>
+ /// Returns the identifier of this <seealso cref="Sorter"/>.
+ /// <para>This identifier is similar to <seealso cref="Object#hashCode()"/> and should be
+ /// chosen so that two instances of this class that sort documents likewise
+ /// will have the same identifier. On the contrary, this identifier should be
+ /// different on different <seealso cref="Sort sorts"/>.
+ /// </para>
+ /// </summary>
+ public string ID
+ {
+ get
+ {
+ return sort_Renamed.ToString();
+ }
+ }
+
+ public override string ToString()
+ {
+ return ID;
+ }
+
+ internal static readonly Scorer FAKESCORER = new ScorerAnonymousInnerClassHelper();
+
+ private class ScorerAnonymousInnerClassHelper : Scorer
+ {
+ public ScorerAnonymousInnerClassHelper() : base(null)
+ {
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public float score() throws java.io.IOException
+ public override float score()
+ {
+ throw new System.NotSupportedException();
+ }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int freq() throws java.io.IOException
+ public override int freq()
+ {
+ throw new System.NotSupportedException();
+ }
+ public override int docID()
+ {
+ throw new System.NotSupportedException();
+ }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException
+ public override int nextDoc()
+ {
+ throw new System.NotSupportedException();
+ }
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int advance(int target) throws java.io.IOException
+ public override int advance(int target)
+ {
+ throw new System.NotSupportedException();
+ }
+ public override long cost()
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+ }
+
+}
\ No newline at end of file