You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/09/16 00:47:07 UTC
[09/11] Skeleton porting of Lucene.Net.Misc
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/SortingAtomicReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/SortingAtomicReader.cs b/src/Lucene.Net.Misc/Index/Sorter/SortingAtomicReader.cs
new file mode 100644
index 0000000..eb87148
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/SortingAtomicReader.cs
@@ -0,0 +1,1081 @@
+using System;
+using System.Diagnostics;
+
+namespace org.apache.lucene.index.sorter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using IndexOptions = org.apache.lucene.index.FieldInfo.IndexOptions;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using Sort = org.apache.lucene.search.Sort;
+ using IndexInput = org.apache.lucene.store.IndexInput;
+ using IndexOutput = org.apache.lucene.store.IndexOutput;
+ using RAMFile = org.apache.lucene.store.RAMFile;
+ using RAMInputStream = org.apache.lucene.store.RAMInputStream;
+ using RAMOutputStream = org.apache.lucene.store.RAMOutputStream;
+ using ArrayUtil = org.apache.lucene.util.ArrayUtil;
+ using Bits = org.apache.lucene.util.Bits;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+ using TimSorter = org.apache.lucene.util.TimSorter;
+ using CompiledAutomaton = org.apache.lucene.util.automaton.CompiledAutomaton;
+
+ /// <summary>
+ /// An <seealso cref="AtomicReader"/> which supports sorting documents by a given
+ /// <seealso cref="Sort"/>. You can use this class to sort an index as follows:
+ ///
+ /// <pre class="prettyprint">
+ /// IndexWriter writer; // writer to which the sorted index will be added
+ /// DirectoryReader reader; // reader on the input index
+ /// Sort sort; // determines how the documents are sorted
+ /// AtomicReader sortingReader = SortingAtomicReader.wrap(SlowCompositeReaderWrapper.wrap(reader), sort);
+ /// writer.addIndexes(reader);
+ /// writer.close();
+ /// reader.close();
+ /// </pre>
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public class SortingAtomicReader : FilterAtomicReader
+ {
+
+ private class SortingFields : FilterFields
+ {
+
+ internal readonly Sorter.DocMap docMap;
+ internal readonly FieldInfos infos;
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SortingFields(final org.apache.lucene.index.Fields in, org.apache.lucene.index.FieldInfos infos, Sorter.DocMap docMap)
+ public SortingFields(Fields @in, FieldInfos infos, Sorter.DocMap docMap) : base(@in)
+ {
+ this.docMap = docMap;
+ this.infos = infos;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.Terms terms(final String field) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override Terms terms(string field)
+ {
+ Terms terms = @in.terms(field);
+ if (terms == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingTerms(terms, infos.fieldInfo(field).IndexOptions, docMap);
+ }
+ }
+
+ }
+
+ private class SortingTerms : FilterTerms
+ {
+
+ internal readonly Sorter.DocMap docMap;
+ internal readonly IndexOptions indexOptions;
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SortingTerms(final org.apache.lucene.index.Terms in, org.apache.lucene.index.FieldInfo.IndexOptions indexOptions, final Sorter.DocMap docMap)
+ public SortingTerms(Terms @in, IndexOptions indexOptions, Sorter.DocMap docMap) : base(@in)
+ {
+ this.docMap = docMap;
+ this.indexOptions = indexOptions;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.TermsEnum iterator(final org.apache.lucene.index.TermsEnum reuse) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override TermsEnum iterator(TermsEnum reuse)
+ {
+ return new SortingTermsEnum(@in.iterator(reuse), docMap, indexOptions);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.TermsEnum intersect(org.apache.lucene.util.automaton.CompiledAutomaton compiled, org.apache.lucene.util.BytesRef startTerm) throws java.io.IOException
+ public override TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm)
+ {
+ return new SortingTermsEnum(@in.intersect(compiled, startTerm), docMap, indexOptions);
+ }
+
+ }
+
+ private class SortingTermsEnum : FilterTermsEnum
+ {
+
+ internal readonly Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
+ internal readonly IndexOptions indexOptions;
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SortingTermsEnum(final org.apache.lucene.index.TermsEnum in, Sorter.DocMap docMap, org.apache.lucene.index.FieldInfo.IndexOptions indexOptions)
+ public SortingTermsEnum(TermsEnum @in, Sorter.DocMap docMap, IndexOptions indexOptions) : base(@in)
+ {
+ this.docMap = docMap;
+ this.indexOptions = indexOptions;
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: org.apache.lucene.util.Bits newToOld(final org.apache.lucene.util.Bits liveDocs)
+ internal virtual Bits newToOld(Bits liveDocs)
+ {
+ if (liveDocs == null)
+ {
+ return null;
+ }
+ return new BitsAnonymousInnerClassHelper(this, liveDocs);
+ }
+
+ private class BitsAnonymousInnerClassHelper : Bits
+ {
+ private readonly SortingTermsEnum outerInstance;
+
+ private Bits liveDocs;
+
+ public BitsAnonymousInnerClassHelper(SortingTermsEnum outerInstance, Bits liveDocs)
+ {
+ this.outerInstance = outerInstance;
+ this.liveDocs = liveDocs;
+ }
+
+
+ public override bool get(int index)
+ {
+ return liveDocs.get(outerInstance.docMap.oldToNew(index));
+ }
+
+ public override int length()
+ {
+ return liveDocs.length();
+ }
+
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.DocsEnum docs(org.apache.lucene.util.Bits liveDocs, org.apache.lucene.index.DocsEnum reuse, final int flags) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inReuse;
+ DocsEnum inReuse;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SortingDocsEnum wrapReuse;
+ SortingDocsEnum wrapReuse;
+ if (reuse != null && reuse is SortingDocsEnum)
+ {
+ // if we're asked to reuse the given DocsEnum and it is Sorting, return
+ // the wrapped one, since some Codecs expect it.
+ wrapReuse = (SortingDocsEnum) reuse;
+ inReuse = wrapReuse.Wrapped;
+ }
+ else
+ {
+ wrapReuse = null;
+ inReuse = reuse;
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.DocsEnum inDocs = in.docs(newToOld(liveDocs), inReuse, flags);
+ DocsEnum inDocs = @in.docs(newToOld(liveDocs), inReuse, flags);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean withFreqs = indexOptions.compareTo(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_AND_FREQS) >=0 && (flags & org.apache.lucene.index.DocsEnum.FLAG_FREQS) != 0;
+ bool withFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 && (flags & DocsEnum.FLAG_FREQS) != 0;
+ return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.DocsAndPositionsEnum docsAndPositions(org.apache.lucene.util.Bits liveDocs, org.apache.lucene.index.DocsAndPositionsEnum reuse, final int flags) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.DocsAndPositionsEnum inReuse;
+ DocsAndPositionsEnum inReuse;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final SortingDocsAndPositionsEnum wrapReuse;
+ SortingDocsAndPositionsEnum wrapReuse;
+ if (reuse != null && reuse is SortingDocsAndPositionsEnum)
+ {
+ // if we're asked to reuse the given DocsEnum and it is Sorting, return
+ // the wrapped one, since some Codecs expect it.
+ wrapReuse = (SortingDocsAndPositionsEnum) reuse;
+ inReuse = wrapReuse.Wrapped;
+ }
+ else
+ {
+ wrapReuse = null;
+ inReuse = reuse;
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.DocsAndPositionsEnum inDocsAndPositions = in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
+ DocsAndPositionsEnum inDocsAndPositions = @in.docsAndPositions(newToOld(liveDocs), inReuse, flags);
+ if (inDocsAndPositions == null)
+ {
+ return null;
+ }
+
+ // we ignore the fact that offsets may be stored but not asked for,
+ // since this code is expected to be used during addIndexes which will
+ // ask for everything. if that assumption changes in the future, we can
+ // factor in whether 'flags' says offsets are not required.
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final boolean storeOffsets = indexOptions.compareTo(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ bool storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ return new SortingDocsAndPositionsEnum(docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
+ }
+
+ }
+
+ private class SortingBinaryDocValues : BinaryDocValues
+ {
+
+ internal readonly BinaryDocValues @in;
+ internal readonly Sorter.DocMap docMap;
+
+ internal SortingBinaryDocValues(BinaryDocValues @in, Sorter.DocMap docMap)
+ {
+ this.@in = @in;
+ this.docMap = docMap;
+ }
+
+ public override void get(int docID, BytesRef result)
+ {
+ @in.get(docMap.newToOld(docID), result);
+ }
+ }
+
+ private class SortingNumericDocValues : NumericDocValues
+ {
+
+ internal readonly NumericDocValues @in;
+ internal readonly Sorter.DocMap docMap;
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SortingNumericDocValues(final org.apache.lucene.index.NumericDocValues in, Sorter.DocMap docMap)
+ public SortingNumericDocValues(NumericDocValues @in, Sorter.DocMap docMap)
+ {
+ this.@in = @in;
+ this.docMap = docMap;
+ }
+
+ public override long get(int docID)
+ {
+ return @in.get(docMap.newToOld(docID));
+ }
+ }
+
+ private class SortingBits : Bits
+ {
+
+ internal readonly Bits @in;
+ internal readonly Sorter.DocMap docMap;
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public SortingBits(final org.apache.lucene.util.Bits in, Sorter.DocMap docMap)
+ public SortingBits(Bits @in, Sorter.DocMap docMap)
+ {
+ this.@in = @in;
+ this.docMap = docMap;
+ }
+
+ public override bool get(int index)
+ {
+ return @in.get(docMap.newToOld(index));
+ }
+
+ public override int length()
+ {
+ return @in.length();
+ }
+ }
+
+ private class SortingSortedDocValues : SortedDocValues
+ {
+
+ internal readonly SortedDocValues @in;
+ internal readonly Sorter.DocMap docMap;
+
+ internal SortingSortedDocValues(SortedDocValues @in, Sorter.DocMap docMap)
+ {
+ this.@in = @in;
+ this.docMap = docMap;
+ }
+
+ public override int getOrd(int docID)
+ {
+ return @in.getOrd(docMap.newToOld(docID));
+ }
+
+ public override void lookupOrd(int ord, BytesRef result)
+ {
+ @in.lookupOrd(ord, result);
+ }
+
+ public override int ValueCount
+ {
+ get
+ {
+ return @in.ValueCount;
+ }
+ }
+
+ public override void get(int docID, BytesRef result)
+ {
+ @in.get(docMap.newToOld(docID), result);
+ }
+
+ public override int lookupTerm(BytesRef key)
+ {
+ return @in.lookupTerm(key);
+ }
+ }
+
+ private class SortingSortedSetDocValues : SortedSetDocValues
+ {
+
+ internal readonly SortedSetDocValues @in;
+ internal readonly Sorter.DocMap docMap;
+
+ internal SortingSortedSetDocValues(SortedSetDocValues @in, Sorter.DocMap docMap)
+ {
+ this.@in = @in;
+ this.docMap = docMap;
+ }
+
+ public override long nextOrd()
+ {
+ return @in.nextOrd();
+ }
+
+ public override int Document
+ {
+ set
+ {
+ @in.Document = docMap.newToOld(value);
+ }
+ }
+
+ public override void lookupOrd(long ord, BytesRef result)
+ {
+ @in.lookupOrd(ord, result);
+ }
+
+ public override long ValueCount
+ {
+ get
+ {
+ return @in.ValueCount;
+ }
+ }
+
+ public override long lookupTerm(BytesRef key)
+ {
+ return @in.lookupTerm(key);
+ }
+ }
+
+ internal class SortingDocsEnum : FilterDocsEnum
+ {
+
+ private sealed class DocFreqSorter : TimSorter
+ {
+
+ internal int[] docs;
+ internal int[] freqs;
+ internal readonly int[] tmpDocs;
+ internal int[] tmpFreqs;
+
+ public DocFreqSorter(int maxDoc) : base(maxDoc / 64)
+ {
+ this.tmpDocs = new int[maxDoc / 64];
+ }
+
+ public void reset(int[] docs, int[] freqs)
+ {
+ this.docs = docs;
+ this.freqs = freqs;
+ if (freqs != null && tmpFreqs == null)
+ {
+ tmpFreqs = new int[tmpDocs.Length];
+ }
+ }
+
+ protected internal override int compare(int i, int j)
+ {
+ return docs[i] - docs[j];
+ }
+
+ protected internal override void swap(int i, int j)
+ {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+
+ if (freqs != null)
+ {
+ int tmpFreq = freqs[i];
+ freqs[i] = freqs[j];
+ freqs[j] = tmpFreq;
+ }
+ }
+
+ protected internal override void copy(int src, int dest)
+ {
+ docs[dest] = docs[src];
+ if (freqs != null)
+ {
+ freqs[dest] = freqs[src];
+ }
+ }
+
+ protected internal override void save(int i, int len)
+ {
+ Array.Copy(docs, i, tmpDocs, 0, len);
+ if (freqs != null)
+ {
+ Array.Copy(freqs, i, tmpFreqs, 0, len);
+ }
+ }
+
+ protected internal override void restore(int i, int j)
+ {
+ docs[j] = tmpDocs[i];
+ if (freqs != null)
+ {
+ freqs[j] = tmpFreqs[i];
+ }
+ }
+
+ protected internal override int compareSaved(int i, int j)
+ {
+ return tmpDocs[i] - docs[j];
+ }
+ }
+
+ internal readonly int maxDoc;
+ internal readonly DocFreqSorter sorter;
+ internal int[] docs;
+ internal int[] freqs;
+ internal int docIt = -1;
+ internal readonly int upto;
+ internal readonly bool withFreqs;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final org.apache.lucene.index.DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ internal SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, DocsEnum @in, bool withFreqs, Sorter.DocMap docMap) : base(@in)
+ {
+ this.maxDoc = maxDoc;
+ this.withFreqs = withFreqs;
+ if (reuse != null)
+ {
+ if (reuse.maxDoc == maxDoc)
+ {
+ sorter = reuse.sorter;
+ }
+ else
+ {
+ sorter = new DocFreqSorter(maxDoc);
+ }
+ docs = reuse.docs;
+ freqs = reuse.freqs; // maybe null
+ }
+ else
+ {
+ docs = new int[64];
+ sorter = new DocFreqSorter(maxDoc);
+ }
+ docIt = -1;
+ int i = 0;
+ int doc;
+ if (withFreqs)
+ {
+ if (freqs == null || freqs.Length < docs.Length)
+ {
+ freqs = new int[docs.Length];
+ }
+ while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ if (i >= docs.Length)
+ {
+ docs = ArrayUtil.grow(docs, docs.Length + 1);
+ freqs = ArrayUtil.grow(freqs, freqs.Length + 1);
+ }
+ docs[i] = docMap.oldToNew(doc);
+ freqs[i] = @in.freq();
+ ++i;
+ }
+ }
+ else
+ {
+ freqs = null;
+ while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ if (i >= docs.Length)
+ {
+ docs = ArrayUtil.grow(docs, docs.Length + 1);
+ }
+ docs[i++] = docMap.oldToNew(doc);
+ }
+ }
+ // TimSort can save much time compared to other sorts in case of
+ // reverse sorting, or when sorting a concatenation of sorted readers
+ sorter.reset(docs, freqs);
+ sorter.sort(0, i);
+ upto = i;
+ }
+
+ // for testing
+ internal virtual bool reused(DocsEnum other)
+ {
+ if (other == null || !(other is SortingDocsEnum))
+ {
+ return false;
+ }
+ return docs == ((SortingDocsEnum) other).docs;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int advance(final int target) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override int advance(int target)
+ {
+ // need to support it for checkIndex, but in practice it won't be called, so
+ // don't bother to implement efficiently for now.
+ return slowAdvance(target);
+ }
+
+ public override int docID()
+ {
+ return docIt < 0 ? - 1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int freq() throws java.io.IOException
+ public override int freq()
+ {
+ return withFreqs && docIt < upto ? freqs[docIt] : 1;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException
+ public override int nextDoc()
+ {
+ if (++docIt >= upto)
+ {
+ return NO_MORE_DOCS;
+ }
+ return docs[docIt];
+ }
+
+ /// <summary>
+ /// Returns the wrapped <seealso cref="DocsEnum"/>. </summary>
+ internal virtual DocsEnum Wrapped
+ {
+ get
+ {
+ return @in;
+ }
+ }
+ }
+
+ internal class SortingDocsAndPositionsEnum : FilterDocsAndPositionsEnum
+ {
+
+ /// <summary>
+ /// A <seealso cref="TimSorter"/> which sorts two parallel arrays of doc IDs and
+ /// offsets in one go. Everytime a doc ID is 'swapped', its correponding offset
+ /// is swapped too.
+ /// </summary>
+ private sealed class DocOffsetSorter : TimSorter
+ {
+
+ internal int[] docs;
+ internal long[] offsets;
+ internal readonly int[] tmpDocs;
+ internal readonly long[] tmpOffsets;
+
+ public DocOffsetSorter(int maxDoc) : base(maxDoc / 64)
+ {
+ this.tmpDocs = new int[maxDoc / 64];
+ this.tmpOffsets = new long[maxDoc / 64];
+ }
+
+ public void reset(int[] docs, long[] offsets)
+ {
+ this.docs = docs;
+ this.offsets = offsets;
+ }
+
+ protected internal override int compare(int i, int j)
+ {
+ return docs[i] - docs[j];
+ }
+
+ protected internal override void swap(int i, int j)
+ {
+ int tmpDoc = docs[i];
+ docs[i] = docs[j];
+ docs[j] = tmpDoc;
+
+ long tmpOffset = offsets[i];
+ offsets[i] = offsets[j];
+ offsets[j] = tmpOffset;
+ }
+
+ protected internal override void copy(int src, int dest)
+ {
+ docs[dest] = docs[src];
+ offsets[dest] = offsets[src];
+ }
+
+ protected internal override void save(int i, int len)
+ {
+ Array.Copy(docs, i, tmpDocs, 0, len);
+ Array.Copy(offsets, i, tmpOffsets, 0, len);
+ }
+
+ protected internal override void restore(int i, int j)
+ {
+ docs[j] = tmpDocs[i];
+ offsets[j] = tmpOffsets[i];
+ }
+
+ protected internal override int compareSaved(int i, int j)
+ {
+ return tmpDocs[i] - docs[j];
+ }
+ }
+
+ internal readonly int maxDoc;
+ internal readonly DocOffsetSorter sorter;
+ internal int[] docs;
+ internal long[] offsets;
+ internal readonly int upto;
+
+ internal readonly IndexInput postingInput;
+ internal readonly bool storeOffsets;
+
+ internal int docIt = -1;
+ internal int pos;
+ internal int startOffset_Renamed = -1;
+ internal int endOffset_Renamed = -1;
+ internal readonly BytesRef payload;
+ internal int currFreq;
+
+ internal readonly RAMFile file;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, final org.apache.lucene.index.DocsAndPositionsEnum in, Sorter.DocMap docMap, boolean storeOffsets) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ internal SortingDocsAndPositionsEnum(int maxDoc, SortingDocsAndPositionsEnum reuse, DocsAndPositionsEnum @in, Sorter.DocMap docMap, bool storeOffsets) : base(@in)
+ {
+ this.maxDoc = maxDoc;
+ this.storeOffsets = storeOffsets;
+ if (reuse != null)
+ {
+ docs = reuse.docs;
+ offsets = reuse.offsets;
+ payload = reuse.payload;
+ file = reuse.file;
+ if (reuse.maxDoc == maxDoc)
+ {
+ sorter = reuse.sorter;
+ }
+ else
+ {
+ sorter = new DocOffsetSorter(maxDoc);
+ }
+ }
+ else
+ {
+ docs = new int[32];
+ offsets = new long[32];
+ payload = new BytesRef(32);
+ file = new RAMFile();
+ sorter = new DocOffsetSorter(maxDoc);
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.store.IndexOutput out = new org.apache.lucene.store.RAMOutputStream(file);
+ IndexOutput @out = new RAMOutputStream(file);
+ int doc;
+ int i = 0;
+ while ((doc = @in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
+ {
+ if (i == docs.Length)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newLength = org.apache.lucene.util.ArrayUtil.oversize(i + 1, 4);
+ int newLength = ArrayUtil.oversize(i + 1, 4);
+ docs = Arrays.copyOf(docs, newLength);
+ offsets = Arrays.copyOf(offsets, newLength);
+ }
+ docs[i] = docMap.oldToNew(doc);
+ offsets[i] = @out.FilePointer;
+ addPositions(@in, @out);
+ i++;
+ }
+ upto = i;
+ sorter.reset(docs, offsets);
+ sorter.sort(0, upto);
+ @out.close();
+ this.postingInput = new RAMInputStream("", file);
+ }
+
+ // for testing
+ internal virtual bool reused(DocsAndPositionsEnum other)
+ {
+ if (other == null || !(other is SortingDocsAndPositionsEnum))
+ {
+ return false;
+ }
+ return docs == ((SortingDocsAndPositionsEnum) other).docs;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void addPositions(final org.apache.lucene.index.DocsAndPositionsEnum in, final org.apache.lucene.store.IndexOutput out) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ internal virtual void addPositions(DocsAndPositionsEnum @in, IndexOutput @out)
+ {
+ int freq = @in.freq();
+ @out.writeVInt(freq);
+ int previousPosition = 0;
+ int previousEndOffset = 0;
+ for (int i = 0; i < freq; i++)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int pos = in.nextPosition();
+ int pos = @in.nextPosition();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.BytesRef payload = in.getPayload();
+ BytesRef payload = @in.Payload;
+ // The low-order bit of token is set only if there is a payload, the
+ // previous bits are the delta-encoded position.
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
+ int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
+ @out.writeVInt(token);
+ previousPosition = pos;
+ if (storeOffsets) // don't encode offsets if they are not stored
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int startOffset = in.startOffset();
+ int startOffset = @in.startOffset();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int endOffset = in.endOffset();
+ int endOffset = @in.endOffset();
+ @out.writeVInt(startOffset - previousEndOffset);
+ @out.writeVInt(endOffset - startOffset);
+ previousEndOffset = endOffset;
+ }
+ if (payload != null)
+ {
+ @out.writeVInt(payload.length);
+ @out.writeBytes(payload.bytes, payload.offset, payload.length);
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int advance(final int target) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override int advance(int target)
+ {
+ // need to support it for checkIndex, but in practice it won't be called, so
+ // don't bother to implement efficiently for now.
+ return slowAdvance(target);
+ }
+
+ public override int docID()
+ {
+ return docIt < 0 ? - 1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int endOffset() throws java.io.IOException
+ public override int endOffset()
+ {
+ return endOffset_Renamed;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int freq() throws java.io.IOException
+ public override int freq()
+ {
+ return currFreq;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.util.BytesRef getPayload() throws java.io.IOException
+ public override BytesRef Payload
+ {
+ get
+ {
+ return payload.length == 0 ? null : payload;
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException
+ public override int nextDoc()
+ {
+ if (++docIt >= upto)
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+ postingInput.seek(offsets[docIt]);
+ currFreq = postingInput.readVInt();
+ // reset variables used in nextPosition
+ pos = 0;
+ endOffset_Renamed = 0;
+ return docs[docIt];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextPosition() throws java.io.IOException
+ public override int nextPosition()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int token = postingInput.readVInt();
+ int token = postingInput.readVInt();
+ pos += (int)((uint)token >> 1);
+ if (storeOffsets)
+ {
+ startOffset_Renamed = endOffset_Renamed + postingInput.readVInt();
+ endOffset_Renamed = startOffset_Renamed + postingInput.readVInt();
+ }
+ if ((token & 1) != 0)
+ {
+ payload.offset = 0;
+ payload.length = postingInput.readVInt();
+ if (payload.length > payload.bytes.length)
+ {
+ payload.bytes = new sbyte[ArrayUtil.oversize(payload.length, 1)];
+ }
+ postingInput.readBytes(payload.bytes, 0, payload.length);
+ }
+ else
+ {
+ payload.length = 0;
+ }
+ return pos;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int startOffset() throws java.io.IOException
+ public override int startOffset()
+ {
+ return startOffset_Renamed;
+ }
+
+ /// <summary>
+ /// Returns the wrapped <seealso cref="DocsAndPositionsEnum"/>. </summary>
+ internal virtual DocsAndPositionsEnum Wrapped
+ {
+ get
+ {
+ return @in;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Return a sorted view of <code>reader</code> according to the order
+ /// defined by <code>sort</code>. If the reader is already sorted, this
+ /// method might return the reader as-is.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static org.apache.lucene.index.AtomicReader wrap(org.apache.lucene.index.AtomicReader reader, org.apache.lucene.search.Sort sort) throws java.io.IOException
+ public static AtomicReader wrap(AtomicReader reader, Sort sort)
+ {
+ return wrap(reader, (new Sorter(sort)).sort(reader));
+ }
+
+ /// <summary>
+ /// Expert: same as <seealso cref="#wrap(AtomicReader, Sort)"/> but operates directly on a <seealso cref="Sorter.DocMap"/>. </summary>
+ internal static AtomicReader wrap(AtomicReader reader, Sorter.DocMap docMap)
+ {
+ if (docMap == null)
+ {
+ // the reader is already sorter
+ return reader;
+ }
+ if (reader.maxDoc() != docMap.size())
+ {
+ throw new System.ArgumentException("reader.maxDoc() should be equal to docMap.size(), got" + reader.maxDoc() + " != " + docMap.size());
+ }
+ Debug.Assert(Sorter.isConsistent(docMap));
+ return new SortingAtomicReader(reader, docMap);
+ }
+
+ internal readonly Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: private SortingAtomicReader(final org.apache.lucene.index.AtomicReader in, final Sorter.DocMap docMap)
+ private SortingAtomicReader(AtomicReader @in, Sorter.DocMap docMap) : base(@in)
+ {
+ this.docMap = docMap;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void document(final int docID, final org.apache.lucene.index.StoredFieldVisitor visitor) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override void document(int docID, StoredFieldVisitor visitor)
+ {
+ @in.document(docMap.newToOld(docID), visitor);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.Fields fields() throws java.io.IOException
+ public override Fields fields()
+ {
+ Fields fields = @in.fields();
+ if (fields == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingFields(fields, @in.FieldInfos, docMap);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.BinaryDocValues getBinaryDocValues(String field) throws java.io.IOException
+ public override BinaryDocValues getBinaryDocValues(string field)
+ {
+ BinaryDocValues oldDocValues = @in.getBinaryDocValues(field);
+ if (oldDocValues == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingBinaryDocValues(oldDocValues, docMap);
+ }
+ }
+
+ public override Bits LiveDocs
+ {
+ get
+ {
+ //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+ //ORIGINAL LINE: final org.apache.lucene.util.Bits inLiveDocs = in.getLiveDocs();
+ Bits inLiveDocs = @in.LiveDocs;
+ if (inLiveDocs == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingBits(inLiveDocs, docMap);
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.NumericDocValues getNormValues(String field) throws java.io.IOException
+ public override NumericDocValues getNormValues(string field)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.NumericDocValues norm = in.getNormValues(field);
+ NumericDocValues norm = @in.getNormValues(field);
+ if (norm == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingNumericDocValues(norm, docMap);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.NumericDocValues getNumericDocValues(String field) throws java.io.IOException
+ public override NumericDocValues getNumericDocValues(string field)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.NumericDocValues oldDocValues = in.getNumericDocValues(field);
+ NumericDocValues oldDocValues = @in.getNumericDocValues(field);
+ if (oldDocValues == null)
+ {
+ return null;
+ }
+ return new SortingNumericDocValues(oldDocValues, docMap);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.SortedDocValues getSortedDocValues(String field) throws java.io.IOException
+ public override SortedDocValues getSortedDocValues(string field)
+ {
+ SortedDocValues sortedDV = @in.getSortedDocValues(field);
+ if (sortedDV == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingSortedDocValues(sortedDV, docMap);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.SortedSetDocValues getSortedSetDocValues(String field) throws java.io.IOException
+ public override SortedSetDocValues getSortedSetDocValues(string field)
+ {
+ SortedSetDocValues sortedSetDV = @in.getSortedSetDocValues(field);
+ if (sortedSetDV == null)
+ {
+ return null;
+ }
+ else
+ {
+ return new SortingSortedSetDocValues(sortedSetDV, docMap);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.util.Bits getDocsWithField(String field) throws java.io.IOException
+ public override Bits getDocsWithField(string field)
+ {
+ Bits bits = @in.getDocsWithField(field);
+ if (bits == null || bits is Bits.MatchAllBits || bits is Bits.MatchNoBits)
+ {
+ return bits;
+ }
+ else
+ {
+ return new SortingBits(bits, docMap);
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.Fields getTermVectors(final int docID) throws java.io.IOException
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+ public override Fields getTermVectors(int docID)
+ {
+ return @in.getTermVectors(docMap.newToOld(docID));
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Index/Sorter/SortingMergePolicy.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Index/Sorter/SortingMergePolicy.cs b/src/Lucene.Net.Misc/Index/Sorter/SortingMergePolicy.cs
new file mode 100644
index 0000000..c455d2a
--- /dev/null
+++ b/src/Lucene.Net.Misc/Index/Sorter/SortingMergePolicy.cs
@@ -0,0 +1,309 @@
+using System.Diagnostics;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.index.sorter
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using Analyzer = org.apache.lucene.analysis.Analyzer; // javadocs
+ using Sort = org.apache.lucene.search.Sort;
+ using Directory = org.apache.lucene.store.Directory;
+ using Bits = org.apache.lucene.util.Bits;
+ using MonotonicAppendingLongBuffer = org.apache.lucene.util.packed.MonotonicAppendingLongBuffer;
+
+ /// <summary>
+ /// A <seealso cref="MergePolicy"/> that reorders documents according to a <seealso cref="Sort"/>
+ /// before merging them. As a consequence, all segments resulting from a merge
+ /// will be sorted while segments resulting from a flush will be in the order
+ /// in which documents have been added.
+ /// <para><b>NOTE</b>: Never use this policy if you rely on
+ /// <seealso cref="IndexWriter#addDocuments(Iterable, Analyzer) IndexWriter.addDocuments"/>
+ /// to have sequentially-assigned doc IDs, this policy will scatter doc IDs.
+ /// </para>
+ /// <para><b>NOTE</b>: This policy should only be used with idempotent {@code Sort}s
+ /// so that the order of segments is predictable. For example, using
+ /// <seealso cref="Sort#INDEXORDER"/> in reverse (which is not idempotent) will make
+ /// the order of documents in a segment depend on the number of times the segment
+ /// has been merged.
+ /// @lucene.experimental
+ /// </para>
+ /// </summary>
+ public sealed class SortingMergePolicy : MergePolicy
+ {
+
+ /// <summary>
+ /// Put in the <seealso cref="SegmentInfo#getDiagnostics() diagnostics"/> to denote that
+ /// this segment is sorted.
+ /// </summary>
+ public const string SORTER_ID_PROP = "sorter";
+
+ internal class SortingOneMerge : OneMerge
+ {
+ private readonly SortingMergePolicy outerInstance;
+
+
+ internal IList<AtomicReader> unsortedReaders;
+ internal Sorter.DocMap docMap;
+ internal AtomicReader sortedView;
+
+ internal SortingOneMerge(SortingMergePolicy outerInstance, IList<SegmentCommitInfo> segments) : base(segments)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public java.util.List<org.apache.lucene.index.AtomicReader> getMergeReaders() throws java.io.IOException
+ public override IList<AtomicReader> MergeReaders
+ {
+ get
+ {
+ if (unsortedReaders == null)
+ {
+ unsortedReaders = base.MergeReaders;
+ //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+ //ORIGINAL LINE: final org.apache.lucene.index.AtomicReader atomicView;
+ AtomicReader atomicView;
+ if (unsortedReaders.Count == 1)
+ {
+ atomicView = unsortedReaders[0];
+ }
+ else
+ {
+ //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+ //ORIGINAL LINE: final org.apache.lucene.index.IndexReader multiReader = new org.apache.lucene.index.MultiReader(unsortedReaders.toArray(new org.apache.lucene.index.AtomicReader[unsortedReaders.size()]));
+ IndexReader multiReader = new MultiReader(unsortedReaders.ToArray());
+ atomicView = SlowCompositeReaderWrapper.wrap(multiReader);
+ }
+ docMap = outerInstance.sorter.sort(atomicView);
+ sortedView = SortingAtomicReader.wrap(atomicView, docMap);
+ }
+ // a null doc map means that the readers are already sorted
+ return docMap == null ? unsortedReaders : Collections.singletonList(sortedView);
+ }
+ }
+
+ public override SegmentCommitInfo Info
+ {
+ set
+ {
+ IDictionary<string, string> diagnostics = value.info.Diagnostics;
+ diagnostics[SORTER_ID_PROP] = outerInstance.sorter.ID;
+ base.Info = value;
+ }
+ }
+
+ internal virtual MonotonicAppendingLongBuffer getDeletes(IList<AtomicReader> readers)
+ {
+ MonotonicAppendingLongBuffer deletes = new MonotonicAppendingLongBuffer();
+ int deleteCount = 0;
+ foreach (AtomicReader reader in readers)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int maxDoc = reader.maxDoc();
+ int maxDoc = reader.maxDoc();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.Bits liveDocs = reader.getLiveDocs();
+ Bits liveDocs = reader.LiveDocs;
+ for (int i = 0; i < maxDoc; ++i)
+ {
+ if (liveDocs != null && !liveDocs.get(i))
+ {
+ ++deleteCount;
+ }
+ else
+ {
+ deletes.add(deleteCount);
+ }
+ }
+ }
+ deletes.freeze();
+ return deletes;
+ }
+
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.index.MergePolicy.DocMap getDocMap(final org.apache.lucene.index.MergeState mergeState)
+ public override MergePolicy.DocMap getDocMap(MergeState mergeState)
+ {
+ if (unsortedReaders == null)
+ {
+ throw new IllegalStateException();
+ }
+ if (docMap == null)
+ {
+ return base.getDocMap(mergeState);
+ }
+ Debug.Assert(mergeState.docMaps.length == 1); // we returned a singleton reader
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.packed.MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders);
+ MonotonicAppendingLongBuffer deletes = getDeletes(unsortedReaders);
+ return new DocMapAnonymousInnerClassHelper(this, mergeState, deletes);
+ }
+
+ private class DocMapAnonymousInnerClassHelper : MergePolicy.DocMap
+ {
+ private readonly SortingOneMerge outerInstance;
+
+ private MergeState mergeState;
+ private MonotonicAppendingLongBuffer deletes;
+
+ public DocMapAnonymousInnerClassHelper(SortingOneMerge outerInstance, MergeState mergeState, MonotonicAppendingLongBuffer deletes)
+ {
+ this.outerInstance = outerInstance;
+ this.mergeState = mergeState;
+ this.deletes = deletes;
+ }
+
+ public override int map(int old)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int oldWithDeletes = old + (int) deletes.get(old);
+ int oldWithDeletes = old + (int) deletes.get(old);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newWithDeletes = docMap.oldToNew(oldWithDeletes);
+ int newWithDeletes = outerInstance.docMap.oldToNew(oldWithDeletes);
+ return mergeState.docMaps[0].get(newWithDeletes);
+ }
+ }
+
+ }
+
+ internal class SortingMergeSpecification : MergeSpecification
+ {
+ private readonly SortingMergePolicy outerInstance;
+
+ public SortingMergeSpecification(SortingMergePolicy outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+
+ public override void add(OneMerge merge)
+ {
+ base.add(new SortingOneMerge(outerInstance, merge.segments));
+ }
+
+ public override string segString(Directory dir)
+ {
+ return "SortingMergeSpec(" + base.segString(dir) + ", sorter=" + outerInstance.sorter + ")";
+ }
+
+ }
+
+ /// <summary>
+ /// Returns {@code true} if the given {@code reader} is sorted by the specified {@code sort}. </summary>
+ public static bool isSorted(AtomicReader reader, Sort sort)
+ {
+ if (reader is SegmentReader)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.SegmentReader segReader = (org.apache.lucene.index.SegmentReader) reader;
+ SegmentReader segReader = (SegmentReader) reader;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map<String, String> diagnostics = segReader.getSegmentInfo().info.getDiagnostics();
+ IDictionary<string, string> diagnostics = segReader.SegmentInfo.info.Diagnostics;
+ if (diagnostics != null && sort.ToString().Equals(diagnostics[SORTER_ID_PROP]))
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private MergeSpecification sortedMergeSpecification(MergeSpecification specification)
+ {
+ if (specification == null)
+ {
+ return null;
+ }
+ MergeSpecification sortingSpec = new SortingMergeSpecification(this);
+ foreach (OneMerge merge in specification.merges)
+ {
+ sortingSpec.add(merge);
+ }
+ return sortingSpec;
+ }
+
+ internal readonly MergePolicy @in;
+ internal readonly Sorter sorter;
+ internal readonly Sort sort;
+
+ /// <summary>
+ /// Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. </summary>
+ public SortingMergePolicy(MergePolicy @in, Sort sort)
+ {
+ this.@in = @in;
+ this.sorter = new Sorter(sort);
+ this.sort = sort;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public MergeSpecification findMerges(org.apache.lucene.index.MergeTrigger mergeTrigger, org.apache.lucene.index.SegmentInfos segmentInfos) throws java.io.IOException
+ public override MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos)
+ {
+ return sortedMergeSpecification(@in.findMerges(mergeTrigger, segmentInfos));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public MergeSpecification findForcedMerges(org.apache.lucene.index.SegmentInfos segmentInfos, int maxSegmentCount, java.util.Map<org.apache.lucene.index.SegmentCommitInfo,Boolean> segmentsToMerge) throws java.io.IOException
+ public override MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary<SegmentCommitInfo, bool?> segmentsToMerge)
+ {
+ return sortedMergeSpecification(@in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public MergeSpecification findForcedDeletesMerges(org.apache.lucene.index.SegmentInfos segmentInfos) throws java.io.IOException
+ public override MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos)
+ {
+ return sortedMergeSpecification(@in.findForcedDeletesMerges(segmentInfos));
+ }
+
+ public override MergePolicy clone()
+ {
+ return new SortingMergePolicy(@in.clone(), sort);
+ }
+
+ public override void close()
+ {
+ @in.close();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public boolean useCompoundFile(org.apache.lucene.index.SegmentInfos segments, org.apache.lucene.index.SegmentCommitInfo newSegment) throws java.io.IOException
+ public override bool useCompoundFile(SegmentInfos segments, SegmentCommitInfo newSegment)
+ {
+ return @in.useCompoundFile(segments, newSegment);
+ }
+
+ public override IndexWriter IndexWriter
+ {
+ set
+ {
+ @in.IndexWriter = value;
+ }
+ }
+
+ public override string ToString()
+ {
+ return "SortingMergePolicy(" + @in + ", sorter=" + sorter + ")";
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Lucene.Net.Misc.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Lucene.Net.Misc.csproj b/src/Lucene.Net.Misc/Lucene.Net.Misc.csproj
new file mode 100644
index 0000000..e4bc070
--- /dev/null
+++ b/src/Lucene.Net.Misc/Lucene.Net.Misc.csproj
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{A3A0D943-B91A-4B7A-9FCB-6160EA575D95}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Misc</RootNamespace>
+ <AssemblyName>Lucene.Net.Misc</AssemblyName>
+ <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="ByteBuffer.cs" />
+ <Compile Include="Document\LazyDocument.cs" />
+ <Compile Include="Index\CompoundFileExtractor.cs" />
+ <Compile Include="Index\IndexSplitter.cs" />
+ <Compile Include="Index\MultiPassIndexSplitter.cs" />
+ <Compile Include="Index\PKIndexSplitter.cs" />
+ <Compile Include="Index\Sorter\BlockJoinComparatorSource.cs" />
+ <Compile Include="Index\Sorter\EarlyTerminatingSortingCollector.cs" />
+ <Compile Include="Index\Sorter\Sorter.cs" />
+ <Compile Include="Index\Sorter\SortingAtomicReader.cs" />
+ <Compile Include="Index\Sorter\SortingMergePolicy.cs" />
+ <Compile Include="Misc\GetTermInfo.cs" />
+ <Compile Include="Misc\HighFreqTerms.cs" />
+ <Compile Include="Misc\IndexMergeTool.cs" />
+ <Compile Include="Misc\SweetSpotSimilarity.cs" />
+ <Compile Include="Misc\TermStats.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Store\NativePosixUtil.cs" />
+ <Compile Include="Store\NativeUnixDirectory.cs" />
+ <Compile Include="Store\WindowsDirectory.cs" />
+ <Compile Include="Util\Fst\ListOfOutputs.cs" />
+ <Compile Include="Util\Fst\UpToTwoPositiveIntOutputs.cs" />
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Misc/GetTermInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Misc/GetTermInfo.cs b/src/Lucene.Net.Misc/Misc/GetTermInfo.cs
new file mode 100644
index 0000000..9d56821
--- /dev/null
+++ b/src/Lucene.Net.Misc/Misc/GetTermInfo.cs
@@ -0,0 +1,74 @@
+using System;
+
+namespace org.apache.lucene.misc
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Directory = org.apache.lucene.store.Directory;
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+ using DirectoryReader = org.apache.lucene.index.DirectoryReader;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using Term = org.apache.lucene.index.Term;
+
+ /// <summary>
+ /// Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
+ /// </summary>
+ public class GetTermInfo
+ {
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void main(String[] args) throws Exception
+ public static void Main(string[] args)
+ {
+
+ FSDirectory dir = null;
+ string inputStr = null;
+ string field = null;
+
+ if (args.Length == 3)
+ {
+ dir = FSDirectory.open(new File(args[0]));
+ field = args[1];
+ inputStr = args[2];
+ }
+ else
+ {
+ usage();
+ Environment.Exit(1);
+ }
+
+ getTermInfo(dir,new Term(field, inputStr));
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void getTermInfo(org.apache.lucene.store.Directory dir, org.apache.lucene.index.Term term) throws Exception
+ public static void getTermInfo(Directory dir, Term term)
+ {
+ IndexReader reader = DirectoryReader.open(dir);
+ System.out.printf(Locale.ROOT, "%s:%s \t totalTF = %,d \t doc freq = %,d \n", term.field(), term.text(), reader.totalTermFreq(term), reader.docFreq(term));
+ }
+
+ private static void usage()
+ {
+//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
+ Console.WriteLine("\n\nusage:\n\t" + "java " + typeof(GetTermInfo).FullName + " <index dir> field term \n\n");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Misc/HighFreqTerms.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Misc/HighFreqTerms.cs b/src/Lucene.Net.Misc/Misc/HighFreqTerms.cs
new file mode 100644
index 0000000..dccb173
--- /dev/null
+++ b/src/Lucene.Net.Misc/Misc/HighFreqTerms.cs
@@ -0,0 +1,230 @@
+using System;
+using System.Collections.Generic;
+
+namespace org.apache.lucene.misc
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using DirectoryReader = org.apache.lucene.index.DirectoryReader;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using MultiFields = org.apache.lucene.index.MultiFields;
+ using Fields = org.apache.lucene.index.Fields;
+ using TermsEnum = org.apache.lucene.index.TermsEnum;
+ using Terms = org.apache.lucene.index.Terms;
+ using Directory = org.apache.lucene.store.Directory;
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+ using PriorityQueue = org.apache.lucene.util.PriorityQueue;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+ /// <summary>
+ /// <code>HighFreqTerms</code> class extracts the top n most frequent terms
+ /// (by document frequency) from an existing Lucene index and reports their
+ /// document frequency.
+ /// <para>
+ /// If the -t flag is given, both document frequency and total tf (total
+ /// number of occurrences) are reported, ordered by descending total tf.
+ ///
+ /// </para>
+ /// </summary>
+ public class HighFreqTerms
+ {
+
+ // The top numTerms will be displayed
+ public const int DEFAULT_NUMTERMS = 100;
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void main(String[] args) throws Exception
+ public static void Main(string[] args)
+ {
+ string field = null;
+ int numTerms = DEFAULT_NUMTERMS;
+
+ if (args.Length == 0 || args.Length > 4)
+ {
+ usage();
+ Environment.Exit(1);
+ }
+
+ Directory dir = FSDirectory.open(new File(args[0]));
+
+ IComparer<TermStats> comparator = new DocFreqComparator();
+
+ for (int i = 1; i < args.Length; i++)
+ {
+ if (args[i].Equals("-t"))
+ {
+ comparator = new TotalTermFreqComparator();
+ }
+ else
+ {
+ try
+ {
+ numTerms = Convert.ToInt32(args[i]);
+ }
+ catch (NumberFormatException)
+ {
+ field = args[i];
+ }
+ }
+ }
+
+ IndexReader reader = DirectoryReader.open(dir);
+ TermStats[] terms = getHighFreqTerms(reader, numTerms, field, comparator);
+
+ for (int i = 0; i < terms.Length; i++)
+ {
+ System.out.printf(Locale.ROOT, "%s:%s \t totalTF = %,d \t docFreq = %,d \n", terms[i].field, terms[i].termtext.utf8ToString(), terms[i].totalTermFreq, terms[i].docFreq);
+ }
+ reader.close();
+ }
+
+ private static void usage()
+ {
+ Console.WriteLine("\n\n" + "java org.apache.lucene.misc.HighFreqTerms <index dir> [-t] [number_terms] [field]\n\t -t: order by totalTermFreq\n\n");
+ }
+
+ /// <summary>
+ /// Returns TermStats[] ordered by the specified comparator
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static TermStats[] getHighFreqTerms(org.apache.lucene.index.IndexReader reader, int numTerms, String field, java.util.Comparator<TermStats> comparator) throws Exception
+ public static TermStats[] getHighFreqTerms(IndexReader reader, int numTerms, string field, IComparer<TermStats> comparator)
+ {
+ TermStatsQueue tiq = null;
+
+ if (field != null)
+ {
+ Fields fields = MultiFields.getFields(reader);
+ if (fields == null)
+ {
+ throw new Exception("field " + field + " not found");
+ }
+ Terms terms = fields.terms(field);
+ if (terms != null)
+ {
+ TermsEnum termsEnum = terms.iterator(null);
+ tiq = new TermStatsQueue(numTerms, comparator);
+ tiq.fill(field, termsEnum);
+ }
+ }
+ else
+ {
+ Fields fields = MultiFields.getFields(reader);
+ if (fields == null)
+ {
+ throw new Exception("no fields found for this index");
+ }
+ tiq = new TermStatsQueue(numTerms, comparator);
+ foreach (string fieldName in fields)
+ {
+ Terms terms = fields.terms(fieldName);
+ if (terms != null)
+ {
+ tiq.fill(fieldName, terms.iterator(null));
+ }
+ }
+ }
+
+ TermStats[] result = new TermStats[tiq.size()];
+ // we want highest first so we read the queue and populate the array
+ // starting at the end and work backwards
+ int count = tiq.size() - 1;
+ while (tiq.size() != 0)
+ {
+ result[count] = tiq.pop();
+ count--;
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Compares terms by docTermFreq
+ /// </summary>
+ public sealed class DocFreqComparator : IComparer<TermStats>
+ {
+
+ public int Compare(TermStats a, TermStats b)
+ {
+ int res = long.compare(a.docFreq, b.docFreq);
+ if (res == 0)
+ {
+ res = a.field.CompareTo(b.field);
+ if (res == 0)
+ {
+ res = a.termtext.compareTo(b.termtext);
+ }
+ }
+ return res;
+ }
+ }
+
+ /// <summary>
+ /// Compares terms by totalTermFreq
+ /// </summary>
+ public sealed class TotalTermFreqComparator : IComparer<TermStats>
+ {
+
+ public int Compare(TermStats a, TermStats b)
+ {
+ int res = long.compare(a.totalTermFreq, b.totalTermFreq);
+ if (res == 0)
+ {
+ res = a.field.CompareTo(b.field);
+ if (res == 0)
+ {
+ res = a.termtext.compareTo(b.termtext);
+ }
+ }
+ return res;
+ }
+ }
+
+ /// <summary>
+ /// Priority queue for TermStats objects
+ ///
+ /// </summary>
+ internal sealed class TermStatsQueue : PriorityQueue<TermStats>
+ {
+ internal readonly IComparer<TermStats> comparator;
+
+ internal TermStatsQueue(int size, IComparer<TermStats> comparator) : base(size)
+ {
+ this.comparator = comparator;
+ }
+
+ protected internal override bool lessThan(TermStats termInfoA, TermStats termInfoB)
+ {
+ return comparator.Compare(termInfoA, termInfoB) < 0;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected void fill(String field, org.apache.lucene.index.TermsEnum termsEnum) throws java.io.IOException
+ protected internal void fill(string field, TermsEnum termsEnum)
+ {
+ BytesRef term = null;
+ while ((term = termsEnum.next()) != null)
+ {
+ insertWithOverflow(new TermStats(field, term, termsEnum.docFreq(), termsEnum.totalTermFreq()));
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Misc/IndexMergeTool.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Misc/IndexMergeTool.cs b/src/Lucene.Net.Misc/Misc/IndexMergeTool.cs
new file mode 100644
index 0000000..bba8704
--- /dev/null
+++ b/src/Lucene.Net.Misc/Misc/IndexMergeTool.cs
@@ -0,0 +1,66 @@
+using System;
+
+namespace org.apache.lucene.misc
+{
+
+ /// <summary>
+ /// Copyright 2005 The Apache Software Foundation
+ ///
+ /// Licensed under the Apache License, Version 2.0 (the "License");
+ /// you may not use this file except in compliance with the License.
+ /// You may obtain a copy of the License at
+ ///
+ /// http://www.apache.org/licenses/LICENSE-2.0
+ ///
+ /// Unless required by applicable law or agreed to in writing, software
+ /// distributed under the License is distributed on an "AS IS" BASIS,
+ /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ /// See the License for the specific language governing permissions and
+ /// limitations under the License.
+ /// </summary>
+
+ using IndexWriter = org.apache.lucene.index.IndexWriter;
+ using IndexWriterConfig = org.apache.lucene.index.IndexWriterConfig;
+ using OpenMode = org.apache.lucene.index.IndexWriterConfig.OpenMode;
+ using Directory = org.apache.lucene.store.Directory;
+ using FSDirectory = org.apache.lucene.store.FSDirectory;
+ using Version = org.apache.lucene.util.Version;
+
+
+ /// <summary>
+ /// Merges indices specified on the command line into the index
+ /// specified as the first command line argument.
+ /// </summary>
+ public class IndexMergeTool
+ {
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public static void main(String[] args) throws java.io.IOException
+ public static void Main(string[] args)
+ {
+ if (args.Length < 3)
+ {
+ Console.Error.WriteLine("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
+ Environment.Exit(1);
+ }
+ FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));
+
+ IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(Version.LUCENE_CURRENT, null)
+ .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
+
+ Directory[] indexes = new Directory[args.Length - 1];
+ for (int i = 1; i < args.Length; i++)
+ {
+ indexes[i - 1] = FSDirectory.open(new File(args[i]));
+ }
+
+ Console.WriteLine("Merging...");
+ writer.addIndexes(indexes);
+
+ Console.WriteLine("Full merge...");
+ writer.forceMerge(1);
+ writer.close();
+ Console.WriteLine("Done.");
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs b/src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs
new file mode 100644
index 0000000..bc0386c
--- /dev/null
+++ b/src/Lucene.Net.Misc/Misc/SweetSpotSimilarity.cs
@@ -0,0 +1,238 @@
+using System;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.misc
+{
+
+ using DefaultSimilarity = org.apache.lucene.search.similarities.DefaultSimilarity;
+ using FieldInvertState = org.apache.lucene.index.FieldInvertState;
+
+ /// <summary>
+ /// <para>
+ /// A similarity with a lengthNorm that provides for a "plateau" of
+ /// equally good lengths, and tf helper functions.
+ /// </para>
+ /// <para>
+ /// For lengthNorm, A min/max can be specified to define the
+ /// plateau of lengths that should all have a norm of 1.0.
+ /// Below the min, and above the max the lengthNorm drops off in a
+ /// sqrt function.
+ /// </para>
+ /// <para>
+ /// For tf, baselineTf and hyperbolicTf functions are provided, which
+ /// subclasses can choose between.
+ /// </para>
+ /// </summary>
+ /// <seealso cref= <a href="doc-files/ss.gnuplot">A Gnuplot file used to generate some of the visualizations refrenced from each function.</a> </seealso>
+ public class SweetSpotSimilarity : DefaultSimilarity
+ {
+
+ private int ln_min = 1;
+ private int ln_max = 1;
+ private float ln_steep = 0.5f;
+
+ private float tf_base = 0.0f;
+ private float tf_min = 0.0f;
+
+ private float tf_hyper_min = 0.0f;
+ private float tf_hyper_max = 2.0f;
+ private double tf_hyper_base = 1.3d;
+ private float tf_hyper_xoffset = 10.0f;
+
+ public SweetSpotSimilarity() : base()
+ {
+ }
+
+ /// <summary>
+ /// Sets the baseline and minimum function variables for baselineTf
+ /// </summary>
+ /// <seealso cref= #baselineTf </seealso>
+ public virtual void setBaselineTfFactors(float @base, float min)
+ {
+ tf_min = min;
+ tf_base = @base;
+ }
+
+ /// <summary>
+ /// Sets the function variables for the hyperbolicTf functions
+ /// </summary>
+ /// <param name="min"> the minimum tf value to ever be returned (default: 0.0) </param>
+ /// <param name="max"> the maximum tf value to ever be returned (default: 2.0) </param>
+ /// <param name="base"> the base value to be used in the exponential for the hyperbolic function (default: 1.3) </param>
+ /// <param name="xoffset"> the midpoint of the hyperbolic function (default: 10.0) </param>
+ /// <seealso cref= #hyperbolicTf </seealso>
+ public virtual void setHyperbolicTfFactors(float min, float max, double @base, float xoffset)
+ {
+ tf_hyper_min = min;
+ tf_hyper_max = max;
+ tf_hyper_base = @base;
+ tf_hyper_xoffset = xoffset;
+ }
+
+ /// <summary>
+ /// Sets the default function variables used by lengthNorm when no field
+ /// specific variables have been set.
+ /// </summary>
+ /// <seealso cref= #computeLengthNorm </seealso>
+ public virtual void setLengthNormFactors(int min, int max, float steepness, bool discountOverlaps)
+ {
+ this.ln_min = min;
+ this.ln_max = max;
+ this.ln_steep = steepness;
+ this.discountOverlaps = discountOverlaps;
+ }
+
+ /// <summary>
+ /// Implemented as <code> state.getBoost() *
+ /// computeLengthNorm(numTokens) </code> where
+ /// numTokens does not count overlap tokens if
+ /// discountOverlaps is true by default or true for this
+ /// specific field.
+ /// </summary>
+ public override float lengthNorm(FieldInvertState state)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int numTokens;
+ int numTokens;
+
+ if (discountOverlaps)
+ {
+ numTokens = state.Length - state.NumOverlap;
+ }
+ else
+ {
+ numTokens = state.Length;
+ }
+
+ return state.Boost * computeLengthNorm(numTokens);
+ }
+
+ /// <summary>
+ /// Implemented as:
+ /// <code>
+ /// 1/sqrt( steepness * (abs(x-min) + abs(x-max) - (max-min)) + 1 )
+ /// </code>.
+ ///
+ /// <para>
+ /// This degrades to <code>1/sqrt(x)</code> when min and max are both 1 and
+ /// steepness is 0.5
+ /// </para>
+ ///
+ /// <para>
+ /// :TODO: potential optimization is to just flat out return 1.0f if numTerms
+ /// is between min and max.
+ /// </para>
+ /// </summary>
+ /// <seealso cref= #setLengthNormFactors </seealso>
+ /// <seealso cref= <a href="doc-files/ss.computeLengthNorm.svg">An SVG visualization of this function</a> </seealso>
+ public virtual float computeLengthNorm(int numTerms)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int l = ln_min;
+ int l = ln_min;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int h = ln_max;
+ int h = ln_max;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final float s = ln_steep;
+ float s = ln_steep;
+
+ return (float)(1.0f / Math.Sqrt((s * (float)(Math.Abs(numTerms - l) + Math.Abs(numTerms - h) - (h - l))) + 1.0f));
+ }
+
+ /// <summary>
+ /// Delegates to baselineTf
+ /// </summary>
+ /// <seealso cref= #baselineTf </seealso>
+ public override float tf(float freq)
+ {
+ return baselineTf(freq);
+ }
+
+ /// <summary>
+ /// Implemented as:
+ /// <code>
+ /// (x <= min) ? base : sqrt(x+(base**2)-min)
+ /// </code>
+ /// ...but with a special case check for 0.
+ /// <para>
+ /// This degrates to <code>sqrt(x)</code> when min and base are both 0
+ /// </para>
+ /// </summary>
+ /// <seealso cref= #setBaselineTfFactors </seealso>
+ /// <seealso cref= <a href="doc-files/ss.baselineTf.svg">An SVG visualization of this function</a> </seealso>
+ public virtual float baselineTf(float freq)
+ {
+
+ if (0.0f == freq)
+ {
+ return 0.0f;
+ }
+
+ return (freq <= tf_min) ? tf_base : (float)Math.Sqrt(freq + (tf_base * tf_base) - tf_min);
+ }
+
+ /// <summary>
+ /// Uses a hyperbolic tangent function that allows for a hard max...
+ ///
+ /// <code>
+ /// tf(x)=min+(max-min)/2*(((base**(x-xoffset)-base**-(x-xoffset))/(base**(x-xoffset)+base**-(x-xoffset)))+1)
+ /// </code>
+ ///
+ /// <para>
+ /// This code is provided as a convenience for subclasses that want
+ /// to use a hyperbolic tf function.
+ /// </para>
+ /// </summary>
+ /// <seealso cref= #setHyperbolicTfFactors </seealso>
+ /// <seealso cref= <a href="doc-files/ss.hyperbolicTf.svg">An SVG visualization of this function</a> </seealso>
+ public virtual float hyperbolicTf(float freq)
+ {
+ if (0.0f == freq)
+ {
+ return 0.0f;
+ }
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final float min = tf_hyper_min;
+ float min = tf_hyper_min;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final float max = tf_hyper_max;
+ float max = tf_hyper_max;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final double base = tf_hyper_base;
+ double @base = tf_hyper_base;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final float xoffset = tf_hyper_xoffset;
+ float xoffset = tf_hyper_xoffset;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final double x = (double)(freq - xoffset);
+ double x = (double)(freq - xoffset);
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final float result = min + (float)((max-min) / 2.0f * (((Math.pow(base,x) - Math.pow(base,-x)) / (Math.pow(base,x) + Math.pow(base,-x))) + 1.0d));
+ float result = min + (float)((max - min) / 2.0f * (((Math.Pow(@base,x) - Math.Pow(@base,-x)) / (Math.Pow(@base,x) + Math.Pow(@base,-x))) + 1.0d));
+
+ return float.IsNaN(result) ? max : result;
+
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Misc/TermStats.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Misc/TermStats.cs b/src/Lucene.Net.Misc/Misc/TermStats.cs
new file mode 100644
index 0000000..061cca3
--- /dev/null
+++ b/src/Lucene.Net.Misc/Misc/TermStats.cs
@@ -0,0 +1,55 @@
+namespace org.apache.lucene.misc
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+ /// <summary>
+ /// Holder for a term along with its statistics
+ /// (<seealso cref="#docFreq"/> and <seealso cref="#totalTermFreq"/>).
+ /// </summary>
+ public sealed class TermStats
+ {
+ public BytesRef termtext;
+ public string field;
+ public int docFreq;
+ public long totalTermFreq;
+
+ internal TermStats(string field, BytesRef termtext, int df, long tf)
+ {
+ this.termtext = BytesRef.deepCopyOf(termtext);
+ this.field = field;
+ this.docFreq = df;
+ this.totalTermFreq = tf;
+ }
+
+ internal string TermText
+ {
+ get
+ {
+ return termtext.utf8ToString();
+ }
+ }
+
+ public override string ToString()
+ {
+ return ("TermStats: term=" + termtext.utf8ToString() + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Properties/AssemblyInfo.cs b/src/Lucene.Net.Misc/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..39cfea7
--- /dev/null
+++ b/src/Lucene.Net.Misc/Properties/AssemblyInfo.cs
@@ -0,0 +1,35 @@
+using System.Reflection;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Misc")]
+[assembly: AssemblyDescription("Index tools and other miscellaneous code")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Misc")]
+[assembly: AssemblyCopyright("Copyright © 2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("b2a3a8e1-a92a-4bd6-9f87-7747470615c3")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/674f0cb9/src/Lucene.Net.Misc/Store/NativePosixUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Misc/Store/NativePosixUtil.cs b/src/Lucene.Net.Misc/Store/NativePosixUtil.cs
new file mode 100644
index 0000000..3bb51d5
--- /dev/null
+++ b/src/Lucene.Net.Misc/Store/NativePosixUtil.cs
@@ -0,0 +1,64 @@
+using System;
+
+namespace org.apache.lucene.store
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ ignore
+
+ /// <summary>
+ /// Provides JNI access to native methods such as madvise() for
+ /// <seealso cref="NativeUnixDirectory"/>
+ /// </summary>
+ public final class NativePosixUtil
+ {
+ public final static int NORMAL = 0;
+ public final static int SEQUENTIAL = 1;
+ public final static int RANDOM = 2;
+ public final static int WILLNEED = 3;
+ public final static int DONTNEED = 4;
+ public final static int NOREUSE = 5;
+
+//JAVA TO C# CONVERTER NOTE: This static initializer block is converted to a static constructor, but there is no current class:
+ static ImpliedClass()
+ {
+//JAVA TO C# CONVERTER TODO TASK: The library is specified in the 'DllImport' attribute for .NET:
+// System.loadLibrary("NativePosixUtil");
+ }
+
+ private static native int posix_fadvise(FileDescriptor fd, long offset, long len, int advise) throws IOException;
+ public static native int posix_madvise(ByteBuffer buf, int advise) throws IOException;
+ public static native int madvise(ByteBuffer buf, int advise) throws IOException;
+ public static native FileDescriptor open_direct(string filename, bool read) throws IOException;
+ public static native long pread(FileDescriptor fd, long pos, ByteBuffer byteBuf) throws IOException;
+
+ public static void advise(FileDescriptor fd, long offset, long len, int advise) throws IOException
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int code = posix_fadvise(fd, offset, len, advise);
+ int code = posix_fadvise(fd, offset, len, advise);
+ if (code != 0)
+ {
+ throw new Exception("posix_fadvise failed code=" + code);
+ }
+ }
+ }
+
+
+}
\ No newline at end of file