You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/09/16 00:47:01 UTC
[03/11] Skeleton porting of Lucene.Net.Queries
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/SumFloatFunction.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/SumFloatFunction.cs b/src/Lucene.Net.Queries/Function/ValueSource/SumFloatFunction.cs
new file mode 100644
index 0000000..fcd3e41
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/SumFloatFunction.cs
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+
+
+ /// <summary>
+ /// <code>SumFloatFunction</code> returns the sum of it's components.
+ /// </summary>
+ public class SumFloatFunction : MultiFloatFunction
+ {
+ public SumFloatFunction(ValueSource[] sources) : base(sources)
+ {
+ }
+
+ protected internal override string name()
+ {
+ return "sum";
+ }
+
+ protected internal override float func(int doc, FunctionValues[] valsArr)
+ {
+ float val = 0.0f;
+ foreach (FunctionValues vals in valsArr)
+ {
+ val += vals.floatVal(doc);
+ }
+ return val;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/SumTotalTermFreqValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/SumTotalTermFreqValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSource/SumTotalTermFreqValueSource.cs
new file mode 100644
index 0000000..48a9600
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/SumTotalTermFreqValueSource.cs
@@ -0,0 +1,132 @@
+using System.Collections;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+
+ using AtomicReaderContext = org.apache.lucene.index.AtomicReaderContext;
+ using Fields = org.apache.lucene.index.Fields;
+ using Terms = org.apache.lucene.index.Terms;
+ using LongDocValues = org.apache.lucene.queries.function.docvalues.LongDocValues;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+ /// <summary>
+ /// <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
+ /// (sum of term freqs across all documents, across all terms).
+ /// Returns -1 if frequencies were omitted for the field, or if
+ /// the codec doesn't support this statistic.
+ /// @lucene.internal
+ /// </summary>
+ public class SumTotalTermFreqValueSource : ValueSource
+ {
+ protected internal readonly string indexedField;
+
+ public SumTotalTermFreqValueSource(string indexedField)
+ {
+ this.indexedField = indexedField;
+ }
+
+ public virtual string name()
+ {
+ return "sumtotaltermfreq";
+ }
+
+ public override string description()
+ {
+ return name() + '(' + indexedField + ')';
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.queries.function.FunctionValues getValues(java.util.Map context, org.apache.lucene.index.AtomicReaderContext readerContext) throws java.io.IOException
+ public override FunctionValues getValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ return (FunctionValues)context[this];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void createWeight(java.util.Map context, org.apache.lucene.search.IndexSearcher searcher) throws java.io.IOException
+ public override void createWeight(IDictionary context, IndexSearcher searcher)
+ {
+ long sumTotalTermFreq = 0;
+ foreach (AtomicReaderContext readerContext in searcher.TopReaderContext.leaves())
+ {
+ Fields fields = readerContext.reader().fields();
+ if (fields == null)
+ {
+ continue;
+ }
+ Terms terms = fields.terms(indexedField);
+ if (terms == null)
+ {
+ continue;
+ }
+ long v = terms.SumTotalTermFreq;
+ if (v == -1)
+ {
+ sumTotalTermFreq = -1;
+ break;
+ }
+ else
+ {
+ sumTotalTermFreq += v;
+ }
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final long ttf = sumTotalTermFreq;
+ long ttf = sumTotalTermFreq;
+ context[this] = new LongDocValuesAnonymousInnerClassHelper(this, this, ttf);
+ }
+
+ private class LongDocValuesAnonymousInnerClassHelper : LongDocValues
+ {
+ private readonly SumTotalTermFreqValueSource outerInstance;
+
+ private long ttf;
+
+ public LongDocValuesAnonymousInnerClassHelper(SumTotalTermFreqValueSource outerInstance, org.apache.lucene.queries.function.valuesource.SumTotalTermFreqValueSource this, long ttf) : base(this)
+ {
+ this.outerInstance = outerInstance;
+ this.ttf = ttf;
+ }
+
+ public override long longVal(int doc)
+ {
+ return ttf;
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ return this.GetType().GetHashCode() + indexedField.GetHashCode();
+ }
+
+ public override bool Equals(object o)
+ {
+ if (this.GetType() != o.GetType())
+ {
+ return false;
+ }
+ SumTotalTermFreqValueSource other = (SumTotalTermFreqValueSource)o;
+ return this.indexedField.Equals(other.indexedField);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/TFValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/TFValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSource/TFValueSource.cs
new file mode 100644
index 0000000..e957cc8
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/TFValueSource.cs
@@ -0,0 +1,197 @@
+using System;
+using System.Collections;
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using org.apache.lucene.index;
+ using FloatDocValues = org.apache.lucene.queries.function.docvalues.FloatDocValues;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+ using TFIDFSimilarity = org.apache.lucene.search.similarities.TFIDFSimilarity;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+ /// <summary>
+ /// Function that returns <seealso cref="TFIDFSimilarity#tf(float)"/>
+ /// for every document.
+ /// <para>
+ /// Note that the configured Similarity for the field must be
+ /// a subclass of <seealso cref="TFIDFSimilarity"/>
+ /// @lucene.internal
+ /// </para>
+ /// </summary>
+ public class TFValueSource : TermFreqValueSource
+ {
+ public TFValueSource(string field, string val, string indexedField, BytesRef indexedBytes) : base(field, val, indexedField, indexedBytes)
+ {
+ }
+
+ public override string name()
+ {
+ return "tf";
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.queries.function.FunctionValues getValues(java.util.Map context, AtomicReaderContext readerContext) throws java.io.IOException
+ public override FunctionValues getValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ Fields fields = readerContext.reader().fields();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Terms terms = fields.terms(indexedField);
+ Terms terms = fields.terms(indexedField);
+ IndexSearcher searcher = (IndexSearcher)context["searcher"];
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.search.similarities.TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField);
+ TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.Similarity, indexedField);
+ if (similarity == null)
+ {
+ throw new System.NotSupportedException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
+ }
+
+ return new FloatDocValuesAnonymousInnerClassHelper(this, this, terms, similarity);
+ }
+
+ private class FloatDocValuesAnonymousInnerClassHelper : FloatDocValues
+ {
+ private readonly TFValueSource outerInstance;
+
+ private Terms terms;
+ private TFIDFSimilarity similarity;
+
+ public FloatDocValuesAnonymousInnerClassHelper(TFValueSource outerInstance, org.apache.lucene.queries.function.valuesource.TFValueSource this, Terms terms, TFIDFSimilarity similarity) : base(this)
+ {
+ this.outerInstance = outerInstance;
+ this.terms = terms;
+ this.similarity = similarity;
+ lastDocRequested = -1;
+ }
+
+ internal DocsEnum docs;
+ internal int atDoc;
+ internal int lastDocRequested;
+
+//JAVA TO C# CONVERTER TODO TASK: Initialization blocks declared within anonymous inner classes are not converted:
+ // {
+ // reset();
+ // }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void reset() throws java.io.IOException
+ public virtual void reset()
+ {
+ // no one should call us for deleted docs?
+
+ if (terms != null)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final TermsEnum termsEnum = terms.iterator(null);
+ TermsEnum termsEnum = terms.iterator(null);
+ if (termsEnum.seekExact(outerInstance.indexedBytes))
+ {
+ docs = termsEnum.docs(null, null);
+ }
+ else
+ {
+ docs = null;
+ }
+ }
+ else
+ {
+ docs = null;
+ }
+
+ if (docs == null)
+ {
+ docs = new DocsEnumAnonymousInnerClassHelper(this);
+ }
+ atDoc = -1;
+ }
+
+ private class DocsEnumAnonymousInnerClassHelper : DocsEnum
+ {
+ private readonly FloatDocValuesAnonymousInnerClassHelper outerInstance;
+
+ public DocsEnumAnonymousInnerClassHelper(FloatDocValuesAnonymousInnerClassHelper outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override int freq()
+ {
+ return 0;
+ }
+
+ public override int docID()
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override int nextDoc()
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override int advance(int target)
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override long cost()
+ {
+ return 0;
+ }
+ }
+
+ public override float floatVal(int doc)
+ {
+ try
+ {
+ if (doc < lastDocRequested)
+ {
+ // out-of-order access.... reset
+ reset();
+ }
+ lastDocRequested = doc;
+
+ if (atDoc < doc)
+ {
+ atDoc = docs.advance(doc);
+ }
+
+ if (atDoc > doc)
+ {
+ // term doesn't match this document... either because we hit the
+ // end, or because the next doc is after this doc.
+ return similarity.tf(0);
+ }
+
+ // a match!
+ return similarity.tf(docs.freq());
+ }
+ catch (IOException e)
+ {
+ throw new Exception("caught exception in function " + outerInstance.description() + " : doc=" + doc, e);
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/TermFreqValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/TermFreqValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSource/TermFreqValueSource.cs
new file mode 100644
index 0000000..7e03f2b
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/TermFreqValueSource.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Collections;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+
+ using org.apache.lucene.index;
+ using IntDocValues = org.apache.lucene.queries.function.docvalues.IntDocValues;
+ using DocIdSetIterator = org.apache.lucene.search.DocIdSetIterator;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+ /// <summary>
+ /// Function that returns <seealso cref="DocsEnum#freq()"/> for the
+ /// supplied term in every document.
+ /// <para>
+ /// If the term does not exist in the document, returns 0.
+ /// If frequencies are omitted, returns 1.
+ /// </para>
+ /// </summary>
+ public class TermFreqValueSource : DocFreqValueSource
+ {
+ public TermFreqValueSource(string field, string val, string indexedField, BytesRef indexedBytes) : base(field, val, indexedField, indexedBytes)
+ {
+ }
+
+ public override string name()
+ {
+ return "termfreq";
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.queries.function.FunctionValues getValues(java.util.Map context, AtomicReaderContext readerContext) throws java.io.IOException
+ public override FunctionValues getValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ Fields fields = readerContext.reader().fields();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Terms terms = fields.terms(indexedField);
+ Terms terms = fields.terms(indexedField);
+
+ return new IntDocValuesAnonymousInnerClassHelper(this, this, terms);
+ }
+
+ private class IntDocValuesAnonymousInnerClassHelper : IntDocValues
+ {
+ private readonly TermFreqValueSource outerInstance;
+
+ private Terms terms;
+
+ public IntDocValuesAnonymousInnerClassHelper(TermFreqValueSource outerInstance, org.apache.lucene.queries.function.valuesource.TermFreqValueSource this, Terms terms) : base(this)
+ {
+ this.outerInstance = outerInstance;
+ this.terms = terms;
+ lastDocRequested = -1;
+ }
+
+ internal DocsEnum docs;
+ internal int atDoc;
+ internal int lastDocRequested;
+
+//JAVA TO C# CONVERTER TODO TASK: Initialization blocks declared within anonymous inner classes are not converted:
+ // {
+ // reset();
+ // }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public void reset() throws java.io.IOException
+ public virtual void reset()
+ {
+ // no one should call us for deleted docs?
+
+ if (terms != null)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final TermsEnum termsEnum = terms.iterator(null);
+ TermsEnum termsEnum = terms.iterator(null);
+ if (termsEnum.seekExact(outerInstance.indexedBytes))
+ {
+ docs = termsEnum.docs(null, null);
+ }
+ else
+ {
+ docs = null;
+ }
+ }
+ else
+ {
+ docs = null;
+ }
+
+ if (docs == null)
+ {
+ docs = new DocsEnumAnonymousInnerClassHelper(this);
+ }
+ atDoc = -1;
+ }
+
+ private class DocsEnumAnonymousInnerClassHelper : DocsEnum
+ {
+ private readonly IntDocValuesAnonymousInnerClassHelper outerInstance;
+
+ public DocsEnumAnonymousInnerClassHelper(IntDocValuesAnonymousInnerClassHelper outerInstance)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override int freq()
+ {
+ return 0;
+ }
+
+ public override int docID()
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override int nextDoc()
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override int advance(int target)
+ {
+ return DocIdSetIterator.NO_MORE_DOCS;
+ }
+
+ public override long cost()
+ {
+ return 0;
+ }
+ }
+
+ public override int intVal(int doc)
+ {
+ try
+ {
+ if (doc < lastDocRequested)
+ {
+ // out-of-order access.... reset
+ reset();
+ }
+ lastDocRequested = doc;
+
+ if (atDoc < doc)
+ {
+ atDoc = docs.advance(doc);
+ }
+
+ if (atDoc > doc)
+ {
+ // term doesn't match this document... either because we hit the
+ // end, or because the next doc is after this doc.
+ return 0;
+ }
+
+ // a match!
+ return docs.freq();
+ }
+ catch (IOException e)
+ {
+ throw new Exception("caught exception in function " + outerInstance.description() + " : doc=" + doc, e);
+ }
+ }
+ }
+ }
+
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/TotalTermFreqValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/TotalTermFreqValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSource/TotalTermFreqValueSource.cs
new file mode 100644
index 0000000..629d6c6
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/TotalTermFreqValueSource.cs
@@ -0,0 +1,127 @@
+using System.Collections;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+
+ using AtomicReaderContext = org.apache.lucene.index.AtomicReaderContext;
+ using Term = org.apache.lucene.index.Term;
+ using LongDocValues = org.apache.lucene.queries.function.docvalues.LongDocValues;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+
+
+ /// <summary>
+ /// <code>TotalTermFreqValueSource</code> returns the total term freq
+ /// (sum of term freqs across all documents).
+ /// Returns -1 if frequencies were omitted for the field, or if
+ /// the codec doesn't support this statistic.
+ /// @lucene.internal
+ /// </summary>
+ public class TotalTermFreqValueSource : ValueSource
+ {
+ protected internal readonly string field;
+ protected internal readonly string indexedField;
+ protected internal readonly string val;
+ protected internal readonly BytesRef indexedBytes;
+
+ public TotalTermFreqValueSource(string field, string val, string indexedField, BytesRef indexedBytes)
+ {
+ this.field = field;
+ this.val = val;
+ this.indexedField = indexedField;
+ this.indexedBytes = indexedBytes;
+ }
+
+ public virtual string name()
+ {
+ return "totaltermfreq";
+ }
+
+ public override string description()
+ {
+ return name() + '(' + field + ',' + val + ')';
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.queries.function.FunctionValues getValues(java.util.Map context, org.apache.lucene.index.AtomicReaderContext readerContext) throws java.io.IOException
+ public override FunctionValues getValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ return (FunctionValues)context[this];
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void createWeight(java.util.Map context, org.apache.lucene.search.IndexSearcher searcher) throws java.io.IOException
+ public override void createWeight(IDictionary context, IndexSearcher searcher)
+ {
+ long totalTermFreq = 0;
+ foreach (AtomicReaderContext readerContext in searcher.TopReaderContext.leaves())
+ {
+ long val = readerContext.reader().totalTermFreq(new Term(indexedField, indexedBytes));
+ if (val == -1)
+ {
+ totalTermFreq = -1;
+ break;
+ }
+ else
+ {
+ totalTermFreq += val;
+ }
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final long ttf = totalTermFreq;
+ long ttf = totalTermFreq;
+ context[this] = new LongDocValuesAnonymousInnerClassHelper(this, this, ttf);
+ }
+
+ private class LongDocValuesAnonymousInnerClassHelper : LongDocValues
+ {
+ private readonly TotalTermFreqValueSource outerInstance;
+
+ private long ttf;
+
+ public LongDocValuesAnonymousInnerClassHelper(TotalTermFreqValueSource outerInstance, org.apache.lucene.queries.function.valuesource.TotalTermFreqValueSource this, long ttf) : base(this)
+ {
+ this.outerInstance = outerInstance;
+ this.ttf = ttf;
+ }
+
+ public override long longVal(int doc)
+ {
+ return ttf;
+ }
+ }
+
+ public override int GetHashCode()
+ {
+ return this.GetType().GetHashCode() + indexedField.GetHashCode() * 29 + indexedBytes.GetHashCode();
+ }
+
+ public override bool Equals(object o)
+ {
+ if (this.GetType() != o.GetType())
+ {
+ return false;
+ }
+ TotalTermFreqValueSource other = (TotalTermFreqValueSource)o;
+ return this.indexedField.Equals(other.indexedField) && this.indexedBytes.Equals(other.indexedBytes);
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSource/VectorValueSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSource/VectorValueSource.cs b/src/Lucene.Net.Queries/Function/ValueSource/VectorValueSource.cs
new file mode 100644
index 0000000..2668ac2
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSource/VectorValueSource.cs
@@ -0,0 +1,293 @@
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.queries.function.valuesource
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using AtomicReaderContext = org.apache.lucene.index.AtomicReaderContext;
+ using IndexSearcher = org.apache.lucene.search.IndexSearcher;
+
+
+
+ /// <summary>
+ /// Converts individual ValueSource instances to leverage the FunctionValues *Val functions that work with multiple values,
+ /// i.e. <seealso cref="org.apache.lucene.queries.function.FunctionValues#doubleVal(int, double[])"/>
+ /// </summary>
+ //Not crazy about the name, but...
+ public class VectorValueSource : MultiValueSource
+ {
+ protected internal readonly IList<ValueSource> sources;
+
+
+ public VectorValueSource(IList<ValueSource> sources)
+ {
+ this.sources = sources;
+ }
+
+ public virtual IList<ValueSource> Sources
+ {
+ get
+ {
+ return sources;
+ }
+ }
+
+ public override int dimension()
+ {
+ return sources.Count;
+ }
+
+ public virtual string name()
+ {
+ return "vector";
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.queries.function.FunctionValues getValues(java.util.Map context, org.apache.lucene.index.AtomicReaderContext readerContext) throws java.io.IOException
+ public override FunctionValues getValues(IDictionary context, AtomicReaderContext readerContext)
+ {
+ int size = sources.Count;
+
+ // special-case x,y and lat,lon since it's so common
+ if (size == 2)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.queries.function.FunctionValues x = sources.get(0).getValues(context, readerContext);
+ FunctionValues x = sources[0].getValues(context, readerContext);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.queries.function.FunctionValues y = sources.get(1).getValues(context, readerContext);
+ FunctionValues y = sources[1].getValues(context, readerContext);
+ return new FunctionValuesAnonymousInnerClassHelper(this, x, y);
+ }
+
+
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.queries.function.FunctionValues[] valsArr = new org.apache.lucene.queries.function.FunctionValues[size];
+ FunctionValues[] valsArr = new FunctionValues[size];
+ for (int i = 0; i < size; i++)
+ {
+ valsArr[i] = sources[i].getValues(context, readerContext);
+ }
+
+ return new FunctionValuesAnonymousInnerClassHelper2(this, valsArr);
+ }
+
+ private class FunctionValuesAnonymousInnerClassHelper : FunctionValues
+ {
+ private readonly VectorValueSource outerInstance;
+
+ private FunctionValues x;
+ private FunctionValues y;
+
+ public FunctionValuesAnonymousInnerClassHelper(VectorValueSource outerInstance, FunctionValues x, FunctionValues y)
+ {
+ this.outerInstance = outerInstance;
+ this.x = x;
+ this.y = y;
+ }
+
+ public override void byteVal(int doc, sbyte[] vals)
+ {
+ vals[0] = x.byteVal(doc);
+ vals[1] = y.byteVal(doc);
+ }
+
+ public override void shortVal(int doc, short[] vals)
+ {
+ vals[0] = x.shortVal(doc);
+ vals[1] = y.shortVal(doc);
+ }
+ public override void intVal(int doc, int[] vals)
+ {
+ vals[0] = x.intVal(doc);
+ vals[1] = y.intVal(doc);
+ }
+ public override void longVal(int doc, long[] vals)
+ {
+ vals[0] = x.longVal(doc);
+ vals[1] = y.longVal(doc);
+ }
+ public override void floatVal(int doc, float[] vals)
+ {
+ vals[0] = x.floatVal(doc);
+ vals[1] = y.floatVal(doc);
+ }
+ public override void doubleVal(int doc, double[] vals)
+ {
+ vals[0] = x.doubleVal(doc);
+ vals[1] = y.doubleVal(doc);
+ }
+ public override void strVal(int doc, string[] vals)
+ {
+ vals[0] = x.strVal(doc);
+ vals[1] = y.strVal(doc);
+ }
+ public override string ToString(int doc)
+ {
+ return outerInstance.name() + "(" + x.ToString(doc) + "," + y.ToString(doc) + ")";
+ }
+ }
+
+ private class FunctionValuesAnonymousInnerClassHelper2 : FunctionValues
+ {
+ private readonly VectorValueSource outerInstance;
+
+ private FunctionValues[] valsArr;
+
+ public FunctionValuesAnonymousInnerClassHelper2(VectorValueSource outerInstance, FunctionValues[] valsArr)
+ {
+ this.outerInstance = outerInstance;
+ this.valsArr = valsArr;
+ }
+
+ public override void byteVal(int doc, sbyte[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].byteVal(doc);
+ }
+ }
+
+ public override void shortVal(int doc, short[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].shortVal(doc);
+ }
+ }
+
+ public override void floatVal(int doc, float[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].floatVal(doc);
+ }
+ }
+
+ public override void intVal(int doc, int[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].intVal(doc);
+ }
+ }
+
+ public override void longVal(int doc, long[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].longVal(doc);
+ }
+ }
+
+ public override void doubleVal(int doc, double[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].doubleVal(doc);
+ }
+ }
+
+ public override void strVal(int doc, string[] vals)
+ {
+ for (int i = 0; i < valsArr.Length; i++)
+ {
+ vals[i] = valsArr[i].strVal(doc);
+ }
+ }
+
+ public override string ToString(int doc)
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append(outerInstance.name()).Append('(');
+ bool firstTime = true;
+ foreach (FunctionValues vals in valsArr)
+ {
+ if (firstTime)
+ {
+ firstTime = false;
+ }
+ else
+ {
+ sb.Append(',');
+ }
+ sb.Append(vals.ToString(doc));
+ }
+ sb.Append(')');
+ return sb.ToString();
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public void createWeight(java.util.Map context, org.apache.lucene.search.IndexSearcher searcher) throws java.io.IOException
+ public override void createWeight(IDictionary context, IndexSearcher searcher)
+ {
+ foreach (ValueSource source in sources)
+ {
+ source.createWeight(context, searcher);
+ }
+ }
+
+
+ public override string description()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append(name()).Append('(');
+ bool firstTime = true;
+ foreach (ValueSource source in sources)
+ {
+ if (firstTime)
+ {
+ firstTime = false;
+ }
+ else
+ {
+ sb.Append(',');
+ }
+ sb.Append(source);
+ }
+ sb.Append(")");
+ return sb.ToString();
+ }
+
+ public override bool Equals(object o)
+ {
+ if (this == o)
+ {
+ return true;
+ }
+ if (!(o is VectorValueSource))
+ {
+ return false;
+ }
+
+ VectorValueSource that = (VectorValueSource) o;
+
+ return sources.Equals(that.sources);
+
+ }
+
+ public override int GetHashCode()
+ {
+ return sources.GetHashCode();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Function/ValueSourceScorer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Function/ValueSourceScorer.cs b/src/Lucene.Net.Queries/Function/ValueSourceScorer.cs
new file mode 100644
index 0000000..fa79903
--- /dev/null
+++ b/src/Lucene.Net.Queries/Function/ValueSourceScorer.cs
@@ -0,0 +1,126 @@
+namespace org.apache.lucene.queries.function
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using MultiFields = org.apache.lucene.index.MultiFields;
+ using Scorer = org.apache.lucene.search.Scorer;
+ using Bits = org.apache.lucene.util.Bits;
+
+ /// <summary>
+ /// <seealso cref="Scorer"/> which returns the result of <seealso cref="FunctionValues#floatVal(int)"/> as
+ /// the score for a document.
+ /// </summary>
+ public class ValueSourceScorer : Scorer
+ {
+ protected internal readonly IndexReader reader;
+ private int doc = -1;
+ protected internal readonly int maxDoc;
+ protected internal readonly FunctionValues values;
+ protected internal bool checkDeletes;
+ private readonly Bits liveDocs;
+
+ protected internal ValueSourceScorer(IndexReader reader, FunctionValues values) : base(null)
+ {
+ this.reader = reader;
+ this.maxDoc = reader.maxDoc();
+ this.values = values;
+ CheckDeletes = true;
+ this.liveDocs = MultiFields.getLiveDocs(reader);
+ }
+
+ public virtual IndexReader Reader
+ {
+ get
+ {
+ return reader;
+ }
+ }
+
+ public virtual bool CheckDeletes
+ {
+ set
+ {
+ this.checkDeletes = value && reader.hasDeletions();
+ }
+ }
+
+ public virtual bool matches(int doc)
+ {
+ return (!checkDeletes || liveDocs.get(doc)) && matchesValue(doc);
+ }
+
+ public virtual bool matchesValue(int doc)
+ {
+ return true;
+ }
+
+ public override int docID()
+ {
+ return doc;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int nextDoc() throws java.io.IOException
+ public override int nextDoc()
+ {
+ for (; ;)
+ {
+ doc++;
+ if (doc >= maxDoc)
+ {
+ return doc = NO_MORE_DOCS;
+ }
+ if (matches(doc))
+ {
+ return doc;
+ }
+ }
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int advance(int target) throws java.io.IOException
+ public override int advance(int target)
+ {
+ // also works fine when target==NO_MORE_DOCS
+ doc = target - 1;
+ return nextDoc();
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public float score() throws java.io.IOException
+ public override float score()
+ {
+ return values.floatVal(doc);
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public int freq() throws java.io.IOException
+ public override int freq()
+ {
+ return 1;
+ }
+
+ public override long cost()
+ {
+ return maxDoc;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Lucene.Net.Queries.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Lucene.Net.Queries.csproj b/src/Lucene.Net.Queries/Lucene.Net.Queries.csproj
new file mode 100644
index 0000000..6856c74
--- /dev/null
+++ b/src/Lucene.Net.Queries/Lucene.Net.Queries.csproj
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{69D7956C-C2CC-4708-B399-A188FEC384C4}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Queries</RootNamespace>
+ <AssemblyName>Lucene.Net.Queries</AssemblyName>
+ <TargetFrameworkVersion>v4.0</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="System.Xml.Linq" />
+ <Reference Include="System.Data.DataSetExtensions" />
+ <Reference Include="Microsoft.CSharp" />
+ <Reference Include="System.Data" />
+ <Reference Include="System.Xml" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="BooleanFilter.cs" />
+ <Compile Include="BoostingQuery.cs" />
+ <Compile Include="ChainedFilter.cs" />
+ <Compile Include="CommonTermsQuery.cs" />
+ <Compile Include="CustomScoreProvider.cs" />
+ <Compile Include="CustomScoreQuery.cs" />
+ <Compile Include="FilterClause.cs" />
+ <Compile Include="Function\BoostedQuery.cs" />
+ <Compile Include="Function\DocValues\BoolDocValues.cs" />
+ <Compile Include="Function\DocValues\DocTermsIndexDocValues.cs" />
+ <Compile Include="Function\DocValues\DoubleDocValues.cs" />
+ <Compile Include="Function\DocValues\FloatDocValues.cs" />
+ <Compile Include="Function\DocValues\IntDocValues.cs" />
+ <Compile Include="Function\DocValues\LongDocValues.cs" />
+ <Compile Include="Function\DocValues\StrDocValues.cs" />
+ <Compile Include="Function\FunctionQuery.cs" />
+ <Compile Include="Function\FunctionValues.cs" />
+ <Compile Include="Function\ValueSource.cs" />
+ <Compile Include="Function\ValueSourceScorer.cs" />
+ <Compile Include="Function\ValueSource\BoolFunction.cs" />
+ <Compile Include="Function\ValueSource\ByteFieldSource.cs" />
+ <Compile Include="Function\ValueSource\BytesRefFieldSource.cs" />
+ <Compile Include="Function\ValueSource\ConstNumberSource.cs" />
+ <Compile Include="Function\ValueSource\ConstValueSource.cs" />
+ <Compile Include="Function\ValueSource\DefFunction.cs" />
+ <Compile Include="Function\ValueSource\DivFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\DocFreqValueSource.cs" />
+ <Compile Include="Function\ValueSource\DoubleConstValueSource.cs" />
+ <Compile Include="Function\ValueSource\DoubleFieldSource.cs" />
+ <Compile Include="Function\ValueSource\DualFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\EnumFieldSource.cs" />
+ <Compile Include="Function\ValueSource\FieldCacheSource.cs" />
+ <Compile Include="Function\ValueSource\FloatFieldSource.cs" />
+ <Compile Include="Function\ValueSource\IDFValueSource.cs" />
+ <Compile Include="Function\ValueSource\IfFunction.cs" />
+ <Compile Include="Function\ValueSource\IntFieldSource.cs" />
+ <Compile Include="Function\ValueSource\JoinDocFreqValueSource.cs" />
+ <Compile Include="Function\ValueSource\LinearFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\LiteralValueSource.cs" />
+ <Compile Include="Function\ValueSource\LongFieldSource.cs" />
+ <Compile Include="Function\ValueSource\MaxDocValueSource.cs" />
+ <Compile Include="Function\ValueSource\MaxFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\MinFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\MultiBoolFunction.cs" />
+ <Compile Include="Function\ValueSource\MultiFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\MultiFunction.cs" />
+ <Compile Include="Function\ValueSource\MultiValueSource.cs" />
+ <Compile Include="Function\ValueSource\NormValueSource.cs" />
+ <Compile Include="Function\ValueSource\NumDocsValueSource.cs" />
+ <Compile Include="Function\ValueSource\OrdFieldSource.cs" />
+ <Compile Include="Function\ValueSource\PowFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\ProductFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\QueryValueSource.cs" />
+ <Compile Include="Function\ValueSource\RangeMapFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\ReciprocalFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\ReverseOrdFieldSource.cs" />
+ <Compile Include="Function\ValueSource\ScaleFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\ShortFieldSource.cs" />
+ <Compile Include="Function\ValueSource\SimpleBoolFunction.cs" />
+ <Compile Include="Function\ValueSource\SimpleFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\SingleFunction.cs" />
+ <Compile Include="Function\ValueSource\SumFloatFunction.cs" />
+ <Compile Include="Function\ValueSource\SumTotalTermFreqValueSource.cs" />
+ <Compile Include="Function\ValueSource\TermFreqValueSource.cs" />
+ <Compile Include="Function\ValueSource\TFValueSource.cs" />
+ <Compile Include="Function\ValueSource\TotalTermFreqValueSource.cs" />
+ <Compile Include="Function\ValueSource\VectorValueSource.cs" />
+ <Compile Include="Mlt\MoreLikeThis.cs" />
+ <Compile Include="Mlt\MoreLikeThisQuery.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="TermFilter.cs" />
+ <Compile Include="TermsFilter.cs" />
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Core\Lucene.Net.csproj">
+ <Project>{5d4ad9be-1ffb-41ab-9943-25737971bf57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Mlt/MoreLikeThis.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Mlt/MoreLikeThis.cs b/src/Lucene.Net.Queries/Mlt/MoreLikeThis.cs
new file mode 100644
index 0000000..1a25108
--- /dev/null
+++ b/src/Lucene.Net.Queries/Mlt/MoreLikeThis.cs
@@ -0,0 +1,981 @@
+using System.Collections.Generic;
+using System.Text;
+
+/// <summary>
+/// Copyright 2004-2005 The Apache Software Foundation.
+///
+/// Licensed under the Apache License, Version 2.0 (the "License");
+/// you may not use this file except in compliance with the License.
+/// You may obtain a copy of the License at
+///
+/// http://www.apache.org/licenses/LICENSE-2.0
+///
+/// Unless required by applicable law or agreed to in writing, software
+/// distributed under the License is distributed on an "AS IS" BASIS,
+/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+/// See the License for the specific language governing permissions and
+/// limitations under the License.
+/// </summary>
+namespace org.apache.lucene.queries.mlt
+{
+
+
+ using Analyzer = org.apache.lucene.analysis.Analyzer;
+ using TokenStream = org.apache.lucene.analysis.TokenStream;
+ using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+ using Document = org.apache.lucene.document.Document;
+ using Fields = org.apache.lucene.index.Fields;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using IndexableField = org.apache.lucene.index.IndexableField;
+ using MultiFields = org.apache.lucene.index.MultiFields;
+ using Term = org.apache.lucene.index.Term;
+ using Terms = org.apache.lucene.index.Terms;
+ using TermsEnum = org.apache.lucene.index.TermsEnum;
+ using org.apache.lucene.search;
+ using DefaultSimilarity = org.apache.lucene.search.similarities.DefaultSimilarity;
+ using TFIDFSimilarity = org.apache.lucene.search.similarities.TFIDFSimilarity;
+ using BytesRef = org.apache.lucene.util.BytesRef;
+ using CharsRef = org.apache.lucene.util.CharsRef;
+ using IOUtils = org.apache.lucene.util.IOUtils;
+ using PriorityQueue = org.apache.lucene.util.PriorityQueue;
+ using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
+
+
+ /// <summary>
+ /// Generate "more like this" similarity queries.
+ /// Based on this mail:
+ /// <code><pre>
+ /// Lucene does let you access the document frequency of terms, with IndexReader.docFreq().
+ /// Term frequencies can be computed by re-tokenizing the text, which, for a single document,
+ /// is usually fast enough. But looking up the docFreq() of every term in the document is
+ /// probably too slow.
+ /// <p/>
+ /// You can use some heuristics to prune the set of terms, to avoid calling docFreq() too much,
+ /// or at all. Since you're trying to maximize a tf*idf score, you're probably most interested
+ /// in terms with a high tf. Choosing a tf threshold even as low as two or three will radically
+ /// reduce the number of terms under consideration. Another heuristic is that terms with a
+ /// high idf (i.e., a low df) tend to be longer. So you could threshold the terms by the
+ /// number of characters, not selecting anything less than, e.g., six or seven characters.
+ /// With these sorts of heuristics you can usually find small set of, e.g., ten or fewer terms
+ /// that do a pretty good job of characterizing a document.
+ /// <p/>
+ /// It all depends on what you're trying to do. If you're trying to eek out that last percent
+ /// of precision and recall regardless of computational difficulty so that you can win a TREC
+ /// competition, then the techniques I mention above are useless. But if you're trying to
+ /// provide a "more like this" button on a search results page that does a decent job and has
+ /// good performance, such techniques might be useful.
+ /// <p/>
+ /// An efficient, effective "more-like-this" query generator would be a great contribution, if
+ /// anyone's interested. I'd imagine that it would take a Reader or a String (the document's
+ /// text), analyzer Analyzer, and return a set of representative terms using heuristics like those
+ /// above. The frequency and length thresholds could be parameters, etc.
+ /// <p/>
+ /// Doug
+ /// </pre></code>
+ /// <p/>
+ /// <p/>
+ /// <p/>
+ /// <h3>Initial Usage</h3>
+ /// <p/>
+ /// This class has lots of options to try to make it efficient and flexible.
+ /// The simplest possible usage is as follows. The bold
+ /// fragment is specific to this class.
+ /// <p/>
+ /// <pre class="prettyprint">
+ /// <p/>
+ /// IndexReader ir = ...
+ /// IndexSearcher is = ...
+ /// <p/>
+ /// MoreLikeThis mlt = new MoreLikeThis(ir);
+ /// Reader target = ... // orig source of doc you want to find similarities to
+ /// Query query = mlt.like( target);
+ /// <p/>
+ /// Hits hits = is.search(query);
+ /// // now the usual iteration thru 'hits' - the only thing to watch for is to make sure
+ /// //you ignore the doc if it matches your 'target' document, as it should be similar to itself
+ /// <p/>
+ /// </pre>
+ /// <p/>
+ /// Thus you:
+ /// <ol>
+ /// <li> do your normal, Lucene setup for searching,
+ /// <li> create a MoreLikeThis,
+ /// <li> get the text of the doc you want to find similarities to
+ /// <li> then call one of the like() calls to generate a similarity query
+ /// <li> call the searcher to find the similar docs
+ /// </ol>
+ /// <p/>
+ /// <h3>More Advanced Usage</h3>
+ /// <p/>
+ /// You may want to use <seealso cref="#setFieldNames setFieldNames(...)"/> so you can examine
+ /// multiple fields (e.g. body and title) for similarity.
+ /// <p/>
+ /// <p/>
+ /// Depending on the size of your index and the size and makeup of your documents you
+ /// may want to call the other set methods to control how the similarity queries are
+ /// generated:
+ /// <ul>
+ /// <li> <seealso cref="#setMinTermFreq setMinTermFreq(...)"/>
+ /// <li> <seealso cref="#setMinDocFreq setMinDocFreq(...)"/>
+ /// <li> <seealso cref="#setMaxDocFreq setMaxDocFreq(...)"/>
+ /// <li> <seealso cref="#setMaxDocFreqPct setMaxDocFreqPct(...)"/>
+ /// <li> <seealso cref="#setMinWordLen setMinWordLen(...)"/>
+ /// <li> <seealso cref="#setMaxWordLen setMaxWordLen(...)"/>
+ /// <li> <seealso cref="#setMaxQueryTerms setMaxQueryTerms(...)"/>
+ /// <li> <seealso cref="#setMaxNumTokensParsed setMaxNumTokensParsed(...)"/>
+ /// <li> <seealso cref="#setStopWords setStopWord(...)"/>
+ /// </ul>
+ /// <p/>
+ /// <hr>
+ /// <pre>
+ /// Changes: Mark Harwood 29/02/04
+ /// Some bugfixing, some refactoring, some optimisation.
+ /// - bugfix: retrieveTerms(int docNum) was not working for indexes without a termvector -added missing code
+ /// - bugfix: No significant terms being created for fields with a termvector - because
+ /// was only counting one occurrence per term/field pair in calculations(ie not including frequency info from TermVector)
+ /// - refactor: moved common code into isNoiseWord()
+ /// - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
+ /// </pre>
+ /// </summary>
+ public sealed class MoreLikeThis
+ {
+
+ /// <summary>
+ /// Default maximum number of tokens to parse in each example doc field that is not stored with TermVector support.
+ /// </summary>
+ /// <seealso cref= #getMaxNumTokensParsed </seealso>
+ public const int DEFAULT_MAX_NUM_TOKENS_PARSED = 5000;
+
+ /// <summary>
+ /// Ignore terms with less than this frequency in the source doc.
+ /// </summary>
+ /// <seealso cref= #getMinTermFreq </seealso>
+ /// <seealso cref= #setMinTermFreq </seealso>
+ public const int DEFAULT_MIN_TERM_FREQ = 2;
+
+ /// <summary>
+ /// Ignore words which do not occur in at least this many docs.
+ /// </summary>
+ /// <seealso cref= #getMinDocFreq </seealso>
+ /// <seealso cref= #setMinDocFreq </seealso>
+ public const int DEFAULT_MIN_DOC_FREQ = 5;
+
+ /// <summary>
+ /// Ignore words which occur in more than this many docs.
+ /// </summary>
+ /// <seealso cref= #getMaxDocFreq </seealso>
+ /// <seealso cref= #setMaxDocFreq </seealso>
+ /// <seealso cref= #setMaxDocFreqPct </seealso>
+ public static readonly int DEFAULT_MAX_DOC_FREQ = int.MaxValue;
+
+ /// <summary>
+ /// Boost terms in query based on score.
+ /// </summary>
+ /// <seealso cref= #isBoost </seealso>
+ /// <seealso cref= #setBoost </seealso>
+ public const bool DEFAULT_BOOST = false;
+
+ /// <summary>
+ /// Default field names. Null is used to specify that the field names should be looked
+ /// up at runtime from the provided reader.
+ /// </summary>
+ public static readonly string[] DEFAULT_FIELD_NAMES = new string[]{"contents"};
+
+ /// <summary>
+ /// Ignore words less than this length or if 0 then this has no effect.
+ /// </summary>
+ /// <seealso cref= #getMinWordLen </seealso>
+ /// <seealso cref= #setMinWordLen </seealso>
+ public const int DEFAULT_MIN_WORD_LENGTH = 0;
+
+ /// <summary>
+ /// Ignore words greater than this length or if 0 then this has no effect.
+ /// </summary>
+ /// <seealso cref= #getMaxWordLen </seealso>
+ /// <seealso cref= #setMaxWordLen </seealso>
+ public const int DEFAULT_MAX_WORD_LENGTH = 0;
+
+ /// <summary>
+ /// Default set of stopwords.
+ /// If null means to allow stop words.
+ /// </summary>
+ /// <seealso cref= #setStopWords </seealso>
+ /// <seealso cref= #getStopWords </seealso>
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: public static final Set<?> DEFAULT_STOP_WORDS = null;
+ public const HashSet<?> DEFAULT_STOP_WORDS = null;
+
+ /// <summary>
+ /// Current set of stop words.
+ /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private Set<?> stopWords = DEFAULT_STOP_WORDS;
+ private HashSet<?> stopWords = DEFAULT_STOP_WORDS;
+
+ /// <summary>
+ /// Return a Query with no more than this many terms.
+ /// </summary>
+ /// <seealso cref= BooleanQuery#getMaxClauseCount </seealso>
+ /// <seealso cref= #getMaxQueryTerms </seealso>
+ /// <seealso cref= #setMaxQueryTerms </seealso>
+ public const int DEFAULT_MAX_QUERY_TERMS = 25;
+
+ /// <summary>
+ /// Analyzer that will be used to parse the doc.
+ /// </summary>
+ private Analyzer analyzer = null;
+
+ /// <summary>
+ /// Ignore words less frequent that this.
+ /// </summary>
+ private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
+
+ /// <summary>
+ /// Ignore words which do not occur in at least this many docs.
+ /// </summary>
+ private int minDocFreq = DEFAULT_MIN_DOC_FREQ;
+
+ /// <summary>
+ /// Ignore words which occur in more than this many docs.
+ /// </summary>
+ private int maxDocFreq = DEFAULT_MAX_DOC_FREQ;
+
+ /// <summary>
+ /// Should we apply a boost to the Query based on the scores?
+ /// </summary>
+ private bool boost = DEFAULT_BOOST;
+
+ /// <summary>
+ /// Field name we'll analyze.
+ /// </summary>
+ private string[] fieldNames = DEFAULT_FIELD_NAMES;
+
+ /// <summary>
+ /// The maximum number of tokens to parse in each example doc field that is not stored with TermVector support
+ /// </summary>
+ private int maxNumTokensParsed = DEFAULT_MAX_NUM_TOKENS_PARSED;
+
+ /// <summary>
+ /// Ignore words if less than this len.
+ /// </summary>
+ private int minWordLen = DEFAULT_MIN_WORD_LENGTH;
+
+ /// <summary>
+ /// Ignore words if greater than this len.
+ /// </summary>
+ private int maxWordLen = DEFAULT_MAX_WORD_LENGTH;
+
+ /// <summary>
+ /// Don't return a query longer than this.
+ /// </summary>
+ private int maxQueryTerms = DEFAULT_MAX_QUERY_TERMS;
+
+ /// <summary>
+ /// For idf() calculations.
+ /// </summary>
+ private TFIDFSimilarity similarity; // = new DefaultSimilarity();
+
+ /// <summary>
+ /// IndexReader to use
+ /// </summary>
+ private readonly IndexReader ir;
+
+ /// <summary>
+ /// Boost factor to use when boosting the terms
+ /// </summary>
+ private float boostFactor = 1;
+
+ /// <summary>
+ /// Returns the boost factor used when boosting terms
+ /// </summary>
+ /// <returns> the boost factor used when boosting terms </returns>
+ /// <seealso cref= #setBoostFactor(float) </seealso>
+ public float BoostFactor
+ {
+ get
+ {
+ return boostFactor;
+ }
+ set
+ {
+ this.boostFactor = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Constructor requiring an IndexReader.
+ /// </summary>
+ public MoreLikeThis(IndexReader ir) : this(ir, new DefaultSimilarity())
+ {
+ }
+
+ public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim)
+ {
+ this.ir = ir;
+ this.similarity = sim;
+ }
+
+
+ public TFIDFSimilarity Similarity
+ {
+ get
+ {
+ return similarity;
+ }
+ set
+ {
+ this.similarity = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns an analyzer that will be used to parse source doc with. The default analyzer
+ /// is not set.
+ /// </summary>
+ /// <returns> the analyzer that will be used to parse source doc with. </returns>
+ public Analyzer Analyzer
+ {
+ get
+ {
+ return analyzer;
+ }
+ set
+ {
+ this.analyzer = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the frequency below which terms will be ignored in the source doc. The default
+ /// frequency is the <seealso cref="#DEFAULT_MIN_TERM_FREQ"/>.
+ /// </summary>
+ /// <returns> the frequency below which terms will be ignored in the source doc. </returns>
+ public int MinTermFreq
+ {
+ get
+ {
+ return minTermFreq;
+ }
+ set
+ {
+ this.minTermFreq = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the frequency at which words will be ignored which do not occur in at least this
+ /// many docs. The default frequency is <seealso cref="#DEFAULT_MIN_DOC_FREQ"/>.
+ /// </summary>
+ /// <returns> the frequency at which words will be ignored which do not occur in at least this
+ /// many docs. </returns>
+ public int MinDocFreq
+ {
+ get
+ {
+ return minDocFreq;
+ }
+ set
+ {
+ this.minDocFreq = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the maximum frequency in which words may still appear.
+ /// Words that appear in more than this many docs will be ignored. The default frequency is
+ /// <seealso cref="#DEFAULT_MAX_DOC_FREQ"/>.
+ /// </summary>
+ /// <returns> get the maximum frequency at which words are still allowed,
+ /// words which occur in more docs than this are ignored. </returns>
+ public int MaxDocFreq
+ {
+ get
+ {
+ return maxDocFreq;
+ }
+ set
+ {
+ this.maxDocFreq = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Set the maximum percentage in which words may still appear. Words that appear
+ /// in more than this many percent of all docs will be ignored.
+ /// </summary>
+ /// <param name="maxPercentage"> the maximum percentage of documents (0-100) that a term may appear
+ /// in to be still considered relevant </param>
+ public int MaxDocFreqPct
+ {
+ set
+ {
+ this.maxDocFreq = value * ir.numDocs() / 100;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns whether to boost terms in query based on "score" or not. The default is
+ /// <seealso cref="#DEFAULT_BOOST"/>.
+ /// </summary>
+ /// <returns> whether to boost terms in query based on "score" or not. </returns>
+ /// <seealso cref= #setBoost </seealso>
+ public bool Boost
+ {
+ get
+ {
+ return boost;
+ }
+ set
+ {
+ this.boost = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the field names that will be used when generating the 'More Like This' query.
+ /// The default field names that will be used is <seealso cref="#DEFAULT_FIELD_NAMES"/>.
+ /// </summary>
+ /// <returns> the field names that will be used when generating the 'More Like This' query. </returns>
+ public string[] FieldNames
+ {
+ get
+ {
+ return fieldNames;
+ }
+ set
+ {
+ this.fieldNames = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the minimum word length below which words will be ignored. Set this to 0 for no
+ /// minimum word length. The default is <seealso cref="#DEFAULT_MIN_WORD_LENGTH"/>.
+ /// </summary>
+ /// <returns> the minimum word length below which words will be ignored. </returns>
+ public int MinWordLen
+ {
+ get
+ {
+ return minWordLen;
+ }
+ set
+ {
+ this.minWordLen = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Returns the maximum word length above which words will be ignored. Set this to 0 for no
+ /// maximum word length. The default is <seealso cref="#DEFAULT_MAX_WORD_LENGTH"/>.
+ /// </summary>
+ /// <returns> the maximum word length above which words will be ignored. </returns>
+ public int MaxWordLen
+ {
+ get
+ {
+ return maxWordLen;
+ }
+ set
+ {
+ this.maxWordLen = value;
+ }
+ }
+
+
+ /// <summary>
+ /// Set the set of stopwords.
+ /// Any word in this set is considered "uninteresting" and ignored.
+ /// Even if your Analyzer allows stopwords, you might want to tell the MoreLikeThis code to ignore them, as
+ /// for the purposes of document similarity it seems reasonable to assume that "a stop word is never interesting".
+ /// </summary>
+ /// <param name="stopWords"> set of stopwords, if null it means to allow stop words </param>
+ /// <seealso cref= #getStopWords </seealso>
+ public HashSet<T1> StopWords<T1>
+ {
+ set
+ {
+ this.stopWords = value;
+ }
+ get
+ {
+ return stopWords;
+ }
+ }
+
+
+
+ /// <summary>
+ /// Returns the maximum number of query terms that will be included in any generated query.
+ /// The default is <seealso cref="#DEFAULT_MAX_QUERY_TERMS"/>.
+ /// </summary>
+ /// <returns> the maximum number of query terms that will be included in any generated query. </returns>
+ public int MaxQueryTerms
+ {
+ get
+ {
+ return maxQueryTerms;
+ }
+ set
+ {
+ this.maxQueryTerms = value;
+ }
+ }
+
+
+ /// <returns> The maximum number of tokens to parse in each example doc field that is not stored with TermVector support </returns>
+ /// <seealso cref= #DEFAULT_MAX_NUM_TOKENS_PARSED </seealso>
+ public int MaxNumTokensParsed
+ {
+ get
+ {
+ return maxNumTokensParsed;
+ }
+ set
+ {
+ maxNumTokensParsed = value;
+ }
+ }
+
+
+
+ /// <summary>
+ /// Return a query that will return docs like the passed lucene document ID.
+ /// </summary>
+ /// <param name="docNum"> the documentID of the lucene doc to generate the 'More Like This" query for. </param>
+ /// <returns> a query that will return docs like the passed lucene document ID. </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Query like(int docNum) throws IOException
+ public Query like(int docNum)
+ {
+ if (fieldNames == null)
+ {
+ // gather list of valid fields from lucene
+ ICollection<string> fields = MultiFields.getIndexedFields(ir);
+ fieldNames = fields.toArray(new string[fields.Count]);
+ }
+
+ return createQuery(retrieveTerms(docNum));
+ }
+
+ /// <summary>
+ /// Return a query that will return docs like the passed Reader.
+ /// </summary>
+ /// <returns> a query that will return docs like the passed Reader. </returns>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public Query like(Reader r, String fieldName) throws IOException
+ public Query like(Reader r, string fieldName)
+ {
+ return createQuery(retrieveTerms(r, fieldName));
+ }
+
+ /// <summary>
+ /// Create the More like query from a PriorityQueue
+ /// </summary>
+ private Query createQuery(PriorityQueue<object[]> q)
+ {
+ BooleanQuery query = new BooleanQuery();
+ object cur;
+ int qterms = 0;
+ float bestScore = 0;
+
+ while ((cur = q.pop()) != null)
+ {
+ object[] ar = (object[]) cur;
+ TermQuery tq = new TermQuery(new Term((string) ar[1], (string) ar[0]));
+
+ if (boost)
+ {
+ if (qterms == 0)
+ {
+ bestScore = ((float?) ar[2]);
+ }
+ float myScore = ((float?) ar[2]);
+
+ tq.Boost = boostFactor * myScore / bestScore;
+ }
+
+ try
+ {
+ query.add(tq, BooleanClause.Occur.SHOULD);
+ }
+ catch (BooleanQuery.TooManyClauses)
+ {
+ break;
+ }
+
+ qterms++;
+ if (maxQueryTerms > 0 && qterms >= maxQueryTerms)
+ {
+ break;
+ }
+ }
+
+ return query;
+ }
+
+ /// <summary>
+ /// Create a PriorityQueue from a word->tf map.
+ /// </summary>
+ /// <param name="words"> a map of words keyed on the word(String) with Int objects as the values. </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private org.apache.lucene.util.PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException
+ private PriorityQueue<object[]> createQueue(IDictionary<string, Int> words)
+ {
+ // have collected all words in doc and their freqs
+ int numDocs = ir.numDocs();
+ FreqQ res = new FreqQ(words.Count); // will order words by score
+
+ foreach (string word in words.Keys) // for every word
+ {
+ int tf = words[word].x; // term freq in the source doc
+ if (minTermFreq > 0 && tf < minTermFreq)
+ {
+ continue; // filter out words that don't occur enough times in the source
+ }
+
+ // go through all the fields and find the largest document frequency
+ string topField = fieldNames[0];
+ int docFreq = 0;
+ foreach (string fieldName in fieldNames)
+ {
+ int freq = ir.docFreq(new Term(fieldName, word));
+ topField = (freq > docFreq) ? fieldName : topField;
+ docFreq = (freq > docFreq) ? freq : docFreq;
+ }
+
+ if (minDocFreq > 0 && docFreq < minDocFreq)
+ {
+ continue; // filter out words that don't occur in enough docs
+ }
+
+ if (docFreq > maxDocFreq)
+ {
+ continue; // filter out words that occur in too many docs
+ }
+
+ if (docFreq == 0)
+ {
+ continue; // index update problem?
+ }
+
+ float idf = similarity.idf(docFreq, numDocs);
+ float score = tf * idf;
+
+ // only really need 1st 3 entries, other ones are for troubleshooting
+ res.insertWithOverflow(new object[]{word, topField, score, idf, docFreq, tf}); // freq in all docs - idf - overall score - the top field - the word
+ }
+ return res;
+ }
+
+ /// <summary>
+ /// Describe the parameters that control how the "more like this" query is formed.
+ /// </summary>
+ public string describeParams()
+ {
+ StringBuilder sb = new StringBuilder();
+ sb.Append("\t").Append("maxQueryTerms : ").Append(maxQueryTerms).Append("\n");
+ sb.Append("\t").Append("minWordLen : ").Append(minWordLen).Append("\n");
+ sb.Append("\t").Append("maxWordLen : ").Append(maxWordLen).Append("\n");
+ sb.Append("\t").Append("fieldNames : ");
+ string delim = "";
+ foreach (string fieldName in fieldNames)
+ {
+ sb.Append(delim).Append(fieldName);
+ delim = ", ";
+ }
+ sb.Append("\n");
+ sb.Append("\t").Append("boost : ").Append(boost).Append("\n");
+ sb.Append("\t").Append("minTermFreq : ").Append(minTermFreq).Append("\n");
+ sb.Append("\t").Append("minDocFreq : ").Append(minDocFreq).Append("\n");
+ return sb.ToString();
+ }
+
+ /// <summary>
+ /// Find words for a more-like-this query former.
+ /// </summary>
+ /// <param name="docNum"> the id of the lucene document from which to find terms </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public org.apache.lucene.util.PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException
+ public PriorityQueue<object[]> retrieveTerms(int docNum)
+ {
+ IDictionary<string, Int> termFreqMap = new Dictionary<string, Int>();
+ foreach (string fieldName in fieldNames)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.Fields vectors = ir.getTermVectors(docNum);
+ Fields vectors = ir.getTermVectors(docNum);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.Terms vector;
+ Terms vector;
+ if (vectors != null)
+ {
+ vector = vectors.terms(fieldName);
+ }
+ else
+ {
+ vector = null;
+ }
+
+ // field does not store term vector info
+ if (vector == null)
+ {
+ Document d = ir.document(docNum);
+ IndexableField[] fields = d.getFields(fieldName);
+ foreach (IndexableField field in fields)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String stringValue = field.stringValue();
+ string stringValue = field.stringValue();
+ if (stringValue != null)
+ {
+ addTermFrequencies(new StringReader(stringValue), termFreqMap, fieldName);
+ }
+ }
+ }
+ else
+ {
+ addTermFrequencies(termFreqMap, vector);
+ }
+ }
+
+ return createQueue(termFreqMap);
+ }
+
+ /// <summary>
+ /// Adds terms and frequencies found in vector into the Map termFreqMap
+ /// </summary>
+ /// <param name="termFreqMap"> a Map of terms and their frequencies </param>
+ /// <param name="vector"> List of terms and their frequencies for a doc/field </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void addTermFrequencies(Map<String, Int> termFreqMap, org.apache.lucene.index.Terms vector) throws IOException
+ private void addTermFrequencies(IDictionary<string, Int> termFreqMap, Terms vector)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.index.TermsEnum termsEnum = vector.iterator(null);
+ TermsEnum termsEnum = vector.iterator(null);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.CharsRef spare = new org.apache.lucene.util.CharsRef();
+ CharsRef spare = new CharsRef();
+ BytesRef text;
+ while ((text = termsEnum.next()) != null)
+ {
+ UnicodeUtil.UTF8toUTF16(text, spare);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String term = spare.toString();
+ string term = spare.ToString();
+ if (isNoiseWord(term))
+ {
+ continue;
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int freq = (int) termsEnum.totalTermFreq();
+ int freq = (int) termsEnum.totalTermFreq();
+
+ // increment frequency
+ Int cnt = termFreqMap[term];
+ if (cnt == null)
+ {
+ cnt = new Int();
+ termFreqMap[term] = cnt;
+ cnt.x = freq;
+ }
+ else
+ {
+ cnt.x += freq;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Adds term frequencies found by tokenizing text from reader into the Map words
+ /// </summary>
+ /// <param name="r"> a source of text to be tokenized </param>
+ /// <param name="termFreqMap"> a Map of terms and their frequencies </param>
+ /// <param name="fieldName"> Used by analyzer for any special per-field analysis </param>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName) throws IOException
+ private void addTermFrequencies(Reader r, IDictionary<string, Int> termFreqMap, string fieldName)
+ {
+ if (analyzer == null)
+ {
+ throw new System.NotSupportedException("To use MoreLikeThis without " + "term vectors, you must provide an Analyzer");
+ }
+ TokenStream ts = analyzer.tokenStream(fieldName, r);
+ try
+ {
+ int tokenCount = 0;
+ // for every token
+ CharTermAttribute termAtt = ts.addAttribute(typeof(CharTermAttribute));
+ ts.reset();
+ while (ts.incrementToken())
+ {
+ string word = termAtt.ToString();
+ tokenCount++;
+ if (tokenCount > maxNumTokensParsed)
+ {
+ break;
+ }
+ if (isNoiseWord(word))
+ {
+ continue;
+ }
+
+ // increment frequency
+ Int cnt = termFreqMap[word];
+ if (cnt == null)
+ {
+ termFreqMap[word] = new Int();
+ }
+ else
+ {
+ cnt.x++;
+ }
+ }
+ ts.end();
+ }
+ finally
+ {
+ IOUtils.closeWhileHandlingException(ts);
+ }
+ }
+
+
+ /// <summary>
+ /// determines if the passed term is likely to be of interest in "more like" comparisons
+ /// </summary>
+ /// <param name="term"> The word being considered </param>
+ /// <returns> true if should be ignored, false if should be used in further analysis </returns>
+ private bool isNoiseWord(string term)
+ {
+ int len = term.Length;
+ if (minWordLen > 0 && len < minWordLen)
+ {
+ return true;
+ }
+ if (maxWordLen > 0 && len > maxWordLen)
+ {
+ return true;
+ }
+ return stopWords != null && stopWords.Contains(term);
+ }
+
+
+ /// <summary>
+ /// Find words for a more-like-this query former.
+ /// The result is a priority queue of arrays with one entry for <b>every word</b> in the document.
+ /// Each array has 6 elements.
+ /// The elements are:
+ /// <ol>
+ /// <li> The word (String)
+ /// <li> The top field that this word comes from (String)
+ /// <li> The score for this word (Float)
+ /// <li> The IDF value (Float)
+ /// <li> The frequency of this word in the index (Integer)
+ /// <li> The frequency of this word in the source document (Integer)
+ /// </ol>
+ /// This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of interest.
+ /// This method is exposed so that you can identify the "interesting words" in a document.
+ /// For an easier method to call see <seealso cref="#retrieveInterestingTerms retrieveInterestingTerms()"/>.
+ /// </summary>
+ /// <param name="r"> the reader that has the content of the document </param>
+ /// <param name="fieldName"> field passed to the analyzer to use when analyzing the content </param>
+ /// <returns> the most interesting words in the document ordered by score, with the highest scoring, or best entry, first </returns>
+ /// <seealso cref= #retrieveInterestingTerms </seealso>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public org.apache.lucene.util.PriorityQueue<Object[]> retrieveTerms(Reader r, String fieldName) throws IOException
+ public PriorityQueue<object[]> retrieveTerms(Reader r, string fieldName)
+ {
+ IDictionary<string, Int> words = new Dictionary<string, Int>();
+ addTermFrequencies(r, words, fieldName);
+ return createQueue(words);
+ }
+
+ /// <seealso cref= #retrieveInterestingTerms(java.io.Reader, String) </seealso>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public String[] retrieveInterestingTerms(int docNum) throws IOException
+ public string[] retrieveInterestingTerms(int docNum)
+ {
+ List<object> al = new List<object>(maxQueryTerms);
+ PriorityQueue<object[]> pq = retrieveTerms(docNum);
+ object cur;
+ int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
+ // we just want to return the top words
+ while (((cur = pq.pop()) != null) && lim-- > 0)
+ {
+ object[] ar = (object[]) cur;
+ al.Add(ar[0]); // the 1st entry is the interesting word
+ }
+ string[] res = new string[al.Count];
+ return al.toArray(res);
+ }
+
+ /// <summary>
+ /// Convenience routine to make it easy to return the most interesting words in a document.
+ /// More advanced users will call <seealso cref="#retrieveTerms(Reader, String) retrieveTerms()"/> directly.
+ /// </summary>
+ /// <param name="r"> the source document </param>
+ /// <param name="fieldName"> field passed to analyzer to use when analyzing the content </param>
+ /// <returns> the most interesting words in the document </returns>
+ /// <seealso cref= #retrieveTerms(java.io.Reader, String) </seealso>
+ /// <seealso cref= #setMaxQueryTerms </seealso>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException
+ public string[] retrieveInterestingTerms(Reader r, string fieldName)
+ {
+ List<object> al = new List<object>(maxQueryTerms);
+ PriorityQueue<object[]> pq = retrieveTerms(r, fieldName);
+ object cur;
+ int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
+ // we just want to return the top words
+ while (((cur = pq.pop()) != null) && lim-- > 0)
+ {
+ object[] ar = (object[]) cur;
+ al.Add(ar[0]); // the 1st entry is the interesting word
+ }
+ string[] res = new string[al.Count];
+ return al.toArray(res);
+ }
+
+ /// <summary>
+ /// PriorityQueue that orders words by score.
+ /// </summary>
+ private class FreqQ : PriorityQueue<object[]>
+ {
+ internal FreqQ(int s) : base(s)
+ {
+ }
+
+ protected internal override bool lessThan(object[] aa, object[] bb)
+ {
+ float? fa = (float?) aa[2];
+ float? fb = (float?) bb[2];
+ return fa > fb;
+ }
+ }
+
+ /// <summary>
+ /// Use for frequencies and to avoid renewing Integers.
+ /// </summary>
+ private class Int
+ {
+ internal int x;
+
+ internal Int()
+ {
+ x = 1;
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs b/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs
new file mode 100644
index 0000000..d397720
--- /dev/null
+++ b/src/Lucene.Net.Queries/Mlt/MoreLikeThisQuery.cs
@@ -0,0 +1,297 @@
+using System.Collections.Generic;
+
+/*
+ * Created on 25-Jan-2006
+ */
+namespace org.apache.lucene.queries.mlt
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using Analyzer = org.apache.lucene.analysis.Analyzer;
+ using IndexReader = org.apache.lucene.index.IndexReader;
+ using BooleanClause = org.apache.lucene.search.BooleanClause;
+ using BooleanQuery = org.apache.lucene.search.BooleanQuery;
+ using Query = org.apache.lucene.search.Query;
+
+
+ /// <summary>
+ /// A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg
+ /// in custom QueryParser extensions. At query.rewrite() time the reader is used to construct the
+ /// actual MoreLikeThis object and obtain the real Query object.
+ /// </summary>
+ public class MoreLikeThisQuery : Query
+ {
+
+ private string likeText;
+ private string[] moreLikeFields;
+ private Analyzer analyzer;
+ private readonly string fieldName;
+ private float percentTermsToMatch = 0.3f;
+ private int minTermFrequency = 1;
+ private int maxQueryTerms = 5;
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private java.util.Set<?> stopWords = null;
+ private HashSet<?> stopWords = null;
+ private int minDocFreq = -1;
+
+ /// <param name="moreLikeFields"> fields used for similarity measure </param>
+ public MoreLikeThisQuery(string likeText, string[] moreLikeFields, Analyzer analyzer, string fieldName)
+ {
+ this.likeText = likeText;
+ this.moreLikeFields = moreLikeFields;
+ this.analyzer = analyzer;
+ this.fieldName = fieldName;
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public org.apache.lucene.search.Query rewrite(org.apache.lucene.index.IndexReader reader) throws java.io.IOException
+ public override Query rewrite(IndexReader reader)
+ {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+
+ mlt.FieldNames = moreLikeFields;
+ mlt.Analyzer = analyzer;
+ mlt.MinTermFreq = minTermFrequency;
+ if (minDocFreq >= 0)
+ {
+ mlt.MinDocFreq = minDocFreq;
+ }
+ mlt.MaxQueryTerms = maxQueryTerms;
+ mlt.StopWords = stopWords;
+ BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName);
+ BooleanClause[] clauses = bq.Clauses;
+ //make at least half the terms match
+ bq.MinimumNumberShouldMatch = (int)(clauses.Length * percentTermsToMatch);
+ return bq;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.search.Query#toString(java.lang.String)
+ */
+ public override string ToString(string field)
+ {
+ return "like:" + likeText;
+ }
+
+ public virtual float PercentTermsToMatch
+ {
+ get
+ {
+ return percentTermsToMatch;
+ }
+ set
+ {
+ this.percentTermsToMatch = value;
+ }
+ }
+
+
+ public virtual Analyzer Analyzer
+ {
+ get
+ {
+ return analyzer;
+ }
+ set
+ {
+ this.analyzer = value;
+ }
+ }
+
+
+ public virtual string LikeText
+ {
+ get
+ {
+ return likeText;
+ }
+ set
+ {
+ this.likeText = value;
+ }
+ }
+
+
+ public virtual int MaxQueryTerms
+ {
+ get
+ {
+ return maxQueryTerms;
+ }
+ set
+ {
+ this.maxQueryTerms = value;
+ }
+ }
+
+
+ public virtual int MinTermFrequency
+ {
+ get
+ {
+ return minTermFrequency;
+ }
+ set
+ {
+ this.minTermFrequency = value;
+ }
+ }
+
+
+ public virtual string[] MoreLikeFields
+ {
+ get
+ {
+ return moreLikeFields;
+ }
+ set
+ {
+ this.moreLikeFields = value;
+ }
+ }
+
+
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: public java.util.Set<?> getStopWords()
+ public virtual HashSet<?> StopWords
+ {
+ get
+ {
+ return stopWords;
+ }
+ set
+ {
+ this.stopWords = value;
+ }
+ }
+
+
+ public virtual int MinDocFreq
+ {
+ get
+ {
+ return minDocFreq;
+ }
+ set
+ {
+ this.minDocFreq = value;
+ }
+ }
+
+
+ public override int GetHashCode()
+ {
+ const int prime = 31;
+ int result = base.GetHashCode();
+ result = prime * result + ((analyzer == null) ? 0 : analyzer.GetHashCode());
+ result = prime * result + ((fieldName == null) ? 0 : fieldName.GetHashCode());
+ result = prime * result + ((likeText == null) ? 0 : likeText.GetHashCode());
+ result = prime * result + maxQueryTerms;
+ result = prime * result + minDocFreq;
+ result = prime * result + minTermFrequency;
+ result = prime * result + Arrays.GetHashCode(moreLikeFields);
+ result = prime * result + float.floatToIntBits(percentTermsToMatch);
+ result = prime * result + ((stopWords == null) ? 0 : stopWords.GetHashCode());
+ return result;
+ }
+
+ public override bool Equals(object obj)
+ {
+ if (this == obj)
+ {
+ return true;
+ }
+ if (!base.Equals(obj))
+ {
+ return false;
+ }
+ if (this.GetType() != obj.GetType())
+ {
+ return false;
+ }
+ MoreLikeThisQuery other = (MoreLikeThisQuery) obj;
+ if (analyzer == null)
+ {
+ if (other.analyzer != null)
+ {
+ return false;
+ }
+ }
+ else if (!analyzer.Equals(other.analyzer))
+ {
+ return false;
+ }
+ if (fieldName == null)
+ {
+ if (other.fieldName != null)
+ {
+ return false;
+ }
+ }
+ else if (!fieldName.Equals(other.fieldName))
+ {
+ return false;
+ }
+ if (likeText == null)
+ {
+ if (other.likeText != null)
+ {
+ return false;
+ }
+ }
+ else if (!likeText.Equals(other.likeText))
+ {
+ return false;
+ }
+ if (maxQueryTerms != other.maxQueryTerms)
+ {
+ return false;
+ }
+ if (minDocFreq != other.minDocFreq)
+ {
+ return false;
+ }
+ if (minTermFrequency != other.minTermFrequency)
+ {
+ return false;
+ }
+ if (!Arrays.Equals(moreLikeFields, other.moreLikeFields))
+ {
+ return false;
+ }
+ if (float.floatToIntBits(percentTermsToMatch) != float.floatToIntBits(other.percentTermsToMatch))
+ {
+ return false;
+ }
+ if (stopWords == null)
+ {
+ if (other.stopWords != null)
+ {
+ return false;
+ }
+ }
+ else if (!stopWords.Equals(other.stopWords))
+ {
+ return false;
+ }
+ return true;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/882f487d/src/Lucene.Net.Queries/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Queries/Properties/AssemblyInfo.cs b/src/Lucene.Net.Queries/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..d8e310e
--- /dev/null
+++ b/src/Lucene.Net.Queries/Properties/AssemblyInfo.cs
@@ -0,0 +1,35 @@
+using System.Reflection;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Queries")]
+[assembly: AssemblyDescription("Filters and Queries that add to core Lucene")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyCompany("")]
+[assembly: AssemblyProduct("Lucene.Net.Queries")]
+[assembly: AssemblyCopyright("Copyright © 2014")]
+[assembly: AssemblyTrademark("")]
+[assembly: AssemblyCulture("")]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("76a14bc5-8f0f-4de4-b501-423d4c1a20a5")]
+
+// Version information for an assembly consists of the following four values:
+//
+// Major Version
+// Minor Version
+// Build Number
+// Revision
+//
+// You can specify all the values or you can default the Build and Revision Numbers
+// by using the '*' as shown below:
+// [assembly: AssemblyVersion("1.0.*")]
+[assembly: AssemblyVersion("1.0.0.0")]
+[assembly: AssemblyFileVersion("1.0.0.0")]