You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:03 UTC
[05/33] lucenenet git commit: Ported Lucene.Net.Benchmark + tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs
new file mode 100644
index 0000000..e3190f9
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Utils/Config.cs
@@ -0,0 +1,559 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Utils
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Perf run configuration properties.
+ /// </summary>
+ /// <remarks>
+ /// Numeric property containing ":", e.g. "10:100:5" is interpreted
+ /// as array of numeric values. It is extracted once, on first use, and
+ /// maintain a round number to return the appropriate value.
+ /// <para/>
+ /// The config property "work.dir" tells where is the root of
+ /// docs data dirs and indexes dirs. It is set to either of:
+ /// <list type="bullet">
+ /// <item><description>value supplied for it in the alg file;</description></item>
+ /// <item><description>otherwise, value of environment variable "benchmark.work.dir";</description></item>
+ /// <item><description>otherwise, "work".</description></item>
+ /// </list>
+ /// </remarks>
+ public class Config
+ {
+ // For tests, if verbose is not turned on, don't print the props.
+ private static readonly bool DEFAULT_PRINT_PROPS = SystemProperties.GetPropertyAsBoolean("tests.verbose", true);
+ private static readonly string NEW_LINE = Environment.NewLine;
+
+ private int roundNumber = 0;
+ private IDictionary<string, string> props;
+ private IDictionary<string, object> valByRound = new Dictionary<string, object>();
+ private IDictionary<string, string> colForValByRound = new Dictionary<string, string>();
+ private string algorithmText;
+
+ /// <summary>
+ /// Read both algorithm and config properties.
+ /// </summary>
+ /// <param name="algReader">From where to read algorithm and config properties.</param>
+ /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+ public Config(TextReader algReader)
+ {
+ // read alg file to array of lines
+ IList<string> lines = new List<string>();
+ int lastConfigLine = 0;
+ string line;
+ while ((line = algReader.ReadLine()) != null)
+ {
+ lines.Add(line);
+ if (line.IndexOf('=') > 0)
+ {
+ lastConfigLine = lines.Count;
+ }
+ }
+ algReader.Dispose();
+ // copy props lines to string
+ MemoryStream ms = new MemoryStream();
+ TextWriter writer = new StreamWriter(ms);
+ for (int i = 0; i < lastConfigLine; i++)
+ {
+ writer.WriteLine(lines[i]);
+ }
+ // read props from string
+ this.props = new Dictionary<string, string>();
+ writer.Flush();
+ ms.Position = 0;
+ props.Load(ms);
+
+ // make sure work dir is set properly
+ string temp;
+ if (!props.TryGetValue("work.dir", out temp) || temp == null)
+ {
+ props["work.dir"] = SystemProperties.GetProperty("benchmark.work.dir", "work");
+ }
+
+ if (props.TryGetValue("print.props", out temp))
+ {
+ if (temp.Equals("true", StringComparison.OrdinalIgnoreCase))
+ {
+ PrintProps();
+ }
+ }
+ else if (DEFAULT_PRINT_PROPS)
+ {
+ PrintProps();
+ }
+
+ // copy algorithm lines
+ var sb = new StringBuilder();
+ for (int i = lastConfigLine; i < lines.Count; i++)
+ {
+ sb.Append(lines[i]);
+ sb.Append(NEW_LINE);
+ }
+ algorithmText = sb.ToString();
+ }
+
+ /// <summary>
+ /// Create config without algorithm - useful for a programmatic perf test.
+ /// </summary>
+ /// <param name="props">Configuration properties.</param>
+ public Config(IDictionary<string, string> props)
+ {
+ this.props = props;
+ string temp;
+ if (props.TryGetValue("print.props", out temp))
+ {
+ if (temp.Equals("true", StringComparison.OrdinalIgnoreCase))
+ {
+ PrintProps();
+ }
+ }
+ else if (DEFAULT_PRINT_PROPS)
+ {
+ PrintProps();
+ }
+ }
+
+ private void PrintProps()
+ {
+ SystemConsole.WriteLine("------------> config properties:");
+ List<string> propKeys = new List<string>(props.Keys);
+ propKeys.Sort();
+ foreach (string propName in propKeys)
+ {
+ SystemConsole.WriteLine(propName + " = " + props[propName]);
+ }
+ SystemConsole.WriteLine("-------------------------------");
+ }
+
+ /// <summary>
+ /// Return a string property.
+ /// </summary>
+ /// <param name="name">Name of property.</param>
+ /// <param name="dflt">Default value.</param>
+ /// <returns>A string property.</returns>
+ public virtual string Get(string name, string dflt)
+ {
+ string[] vals;
+ object temp;
+ if (valByRound.TryGetValue(name, out temp) && temp != null)
+ {
+ vals = (string[])temp;
+ return vals[roundNumber % vals.Length];
+ }
+ // done if not by round
+ string sval;
+ if (!props.TryGetValue(name, out sval))
+ {
+ sval = dflt;
+ }
+ if (sval == null)
+ {
+ return null;
+ }
+ if (sval.IndexOf(":") < 0)
+ {
+ return sval;
+ }
+ else if (sval.IndexOf(":\\") >= 0 || sval.IndexOf(":/") >= 0)
+ {
+ // this previously messed up absolute path names on Windows. Assuming
+ // there is no real value that starts with \ or /
+ return sval;
+ }
+ // first time this prop is extracted by round
+ int k = sval.IndexOf(":");
+ string colName = sval.Substring(0, k - 0);
+ sval = sval.Substring(k + 1);
+ colForValByRound[name] = colName;
+ vals = PropToStringArray(sval);
+ valByRound[name] = vals;
+ return vals[roundNumber % vals.Length];
+ }
+
+ /// <summary>
+ /// Set a property.
+ /// <para/>
+ /// Note: once a multiple values property is set, it can no longer be modified.
+ /// </summary>
+ /// <param name="name">Name of property.</param>
+ /// <param name="value">Either single or multiple property value (multiple values are separated by ":")</param>
+ public virtual void Set(string name, string value)
+ {
+ object temp;
+ if (valByRound.TryGetValue(name, out temp) && temp != null)
+ {
+ throw new Exception("Cannot modify a multi value property!");
+ }
+ props[name] = value;
+ }
+
+ /// <summary>
+ /// Return an <see cref="int"/> property.
+ /// <para/>
+ /// If the property contain ":", e.g. "10:100:5", it is interpreted
+ /// as array of ints. It is extracted once, on first call
+ /// to Get() it, and a by-round-value is returned.
+ /// </summary>
+ /// <param name="name">Name of property.</param>
+ /// <param name="dflt">Default value.</param>
+ /// <returns>An <see cref="int"/> property.</returns>
+ public virtual int Get(string name, int dflt)
+ {
+ // use value by round if already parsed
+ int[] vals;
+ object temp;
+ if (valByRound.TryGetValue(name, out temp) && temp != null)
+ {
+ vals = (int[])temp;
+ return vals[roundNumber % vals.Length];
+ }
+ // done if not by round
+ string sval;
+ if (!props.TryGetValue(name, out sval))
+ {
+ sval = "" + dflt;
+ }
+ if (sval.IndexOf(":") < 0)
+ {
+ return int.Parse(sval, CultureInfo.InvariantCulture);
+ }
+ // first time this prop is extracted by round
+ int k = sval.IndexOf(":");
+ string colName = sval.Substring(0, k - 0);
+ sval = sval.Substring(k + 1);
+ colForValByRound[name] = colName;
+ vals = PropToInt32Array(sval);
+ valByRound[name] = vals;
+ return vals[roundNumber % vals.Length];
+ }
+
+ /// <summary>
+ /// Return a double property.
+ /// <para/>
+ /// If the property contain ":", e.g. "10:100:5", it is interpreted
+ /// as array of doubles. It is extracted once, on first call
+ /// to Get() it, and a by-round-value is returned.
+ /// </summary>
+ /// <param name="name">Name of property.</param>
+ /// <param name="dflt">Default value.</param>
+ /// <returns>A double property.</returns>
+ public virtual double Get(string name, double dflt)
+ {
+ // use value by round if already parsed
+ double[] vals;
+ object temp;
+ if (valByRound.TryGetValue(name, out temp) && temp != null)
+ {
+ vals = (double[])temp;
+ return vals[roundNumber % vals.Length];
+ }
+ // done if not by round
+ string sval;
+ if (!props.TryGetValue(name, out sval))
+ {
+ sval = "" + dflt;
+ }
+ if (sval.IndexOf(":") < 0)
+ {
+ return double.Parse(sval, CultureInfo.InvariantCulture);
+ }
+ // first time this prop is extracted by round
+ int k = sval.IndexOf(":");
+ string colName = sval.Substring(0, k - 0);
+ sval = sval.Substring(k + 1);
+ colForValByRound[name] = colName;
+ vals = PropToDoubleArray(sval);
+ valByRound[name] = vals;
+ return vals[roundNumber % vals.Length];
+ }
+
+ /// <summary>
+ /// Return a boolean property.
+ /// If the property contain ":", e.g. "true.true.false", it is interpreted
+ /// as array of booleans. It is extracted once, on first call
+ /// to Get() it, and a by-round-value is returned.
+ /// </summary>
+ /// <param name="name">Name of property.</param>
+ /// <param name="dflt">Default value.</param>
+ /// <returns>A <see cref="bool"/> property.</returns>
+ public virtual bool Get(string name, bool dflt)
+ {
+ // use value by round if already parsed
+ bool[] vals;
+ object temp;
+ if (valByRound.TryGetValue(name, out temp) && temp != null)
+ {
+ vals = (bool[])temp;
+ return vals[roundNumber % vals.Length];
+ }
+ // done if not by round
+ string sval;
+ if (!props.TryGetValue(name, out sval))
+ {
+ sval = "" + dflt;
+ }
+ if (sval.IndexOf(":") < 0)
+ {
+ return bool.Parse(sval);
+ }
+ // first time this prop is extracted by round
+ int k = sval.IndexOf(":");
+ string colName = sval.Substring(0, k - 0);
+ sval = sval.Substring(k + 1);
+ colForValByRound[name] = colName;
+ vals = PropToBooleanArray(sval);
+ valByRound[name] = vals;
+ return vals[roundNumber % vals.Length];
+ }
+
+ /// <summary>
+ /// Increment the round number, for config values that are extracted by round number.
+ /// </summary>
+ /// <returns>The new round number.</returns>
+ public virtual int NewRound()
+ {
+ roundNumber++;
+
+ StringBuilder sb = new StringBuilder("--> Round ").Append(roundNumber - 1).Append("-->").Append(roundNumber);
+
+ // log changes in values
+ if (valByRound.Count > 0)
+ {
+ sb.Append(": ");
+ foreach (string name in valByRound.Keys)
+ {
+ object a = valByRound[name];
+ if (a is int[])
+ {
+ int[] ai = (int[])a;
+ int n1 = (roundNumber - 1) % ai.Length;
+ int n2 = roundNumber % ai.Length;
+ sb.Append(" ").Append(name).Append(":").Append(ai[n1]).Append("-->").Append(ai[n2]);
+ }
+ else if (a is double[])
+ {
+ double[] ad = (double[])a;
+ int n1 = (roundNumber - 1) % ad.Length;
+ int n2 = roundNumber % ad.Length;
+ sb.Append(" ").Append(name).Append(":").Append(ad[n1]).Append("-->").Append(ad[n2]);
+ }
+ else if (a is string[])
+ {
+ string[] ad = (string[])a;
+ int n1 = (roundNumber - 1) % ad.Length;
+ int n2 = roundNumber % ad.Length;
+ sb.Append(" ").Append(name).Append(":").Append(ad[n1]).Append("-->").Append(ad[n2]);
+ }
+ else
+ {
+ bool[] ab = (bool[])a;
+ int n1 = (roundNumber - 1) % ab.Length;
+ int n2 = roundNumber % ab.Length;
+ sb.Append(" ").Append(name).Append(":").Append(ab[n1]).Append("-->").Append(ab[n2]);
+ }
+ }
+ }
+
+ SystemConsole.WriteLine();
+ SystemConsole.WriteLine(sb.ToString());
+ SystemConsole.WriteLine();
+
+ return roundNumber;
+ }
+
+ private string[] PropToStringArray(string s)
+ {
+ if (s.IndexOf(":") < 0)
+ {
+ return new string[] { s };
+ }
+
+ List<string> a = new List<string>();
+ StringTokenizer st = new StringTokenizer(s, ":");
+ while (st.HasMoreTokens())
+ {
+ string t = st.NextToken();
+ a.Add(t);
+ }
+ return a.ToArray();
+ }
+
+ // extract properties to array, e.g. for "10:100:5" return int[]{10,100,5}.
+ private int[] PropToInt32Array(string s)
+ {
+ if (s.IndexOf(":") < 0)
+ {
+ return new int[] { int.Parse(s, CultureInfo.InvariantCulture) };
+ }
+
+ List<int> a = new List<int>();
+ StringTokenizer st = new StringTokenizer(s, ":");
+ while (st.HasMoreTokens())
+ {
+ string t = st.NextToken();
+ a.Add(int.Parse(t, CultureInfo.InvariantCulture));
+ }
+ int[] res = new int[a.Count];
+ for (int i = 0; i < a.Count; i++)
+ {
+ res[i] = a[i];
+ }
+ return res;
+ }
+
+ // extract properties to array, e.g. for "10.7:100.4:-2.3" return int[]{10.7,100.4,-2.3}.
+ private double[] PropToDoubleArray(string s)
+ {
+ if (s.IndexOf(":") < 0)
+ {
+ return new double[] { double.Parse(s, CultureInfo.InvariantCulture) };
+ }
+
+ List<double> a = new List<double>();
+ StringTokenizer st = new StringTokenizer(s, ":");
+ while (st.HasMoreTokens())
+ {
+ string t = st.NextToken();
+ a.Add(double.Parse(t, CultureInfo.InvariantCulture));
+ }
+ double[] res = new double[a.Count];
+ for (int i = 0; i < a.Count; i++)
+ {
+ res[i] = a[i];
+ }
+ return res;
+ }
+
+ // extract properties to array, e.g. for "true:true:false" return boolean[]{true,false,false}.
+ private bool[] PropToBooleanArray(string s)
+ {
+ if (s.IndexOf(":") < 0)
+ {
+ return new bool[] { bool.Parse(s) };
+ }
+
+ List<bool> a = new List<bool>();
+ StringTokenizer st = new StringTokenizer(s, ":");
+ while (st.HasMoreTokens())
+ {
+ string t = st.NextToken();
+ a.Add(bool.Parse(t));
+ }
+ bool[] res = new bool[a.Count];
+ for (int i = 0; i < a.Count; i++)
+ {
+ res[i] = a[i];
+ }
+ return res;
+ }
+
+ /// <summary>
+ /// Gets names of params set by round, for reports title.
+ /// </summary>
+ public virtual string GetColsNamesForValsByRound()
+ {
+ if (colForValByRound.Count == 0)
+ {
+ return "";
+ }
+ StringBuilder sb = new StringBuilder();
+ foreach (string name in colForValByRound.Keys)
+ {
+ string colName = colForValByRound[name];
+ sb.Append(" ").Append(colName);
+ }
+ return sb.ToString();
+ }
+
+ /// <summary>
+ /// Gets values of params set by round, for reports lines.
+ /// </summary>
+ public virtual string GetColsValuesForValsByRound(int roundNum)
+ {
+ if (colForValByRound.Count == 0)
+ {
+ return "";
+ }
+ StringBuilder sb = new StringBuilder();
+ foreach (string name in colForValByRound.Keys)
+ {
+ string colName = colForValByRound[name];
+ string template = " " + colName;
+ if (roundNum < 0)
+ {
+ // just append blanks
+ sb.Append(Formatter.FormatPaddLeft("-", template));
+ }
+ else
+ {
+ // append actual values, for that round
+ object a;
+ valByRound.TryGetValue(name, out a);
+ if (a is int[])
+ {
+ int[] ai = (int[])a;
+ int n = roundNum % ai.Length;
+ sb.Append(Formatter.Format(ai[n], template));
+ }
+ else if (a is double[])
+ {
+ double[] ad = (double[])a;
+ int n = roundNum % ad.Length;
+ sb.Append(Formatter.Format(2, ad[n], template));
+ }
+ else if (a is string[])
+ {
+ string[] ad = (string[])a;
+ int n = roundNum % ad.Length;
+ sb.Append(ad[n]);
+ }
+ else
+ {
+ bool[] ab = (bool[])a;
+ int n = roundNum % ab.Length;
+ sb.Append(Formatter.FormatPaddLeft("" + ab[n], template));
+ }
+ }
+ }
+ return sb.ToString();
+ }
+
+ /// <summary>
+ /// Gets the round number.
+ /// </summary>
+ public virtual int RoundNumber
+ {
+ get { return roundNumber; }
+ }
+
+ /// <summary>
+ /// Gets the algorithmText.
+ /// </summary>
+ public virtual string AlgorithmText
+ {
+ get { return algorithmText; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs
new file mode 100644
index 0000000..3d05db8
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs
@@ -0,0 +1,46 @@
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.ByTask.Utils
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// File utilities.
+ /// </summary>
+ public class FileUtils
+ {
+ /// <summary>
+ /// Delete files and directories, even if non-empty.
+ /// </summary>
+ /// <param name="dir">File or directory.</param>
+ /// <returns><c>true</c> on success, <c>false</c> if no or part of files have been deleted.</returns>
+ /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+ public static bool FullyDelete(DirectoryInfo dir)
+ {
+ try
+ {
+ Directory.Delete(dir.FullName, true);
+ return true;
+ }
+ catch
+ {
+ return !Directory.Exists(dir.FullName);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs
new file mode 100644
index 0000000..85f1fdd
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Utils/Format.cs
@@ -0,0 +1,109 @@
+using System;
+
+namespace Lucene.Net.Benchmarks.ByTask.Utils
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Formatting utilities (for reports).
+ /// </summary>
+ public class Formatter // LUCENENET specific - renamed from Format because of method name collision
+ {
+ private static string[] numFormat = {
+ "N0",
+ "N1",
+ "N2"
+ };
+
+ private static readonly string padd = " ";
+
+ /// <summary>
+ /// Padd a number from left.
+ /// </summary>
+ /// <param name="numFracDigits">Number of digits in fraction part - must be 0 or 1 or 2.</param>
+ /// <param name="f">Number to be formatted.</param>
+ /// <param name="col">Column name (used for deciding on length).</param>
+ /// <returns>Formatted string.</returns>
+ public static string Format(int numFracDigits, float f, string col)
+ {
+ string res = padd + string.Format(numFormat[numFracDigits], f);
+ return res.Substring(res.Length - col.Length);
+ }
+
+ public static string Format(int numFracDigits, double f, string col)
+ {
+ string res = padd + string.Format(numFormat[numFracDigits], f);
+ return res.Substring(res.Length - col.Length);
+ }
+
+ /// <summary>
+ /// Pad a number from right.
+ /// </summary>
+ /// <param name="numFracDigits">Number of digits in fraction part - must be 0 or 1 or 2.</param>
+ /// <param name="f">Number to be formatted.</param>
+ /// <param name="col">Column name (used for deciding on length).</param>
+ /// <returns>Formatted string.</returns>
+ public static string FormatPaddRight(int numFracDigits, float f, string col)
+ {
+ string res = string.Format(numFormat[numFracDigits], f) + padd;
+ return res.Substring(0, col.Length - 0);
+ }
+
+ public static string FormatPaddRight(int numFracDigits, double f, string col)
+ {
+ string res = string.Format(numFormat[numFracDigits], f) + padd;
+ return res.Substring(0, col.Length - 0);
+ }
+
+ /// <summary>
+ /// Pad a number from left.
+ /// </summary>
+ /// <param name="n">Number to be formatted.</param>
+ /// <param name="col">Column name (used for deciding on length).</param>
+ /// <returns>Formatted string.</returns>
+ public static string Format(int n, string col)
+ {
+ string res = padd + n;
+ return res.Substring(res.Length - col.Length);
+ }
+
+ /// <summary>
+ /// Pad a string from right.
+ /// </summary>
+ /// <param name="s">String to be formatted.</param>
+ /// <param name="col">Column name (used for deciding on length).</param>
+ /// <returns>Formatted string.</returns>
+ public static string Format(string s, string col)
+ {
+ string s1 = (s + padd);
+ return s1.Substring(0, Math.Min(col.Length, s1.Length));
+ }
+
+ /// <summary>
+ /// Pad a string from left.
+ /// </summary>
+ /// <param name="s">String to be formatted.</param>
+ /// <param name="col">Column name (used for deciding on length).</param>
+ /// <returns>Formatted string.</returns>
+ public static string FormatPaddLeft(string s, string col)
+ {
+ string res = padd + s;
+ return res.Substring(res.Length - col.Length);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs
new file mode 100644
index 0000000..cf57512
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs
@@ -0,0 +1,132 @@
+using ICSharpCode.SharpZipLib.BZip2;
+using System.Collections.Generic;
+using System.IO;
+using System.IO.Compression;
+
+namespace Lucene.Net.Benchmarks.ByTask.Utils
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Stream utilities.
+ /// </summary>
+ public class StreamUtils
+ {
+ /// <summary>Buffer size used across the benchmark package</summary>
+ public static readonly int BUFFER_SIZE = 1 << 16; // 64K
+
+ // LUCENENET specific - de-nested Type and renamed FileType
+
+ private static readonly IDictionary<string, FileType?> extensionToType = new Dictionary<string, FileType?>();
+ static StreamUtils()
+ {
+ // these in are lower case, we will lower case at the test as well
+ extensionToType[".bz2"] = FileType.BZIP2;
+ extensionToType[".bzip"] = FileType.BZIP2;
+ extensionToType[".gz"] = FileType.GZIP;
+ extensionToType[".gzip"] = FileType.GZIP;
+ }
+
+ /// <summary>
+ /// Returns an <see cref="Stream"/> over the requested file. This method
+ /// attempts to identify the appropriate <see cref="Stream"/> instance to return
+ /// based on the file name (e.g., if it ends with .bz2 or .bzip, return a
+ /// 'bzip' <see cref="Stream"/>).
+ /// </summary>
+ public static Stream GetInputStream(FileInfo file)
+ {
+ // First, create a FileInputStream, as this will be required by all types.
+ // Wrap with BufferedInputStream for better performance
+ Stream @in = new FileStream(file.FullName, FileMode.Open, FileAccess.Read);
+ return GetFileType(file).GetInputStream(@in);
+ }
+
+ /// <summary>Return the type of the file, or <c>null</c> if unknown.</summary>
+ private static FileType GetFileType(FileInfo file)
+ {
+ FileType? type = null;
+ string fileName = file.Name;
+ int idx = fileName.LastIndexOf('.');
+ if (idx != -1)
+ {
+ extensionToType.TryGetValue(fileName.Substring(idx).ToLowerInvariant(), out type);
+ }
+ return !type.HasValue ? FileType.PLAIN : type.Value;
+ }
+
+ /// <summary>
+ /// Returns an <see cref="Stream"/> over the requested file, identifying
+ /// the appropriate <see cref="Stream"/> instance similar to <see cref="GetInputStream(FileInfo)"/>.
+ /// </summary>
+ public static Stream GetOutputStream(FileInfo file)
+ {
+ // First, create a FileInputStream, as this will be required by all types.
+ // Wrap with BufferedInputStream for better performance
+ Stream os = new FileStream(file.FullName, FileMode.Create, FileAccess.Write);
+ return GetFileType(file).GetOutputStream(os);
+ }
+ }
+
+ /// <summary>File format type.</summary>
+ public enum FileType
+ {
+ /// <summary>
+ /// BZIP2 is automatically used for <b>.bz2</b> and <b>.bzip2</b> extensions.
+ /// </summary>
+ BZIP2,
+
+ /// <summary>
+ /// GZIP is automatically used for <b>.gz</b> and <b>.gzip</b> extensions.
+ /// </summary>
+ GZIP,
+
+ /// <summary>
+ /// Plain text is used for anything which is not GZIP or BZIP.
+ /// </summary>
+ PLAIN
+ }
+
+ internal static class FileTypeExtensions
+ {
+ public static Stream GetInputStream(this FileType fileType, Stream input)
+ {
+ switch (fileType)
+ {
+ case FileType.BZIP2:
+ return new BZip2InputStream(input);
+ case FileType.GZIP:
+ return new GZipStream(input, CompressionMode.Decompress);
+ default:
+ return input;
+ }
+ }
+
+ public static Stream GetOutputStream(this FileType fileType, Stream output)
+ {
+ switch (fileType)
+ {
+ case FileType.BZIP2:
+ return new BZip2OutputStream(output);
+ case FileType.GZIP:
+ return new GZipStream(output, CompressionMode.Compress);
+ default:
+ return output;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Constants.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Constants.cs b/src/Lucene.Net.Benchmark/Constants.cs
new file mode 100644
index 0000000..72bdbdc
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Constants.cs
@@ -0,0 +1,33 @@
+namespace Lucene.Net.Benchmarks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Various benchmarking constants (mostly defaults)
+ /// </summary>
+ public class Constants
+ {
+ public static readonly int DEFAULT_RUN_COUNT = 5;
+ public static readonly int DEFAULT_SCALE_UP = 5;
+ public static readonly int DEFAULT_LOG_STEP = 1000;
+
+ public static bool[] BOOLEANS = new bool[] { false, true };
+
+ public static readonly int DEFAULT_MAXIMUM_DOCUMENTS = int.MaxValue;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
new file mode 100644
index 0000000..0241099
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
@@ -0,0 +1,214 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
+ <PropertyGroup>
+ <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+ <ProjectGuid>{EDC77CB4-597F-4818-8C83-3C006D12C384}</ProjectGuid>
+ <OutputType>Library</OutputType>
+ <AppDesignerFolder>Properties</AppDesignerFolder>
+ <RootNamespace>Lucene.Net.Benchmarks</RootNamespace>
+ <AssemblyName>Lucene.Net.Benchmark</AssemblyName>
+ <TargetFrameworkVersion>v4.5.1</TargetFrameworkVersion>
+ <FileAlignment>512</FileAlignment>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+ <DebugSymbols>true</DebugSymbols>
+ <DebugType>full</DebugType>
+ <Optimize>false</Optimize>
+ <OutputPath>bin\Debug\</OutputPath>
+ <DefineConstants>DEBUG;TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ <DocumentationFile>
+ </DocumentationFile>
+ </PropertyGroup>
+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+ <DebugType>pdbonly</DebugType>
+ <Optimize>true</Optimize>
+ <OutputPath>bin\Release\</OutputPath>
+ <DefineConstants>TRACE</DefineConstants>
+ <ErrorReport>prompt</ErrorReport>
+ <WarningLevel>4</WarningLevel>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DefineConstants>$(DefineConstants);FEATURE_SERIALIZABLE</DefineConstants>
+ </PropertyGroup>
+ <ItemGroup>
+ <Reference Include="System" />
+ <Reference Include="System.Core" />
+ <Reference Include="Microsoft.CSharp" />
+ </ItemGroup>
+ <ItemGroup>
+ <Compile Include="ByTask\Benchmark.cs" />
+ <Compile Include="ByTask\Feeds\AbstractQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\ContentItemsSource.cs" />
+ <Compile Include="ByTask\Feeds\ContentSource.cs" />
+ <Compile Include="ByTask\Feeds\DemoHTMLParser.cs" />
+ <Compile Include="ByTask\Feeds\DirContentSource.cs" />
+ <Compile Include="ByTask\Feeds\DocData.cs" />
+ <Compile Include="ByTask\Feeds\DocMaker.cs" />
+ <Compile Include="ByTask\Feeds\EnwikiContentSource.cs" />
+ <Compile Include="ByTask\Feeds\EnwikiQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\FacetSource.cs" />
+ <Compile Include="ByTask\Feeds\FileBasedQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\GeonamesLineParser.cs" />
+ <Compile Include="ByTask\Feeds\HTMLParser.cs" />
+ <Compile Include="ByTask\Feeds\LineDocSource.cs" />
+ <Compile Include="ByTask\Feeds\LongToEnglishContentSource.cs" />
+ <Compile Include="ByTask\Feeds\LongToEnglishQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\NoMoreDataException.cs" />
+ <Compile Include="ByTask\Feeds\QueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\RandomFacetSource.cs" />
+ <Compile Include="ByTask\Feeds\ReutersContentSource.cs" />
+ <Compile Include="ByTask\Feeds\ReutersQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\SimpleQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\SimpleSloppyPhraseQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\SingleDocSource.cs" />
+ <Compile Include="ByTask\Feeds\SortableSingleDocSource.cs" />
+ <Compile Include="ByTask\Feeds\SpatialDocMaker.cs" />
+ <Compile Include="ByTask\Feeds\SpatialFileQueryMaker.cs" />
+ <Compile Include="ByTask\Feeds\TrecContentSource.cs" />
+ <Compile Include="ByTask\Feeds\TrecDocParser.cs" />
+ <Compile Include="ByTask\Feeds\TrecFBISParser.cs" />
+ <Compile Include="ByTask\Feeds\TrecFR94Parser.cs" />
+ <Compile Include="ByTask\Feeds\TrecFTParser.cs" />
+ <Compile Include="ByTask\Feeds\TrecGov2Parser.cs" />
+ <Compile Include="ByTask\Feeds\TrecLATimesParser.cs" />
+ <Compile Include="ByTask\Feeds\TrecParserByPath.cs" />
+ <Compile Include="ByTask\PerfRunData.cs" />
+ <Compile Include="ByTask\Programmatic\Sample.cs" />
+ <Compile Include="ByTask\Stats\Points.cs" />
+ <Compile Include="ByTask\Stats\Report.cs" />
+ <Compile Include="ByTask\Stats\TaskStats.cs" />
+ <Compile Include="ByTask\Tasks\AddDocTask.cs" />
+ <Compile Include="ByTask\Tasks\AddFacetedDocTask.cs" />
+ <Compile Include="ByTask\Tasks\AddIndexesTask.cs" />
+ <Compile Include="ByTask\Tasks\AnalyzerFactoryTask.cs" />
+ <Compile Include="ByTask\Tasks\BenchmarkHighlighter.cs" />
+ <Compile Include="ByTask\Tasks\ClearStatsTask.cs" />
+ <Compile Include="ByTask\Tasks\CloseIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\CloseReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\CloseTaxonomyIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\CloseTaxonomyReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\CommitIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\CommitTaxonomyIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\ConsumeContentSourceTask.cs" />
+ <Compile Include="ByTask\Tasks\CreateIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\CreateTaxonomyIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\ForceMergeTask.cs" />
+ <Compile Include="ByTask\Tasks\NearRealtimeReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\NewAnalyzerTask.cs" />
+ <Compile Include="ByTask\Tasks\NewCollationAnalyzerTask.cs" />
+ <Compile Include="ByTask\Tasks\NewLocaleTask.cs" />
+ <Compile Include="ByTask\Tasks\NewRoundTask.cs" />
+ <Compile Include="ByTask\Tasks\OpenIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\OpenReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\OpenTaxonomyIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\OpenTaxonomyReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\PerfTask.cs" />
+ <Compile Include="ByTask\Tasks\PrintReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\ReadTask.cs" />
+ <Compile Include="ByTask\Tasks\ReadTokensTask.cs" />
+ <Compile Include="ByTask\Tasks\ReopenReaderTask.cs" />
+ <Compile Include="ByTask\Tasks\RepAllTask.cs" />
+ <Compile Include="ByTask\Tasks\ReportTask.cs" />
+ <Compile Include="ByTask\Tasks\RepSelectByPrefTask.cs" />
+ <Compile Include="ByTask\Tasks\RepSumByNameRoundTask.cs" />
+ <Compile Include="ByTask\Tasks\RepSumByNameTask.cs" />
+ <Compile Include="ByTask\Tasks\RepSumByPrefRoundTask.cs" />
+ <Compile Include="ByTask\Tasks\RepSumByPrefTask.cs" />
+ <Compile Include="ByTask\Tasks\ResetInputsTask.cs" />
+ <Compile Include="ByTask\Tasks\ResetSystemEraseTask.cs" />
+ <Compile Include="ByTask\Tasks\ResetSystemSoftTask.cs" />
+ <Compile Include="ByTask\Tasks\RollbackIndexTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTravRetHighlightTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTravRetLoadFieldSelectorTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTravRetTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTravRetVectorHighlightTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchTravTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchWithCollectorTask.cs" />
+ <Compile Include="ByTask\Tasks\SearchWithSortTask.cs" />
+ <Compile Include="ByTask\Tasks\SetPropTask.cs" />
+ <Compile Include="ByTask\Tasks\TaskSequence.cs" />
+ <Compile Include="ByTask\Tasks\UpdateDocTask.cs" />
+ <Compile Include="ByTask\Tasks\WaitForMergesTask.cs" />
+ <Compile Include="ByTask\Tasks\WaitTask.cs" />
+ <Compile Include="ByTask\Tasks\WarmTask.cs" />
+ <Compile Include="ByTask\Tasks\WriteEnwikiLineDocTask.cs" />
+ <Compile Include="ByTask\Tasks\WriteLineDocTask.cs" />
+ <Compile Include="ByTask\Utils\Algorithm.cs" />
+ <Compile Include="ByTask\Utils\AnalyzerFactory.cs" />
+ <Compile Include="ByTask\Utils\Config.cs" />
+ <Compile Include="ByTask\Utils\FileUtils.cs" />
+ <Compile Include="ByTask\Utils\Format.cs" />
+ <Compile Include="ByTask\Utils\StreamUtils.cs" />
+ <Compile Include="Constants.cs" />
+ <Compile Include="Properties\AssemblyInfo.cs" />
+ <Compile Include="Quality\Judge.cs" />
+ <Compile Include="Quality\QualityBenchmark.cs" />
+ <Compile Include="Quality\QualityQuery.cs" />
+ <Compile Include="Quality\QualityQueryParser.cs" />
+ <Compile Include="Quality\QualityStats.cs" />
+ <Compile Include="Quality\Trec\QueryDriver.cs" />
+ <Compile Include="Quality\Trec\Trec1MQReader.cs" />
+ <Compile Include="Quality\Trec\TrecJudge.cs" />
+ <Compile Include="Quality\Trec\TrecTopicsReader.cs" />
+ <Compile Include="Quality\Utils\DocNameExtractor.cs" />
+ <Compile Include="Quality\Utils\QualityQueriesFinder.cs" />
+ <Compile Include="Quality\Utils\SimpleQQParser.cs" />
+ <Compile Include="Quality\Utils\SubmissionReport.cs" />
+ <Compile Include="Utils\ExtractReuters.cs" />
+ <Compile Include="Utils\ExtractWikipedia.cs" />
+ <Compile Include="..\CommonAssemblyInfo.cs">
+ <Link>Properties\CommonAssemblyInfo.cs</Link>
+ </Compile>
+ </ItemGroup>
+ <ItemGroup>
+ <ProjectReference Include="..\Lucene.Net.Analysis.Common\Lucene.Net.Analysis.Common.csproj">
+ <Project>{4ADD0BBC-B900-4715-9526-D871DE8EEA64}</Project>
+ <Name>Lucene.Net.Analysis.Common</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Facet\Lucene.Net.Facet.csproj">
+ <Project>{48F7884A-9454-4E88-8413-9D35992CB440}</Project>
+ <Name>Lucene.Net.Facet</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Highlighter\Lucene.Net.Highlighter.csproj">
+ <Project>{E9E769EA-8504-44BC-8DC9-CCF958765F8F}</Project>
+ <Name>Lucene.Net.Highlighter</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.ICU\Lucene.Net.ICU.csproj">
+ <Project>{349cb7c9-7534-4e1d-9b0a-5521441af0ae}</Project>
+ <Name>Lucene.Net.ICU</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Queries\Lucene.Net.Queries.csproj">
+ <Project>{69D7956C-C2CC-4708-B399-A188FEC384C4}</Project>
+ <Name>Lucene.Net.Queries</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.QueryParser\Lucene.Net.QueryParser.csproj">
+ <Project>{949BA34B-6AE6-4CE3-B578-61E13E4D76BF}</Project>
+ <Name>Lucene.Net.QueryParser</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net.Spatial\Lucene.Net.Spatial.csproj">
+ <Project>{35C347F4-24B2-4BE5-8117-A0E3001551CE}</Project>
+ <Name>Lucene.Net.Spatial</Name>
+ </ProjectReference>
+ <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj">
+ <Project>{5D4AD9BE-1FFB-41AB-9943-25737971BF57}</Project>
+ <Name>Lucene.Net</Name>
+ </ProjectReference>
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="Lucene.Net.Benchmark.project.json" />
+ </ItemGroup>
+ <ItemGroup />
+ <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+ <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+ Other similar extension points exist, see Microsoft.Common.targets.
+ <Target Name="BeforeBuild">
+ </Target>
+ <Target Name="AfterBuild">
+ </Target>
+ -->
+</Project>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json
new file mode 100644
index 0000000..0a83392
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.project.json
@@ -0,0 +1,15 @@
+{
+ "runtimes": {
+ "win": {}
+ },
+ "dependencies": {
+ "icu.net": "54.1.1-alpha",
+ "Sax.Net": "2.0.2",
+ "SharpZipLib": "0.86.0",
+ "Spatial4n.Core": "0.4.1-beta00003",
+ "TagSoup.Net": "1.2.1.1"
+ },
+ "frameworks": {
+ "net451": {}
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs b/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs
new file mode 100644
index 0000000..8060798
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Properties/AssemblyInfo.cs
@@ -0,0 +1,30 @@
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+// General Information about an assembly is controlled through the following
+// set of attributes. Change these attribute values to modify the information
+// associated with an assembly.
+[assembly: AssemblyTitle("Lucene.Net.Benchmark")]
+[assembly: AssemblyDescription(
+ "System for benchmarking " +
+ "for the Lucene.Net full-text search engine library from The Apache Software Foundation.")]
+[assembly: AssemblyConfiguration("")]
+[assembly: AssemblyDefaultAlias("Lucene.Net.Benchmark")]
+[assembly: AssemblyCulture("")]
+
+[assembly: CLSCompliant(true)]
+
+// Setting ComVisible to false makes the types in this assembly not visible
+// to COM components. If you need to access a type in this assembly from
+// COM, set the ComVisible attribute to true on that type.
+[assembly: ComVisible(false)]
+
+// The following GUID is for the ID of the typelib if this project is exposed to COM
+[assembly: Guid("edc77cb4-597f-4818-8c83-3c006d12c384")]
+
+// for testing
+[assembly: InternalsVisibleTo("Lucene.Net.Tests.Benchmark")]
+
+// NOTE: Version information is in CommonAssemblyInfo.cs
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Judge.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Judge.cs b/src/Lucene.Net.Benchmark/Quality/Judge.cs
new file mode 100644
index 0000000..7cd2089
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Judge.cs
@@ -0,0 +1,55 @@
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.Quality
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Judge if a document is relevant for a quality query.
+ /// </summary>
+ public interface IJudge
+ {
+ /// <summary>
+ /// Judge if document <paramref name="docName"/> is relevant for the given quality query.
+ /// </summary>
+ /// <param name="docName">Name of doc tested for relevancy.</param>
+ /// <param name="query">Tested quality query.</param>
+ /// <returns><c>true</c> if relevant, <c>false</c> if not.</returns>
+ bool IsRelevant(string docName, QualityQuery query);
+
+ /// <summary>
+ /// Validate that queries and this <see cref="IJudge"/> match each other.
+ /// To be perfectly valid, this Judge must have some data for each and every
+ /// input quality query, and must not have any data on any other quality query.
+ /// <b>Note</b>: the quality benchmark run would not fail in case of imperfect
+ /// validity, just a warning message would be logged.
+ /// </summary>
+ /// <param name="qq">Quality queries to be validated.</param>
+ /// <param name="logger">If not <c>null</c>, validation issues are logged.</param>
+ /// <returns><c>true</c> if perfectly valid, <c>false</c> if not.</returns>
+ bool ValidateData(QualityQuery[] qq, TextWriter logger);
+
+ /// <summary>
+ /// Return the maximal recall for the input quality query.
+ /// It is the number of relevant docs this <see cref="IJudge"/> "knows" for the query.
+ /// </summary>
+ /// <param name="query">The query whose maximal recall is needed.</param>
+ /// <returns></returns>
+ int MaxRecall(QualityQuery query);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs b/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs
new file mode 100644
index 0000000..ef53e25
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs
@@ -0,0 +1,159 @@
+using Lucene.Net.Benchmarks.Quality.Utils;
+using Lucene.Net.Search;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.Quality
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Main entry point for running a quality benchmark.
+ /// <para/>
+ /// There are two main configurations for running a quality benchmark:
+ /// <list type="bullet">
+ /// <item><description>Against existing judgements.</description></item>
+ /// <item><description>For submission (e.g. for a contest).</description></item>
+ /// </list>
+ /// The first configuration requires a non null <see cref="IJudge"/>.
+ /// The second configuration requires a non null <see cref="Utils.SubmissionReport"/>.
+ /// </summary>
+ public class QualityBenchmark
+ {
+ /// <summary>Quality Queries that this quality benchmark would execute.</summary>
+ protected QualityQuery[] m_qualityQueries;
+
+ /// <summary>Parser for turning QualityQueries into Lucene Queries.</summary>
+ protected IQualityQueryParser m_qqParser;
+
+ /// <summary>Index to be searched.</summary>
+ protected IndexSearcher m_searcher;
+
+ /// <summary>index field to extract doc name for each search result; used for judging the results.</summary>
+ protected string m_docNameField;
+
+ /// <summary>maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging.</summary>
+ private int maxQueries = int.MaxValue;
+
+ /// <summary>Maximal number of results to collect for each query. Default: 1000.</summary>
+ private int maxResults = 1000;
+
+ /// <summary>
+ /// Create a <see cref="QualityBenchmark"/>.
+ /// </summary>
+ /// <param name="qqs">Quality queries to run.</param>
+ /// <param name="qqParser">Parser for turning QualityQueries into Lucene Queries.</param>
+ /// <param name="searcher">Index to be searched.</param>
+ /// <param name="docNameField">
+ /// Name of field containing the document name.
+ /// This allows to extract the doc name for search results,
+ /// and is important for judging the results.
+ /// </param>
+ public QualityBenchmark(QualityQuery[] qqs, IQualityQueryParser qqParser,
+ IndexSearcher searcher, string docNameField)
+ {
+ this.m_qualityQueries = qqs;
+ this.m_qqParser = qqParser;
+ this.m_searcher = searcher;
+ this.m_docNameField = docNameField;
+ }
+
+ /// <summary>
+ /// Run the quality benchmark.
+ /// </summary>
+ /// <param name="judge">
+ /// The judge that can tell if a certain result doc is relevant for a certain quality query.
+ /// If null, no judgements would be made. Usually null for a submission run.
+ /// </param>
+ /// <param name="submitRep">Submission report is created if non null.</param>
+ /// <param name="qualityLog">If not null, quality run data would be printed for each query.</param>
+ /// <returns><see cref="QualityStats"/> of each quality query that was executed.</returns>
+ /// <exception cref="Exception">If quality benchmark failed to run.</exception>
+ public virtual QualityStats[] Execute(IJudge judge, SubmissionReport submitRep,
+ TextWriter qualityLog)
+ {
+ int nQueries = Math.Min(maxQueries, m_qualityQueries.Length);
+ QualityStats[] stats = new QualityStats[nQueries];
+ for (int i = 0; i < nQueries; i++)
+ {
+ QualityQuery qq = m_qualityQueries[i];
+ // generate query
+ Query q = m_qqParser.Parse(qq);
+ // search with this query
+ long t1 = Support.Time.CurrentTimeMilliseconds();
+ TopDocs td = m_searcher.Search(q, null, maxResults);
+ long searchTime = Support.Time.CurrentTimeMilliseconds() - t1;
+ //most likely we either submit or judge, but check both
+ if (judge != null)
+ {
+ stats[i] = AnalyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
+ }
+ if (submitRep != null)
+ {
+ submitRep.Report(qq, td, m_docNameField, m_searcher);
+ }
+ }
+ if (submitRep != null)
+ {
+ submitRep.Flush();
+ }
+ return stats;
+ }
+
+ /// <summary>Analyze/judge results for a single quality query; optionally log them.</summary>
+ private QualityStats AnalyzeQueryResults(QualityQuery qq, Query q, TopDocs td, IJudge judge, TextWriter logger, long searchTime)
+ {
+ QualityStats stts = new QualityStats(judge.MaxRecall(qq), searchTime);
+ ScoreDoc[] sd = td.ScoreDocs;
+ long t1 = Support.Time.CurrentTimeMilliseconds(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
+ DocNameExtractor xt = new DocNameExtractor(m_docNameField);
+ for (int i = 0; i < sd.Length; i++)
+ {
+ string docName = xt.DocName(m_searcher, sd[i].Doc);
+ long docNameExtractTime = Support.Time.CurrentTimeMilliseconds() - t1;
+ t1 = Support.Time.CurrentTimeMilliseconds();
+ bool isRelevant = judge.IsRelevant(docName, qq);
+ stts.AddResult(i + 1, isRelevant, docNameExtractTime);
+ }
+ if (logger != null)
+ {
+ logger.WriteLine(qq.QueryID + " - " + q);
+ stts.Log(qq.QueryID + " Stats:", 1, logger, " ");
+ }
+ return stts;
+ }
+
+ /// <summary>
+ /// The maximum number of quality queries to run. Useful at debugging.
+ /// </summary>
+ public virtual int MaxQueries
+ {
+ get { return maxQueries; }
+ set { maxQueries = value; }
+ }
+
+ /// <summary>
+ /// The maximum number of results to collect for each quality query.
+ /// </summary>
+ public virtual int MaxResults
+ {
+ get { return maxResults; }
+ set { maxResults = value; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs b/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs
new file mode 100644
index 0000000..de4a945
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/QualityQuery.cs
@@ -0,0 +1,107 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+
+namespace Lucene.Net.Benchmarks.Quality
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A QualityQuery has an ID and some name-value pairs.
+ /// <para/>
+ /// The ID allows to map the quality query with its judgements.
+ /// <para/>
+ /// The name-value pairs are used by a
+ /// <see cref="QualityQueryParser"/>
+ /// to create a Lucene <see cref="Search.Query"/>.
+ /// <para/>
+ /// It is very likely that name-value-pairs would be mapped into fields in a Lucene query,
+ /// but it is up to the QualityQueryParser how to map - e.g. all values in a single field,
+ /// or each pair as its own field, etc., - and this of course must match the way the
+ /// searched index was constructed.
+ /// </summary>
+ public class QualityQuery : IComparable<QualityQuery>
+ {
+ private string queryID;
+ private IDictionary<string, string> nameValPairs;
+
+ /// <summary>
+ /// Create a <see cref="QualityQuery"/> with given ID and name-value pairs.
+ /// </summary>
+ /// <param name="queryID">ID of this quality query.</param>
+ /// <param name="nameValPairs">The contents of this quality query.</param>
+ public QualityQuery(string queryID, IDictionary<string, string> nameValPairs)
+ {
+ this.queryID = queryID;
+ this.nameValPairs = nameValPairs;
+ }
+
+ /// <summary>
+ /// Return all the names of name-value-pairs in this <see cref="QualityQuery"/>.
+ /// </summary>
+ public virtual string[] GetNames()
+ {
+ return nameValPairs.Keys.ToArray();
+ }
+
+ /// <summary>
+ /// Return the value of a certain name-value pair.
+ /// </summary>
+ /// <param name="name">The name whose value should be returned.</param>
+ /// <returns></returns>
+ public virtual string GetValue(string name)
+ {
+ string result;
+ nameValPairs.TryGetValue(name, out result);
+ return result;
+ }
+
+ /// <summary>
+ /// Gets the ID of this query.
+ /// The ID allows to map the quality query with its judgements.
+ /// </summary>
+ public virtual string QueryID
+ {
+ get { return queryID; }
+ }
+
+ /// <summary>
+ /// For a nicer sort of input queries before running them.
+ /// Try first as ints, fall back to string if not int.
+ /// </summary>
+ /// <param name="other"></param>
+ /// <returns></returns>
+ public virtual int CompareTo(QualityQuery other)
+ {
+ try
+ {
+ // compare as ints when ids ints
+ int n = int.Parse(queryID, CultureInfo.InvariantCulture);
+ int nOther = int.Parse(other.queryID, CultureInfo.InvariantCulture);
+ return n - nOther;
+ }
+ catch (FormatException /*e*/)
+ {
+ // fall back to string comparison
+ return queryID.CompareToOrdinal(other.queryID);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs b/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs
new file mode 100644
index 0000000..a62d472
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/QualityQueryParser.cs
@@ -0,0 +1,35 @@
+using Lucene.Net.Search;
+using System;
+
+namespace Lucene.Net.Benchmarks.Quality
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Parse a <see cref="QualityQuery"/> into a Lucene query.
+ /// </summary>
+ public interface IQualityQueryParser
+ {
+ /// <summary>
+ /// Parse a given <see cref="QualityQuery"/> into a Lucene query.
+ /// </summary>
+ /// <param name="qq">The quality query to be parsed.</param>
+ /// <exception cref="FormatException">If parsing failed.</exception>
+ Query Parse(QualityQuery qq);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/QualityStats.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/QualityStats.cs b/src/Lucene.Net.Benchmark/Quality/QualityStats.cs
new file mode 100644
index 0000000..2098085
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/QualityStats.cs
@@ -0,0 +1,339 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+
+namespace Lucene.Net.Benchmarks.Quality
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Results of quality benchmark run for a single query or for a set of queries.
+ /// </summary>
+ public class QualityStats
+ {
+ /// <summary>Number of points for which precision is computed.</summary>
+ public static readonly int MAX_POINTS = 20;
+
+ private double maxGoodPoints;
+ private double recall;
+ private double[] pAt;
+ private double pReleventSum = 0;
+ private double numPoints = 0;
+ private double numGoodPoints = 0;
+ private double mrr = 0;
+ private long searchTime;
+ private long docNamesExtractTime;
+
+ /// <summary>
+ /// A certain rank in which a relevant doc was found.
+ /// </summary>
+ public class RecallPoint
+ {
+ private int rank;
+ private double recall;
+ internal RecallPoint(int rank, double recall)
+ {
+ this.rank = rank;
+ this.recall = recall;
+ }
+
+ /// <summary>Returns the rank: where on the list of returned docs this relevant doc appeared.</summary>
+ public virtual int Rank
+ {
+ get { return rank; }
+ }
+
+ /// <summary>Returns the recall: how many relevant docs were returned up to this point, inclusive.</summary>
+ public virtual double Recall
+ {
+ get { return recall; }
+ }
+ }
+
+ private IList<RecallPoint> recallPoints;
+
+ /// <summary>
+ /// Construct a QualityStats object with anticipated maximal number of relevant hits.
+ /// </summary>
+ /// <param name="maxGoodPoints">maximal possible relevant hits.</param>
+ /// <param name="searchTime"></param>
+ public QualityStats(double maxGoodPoints, long searchTime)
+ {
+ this.maxGoodPoints = maxGoodPoints;
+ this.searchTime = searchTime;
+ this.recallPoints = new List<RecallPoint>();
+ pAt = new double[MAX_POINTS + 1]; // pAt[0] unused.
+ }
+
+ /// <summary>
+ /// Add a (possibly relevant) doc.
+ /// </summary>
+ /// <param name="n">rank of the added doc (its ordinal position within the query results).</param>
+ /// <param name="isRelevant"><c>true</c> if the added doc is relevant, <c>false</c> otherwise.</param>
+ /// <param name="docNameExtractTime"></param>
+ public virtual void AddResult(int n, bool isRelevant, long docNameExtractTime)
+ {
+ if (Math.Abs(numPoints + 1 - n) > 1E-6)
+ {
+ throw new ArgumentException("point " + n + " illegal after " + numPoints + " points!");
+ }
+ if (isRelevant)
+ {
+ numGoodPoints += 1;
+ recallPoints.Add(new RecallPoint(n, numGoodPoints));
+ if (recallPoints.Count == 1 && n <= 5)
+ { // first point, but only within 5 top scores.
+ mrr = 1.0 / n;
+ }
+ }
+ numPoints = n;
+ double p = numGoodPoints / numPoints;
+ if (isRelevant)
+ {
+ pReleventSum += p;
+ }
+ if (n < pAt.Length)
+ {
+ pAt[n] = p;
+ }
+ recall = maxGoodPoints <= 0 ? p : numGoodPoints / maxGoodPoints;
+ docNamesExtractTime += docNameExtractTime;
+ }
+
+ /// <summary>
+ /// Return the precision at rank n:
+ /// |{relevant hits within first <c>n</c> hits}| / <c>n</c>.
+ /// </summary>
+ /// <param name="n">requested precision point, must be at least 1 and at most <see cref="MAX_POINTS"/>.</param>
+ /// <returns></returns>
+ public virtual double GetPrecisionAt(int n)
+ {
+ if (n < 1 || n > MAX_POINTS)
+ {
+ throw new ArgumentException("n=" + n + " - but it must be in [1," + MAX_POINTS + "] range!");
+ }
+ if (n > numPoints)
+ {
+ return (numPoints * pAt[(int)numPoints]) / n;
+ }
+ return pAt[n];
+ }
+
+ /// <summary>
+ /// Return the average precision at recall points.
+ /// </summary>
+ public virtual double GetAvp()
+ {
+ return maxGoodPoints == 0 ? 0 : pReleventSum / maxGoodPoints;
+ }
+
+ /// <summary>
+ /// Return the recall: |{relevant hits found}| / |{relevant hits existing}|.
+ /// </summary>
+ public virtual double Recall
+ {
+ get { return recall; }
+ }
+
+ /// <summary>
+ /// Log information on this <see cref="QualityStats"/> object.
+ /// </summary>
+ /// <param name="title"></param>
+ /// <param name="paddLines"></param>
+ /// <param name="logger">Logger.</param>
+ /// <param name="prefix">prefix before each log line.</param>
+ public virtual void Log(string title, int paddLines, TextWriter logger, string prefix)
+ {
+ for (int i = 0; i < paddLines; i++)
+ {
+ logger.WriteLine();
+ }
+ if (title != null && title.Trim().Length > 0)
+ {
+ logger.WriteLine(title);
+ }
+ prefix = prefix == null ? "" : prefix;
+ string nf = "{0:F3}";
+ int M = 19;
+ logger.WriteLine(prefix + Format("Search Seconds: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, (double)searchTime / 1000)));
+ logger.WriteLine(prefix + Format("DocName Seconds: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, (double)docNamesExtractTime / 1000)));
+ logger.WriteLine(prefix + Format("Num Points: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, numPoints)));
+ logger.WriteLine(prefix + Format("Num Good Points: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, numGoodPoints)));
+ logger.WriteLine(prefix + Format("Max Good Points: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, maxGoodPoints)));
+ logger.WriteLine(prefix + Format("Average Precision: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, GetAvp())));
+ logger.WriteLine(prefix + Format("MRR: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, MRR)));
+ logger.WriteLine(prefix + Format("Recall: ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, Recall)));
+ for (int i = 1; i < (int)numPoints && i < pAt.Length; i++)
+ {
+ logger.WriteLine(prefix + Format("Precision At " + i + ": ", M) +
+ FracFormat(string.Format(CultureInfo.InvariantCulture, nf, GetPrecisionAt(i))));
+ }
+ for (int i = 0; i < paddLines; i++)
+ {
+ logger.WriteLine();
+ }
+ }
+
+ private static string padd = " ";
+ private string Format(string s, int minLen)
+ {
+ s = (s == null ? "" : s);
+ int n = Math.Max(minLen, s.Length);
+ return (s + padd).Substring(0, n-0);
+ }
+ private string FracFormat(string frac)
+ {
+ int k = frac.IndexOf('.');
+ string s1 = padd + frac.Substring(0, k-0);
+ int n = Math.Max(k, 6);
+ s1 = s1.Substring(s1.Length - n);
+ return s1 + frac.Substring(k);
+ }
+
+ /// <summary>
+ /// Create a <see cref="QualityStats"/> object that is the average of the input <see cref="QualityStats"/> objects.
+ /// </summary>
+ /// <param name="stats">array of input stats to be averaged.</param>
+ /// <returns>an average over the input stats.</returns>
+ public static QualityStats Average(QualityStats[] stats)
+ {
+ QualityStats avg = new QualityStats(0, 0);
+ if (stats.Length == 0)
+ {
+ // weired, no stats to average!
+ return avg;
+ }
+ int m = 0; // queries with positive judgements
+ // aggregate
+ for (int i = 0; i < stats.Length; i++)
+ {
+ avg.searchTime += stats[i].searchTime;
+ avg.docNamesExtractTime += stats[i].docNamesExtractTime;
+ if (stats[i].maxGoodPoints > 0)
+ {
+ m++;
+ avg.numGoodPoints += stats[i].numGoodPoints;
+ avg.numPoints += stats[i].numPoints;
+ avg.pReleventSum += stats[i].GetAvp();
+ avg.recall += stats[i].recall;
+ avg.mrr += stats[i].MRR;
+ avg.maxGoodPoints += stats[i].maxGoodPoints;
+ for (int j = 1; j < avg.pAt.Length; j++)
+ {
+ avg.pAt[j] += stats[i].GetPrecisionAt(j);
+ }
+ }
+ }
+ Debug.Assert(m> 0, "Fishy: no \"good\" queries!");
+ // take average: times go by all queries, other measures go by "good" queries only.
+ avg.searchTime /= stats.Length;
+ avg.docNamesExtractTime /= stats.Length;
+ avg.numGoodPoints /= m;
+ avg.numPoints /= m;
+ avg.recall /= m;
+ avg.mrr /= m;
+ avg.maxGoodPoints /= m;
+ for (int j = 1; j < avg.pAt.Length; j++)
+ {
+ avg.pAt[j] /= m;
+ }
+ avg.pReleventSum /= m; // this is actually avgp now
+ avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct
+
+ return avg;
+ }
+
+ /// <summary>
+ /// Returns the time it took to extract doc names for judging the measured query, in milliseconds.
+ /// </summary>
+ public virtual long DocNamesExtractTime
+ {
+ get { return docNamesExtractTime; }
+ }
+
+ /// <summary>
+ /// Returns the maximal number of good points.
+ /// This is the number of relevant docs known by the judge for the measured query.
+ /// </summary>
+ public virtual double MaxGoodPoints
+ {
+ get { return maxGoodPoints; }
+ }
+
+ /// <summary>
+ /// Returns the number of good points (only relevant points).
+ /// </summary>
+ public virtual double NumGoodPoints
+ {
+ get { return numGoodPoints; }
+ }
+
+ /// <summary>
+ /// Returns the number of points (both relevant and irrelevant points).
+ /// </summary>
+ public virtual double NumPoints
+ {
+ get { return numPoints; }
+ }
+
+ /// <summary>
+ /// Returns the recallPoints.
+ /// </summary>
+ public virtual RecallPoint[] GetRecallPoints()
+ {
+ return recallPoints.ToArray();
+ }
+
+ /// <summary>
+ /// Returns the Mean reciprocal rank over the queries or RR for a single query.
+ /// </summary>
+ /// <remarks>
+ /// Reciprocal rank is defined as <c>1/r</c> where <c>r</c> is the
+ /// rank of the first correct result, or <c>0</c> if there are no correct
+ /// results within the top 5 results.
+ /// <para/>
+ /// This follows the definition in
+ /// <a href="http://www.cnlp.org/publications/02cnlptrec10.pdf">
+ /// Question Answering - CNLP at the TREC-10 Question Answering Track</a>.
+ /// </remarks>
+ public virtual double MRR
+ {
+ get { return mrr; }
+ }
+
+
+ /// <summary>
+ /// Returns the search time in milliseconds for the measured query.
+ /// </summary>
+ public virtual long SearchTime
+ {
+ get { return searchTime; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
new file mode 100644
index 0000000..0540e62
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
@@ -0,0 +1,93 @@
+using Lucene.Net.Benchmarks.Quality.Utils;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.Quality.Trec
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Command-line tool for doing a TREC evaluation run.
+ /// </summary>
+ public class QueryDriver
+ {
+ public static void Main(string[] args)
+ {
+ if (args.Length < 4 || args.Length > 5)
+ {
+ SystemConsole.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
+ SystemConsole.Error.WriteLine("topicsFile: input file containing queries");
+ SystemConsole.Error.WriteLine("qrelsFile: input file containing relevance judgements");
+ SystemConsole.Error.WriteLine("submissionFile: output submission file for trec_eval");
+ SystemConsole.Error.WriteLine("indexDir: index directory");
+ SystemConsole.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
+ SystemConsole.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)");
+ Environment.Exit(1);
+ }
+
+ FileInfo topicsFile = new FileInfo(args[0]);
+ FileInfo qrelsFile = new FileInfo(args[1]);
+ SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene");
+ using (Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])))
+ using (IndexReader reader = DirectoryReader.Open(dir))
+ {
+ string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified.
+ IndexSearcher searcher = new IndexSearcher(reader);
+
+ int maxResults = 1000;
+ string docNameField = "docname";
+
+ TextWriter logger = SystemConsole.Out; //new StreamWriter(SystemConsole, Encoding.GetEncoding(0));
+
+ // use trec utilities to read trec topics into quality queries
+ TrecTopicsReader qReader = new TrecTopicsReader();
+ QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8));
+
+ // prepare judge, with trec utilities that read from a QRels file
+ IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8));
+
+ // validate topics & judgments match each other
+ judge.ValidateData(qqs, logger);
+
+ ISet<string> fieldSet = new HashSet<string>();
+ if (fieldSpec.IndexOf('T') >= 0) fieldSet.Add("title");
+ if (fieldSpec.IndexOf('D') >= 0) fieldSet.Add("description");
+ if (fieldSpec.IndexOf('N') >= 0) fieldSet.Add("narrative");
+
+ // set the parsing of quality queries into Lucene queries.
+ IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body");
+
+ // run the benchmark
+ QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+ qrun.MaxResults = maxResults;
+ QualityStats[] stats = qrun.Execute(judge, submitLog, logger);
+
+ // print an avarage sum of the results
+ QualityStats avg = QualityStats.Average(stats);
+ avg.Log("SUMMARY", 2, logger, " ");
+ }
+ }
+ }
+}