You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:06 UTC
[08/33] lucenenet git commit: Ported Lucene.Net.Benchmark + tests
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs
new file mode 100644
index 0000000..046ed25
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs
@@ -0,0 +1,225 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Codecs;
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Creates an index.
+ /// </summary>
+ /// <remarks>
+ /// Other side effects: index writer object in perfRunData is set.
+ /// <para/>
+ /// Relevant properties:
+ /// <list type="bullet">
+ /// <item><term>merge.factor</term><description>(default 10)</description></item>
+ /// <item><term>max.buffered</term><description>(default no flush)</description></item>
+ /// <item><term>compound</term><description>(default true)</description></item>
+ /// <item><term>ram.flush.mb</term><description>[default 0]</description></item>
+ /// <item><term>merge.policy</term><description>(default Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net)</description></item>
+ /// <item><term>merge.scheduler</term><description>(default Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net)</description></item>
+ /// <item><term>concurrent.merge.scheduler.max.thread.count</term><description>(defaults per ConcurrentMergeScheduler)</description></item>
+ /// <item><term>concurrent.merge.scheduler.max.merge.count</term><description>(defaults per ConcurrentMergeScheduler)</description></item>
+ /// <item><term>default.codec</term><description></description></item>
+ /// </list>
+ /// <para/>
+ /// This task also supports a "writer.info.stream" property with the following
+ /// values:
+ /// <list type="bullet">
+ /// <item><term>SystemOut</term><description>Sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/> to <see cref="SystemConsole.Out"/>.</description></item>
+ /// <item><term>SystemErr</term><description>Sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/> to <see cref="SystemConsole.Error"/></description></item>
+ /// <item><term><file_name></term><description>
+ /// Attempts to create a file given that name and sets <see cref="IndexWriterConfig.SetInfoStream(InfoStream)"/>
+ /// to that file. If this denotes an invalid file name, or some error occurs, an exception will be thrown.
+ /// </description></item>
+ /// </list>
+ /// </remarks>
+ public class CreateIndexTask : PerfTask
+ {
+ public CreateIndexTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public static IndexDeletionPolicy GetIndexDeletionPolicy(Config config)
+ {
+ string deletionPolicyName = config.Get("deletion.policy", "Lucene.Net.Index.KeepOnlyLastCommitDeletionPolicy, Lucene.Net");
+ Type deletionPolicyType = Type.GetType(deletionPolicyName);
+ if (deletionPolicyType == null)
+ {
+ throw new Exception("Unrecognized deletion policy type '" + deletionPolicyName + "'");
+ }
+ else if (deletionPolicyType.Equals(typeof(NoDeletionPolicy)))
+ {
+ return NoDeletionPolicy.INSTANCE;
+ }
+ else
+ {
+ try
+ {
+ return (IndexDeletionPolicy)Activator.CreateInstance(deletionPolicyType);
+ }
+ catch (Exception e)
+ {
+ throw new Exception("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy", e);
+ }
+ }
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+ Config config = runData.Config;
+ runData.IndexWriter = ConfigureWriter(config, runData, OpenMode.CREATE, null);
+ return 1;
+ }
+
+ public static IndexWriterConfig CreateWriterConfig(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit)
+ {
+ // :Post-Release-Update-Version.LUCENE_XY:
+ LuceneVersion version = (LuceneVersion)Enum.Parse(typeof(LuceneVersion), config.Get("writer.version", LuceneVersion.LUCENE_48.ToString()));
+ IndexWriterConfig iwConf = new IndexWriterConfig(version, runData.Analyzer);
+ iwConf.OpenMode = mode;
+ IndexDeletionPolicy indexDeletionPolicy = GetIndexDeletionPolicy(config);
+ iwConf.IndexDeletionPolicy = indexDeletionPolicy;
+ if (commit != null)
+ iwConf.IndexCommit = commit;
+
+
+ string mergeScheduler = config.Get("merge.scheduler",
+ "Lucene.Net.Index.ConcurrentMergeScheduler, Lucene.Net");
+ Type mergeSchedulerType = Type.GetType(mergeScheduler);
+ if (mergeSchedulerType == null)
+ {
+ throw new Exception("Unrecognized merge scheduler type '" + mergeScheduler + "'");
+ }
+ else if (mergeSchedulerType.Equals(typeof(NoMergeScheduler)))
+ {
+ iwConf.MergeScheduler = NoMergeScheduler.INSTANCE;
+ }
+ else
+ {
+ try
+ {
+ iwConf.MergeScheduler = (IMergeScheduler)Activator.CreateInstance(mergeSchedulerType);
+ }
+ catch (Exception e)
+ {
+ throw new Exception("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
+ }
+
+ if (mergeScheduler.Equals("Lucene.Net.Index.ConcurrentMergeScheduler"))
+ {
+ ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler)iwConf.MergeScheduler;
+ int maxThreadCount = config.Get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT);
+ int maxMergeCount = config.Get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT);
+ cms.SetMaxMergesAndThreads(maxMergeCount, maxThreadCount);
+ }
+ }
+
+ string defaultCodec = config.Get("default.codec", null);
+ if (defaultCodec != null)
+ {
+ try
+ {
+ Type clazz = Type.GetType(defaultCodec);
+ iwConf.Codec = (Codec)Activator.CreateInstance(clazz);
+ }
+ catch (Exception e)
+ {
+ throw new Exception("Couldn't instantiate Codec: " + defaultCodec, e);
+ }
+ }
+
+ string mergePolicy = config.Get("merge.policy",
+ "Lucene.Net.Index.LogByteSizeMergePolicy, Lucene.Net");
+ bool isCompound = config.Get("compound", true);
+ Type mergePolicyType = Type.GetType(mergePolicy);
+ if (mergePolicyType == null)
+ {
+ throw new Exception("Unrecognized merge policy type '" + mergePolicy + "'");
+ }
+ else if (mergePolicyType.Equals(typeof(NoMergePolicy)))
+ {
+ iwConf.MergePolicy = isCompound ? NoMergePolicy.COMPOUND_FILES : NoMergePolicy.NO_COMPOUND_FILES;
+ }
+ else
+ {
+ try
+ {
+ iwConf.MergePolicy = (MergePolicy)Activator.CreateInstance(mergePolicyType);
+ }
+ catch (Exception e)
+ {
+ throw new Exception("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
+ }
+ iwConf.MergePolicy.NoCFSRatio = isCompound ? 1.0 : 0.0;
+ if (iwConf.MergePolicy is LogMergePolicy)
+ {
+ LogMergePolicy logMergePolicy = (LogMergePolicy)iwConf.MergePolicy;
+ logMergePolicy.MergeFactor = config.Get("merge.factor", OpenIndexTask.DEFAULT_MERGE_PFACTOR);
+ }
+ }
+ double ramBuffer = config.Get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
+ int maxBuffered = config.Get("max.buffered", OpenIndexTask.DEFAULT_MAX_BUFFERED);
+ if (maxBuffered == IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ {
+ iwConf.RAMBufferSizeMB = ramBuffer;
+ iwConf.MaxBufferedDocs = maxBuffered;
+ }
+ else
+ {
+ iwConf.MaxBufferedDocs = maxBuffered;
+ iwConf.RAMBufferSizeMB = ramBuffer;
+ }
+
+ return iwConf;
+ }
+
+ public static IndexWriter ConfigureWriter(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit)
+ {
+ IndexWriterConfig iwc = CreateWriterConfig(config, runData, mode, commit);
+ string infoStreamVal = config.Get("writer.info.stream", null);
+ if (infoStreamVal != null)
+ {
+ if (infoStreamVal.Equals("SystemOut", StringComparison.Ordinal))
+ {
+ iwc.SetInfoStream(SystemConsole.Out);
+ }
+ else if (infoStreamVal.Equals("SystemErr", StringComparison.Ordinal))
+ {
+ iwc.SetInfoStream(SystemConsole.Error);
+ }
+ else
+ {
+ FileInfo f = new FileInfo(infoStreamVal);
+ iwc.SetInfoStream(new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.GetEncoding(0)));
+ }
+ }
+ IndexWriter writer = new IndexWriter(runData.Directory, iwc);
+ return writer;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs
new file mode 100644
index 0000000..15ec2ee
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateTaxonomyIndexTask.cs
@@ -0,0 +1,42 @@
+using Lucene.Net.Facet.Taxonomy.Directory;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Create a taxonomy index.
+ /// <para/>
+ /// Other side effects: taxonomy writer object in perfRunData is set.
+ /// </summary>
+ public class CreateTaxonomyIndexTask : PerfTask
+ {
+ public CreateTaxonomyIndexTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+ runData.TaxonomyWriter = new DirectoryTaxonomyWriter(runData.TaxonomyDir, OpenMode.CREATE);
+ return 1;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs
new file mode 100644
index 0000000..3e29e4d
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ForceMergeTask.cs
@@ -0,0 +1,61 @@
+using Lucene.Net.Index;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Runs forceMerge on the index.
+ /// <para/>
+ /// Other side effects: none.
+ /// </summary>
+ public class ForceMergeTask : PerfTask
+ {
+ public ForceMergeTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ int maxNumSegments = -1;
+
+ public override int DoLogic()
+ {
+ if (maxNumSegments == -1)
+ {
+ throw new InvalidOperationException("required argument (maxNumSegments) was not specified");
+ }
+ IndexWriter iw = RunData.IndexWriter;
+ iw.ForceMerge(maxNumSegments);
+ //System.out.println("forceMerge called");
+ return 1;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ maxNumSegments = (int)double.Parse(@params, CultureInfo.InvariantCulture);
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs
new file mode 100644
index 0000000..411f285
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NearRealtimeReaderTask.cs
@@ -0,0 +1,132 @@
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Spawns a BG thread that periodically (defaults to 3.0
+ /// seconds, but accepts param in seconds) wakes up and asks
+ /// IndexWriter for a near real-time reader. Then runs a
+ /// single query (body: 1) sorted by docdate, and prints
+ /// time to reopen and time to run the search.
+ /// <para/>
+ /// @lucene.experimental It's also not generally usable, eg
+ /// you cannot change which query is executed.
+ /// </summary>
+ public class NearRealtimeReaderTask : PerfTask
+ {
+ internal long pauseMSec = 3000L;
+
+ internal int reopenCount;
+ internal int[] reopenTimes = new int[1];
+
+ public NearRealtimeReaderTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+
+ // Get initial reader
+ IndexWriter w = runData.IndexWriter;
+ if (w == null)
+ {
+ throw new Exception("please open the writer before invoking NearRealtimeReader");
+ }
+
+ if (runData.GetIndexReader() != null)
+ {
+ throw new Exception("please close the existing reader before invoking NearRealtimeReader");
+ }
+
+
+ long t = Support.Time.CurrentTimeMilliseconds();
+ DirectoryReader r = DirectoryReader.Open(w, true);
+ runData.SetIndexReader(r);
+ // Transfer our reference to runData
+ r.DecRef();
+
+ // TODO: gather basic metrics for reporting -- eg mean,
+ // stddev, min/max reopen latencies
+
+ // Parent sequence sets stopNow
+ reopenCount = 0;
+ while (!Stop)
+ {
+ long waitForMsec = (pauseMSec - (Support.Time.CurrentTimeMilliseconds() - t));
+ if (waitForMsec > 0)
+ {
+ Thread.Sleep((int)waitForMsec);
+ //System.out.println("NRT wait: " + waitForMsec + " msec");
+ }
+
+ t = Support.Time.CurrentTimeMilliseconds();
+ DirectoryReader newReader = DirectoryReader.OpenIfChanged(r);
+ if (newReader != null)
+ {
+ int delay = (int)(Support.Time.CurrentTimeMilliseconds() - t);
+ if (reopenTimes.Length == reopenCount)
+ {
+ reopenTimes = ArrayUtil.Grow(reopenTimes, 1 + reopenCount);
+ }
+ reopenTimes[reopenCount++] = delay;
+ // TODO: somehow we need to enable warming, here
+ runData.SetIndexReader(newReader);
+ // Transfer our reference to runData
+ newReader.DecRef();
+ r = newReader;
+ }
+ }
+ Stop = false;
+
+ return reopenCount;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ pauseMSec = (long)(1000.0 * float.Parse(@params, CultureInfo.InvariantCulture));
+ }
+
+ protected override void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ SystemConsole.WriteLine("NRT reopen times:");
+ for (int i = 0; i < reopenCount; i++)
+ {
+ SystemConsole.Write(" " + reopenTimes[i]);
+ }
+ SystemConsole.WriteLine();
+ }
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
new file mode 100644
index 0000000..aae5abb
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewAnalyzerTask.cs
@@ -0,0 +1,189 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Create a new <see cref="Analyzer"/> and set it it in the getRunData() for use by all future tasks.
+ /// </summary>
+ public class NewAnalyzerTask : PerfTask
+ {
+ private IList<string> analyzerNames;
+ private int current;
+
+ public NewAnalyzerTask(PerfRunData runData)
+ : base(runData)
+ {
+ analyzerNames = new List<string>();
+ }
+
+ public static Analyzer CreateAnalyzer(string className)
+ {
+ Type clazz = Type.GetType(className);
+ try
+ {
+ // first try to use a ctor with version parameter (needed for many new Analyzers that have no default one anymore
+ return (Analyzer)Activator.CreateInstance(clazz,
+#pragma warning disable 612, 618
+ LuceneVersion.LUCENE_CURRENT);
+#pragma warning restore 612, 618
+ }
+ catch (MissingMethodException /*nsme*/)
+ {
+ // otherwise use default ctor
+ return (Analyzer)Activator.CreateInstance(clazz);
+ }
+ }
+
+ public override int DoLogic()
+ {
+ string analyzerName = null;
+ try
+ {
+ if (current >= analyzerNames.Count)
+ {
+ current = 0;
+ }
+ analyzerName = analyzerNames[current++];
+ Analyzer analyzer = null;
+ if (null == analyzerName || 0 == analyzerName.Length)
+ {
+ analyzerName = "Lucene.Net.Analysis.Standard.StandardAnalyzer, Lucene.Net.Analysis.Common";
+ }
+ // First, lookup analyzerName as a named analyzer factory
+ AnalyzerFactory factory;
+ if (RunData.AnalyzerFactories.TryGetValue(analyzerName, out factory) && null != factory)
+ {
+ analyzer = factory.Create();
+ }
+ else
+ {
+ if (analyzerName.Contains("."))
+ {
+ if (analyzerName.StartsWith("Standard.", StringComparison.Ordinal))
+ {
+ analyzerName = "Lucene.Net.Analysis." + analyzerName;
+ }
+ analyzer = CreateAnalyzer(analyzerName);
+ }
+ else
+ { // No package
+ try
+ {
+ // Attempt to instantiate a core analyzer
+ string coreClassName = "Lucene.Net.Analysis.Core." + analyzerName;
+ analyzer = CreateAnalyzer(coreClassName);
+ analyzerName = coreClassName;
+ }
+ catch (TypeLoadException /*e*/)
+ {
+ // If not a core analyzer, try the base analysis package
+ analyzerName = "Lucene.Net.Analysis." + analyzerName;
+ analyzer = CreateAnalyzer(analyzerName);
+ }
+ }
+ }
+ RunData.Analyzer = analyzer;
+ }
+ catch (Exception e)
+ {
+ throw new Exception("Error creating Analyzer: " + analyzerName, e);
+ }
+ return 1;
+ }
+
+ /// <summary>
+ /// Set the params (analyzerName only), Comma-separate list of Analyzer class names. If the Analyzer lives in
+ /// Lucene.Net.Analysis, the name can be shortened by dropping the Lucene.Net.Analysis part of the Fully Qualified Class Name.
+ /// <para/>
+ /// Analyzer names may also refer to previously defined AnalyzerFactory's.
+ /// <para/>
+ /// Example Declaration:
+ /// <code>
+ /// {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, Standard.StandardAnalyzer) >
+ /// </code>
+ /// <para/>
+ /// Example AnalyzerFactory usage:
+ /// <code>
+ /// -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
+ /// -NewAnalyzer('whitespace tokenized')
+ /// </code>
+ /// </summary>
+ /// <param name="params">analyzerClassName, or empty for the StandardAnalyzer</param>
+ public override void SetParams(string @params)
+ {
+
+ base.SetParams(@params);
+ StreamTokenizer stok = new StreamTokenizer(new StringReader(@params));
+ stok.QuoteChar('"');
+ stok.QuoteChar('\'');
+ stok.IsEOLSignificant = false;
+ stok.OrdinaryChar(',');
+ try
+ {
+ while (stok.NextToken() != StreamTokenizer.TT_EOF)
+ {
+ switch (stok.TokenType)
+ {
+ case ',':
+ {
+ // Do nothing
+ break;
+ }
+ case '\'':
+ case '\"':
+ case StreamTokenizer.TT_WORD:
+ {
+ analyzerNames.Add(stok.StringValue);
+ break;
+ }
+ default:
+ {
+ //throw new RuntimeException("Unexpected token: " + stok.ToString());
+ throw new Exception("Unexpected token: " + stok.ToString());
+ }
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ if (e.Message.StartsWith("Line #", StringComparison.Ordinal))
+ {
+ throw e;
+ }
+ else
+ {
+ throw new Exception("Line #" + (stok.LineNumber + AlgLineNum) + ": ", e);
+ }
+ }
+ }
+
+ /// <seealso cref="PerfTask.SupportsParams"/>
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs
new file mode 100644
index 0000000..4ff00c1
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewCollationAnalyzerTask.cs
@@ -0,0 +1,149 @@
+using Icu.Collation;
+using Lucene.Net.Analysis;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// LUCENENET specific extension methods for the <see cref="NewCollationAnalyzerTask.Implementation"/> enumeration.
+ /// </summary>
+ public static class ImplementationExtensions
+ {
+ public static Type GetAnalyzerType(this NewCollationAnalyzerTask.Implementation impl)
+ {
+ switch (impl)
+ {
+ //case NewCollationAnalyzerTask.Implementation.JDK:
+ // return typeof(Lucene.Net.Collation.CollationKeyAnalyzer);
+
+ case NewCollationAnalyzerTask.Implementation.ICU:
+ return typeof(Lucene.Net.Collation.ICUCollationKeyAnalyzer);
+ default:
+ return typeof(Lucene.Net.Collation.ICUCollationKeyAnalyzer);
+ }
+ }
+
+ public static Type GetCollatorType(this NewCollationAnalyzerTask.Implementation impl)
+ {
+ switch (impl)
+ {
+ //case NewCollationAnalyzerTask.Implementation.JDK:
+ // return typeof(Icu.Collation.Collator);
+
+ case NewCollationAnalyzerTask.Implementation.ICU:
+ return typeof(Icu.Collation.Collator);
+ default:
+ return typeof(Icu.Collation.Collator);
+ }
+ }
+ }
+
+ public class NewCollationAnalyzerTask : PerfTask
+ {
+ /// <summary>
+ /// Different Collation implementations: currently
+ /// limited to what is provided in ICU.
+ /// <para/>
+ /// See <a href="http://site.icu-project.org/charts/collation-icu4j-sun">Comparison of implementations</a>
+ /// </summary>
+ public enum Implementation
+ {
+ //JDK, // LUCENENET: Not supported
+ ICU
+ }
+
+ private Implementation impl = Implementation.ICU; //Implementation.JDK;
+
+ public NewCollationAnalyzerTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ internal static Analyzer CreateAnalyzer(CultureInfo locale, Implementation impl)
+ {
+ // LUCENENET specific - senseless to use reflection here because we only have one
+ // collator.
+ object collator = Collator.Create(locale, Collator.Fallback.FallbackAllowed);
+
+ Type clazz = impl.GetAnalyzerType();
+ return (Analyzer)Activator.CreateInstance(clazz,
+#pragma warning disable 612, 618
+ LuceneVersion.LUCENE_CURRENT,
+#pragma warning restore 612, 618
+ collator);
+ }
+
+ public override int DoLogic()
+ {
+ try
+ {
+ CultureInfo locale = RunData.Locale;
+ if (locale == null) throw new Exception(
+ "Locale must be set with the NewLocale task!");
+ Analyzer analyzer = CreateAnalyzer(locale, impl);
+ RunData.Analyzer = analyzer;
+ SystemConsole.WriteLine("Changed Analyzer to: "
+ + analyzer.GetType().Name + "(" + locale + ")");
+ }
+ catch (Exception e)
+ {
+ throw new Exception("Error creating Analyzer: impl=" + impl, e);
+ }
+ return 1;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+
+ StringTokenizer st = new StringTokenizer(@params, ",");
+ while (st.HasMoreTokens())
+ {
+ string param = st.NextToken();
+ StringTokenizer expr = new StringTokenizer(param, ":");
+ string key = expr.NextToken();
+ string value = expr.NextToken();
+ // for now we only support the "impl" parameter.
+ // TODO: add strength, decomposition, etc
+ if (key.Equals("impl", StringComparison.Ordinal))
+ {
+ if (value.Equals("icu", StringComparison.OrdinalIgnoreCase))
+ impl = Implementation.ICU;
+ //else if (value.Equals("jdk", StringComparison.OrdinalIgnoreCase))
+ // impl = Implementation.JDK;
+ else
+ throw new Exception("Unknown parameter " + param);
+ }
+ else
+ {
+ throw new Exception("Unknown parameter " + param);
+ }
+ }
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs
new file mode 100644
index 0000000..135d203
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewLocaleTask.cs
@@ -0,0 +1,97 @@
+using Lucene.Net.Support;
+using System;
+using System.Globalization;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Set a <see cref="CultureInfo"/> for use in benchmarking.
+ /// </summary>
+ /// <remarks>
+ /// Locales can be specified in the following ways:
+ /// <list type="bullet">
+ /// <item><description><c>de</c>: Language "de"</description></item>
+ /// <item><description><code>en,US</code>: Language "en", country "US"</description></item>
+ /// <item><description><code>no-NO</code>: Language "no", country "NO"</description></item>
+ /// <item><description><code>ROOT</code>: The <see cref="CultureInfo.InvariantCulture"/></description></item>
+ /// </list>
+ /// </remarks>
+ public class NewLocaleTask : PerfTask
+ {
+ private string culture;
+ //private string language;
+ //private string country;
+ //private string variant;
+
+ /// <summary>
+ /// Create a new <see cref="CultureInfo"/> and set it it in the RunData for
+ /// use by all future tasks.
+ /// </summary>
+ /// <param name="runData"></param>
+ public NewLocaleTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ internal static CultureInfo CreateLocale(string culture /*String language, String country, String variant*/)
+ {
+ if (culture == null || culture.Length == 0)
+ return null;
+
+ string lang = culture;
+ if (lang.Equals("ROOT", StringComparison.OrdinalIgnoreCase))
+ return CultureInfo.InvariantCulture; // Default culture
+ //lang = ""; // empty language is the root locale in the JDK
+
+ return new CultureInfo(lang);
+ }
+
+ public override int DoLogic()
+ {
+ CultureInfo locale = CreateLocale(culture /*language, country, variant*/);
+ RunData.Locale = locale;
+ SystemConsole.WriteLine("Changed Locale to: " +
+ (locale == null ? "null" :
+ (locale.EnglishName.Length == 0) ? "root locale" : locale.ToString()));
+ return 1;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ //language = country = variant = "";
+ culture = "";
+ string ignore;
+ StringTokenizer st = new StringTokenizer(@params, ",");
+ if (st.HasMoreTokens())
+ //language = st.nextToken();
+ culture = st.NextToken();
+ if (st.HasMoreTokens())
+ culture += "-" + st.NextToken();
+ if (st.HasMoreTokens())
+ ignore = st.NextToken();
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs
new file mode 100644
index 0000000..66fc685
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/NewRoundTask.cs
@@ -0,0 +1,44 @@
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Increment the counter for properties maintained by Round Number.
+ /// <para/>
+ /// Other side effects: if there are props by round number, log value change.
+ /// </summary>
+ public class NewRoundTask : PerfTask
+ {
+ public NewRoundTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ RunData.Config.NewRound();
+ return 0;
+ }
+
+ /// <seealso cref="PerfTask.ShouldNotRecordStats"/>
+ protected override bool ShouldNotRecordStats
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs
new file mode 100644
index 0000000..73ec96a
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenIndexTask.cs
@@ -0,0 +1,88 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Index;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Open an index writer.
+ /// </summary>
+ /// <remarks>
+ /// Other side effects: index writer object in perfRunData is set.
+ /// <para/>
+ /// Relevant properties:
+ /// <list type="bullet">
+ /// <item><term>merge.factor</term><description></description></item>
+ /// <item><term>max.buffered</term><description></description></item>
+ /// <item><term>max.field.length</term><description></description></item>
+ /// <item><term>ram.flush.mb</term><description>[default 0]</description></item>
+ /// </list>
+ /// <para/>
+ /// Accepts a param specifying the commit point as
+ /// previously saved with <see cref="CommitIndexTask"/>. If you specify
+ /// this, it rolls the index back to that commit on opening
+ /// the <see cref="IndexWriter"/>.
+ /// </remarks>
+ public class OpenIndexTask : PerfTask
+ {
+ public static readonly int DEFAULT_MAX_BUFFERED = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
+ public static readonly int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
+ public static readonly double DEFAULT_RAM_FLUSH_MB = (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
+ private string commitUserData;
+
+ public OpenIndexTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+ Config config = runData.Config;
+ IndexCommit ic;
+ if (commitUserData != null)
+ {
+ ic = OpenReaderTask.FindIndexCommit(runData.Directory, commitUserData);
+ }
+ else
+ {
+ ic = null;
+ }
+
+ IndexWriter writer = CreateIndexTask.ConfigureWriter(config, runData, OpenMode.APPEND, ic);
+ runData.IndexWriter = writer;
+ return 1;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ if (@params != null)
+ {
+ // specifies which commit point to open
+ commitUserData = @params;
+ }
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs
new file mode 100644
index 0000000..81adf4c
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenReaderTask.cs
@@ -0,0 +1,100 @@
+using Lucene.Net.Index;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Open an index reader.
+ /// <para/>
+ /// Other side effects: index reader object in perfRunData is set.
+ /// <para/>
+ /// Optional params commitUserData eg. OpenReader(false,commit1)
+ /// </summary>
+ public class OpenReaderTask : PerfTask
+ {
+ public static readonly string USER_DATA = "userData";
+ private string commitUserData = null;
+
+ public OpenReaderTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ Store.Directory dir = RunData.Directory;
+ DirectoryReader r = null;
+ if (commitUserData != null)
+ {
+ r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, commitUserData));
+ }
+ else
+ {
+ r = DirectoryReader.Open(dir);
+ }
+ RunData.SetIndexReader(r);
+ // We transfer reference to the run data
+ r.DecRef();
+ return 1;
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ if (@params != null)
+ {
+ string[] split = @params.Split(new char[] { ',' }).TrimEnd();
+ if (split.Length > 0)
+ {
+ commitUserData = split[0];
+ }
+ }
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+
+ public static IndexCommit FindIndexCommit(Store.Directory dir, string userData)
+ {
+ IList<IndexCommit> commits = DirectoryReader.ListCommits(dir);
+ foreach (IndexCommit ic in commits)
+ {
+ IDictionary<string, string> map = ic.UserData;
+ string ud = null;
+ if (map != null)
+ {
+ //ud = map.get(USER_DATA);
+ map.TryGetValue(USER_DATA, out ud);
+ }
+ if (ud != null && ud.Equals(userData, StringComparison.Ordinal))
+ {
+ return ic;
+ }
+ }
+
+ throw new IOException("index does not contain commit with userData: " + userData);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs
new file mode 100644
index 0000000..5a1f38b
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyIndexTask.cs
@@ -0,0 +1,41 @@
+using Lucene.Net.Facet.Taxonomy.Directory;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Open a taxonomy index.
+ /// <para/>
+ /// Other side effects: taxonomy writer object in perfRunData is set.
+ /// </summary>
+ public class OpenTaxonomyIndexTask : PerfTask
+ {
+ public OpenTaxonomyIndexTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+ runData.TaxonomyWriter = new DirectoryTaxonomyWriter(runData.TaxonomyDir);
+ return 1;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs
new file mode 100644
index 0000000..e53738f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/OpenTaxonomyReaderTask.cs
@@ -0,0 +1,44 @@
+using Lucene.Net.Facet.Taxonomy.Directory;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Open a taxonomy index reader.
+ /// <para/>
+ /// Other side effects: taxonomy reader object in perfRunData is set.
+ /// </summary>
+ public class OpenTaxonomyReaderTask : PerfTask
+ {
+ public OpenTaxonomyReaderTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override int DoLogic()
+ {
+ PerfRunData runData = RunData;
+ DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(runData.TaxonomyDir);
+ runData.SetTaxonomyReader(taxoReader);
+ // We transfer reference to the run data
+ taxoReader.DecRef();
+ return 1;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs
new file mode 100644
index 0000000..0ae9dac
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/PerfTask.cs
@@ -0,0 +1,380 @@
+using Lucene.Net.Benchmarks.ByTask.Stats;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Support;
+using System;
+using System.Diagnostics;
+using System.Globalization;
+using System.Text;
+using System.Threading;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// An abstract task to be tested for performance.
+ /// </summary>
+ /// <remarks>
+ /// Every performance task extends this class, and provides its own
+ /// <see cref="DoLogic()"/> method, which performs the actual task.
+ /// <para/>
+ /// Tasks performing some work that should be measured for the task, can override
+ /// <see cref="Setup()"/> and/or <see cref="TearDown()"/> and place that work there.
+ /// <para/>
+ /// Relevant properties:
+ /// <list type="bullet">
+ /// <item><term>task.max.depth.log</term><description></description></item>
+ /// </list>
+ /// <para/>
+ /// Also supports the following logging attributes:
+ /// <list type="bullet">
+ /// <item><term>log.step</term><description>
+ /// specifies how often to log messages about the current running
+ /// task. Default is 1000 <see cref="DoLogic()"/> invocations. Set to -1 to disable
+ /// logging.
+ /// </description></item>
+ /// <item><term>log.step.[class Task Name]</term><description>
+ /// specifies the same as 'log.step', only for a
+ /// particular task name. For example, log.step.AddDoc will be applied only for
+ /// <see cref="AddDocTask"/>. It's a way to control
+ /// per task logging settings. If you want to omit logging for any other task,
+ /// include log.step=-1. The syntax is "log.step." together with the Task's
+ /// 'short' name (i.e., without the 'Task' part).
+ /// </description></item>
+ /// </list>
+ /// </remarks>
+ public abstract class PerfTask
+ {
+ internal static readonly int DEFAULT_LOG_STEP = 1000;
+
+ private PerfRunData runData;
+
+ // propeties that all tasks have
+ private string name;
+ private int depth = 0;
+ protected int m_logStep;
+ private int logStepCount = 0;
+ private int maxDepthLogStart = 0;
+ private bool disableCounting = false;
+ protected string m_params = null;
+
+ private bool runInBackground;
+ private int deltaPri;
+
+ // The first line of this task's definition in the alg file
+ private int algLineNum = 0;
+
+ protected static readonly string NEW_LINE = Environment.NewLine;
+
+ /// <summary>
+ /// Should not be used externally
+ /// </summary>
+ private PerfTask()
+ {
+ name = GetType().Name;
+ if (name.EndsWith("Task", StringComparison.Ordinal))
+ {
+ name = name.Substring(0, name.Length - 4);
+ }
+ }
+
+ public virtual void SetRunInBackground(int deltaPri)
+ {
+ runInBackground = true;
+ this.deltaPri = deltaPri;
+ }
+
+ public virtual bool RunInBackground
+ {
+ get { return runInBackground; }
+ }
+
+ public virtual int BackgroundDeltaPriority
+ {
+ get { return deltaPri; }
+ }
+
+ // LUCENENET specific - made private and
+ // added Stop property because volatile
+ // fields cannot be protected.
+ private volatile bool stopNow;
+
+ protected bool Stop
+ {
+ get { return stopNow; }
+ set { stopNow = value; }
+ }
+ public virtual void StopNow()
+ {
+ stopNow = true;
+ }
+
+ public PerfTask(PerfRunData runData)
+ : this()
+ {
+ this.runData = runData;
+ Config config = runData.Config;
+ this.maxDepthLogStart = config.Get("task.max.depth.log", 0);
+
+ string logStepAtt = "log.step";
+ string taskLogStepAtt = "log.step." + name;
+ if (config.Get(taskLogStepAtt, null) != null)
+ {
+ logStepAtt = taskLogStepAtt;
+ }
+
+ // It's important to read this from Config, to support vals-by-round.
+ m_logStep = config.Get(logStepAtt, DEFAULT_LOG_STEP);
+ // To avoid the check 'if (logStep > 0)' in tearDown(). This effectively
+ // turns logging off.
+ if (m_logStep <= 0)
+ {
+ m_logStep = int.MaxValue;
+ }
+ }
+
+ public virtual object Clone()
+ {
+ // tasks having non primitive data structures should override this.
+ // otherwise parallel running of a task sequence might not run correctly.
+ return (PerfTask)base.MemberwiseClone();
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ }
+
+ /// <summary>
+ /// Run the task, record statistics.
+ /// </summary>
+ /// <param name="reportStats"></param>
+ /// <returns>Number of work items done by this task.</returns>
+ public int RunAndMaybeStats(bool reportStats)
+ {
+ int count;
+ if (!reportStats || ShouldNotRecordStats)
+ {
+ Setup();
+ count = DoLogic();
+ count = disableCounting ? 0 : count;
+ TearDown();
+ return count;
+ }
+ if (reportStats && depth <= maxDepthLogStart && !ShouldNeverLogAtStart)
+ {
+ SystemConsole.WriteLine("------------> starting task: " + GetName());
+ }
+ Setup();
+ Points pnts = runData.Points;
+ TaskStats ts = pnts.MarkTaskStart(this, runData.Config.RoundNumber);
+ count = DoLogic();
+ count = disableCounting ? 0 : count;
+ pnts.MarkTaskEnd(ts, count);
+ TearDown();
+ return count;
+ }
+
+ /// <summary>
+ /// Perform the task once (ignoring repetitions specification).
+ /// Return number of work items done by this task.
+ /// For indexing that can be number of docs added.
+ /// For warming that can be number of scanned items, etc.
+ /// </summary>
+ /// <returns>Number of work items done by this task.</returns>
+ public abstract int DoLogic();
+
+ /// <summary>
+ /// Returns the name.
+ /// </summary>
+ public virtual string GetName()
+ {
+ if (m_params == null)
+ {
+ return name;
+ }
+ return new StringBuilder(name).Append('(').Append(m_params).Append(')').ToString();
+ }
+
+ /// <summary>
+ /// Sets the name.
+ /// </summary>
+ /// <param name="name">The name to set.</param>
+ protected virtual void SetName(string name)
+ {
+ this.name = name;
+ }
+
+ /// <summary>
+ /// Gets the run data.
+ /// </summary>
+ public virtual PerfRunData RunData
+ {
+ get { return runData; }
+ }
+
+ /// <summary>
+ /// Gets or Sets the depth.
+ /// </summary>
+ public virtual int Depth
+ {
+ get { return depth; }
+ set { depth = value; }
+ }
+
+ // compute a blank string padding for printing this task indented by its depth
+ internal string GetPadding()
+ {
+ char[] c = new char[4 * Depth];
+ for (int i = 0; i < c.Length; i++) c[i] = ' ';
+ return new string(c);
+ }
+
+ public override string ToString()
+ {
+ string padd = GetPadding();
+ StringBuilder sb = new StringBuilder(padd);
+ if (disableCounting)
+ {
+ sb.Append('-');
+ }
+ sb.Append(GetName());
+ if (RunInBackground)
+ {
+ sb.Append(" &");
+ int x = BackgroundDeltaPriority;
+ if (x != 0)
+ {
+ sb.Append(x);
+ }
+ }
+ return sb.ToString();
+ }
+
+ /// <summary>
+ /// Returns the maxDepthLogStart.
+ /// </summary>
+ internal int MaxDepthLogStart
+ {
+ get { return maxDepthLogStart; }
+ }
+
+ protected virtual string GetLogMessage(int recsCount)
+ {
+ return "processed " + recsCount + " records";
+ }
+
+ /// <summary>
+ /// Tasks that should never log at start can override this.
+ /// Returns <c>true</c> if this task should never log when it start.
+ /// </summary>
+ protected virtual bool ShouldNeverLogAtStart
+ {
+ get { return false; }
+ }
+
+ /// <summary>
+ /// Tasks that should not record statistics can override this.
+ /// Returns <c>true</c> if this task should never record its statistics.
+ /// </summary>
+ protected virtual bool ShouldNotRecordStats
+ {
+ get { return false; }
+ }
+
+ /// <summary>
+ /// Task setup work that should not be measured for that specific task. By
+ /// default it does nothing, but tasks can implement this, moving work from
+ /// <see cref="DoLogic()"/> to this method. Only the work done in <see cref="DoLogic()"/>
+ /// is measured for this task. Notice that higher level (sequence) tasks
+ /// containing this task would then measure larger time than the sum of their
+ /// contained tasks.
+ /// </summary>
+ public virtual void Setup()
+ {
+ }
+
+ /// <summary>
+ /// Task teardown work that should not be measured for that specific task. By
+ /// default it does nothing, but tasks can implement this, moving work from
+ /// <see cref="DoLogic()"/> to this method. Only the work done in <see cref="DoLogic()"/>
+ /// is measured for this task. Notice that higher level (sequence) tasks
+ /// containing this task would then measure larger time than the sum of their
+ /// contained tasks.
+ /// </summary>
+ public virtual void TearDown()
+ {
+ if (++logStepCount % m_logStep == 0)
+ {
+ double time = (((Stopwatch.GetTimestamp() / Stopwatch.Frequency) * 1000) - runData.StartTimeMillis) / 1000.0;
+ SystemConsole.WriteLine(string.Format(CultureInfo.InvariantCulture, "{0:0000000.00}", time) + " sec --> "
+ + Thread.CurrentThread.Name + " " + GetLogMessage(logStepCount));
+ }
+ }
+
+ /// <summary>
+ /// Sub classes that support parameters must override this method to return
+ /// <c>true</c> if this task supports command line params.
+ /// </summary>
+ public virtual bool SupportsParams
+ {
+ get { return false; }
+ }
+
+ /// <summary>
+ /// Set the params of this task.
+ /// </summary>
+ /// <exception cref="NotSupportedException">For tasks supporting command line parameters.</exception>
+ public virtual void SetParams(string @params)
+ {
+ if (!SupportsParams)
+ {
+ throw new NotSupportedException(GetName() + " does not support command line parameters.");
+ }
+ this.m_params = @params;
+ }
+
+ /// <summary>
+ /// Gets the Params.
+ /// </summary>
+ public virtual string Params
+ {
+ get { return m_params; }
+ }
+
+ /// <summary>
+ /// Return <c>true</c> if counting is disabled for this task.
+ /// </summary>
+ public virtual bool DisableCounting
+ {
+ get { return disableCounting; }
+ set { disableCounting = value; }
+ }
+
+ public virtual int AlgLineNum
+ {
+ get { return algLineNum; }
+ set { algLineNum = value; }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs
new file mode 100644
index 0000000..3453fc5
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/PrintReaderTask.cs
@@ -0,0 +1,60 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Opens a reader and prints basic statistics.
+ /// </summary>
+ public class PrintReaderTask : PerfTask
+ {
+ private string userData = null;
+
+ public PrintReaderTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ public override void SetParams(string @params)
+ {
+ base.SetParams(@params);
+ userData = @params;
+ }
+
+ public override bool SupportsParams
+ {
+ get { return true; }
+ }
+
+ public override int DoLogic()
+ {
+ Directory dir = RunData.Directory;
+ IndexReader r = null;
+ if (userData == null)
+ r = DirectoryReader.Open(dir);
+ else
+ r = DirectoryReader.Open(OpenReaderTask.FindIndexCommit(dir, userData));
+ SystemConsole.WriteLine("--> numDocs:" + r.NumDocs + " dels:" + r.NumDeletedDocs);
+ r.Dispose();
+ return 1;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs
new file mode 100644
index 0000000..3eeda9b
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTask.cs
@@ -0,0 +1,339 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Benchmarks.ByTask.Feeds;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using System;
+using System.Collections.Generic;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Read index (abstract) task.
+ /// Sub classes implement <see cref="WithSearch"/>, <see cref="WithWarm"/>, <see cref="WithTraverse"/> and <see cref="WithRetrieve"/>
+ /// </summary>
+ /// <remarks>
+ /// Note: All ReadTasks reuse the reader if it is already open.
+ /// Otherwise a reader is opened at start and closed at the end.
+ /// <para/>
+ /// The <c>search.num.hits</c> config parameter sets
+ /// the top number of hits to collect during searching. If
+ /// <c>print.hits.field</c> is set, then each hit is
+ /// printed along with the value of that field.
+ /// <para/>
+ /// Other side effects: none.
+ /// </remarks>
+ public abstract class ReadTask : PerfTask
+ {
+ private readonly IQueryMaker queryMaker;
+
+ public ReadTask(PerfRunData runData)
+ : base(runData)
+ {
+ if (WithSearch)
+ {
+ queryMaker = GetQueryMaker();
+ }
+ else
+ {
+ queryMaker = null;
+ }
+ }
+
+ public override int DoLogic()
+ {
+ int res = 0;
+
+ // open reader or use existing one
+ IndexSearcher searcher = RunData.GetIndexSearcher();
+
+ IndexReader reader;
+
+ bool closeSearcher;
+ if (searcher == null)
+ {
+ // open our own reader
+ Directory dir = RunData.Directory;
+ reader = DirectoryReader.Open(dir);
+ searcher = new IndexSearcher(reader);
+ closeSearcher = true;
+ }
+ else
+ {
+ // use existing one; this passes +1 ref to us
+ reader = searcher.IndexReader;
+ closeSearcher = false;
+ }
+
+ // optionally warm and add num docs traversed to count
+ if (WithWarm)
+ {
+ Document doc = null;
+ IBits liveDocs = MultiFields.GetLiveDocs(reader);
+ for (int m = 0; m < reader.MaxDoc; m++)
+ {
+ if (null == liveDocs || liveDocs.Get(m))
+ {
+ doc = reader.Document(m);
+ res += (doc == null ? 0 : 1);
+ }
+ }
+ }
+
+ if (WithSearch)
+ {
+ res++;
+ Query q = queryMaker.MakeQuery();
+ Sort sort = Sort;
+ TopDocs hits = null;
+ int numHits = NumHits;
+ if (numHits > 0)
+ {
+ if (WithCollector == false)
+ {
+ if (sort != null)
+ {
+ // TODO: instead of always passing false we
+ // should detect based on the query; if we make
+ // the IndexSearcher search methods that take
+ // Weight public again, we can go back to
+ // pulling the Weight ourselves:
+ TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
+ true, WithScore,
+ WithMaxScore,
+ false);
+ searcher.Search(q, null, collector);
+ hits = collector.GetTopDocs();
+ }
+ else
+ {
+ hits = searcher.Search(q, numHits);
+ }
+ }
+ else
+ {
+ ICollector collector = CreateCollector();
+ searcher.Search(q, null, collector);
+ //hits = collector.topDocs();
+ }
+
+ string printHitsField = RunData.Config.Get("print.hits.field", null);
+ if (hits != null && printHitsField != null && printHitsField.Length > 0)
+ {
+ SystemConsole.WriteLine("totalHits = " + hits.TotalHits);
+ SystemConsole.WriteLine("maxDoc() = " + reader.MaxDoc);
+ SystemConsole.WriteLine("numDocs() = " + reader.NumDocs);
+ for (int i = 0; i < hits.ScoreDocs.Length; i++)
+ {
+ int docID = hits.ScoreDocs[i].Doc;
+ Document doc = reader.Document(docID);
+ SystemConsole.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
+ }
+ }
+
+ if (WithTraverse)
+ {
+ ScoreDoc[] scoreDocs = hits.ScoreDocs;
+ int traversalSize = Math.Min(scoreDocs.Length, TraversalSize);
+
+ if (traversalSize > 0)
+ {
+ bool retrieve = WithRetrieve;
+ int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
+ Analyzer analyzer = RunData.Analyzer;
+ BenchmarkHighlighter highlighter = null;
+ if (numHighlight > 0)
+ {
+ highlighter = GetBenchmarkHighlighter(q);
+ }
+ for (int m = 0; m < traversalSize; m++)
+ {
+ int id = scoreDocs[m].Doc;
+ res++;
+ if (retrieve)
+ {
+ Document document = RetrieveDoc(reader, id);
+ res += document != null ? 1 : 0;
+ if (numHighlight > 0 && m < numHighlight)
+ {
+ ICollection<string> fieldsToHighlight = GetFieldsToHighlight(document);
+ foreach (string field in fieldsToHighlight)
+ {
+ string text = document.Get(field);
+ res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (closeSearcher)
+ {
+ reader.Dispose();
+ }
+ else
+ {
+ // Release our +1 ref from above
+ reader.DecRef();
+ }
+ return res;
+ }
+
+ protected virtual ICollector CreateCollector()
+ {
+ return TopScoreDocCollector.Create(NumHits, true);
+ }
+
+
+ protected virtual Document RetrieveDoc(IndexReader ir, int id)
+ {
+ return ir.Document(id);
+ }
+
+ /// <summary>
+ /// Return query maker used for this task.
+ /// </summary>
+ public abstract IQueryMaker GetQueryMaker();
+
+ /// <summary>
+ /// Return <c>true</c> if search should be performed.
+ /// </summary>
+ public abstract bool WithSearch { get; }
+
+ public virtual bool WithCollector
+ {
+ get { return false; }
+ }
+
+
+ /// <summary>
+ /// Return <c>true</c> if warming should be performed.
+ /// </summary>
+ public abstract bool WithWarm { get; }
+
+ /// <summary>
+ /// Return <c>true</c> if, with search, results should be traversed.
+ /// </summary>
+ public abstract bool WithTraverse { get; }
+
+ /// <summary>
+ /// Whether scores should be computed (only useful with
+ /// field sort)
+ /// </summary>
+ public virtual bool WithScore
+ {
+ get { return true; }
+ }
+
+ /// <summary>
+ /// Whether maxScores should be computed (only useful with
+ /// field sort)
+ /// </summary>
+ public virtual bool WithMaxScore
+ {
+ get { return true; }
+ }
+
+ /// <summary>
+ /// Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
+ /// of hits that are traversed when <see cref="WithTraverse"/> is <c>true</c>. Must be greater than 0.
+ /// <para/>
+ /// Read task calculates the traversal as: <c>Math.Min(hits.Length, TraversalSize)</c>
+ /// </summary>
+ /// <remarks>
+ /// Unless overridden, the return value is <see cref="int.MaxValue"/>.
+ /// </remarks>
+ public virtual int TraversalSize
+ {
+ get { return int.MaxValue; }
+ }
+
+ internal static readonly int DEFAULT_SEARCH_NUM_HITS = 10;
+ private int numHits;
+
+ public override void Setup()
+ {
+ base.Setup();
+ numHits = RunData.Config.Get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
+ }
+
+ /// <summary>
+ /// Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
+ /// of hits that are collected during searching. Must be greater than 0.
+ /// <para/>
+ /// Returns 10 by default, or <c>search.num.hits</c> config if set.
+ /// </summary>
+ public virtual int NumHits
+ {
+ get { return numHits; }
+ }
+
+ /// <summary>
+ /// Return <c>true</c> if, with search & results traversing, docs should be retrieved.
+ /// </summary>
+ public abstract bool WithRetrieve { get; }
+
+ /// <summary>
+ /// The number of documents to highlight. 0 means no docs will be highlighted.
+ /// </summary>
+ public virtual int NumToHighlight
+ {
+ get { return 0; }
+ }
+
+ /// <summary>
+ /// Return an appropriate highlighter to be used with
+ /// highlighting tasks.
+ /// </summary>
+ /// <param name="q"></param>
+ /// <returns></returns>
+ protected virtual BenchmarkHighlighter GetBenchmarkHighlighter(Query q)
+ {
+ return null;
+ }
+
+ public virtual Sort Sort
+ {
+ get { return null; }
+ }
+
+ /// <summary>
+ /// Define the fields to highlight. Base implementation returns all fields.
+ /// </summary>
+ /// <param name="document">The <see cref="Document"/>.</param>
+ /// <returns>An <see cref="T:ICollection{string}"/> of <see cref="Field"/> names.</returns>
+ protected virtual ICollection<string> GetFieldsToHighlight(Document document)
+ {
+ IList<IIndexableField> fields = document.Fields;
+ ISet<string> result = new HashSet<string>(/*fields.Count*/);
+ foreach (IIndexableField f in fields)
+ {
+ result.Add(f.Name);
+ }
+ return result;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs
new file mode 100644
index 0000000..1a8125d
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/ReadTokensTask.cs
@@ -0,0 +1,160 @@
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.TokenAttributes;
+using Lucene.Net.Benchmarks.ByTask.Feeds;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using System.Collections.Generic;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.ByTask.Tasks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// Simple task to test performance of tokenizers. It just
+ /// creates a token stream for each field of the document and
+ /// read all tokens out of that stream.
+ /// </summary>
+ public class ReadTokensTask : PerfTask
+ {
+ public ReadTokensTask(PerfRunData runData)
+ : base(runData)
+ {
+ }
+
+ private int totalTokenCount = 0;
+
+ // volatile data passed between setup(), doLogic(), tearDown().
+ private Document doc = null;
+
+ public override void Setup()
+ {
+ base.Setup();
+ DocMaker docMaker = RunData.DocMaker;
+ doc = docMaker.MakeDocument();
+ }
+
+ protected override string GetLogMessage(int recsCount)
+ {
+ return "read " + recsCount + " docs; " + totalTokenCount + " tokens";
+ }
+
+ public override void TearDown()
+ {
+ doc = null;
+ base.TearDown();
+ }
+
+ public override int DoLogic()
+ {
+ IList<IIndexableField> fields = doc.Fields;
+ Analyzer analyzer = RunData.Analyzer;
+ int tokenCount = 0;
+ foreach (IIndexableField field in fields)
+ {
+ if (!field.FieldType.IsTokenized ||
+ field is Int32Field ||
+ field is Int64Field ||
+ field is SingleField ||
+ field is DoubleField)
+ {
+ continue;
+ }
+
+ using (TokenStream stream = field.GetTokenStream(analyzer))
+ {
+ // reset the TokenStream to the first token
+ stream.Reset();
+
+ ITermToBytesRefAttribute termAtt = stream.GetAttribute<ITermToBytesRefAttribute>();
+ while (stream.IncrementToken())
+ {
+ termAtt.FillBytesRef();
+ tokenCount++;
+ }
+ stream.End();
+ }
+ }
+ totalTokenCount += tokenCount;
+ return tokenCount;
+ }
+
+ /// <summary>
+ /// Simple StringReader that can be reset to a new string;
+ /// we use this when tokenizing the string value from a
+ /// Field.
+ /// </summary>
+ internal ReusableStringReader stringReader = new ReusableStringReader();
+
+ internal sealed class ReusableStringReader : TextReader
+ {
+ int upto;
+ int left;
+ string s;
+ internal void Init(string s)
+ {
+ this.s = s;
+ left = s.Length;
+ this.upto = 0;
+ }
+
+ public override int Read()
+ {
+ char[] result = new char[1];
+ if (Read(result, 0, 1, false) != -1)
+ {
+ return result[0];
+ }
+ return -1;
+ }
+ public override int Read(char[] c, int off, int len)
+ {
+ return Read(c, off, len, true);
+ }
+
+ private int Read(char[] c, int off, int len, bool returnZeroWhenComplete)
+ {
+ if (left > len)
+ {
+ s.CopyTo(upto, c, off, upto + len);
+ upto += len;
+ left -= len;
+ return len;
+ }
+ else if (0 == left)
+ {
+ if (returnZeroWhenComplete)
+ {
+ return 0; // .NET semantics
+ }
+ return -1;
+ }
+ else
+ {
+ s.CopyTo(upto, c, off, upto + left);
+ int r = left;
+ left = 0;
+ upto = s.Length;
+ return r;
+ }
+ }
+
+ protected override void Dispose(bool disposing) { }
+ }
+ }
+}