You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:02 UTC

[04/33] lucenenet git commit: Ported Lucene.Net.Benchmark + tests

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Trec/Trec1MQReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/Trec1MQReader.cs b/src/Lucene.Net.Benchmark/Quality/Trec/Trec1MQReader.cs
new file mode 100644
index 0000000..85dceda
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/Trec1MQReader.cs
@@ -0,0 +1,92 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+
+namespace Lucene.Net.Benchmarks.Quality.Trec
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Read topics of TREC 1MQ track.
+    /// <para/>
+    /// Expects this topic format -
+    /// <code>
+    ///     qnum:qtext
+    /// </code>
+    /// Comment lines starting with '#' are ignored.
+    /// <para/>
+    /// All topics will have a single name value pair.
+    /// </summary>
+    public class Trec1MQReader
+    {
+        private string name;
+
+        /// <summary>
+        /// Constructor for Trec's 1MQ TopicsReader
+        /// </summary>
+        /// <param name="name">Name of name-value pair to set for all queries.</param>
+        public Trec1MQReader(string name)
+            : base()
+        {
+            this.name = name;
+        }
+
+        /// <summary>
+        /// Read quality queries from trec 1MQ format topics file.
+        /// </summary>
+        /// <param name="reader">where queries are read from.</param>
+        /// <returns>the result quality queries.</returns>
+        /// <exception cref="IOException">if cannot read the queries.</exception>
+        public virtual QualityQuery[] ReadQueries(TextReader reader)
+        {
+            IList<QualityQuery> res = new List<QualityQuery>();
+            string line;
+            try
+            {
+                while (null != (line = reader.ReadLine()))
+                {
+                    line = line.Trim();
+                    if (line.StartsWith("#", StringComparison.Ordinal))
+                    {
+                        continue;
+                    }
+                    // id
+                    int k = line.IndexOf(':');
+                    string id = line.Substring(0, k - 0).Trim();
+                    // qtext
+                    string qtext = line.Substring(k + 1).Trim();
+                    // we got a topic!
+                    IDictionary<string, string> fields = new Dictionary<string, string>();
+                    fields[name] = qtext;
+                    //System.out.println("id: "+id+" qtext: "+qtext+"  line: "+line);
+                    QualityQuery topic = new QualityQuery(id, fields);
+                    res.Add(topic);
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+            // sort result array (by ID) 
+            QualityQuery[] qq = res.ToArray();
+            Array.Sort(qq);
+            return qq;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Trec/TrecJudge.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/TrecJudge.cs b/src/Lucene.Net.Benchmark/Quality/Trec/TrecJudge.cs
new file mode 100644
index 0000000..386b130
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/TrecJudge.cs
@@ -0,0 +1,186 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.Quality.Trec
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Judge if given document is relevant to given quality query, based on Trec format for judgements.
+    /// </summary>
+    public class TrecJudge : IJudge
+    {
+        IDictionary<string, QRelJudgement> judgements;
+
+        /// <summary>
+        /// Constructor from a reader.
+        /// </summary>
+        /// <remarks>
+        /// Expected input format:
+        /// <code>
+        ///     qnum  0   doc-name     is-relevant
+        /// </code>
+        /// Two sample lines:
+        /// <code>
+        ///     19    0   doc303       1
+        ///     19    0   doc7295      0
+        /// </code>
+        /// </remarks>
+        /// <param name="reader">Where judgments are read from.</param>
+        /// <exception cref="IOException">If there is a low-level I/O error.</exception>
+        public TrecJudge(TextReader reader)
+        {
+            judgements = new Dictionary<string, QRelJudgement>();
+            QRelJudgement curr = null;
+            string zero = "0";
+            string line;
+
+            try
+            {
+                while (null != (line = reader.ReadLine()))
+                {
+                    line = line.Trim();
+                    if (line.Length == 0 || '#' == line[0])
+                    {
+                        continue;
+                    }
+                    StringTokenizer st = new StringTokenizer(line);
+                    string queryID = st.NextToken();
+                    st.NextToken();
+                    string docName = st.NextToken();
+                    bool relevant = !zero.Equals(st.NextToken(), StringComparison.Ordinal);
+                    // LUCENENET: don't call st.NextToken() unless the condition fails.
+                    Debug.Assert(!st.HasMoreTokens(), "wrong format: " + line + "  next: " + (st.HasMoreTokens() ? st.NextToken() : ""));
+                    if (relevant)
+                    { // only keep relevant docs
+                        if (curr == null || !curr.queryID.Equals(queryID, StringComparison.Ordinal))
+                        {
+                            if (!judgements.TryGetValue(queryID, out curr) || curr == null)
+                            {
+                                curr = new QRelJudgement(queryID);
+                                judgements[queryID] = curr;
+                            }
+                        }
+                        curr.AddRelevantDoc(docName);
+                    }
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+        }
+
+        // inherit javadocs
+        public virtual bool IsRelevant(string docName, QualityQuery query)
+        {
+            QRelJudgement qrj;// = judgements.get(query.getQueryID());
+            judgements.TryGetValue(query.QueryID, out qrj);
+            return qrj != null && qrj.IsRelevant(docName);
+        }
+
+        /// <summary>
+        /// Single Judgement of a trec quality query.
+        /// </summary>
+        private class QRelJudgement
+        {
+            internal string queryID;
+            private IDictionary<string, string> relevantDocs;
+
+            internal QRelJudgement(string queryID)
+            {
+                this.queryID = queryID;
+                relevantDocs = new HashMap<string, string>();
+            }
+
+            public virtual void AddRelevantDoc(string docName)
+            {
+                relevantDocs[docName] = docName;
+            }
+
+            internal virtual bool IsRelevant(string docName)
+            {
+                return relevantDocs.ContainsKey(docName);
+            }
+
+            public virtual int MaxRecall
+            {
+                get { return relevantDocs.Count; }
+            }
+        }
+
+        // inherit javadocs
+        public virtual bool ValidateData(QualityQuery[] qq, TextWriter logger)
+        {
+            IDictionary<string, QRelJudgement> missingQueries = new Dictionary<string, QRelJudgement>(judgements);
+            IList<string> missingJudgements = new List<string>();
+            for (int i = 0; i < qq.Length; i++)
+            {
+                string id = qq[i].QueryID;
+                if (missingQueries.ContainsKey(id))
+                {
+                    missingQueries.Remove(id);
+                }
+                else
+                {
+                    missingJudgements.Add(id);
+                }
+            }
+            bool isValid = true;
+            if (missingJudgements.Count > 0)
+            {
+                isValid = false;
+                if (logger != null)
+                {
+                    logger.WriteLine("WARNING: " + missingJudgements.Count + " queries have no judgments! - ");
+                    for (int i = 0; i < missingJudgements.Count; i++)
+                    {
+                        logger.WriteLine("   " + missingJudgements[i]);
+                    }
+                }
+            }
+            if (missingQueries.Count > 0)
+            {
+                isValid = false;
+                if (logger != null)
+                {
+                    logger.WriteLine("WARNING: " + missingQueries.Count + " judgments match no query! - ");
+                    foreach (string id in missingQueries.Keys)
+                    {
+                        logger.WriteLine("   " + id);
+                    }
+                }
+            }
+            return isValid;
+        }
+
+        // inherit javadocs
+        public virtual int MaxRecall(QualityQuery query)
+        {
+            QRelJudgement qrj;
+            if (judgements.TryGetValue(query.QueryID, out qrj) && qrj != null)
+            {
+                return qrj.MaxRecall;
+            }
+            return 0;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Trec/TrecTopicsReader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/TrecTopicsReader.cs b/src/Lucene.Net.Benchmark/Quality/Trec/TrecTopicsReader.cs
new file mode 100644
index 0000000..158386f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/TrecTopicsReader.cs
@@ -0,0 +1,154 @@
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.Quality.Trec
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Read TREC topics.
+    /// </summary>
+    /// <remarks>
+    /// Expects this topic format -
+    /// <code>
+    ///   &lt;top&gt;
+    ///   &lt;num&gt; Number: nnn
+    ///     
+    ///   &lt;title&gt; title of the topic
+    ///
+    ///   &lt;desc&gt; Description:
+    ///   description of the topic
+    ///
+    ///   &lt;narr&gt; Narrative:
+    ///   "story" composed by assessors.
+    ///
+    ///   &lt;/top&gt;
+    /// </code>
+    /// Comment lines starting with '#' are ignored.
+    /// </remarks>
+    public class TrecTopicsReader
+    {
+        private static readonly string newline = Environment.NewLine;
+
+        /// <summary>
+        /// Constructor for Trec's TopicsReader
+        /// </summary>
+        public TrecTopicsReader()
+            : base()
+        {
+        }
+
+        /// <summary>
+        /// Read quality queries from trec format topics file.
+        /// </summary>
+        /// <param name="reader">where queries are read from.</param>
+        /// <returns>the result quality queries.</returns>
+        /// <exception cref="IOException">if cannot read the queries.</exception>
+        public virtual QualityQuery[] ReadQueries(TextReader reader)
+        {
+            IList<QualityQuery> res = new List<QualityQuery>();
+            StringBuilder sb;
+            try
+            {
+                while (null != (sb = Read(reader, "<top>", null, false, false)))
+                {
+                    IDictionary<string, string> fields = new Dictionary<string, string>();
+                    // id
+                    sb = Read(reader, "<num>", null, true, false);
+                    int k = sb.IndexOf(":");
+                    string id = sb.ToString(k + 1, sb.Length - (k + 1)).Trim();
+                    // title
+                    sb = Read(reader, "<title>", null, true, false);
+                    k = sb.IndexOf(">");
+                    string title = sb.ToString(k + 1, sb.Length - (k + 1)).Trim();
+                    // description
+                    Read(reader, "<desc>", null, false, false);
+                    sb.Length = 0;
+                    string line = null;
+                    while ((line = reader.ReadLine()) != null)
+                    {
+                        if (line.StartsWith("<narr>", StringComparison.Ordinal))
+                            break;
+                        if (sb.Length > 0) sb.Append(' ');
+                        sb.Append(line);
+                    }
+                    string description = sb.ToString().Trim();
+                    // narrative
+                    sb.Length = 0;
+                    while ((line = reader.ReadLine()) != null)
+                    {
+                        if (line.StartsWith("</top>", StringComparison.Ordinal))
+                            break;
+                        if (sb.Length > 0) sb.Append(' ');
+                        sb.Append(line);
+                    }
+                    string narrative = sb.ToString().Trim();
+                    // we got a topic!
+                    fields["title"] = title;
+                    fields["description"] = description;
+                    fields["narrative"] = narrative;
+                    QualityQuery topic = new QualityQuery(id, fields);
+                    res.Add(topic);
+                }
+            }
+            finally
+            {
+                reader.Dispose();
+            }
+            // sort result array (by ID) 
+            QualityQuery[] qq = res.ToArray();
+            Array.Sort(qq);
+            return qq;
+        }
+
+        // read until finding a line that starts with the specified prefix
+        private StringBuilder Read(TextReader reader, string prefix, StringBuilder sb, bool collectMatchLine, bool collectAll)
+        {
+            sb = (sb == null ? new StringBuilder() : sb);
+            string sep = "";
+            while (true)
+            {
+                string line = reader.ReadLine();
+                if (line == null)
+                {
+                    return null;
+                }
+                if (line.StartsWith(prefix, StringComparison.Ordinal))
+                {
+                    if (collectMatchLine)
+                    {
+                        sb.Append(sep + line);
+                        sep = newline;
+                    }
+                    break;
+                }
+                if (collectAll)
+                {
+                    sb.Append(sep + line);
+                    sep = newline;
+                }
+            }
+            //System.out.println("read: "+sb);
+            return sb;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Utils/DocNameExtractor.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Utils/DocNameExtractor.cs b/src/Lucene.Net.Benchmark/Quality/Utils/DocNameExtractor.cs
new file mode 100644
index 0000000..6e5cc0f
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Utils/DocNameExtractor.cs
@@ -0,0 +1,89 @@
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Lucene.Net.Benchmarks.Quality.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Utility: extract doc names from an index
+    /// </summary>
+    public class DocNameExtractor
+    {
+        private readonly string docNameField;
+
+        /// <summary>
+        /// Constructor for <see cref="DocNameExtractor"/>.
+        /// </summary>
+        /// <param name="docNameField">name of the stored field containing the doc name.</param>
+        public DocNameExtractor(string docNameField)
+        {
+            this.docNameField = docNameField;
+        }
+
+        /// <summary>
+        /// Extract the name of the input doc from the index.
+        /// </summary>
+        /// <param name="searcher">access to the index.</param>
+        /// <param name="docid">ID of doc whose name is needed.</param>
+        /// <returns>the name of the input doc as extracted from the index.</returns>
+        /// <exception cref="System.IO.IOException">if cannot extract the doc name from the index.</exception>
+        public virtual string DocName(IndexSearcher searcher, int docid)
+        {
+            IList<string> name = new List<string>();
+            searcher.IndexReader.Document(docid, new StoredFieldVisitorAnonymousHelper(this, name));
+
+            return name.FirstOrDefault();
+        }
+
+        private class StoredFieldVisitorAnonymousHelper : StoredFieldVisitor
+        {
+            private readonly DocNameExtractor outerInstance;
+            private readonly IList<string> name;
+
+            public StoredFieldVisitorAnonymousHelper(DocNameExtractor outerInstance, IList<string> name)
+            {
+                this.outerInstance = outerInstance;
+                this.name = name;
+            }
+            public override void StringField(FieldInfo fieldInfo, string value)
+            {
+                name.Add(value);
+            }
+
+            public override Status NeedsField(FieldInfo fieldInfo)
+            {
+                if (name.Count > 0)
+                {
+                    return Status.STOP;
+                }
+                else if (fieldInfo.Name.Equals(outerInstance.docNameField, StringComparison.Ordinal))
+                {
+                    return Status.YES;
+                }
+                else
+                {
+                    return Status.NO;
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Utils/QualityQueriesFinder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Utils/QualityQueriesFinder.cs b/src/Lucene.Net.Benchmark/Quality/Utils/QualityQueriesFinder.cs
new file mode 100644
index 0000000..062263a
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Utils/QualityQueriesFinder.cs
@@ -0,0 +1,152 @@
+using Lucene.Net.Index;
+using Lucene.Net.Store;
+using Lucene.Net.Support;
+using System;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.Quality.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Suggest Quality queries based on an index contents.
+    /// Utility class, used for making quality test benchmarks.
+    /// </summary>
+    public class QualityQueriesFinder
+    {
+        private static readonly string newline = Environment.NewLine;
+        private Store.Directory dir;
+
+        /// <summary>
+        /// Constructor over a directory containing the index.
+        /// </summary>
+        /// <param name="dir">Directory containing the index we search for the quality test.</param>
+        private QualityQueriesFinder(Store.Directory dir)
+        {
+            this.dir = dir;
+        }
+
+        /// <summary>
+        /// 
+        /// </summary>
+        /// <param name="args">{index-dir}</param>
+        /// <exception cref="IOException">if cannot access the index.</exception>
+        public static void Main(string[] args)
+        {
+            if (args.Length < 1)
+            {
+                SystemConsole.Error.WriteLine("Usage: java QualityQueriesFinder <index-dir>");
+                Environment.Exit(1);
+            }
+            QualityQueriesFinder qqf = new QualityQueriesFinder(FSDirectory.Open(new DirectoryInfo(args[0])));
+            string[] q = qqf.BestQueries("body", 20);
+            for (int i = 0; i < q.Length; i++)
+            {
+                SystemConsole.WriteLine(newline + FormatQueryAsTrecTopic(i, q[i], null, null));
+            }
+        }
+
+        private string[] BestQueries(string field, int numQueries)
+        {
+            string[] words = BestTerms("body", 4 * numQueries);
+            int n = words.Length;
+            int m = n / 4;
+            string[] res = new string[m];
+            for (int i = 0; i < res.Length; i++)
+            {
+                res[i] = words[i] + " " + words[m + i] + "  " + words[n - 1 - m - i] + " " + words[n - 1 - i];
+                //System.out.println("query["+i+"]:  "+res[i]);
+            }
+            return res;
+        }
+
+        private static string FormatQueryAsTrecTopic(int qnum, string title, string description, string narrative)
+        {
+            return
+              "<top>" + newline +
+              "<num> Number: " + qnum + newline + newline +
+              "<title> " + (title == null ? "" : title) + newline + newline +
+              "<desc> Description:" + newline +
+              (description == null ? "" : description) + newline + newline +
+              "<narr> Narrative:" + newline +
+              (narrative == null ? "" : narrative) + newline + newline +
+              "</top>";
+        }
+
+        private string[] BestTerms(string field, int numTerms)
+        {
+            Util.PriorityQueue<TermDf> pq = new TermsDfQueue(numTerms);
+            IndexReader ir = DirectoryReader.Open(dir);
+            try
+            {
+                int threshold = ir.MaxDoc / 10; // ignore words too common.
+                Terms terms = MultiFields.GetTerms(ir, field);
+                if (terms != null)
+                {
+                    TermsEnum termsEnum = terms.GetIterator(null);
+                    while (termsEnum.Next() != null)
+                    {
+                        int df = termsEnum.DocFreq;
+                        if (df < threshold)
+                        {
+                            string ttxt = termsEnum.Term.Utf8ToString();
+                            pq.InsertWithOverflow(new TermDf(ttxt, df));
+                        }
+                    }
+                }
+            }
+            finally
+            {
+                ir.Dispose();
+            }
+            string[] res = new string[pq.Count];
+            int i = 0;
+            while (pq.Count > 0)
+            {
+                TermDf tdf = pq.Pop();
+                res[i++] = tdf.word;
+                SystemConsole.WriteLine(i + ".   word:  " + tdf.df + "   " + tdf.word);
+            }
+            return res;
+        }
+
+        private class TermDf
+        {
+            internal string word;
+            internal int df;
+            internal TermDf(string word, int freq)
+            {
+                this.word = word;
+                this.df = freq;
+            }
+        }
+
+        private class TermsDfQueue : Util.PriorityQueue<TermDf>
+        {
+            internal TermsDfQueue(int maxSize)
+                    : base(maxSize)
+            {
+            }
+
+            protected override bool LessThan(TermDf tf1, TermDf tf2)
+            {
+                return tf1.df < tf2.df;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Utils/SimpleQQParser.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Utils/SimpleQQParser.cs b/src/Lucene.Net.Benchmark/Quality/Utils/SimpleQQParser.cs
new file mode 100644
index 0000000..0711e86
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Utils/SimpleQQParser.cs
@@ -0,0 +1,76 @@
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.QueryParsers.Classic;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using System.Threading;
+
+namespace Lucene.Net.Benchmarks.Quality.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Simplistic quality query parser. A Lucene query is created by passing 
+    /// the value of the specified <see cref="QualityQuery"/> name-value pair(s) into 
+    /// a Lucene's <see cref="QueryParser"/> using <see cref="StandardAnalyzer"/>.
+    /// </summary>
+    public class SimpleQQParser : IQualityQueryParser
+    {
+        private string[] qqNames;
+        private string indexField;
+        ThreadLocal<QueryParser> queryParser = new ThreadLocal<QueryParser>();
+
+        /// <summary>
+        /// Constructor of a simple qq parser.
+        /// </summary>
+        /// <param name="qqNames">Name-value pairs of quality query to use for creating the query.</param>
+        /// <param name="indexField">Corresponding index field.</param>
+        public SimpleQQParser(string[] qqNames, string indexField)
+        {
+            this.qqNames = qqNames;
+            this.indexField = indexField;
+        }
+
+        /// <summary>
+        /// Constructor of a simple qq parser.
+        /// </summary>
+        /// <param name="qqName">Name-value pair of quality query to use for creating the query.</param>
+        /// <param name="indexField">Corresponding index field.</param>
+        public SimpleQQParser(string qqName, string indexField)
+            : this(new string[] { qqName }, indexField)
+        {
+        }
+
+        /// <seealso cref="IQualityQueryParser.Parse(QualityQuery)"/>
+        public virtual Query Parse(QualityQuery qq)
+        {
+            QueryParser qp = queryParser.Value;
+            if (qp == null)
+            {
+#pragma warning disable 612, 618
+                qp = new QueryParser(LuceneVersion.LUCENE_CURRENT, indexField, new StandardAnalyzer(LuceneVersion.LUCENE_CURRENT));
+#pragma warning restore 612, 618
+                queryParser.Value = qp;
+            }
+            BooleanQuery bq = new BooleanQuery();
+            for (int i = 0; i < qqNames.Length; i++)
+                bq.Add(qp.Parse(QueryParserBase.Escape(qq.GetValue(qqNames[i]))), Occur.SHOULD);
+
+            return bq;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Quality/Utils/SubmissionReport.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Quality/Utils/SubmissionReport.cs b/src/Lucene.Net.Benchmark/Quality/Utils/SubmissionReport.cs
new file mode 100644
index 0000000..c31eddc
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Quality/Utils/SubmissionReport.cs
@@ -0,0 +1,98 @@
+using Lucene.Net.Search;
+using System;
+using System.Globalization;
+using System.IO;
+
+namespace Lucene.Net.Benchmarks.Quality.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Create a log ready for submission.
+    /// Extend this class and override
+    /// <see cref="Report(QualityQuery, TopDocs, string, IndexSearcher)"/>
+    /// to create different reports. 
+    /// </summary>
+    public class SubmissionReport
+    {
+        //private NumberFormat nf;
+        private string nf;
+        private TextWriter logger;
+        private string name;
+
+        /// <summary>
+        /// Constructor for <see cref="SubmissionReport"/>.
+        /// </summary>
+        /// <param name="logger">If <c>null</c>, no submission data is created.</param>
+        /// <param name="name">Name of this run.</param>
+        public SubmissionReport(TextWriter logger, string name)
+        {
+            this.logger = logger;
+            this.name = name;
+            nf = "{0:F4}";
+        }
+
+        /// <summary>
+        /// Report a search result for a certain quality query.
+        /// </summary>
+        /// <param name="qq">quality query for which the results are reported.</param>
+        /// <param name="td">search results for the query.</param>
+        /// <param name="docNameField">stored field used for fetching the result doc name.</param>
+        /// <param name="searcher">index access for fetching doc name.</param>
+        /// <see cref="IOException">in case of a problem.</see>
+        public virtual void Report(QualityQuery qq, TopDocs td, string docNameField, IndexSearcher searcher)
+        {
+            if (logger == null)
+            {
+                return;
+            }
+            ScoreDoc[] sd = td.ScoreDocs;
+            string sep = " \t ";
+            DocNameExtractor xt = new DocNameExtractor(docNameField);
+            for (int i = 0; i < sd.Length; i++)
+            {
+                string docName = xt.DocName(searcher, sd[i].Doc);
+                logger.WriteLine(
+                  qq.QueryID + sep +
+                  "Q0" + sep +
+                  Format(docName, 20) + sep +
+                  Format("" + i, 7) + sep +
+                  //nf.format(sd[i].score) + sep +
+                  string.Format(nf, sd[i].Score, CultureInfo.InvariantCulture) + sep +
+                  name
+                  );
+            }
+        }
+
+        public virtual void Flush()
+        {
+            if (logger != null)
+            {
+                logger.Flush();
+            }
+        }
+
+        private static string padd = "                                    ";
+        private string Format(string s, int minLen)
+        {
+            s = (s == null ? "" : s);
+            int n = Math.Max(minLen, s.Length);
+            return (s + padd).Substring(0, n - 0);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
new file mode 100644
index 0000000..8727fa0
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
@@ -0,0 +1,167 @@
+using Lucene.Net.Support;
+using System;
+using System.IO;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace Lucene.Net.Benchmarks.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
+    /// </summary>
+    public class ExtractReuters
+    {
+        private DirectoryInfo reutersDir;
+        private DirectoryInfo outputDir;
+        private static readonly string LINE_SEPARATOR = Environment.NewLine;
+
+        public ExtractReuters(DirectoryInfo reutersDir, DirectoryInfo outputDir)
+        {
+            this.reutersDir = reutersDir;
+            this.outputDir = outputDir;
+            SystemConsole.WriteLine("Deleting all files in " + outputDir);
+            foreach (FileInfo f in outputDir.EnumerateFiles())
+            {
+                f.Delete();
+            }
+        }
+
+        public virtual void Extract()
+        {
+            FileInfo[] sgmFiles = reutersDir.GetFiles("*.sgm");
+            if (sgmFiles != null && sgmFiles.Length > 0)
+            {
+                foreach (FileInfo sgmFile in sgmFiles)
+                {
+                    ExtractFile(sgmFile);
+                }
+            }
+            else
+            {
+                SystemConsole.Error.WriteLine("No .sgm files in " + reutersDir);
+            }
+        }
+
+        internal Regex EXTRACTION_PATTERN = new Regex("<TITLE>(.*?)</TITLE>|<DATE>(.*?)</DATE>|<BODY>(.*?)</BODY>", RegexOptions.Compiled);
+
+        private static string[] META_CHARS = { "&", "<", ">", "\"", "'" };
+
+        private static string[] META_CHARS_SERIALIZATIONS = { "&amp;", "&lt;",
+            "&gt;", "&quot;", "&apos;" };
+
+        /// <summary>
+        /// Override if you wish to change what is extracted
+        /// </summary>
+        protected virtual void ExtractFile(FileInfo sgmFile)
+        {
+            try
+            {
+                using (TextReader reader = new StreamReader(new FileStream(sgmFile.FullName, FileMode.Open, FileAccess.Read), Encoding.UTF8))
+                {
+                    StringBuilder buffer = new StringBuilder(1024);
+                    StringBuilder outBuffer = new StringBuilder(1024);
+
+                    string line = null;
+                    int docNumber = 0;
+                    while ((line = reader.ReadLine()) != null)
+                    {
+                        // when we see a closing reuters tag, flush the file
+
+                        if (line.IndexOf("</REUTERS") == -1)
+                        {
+                            // Replace the SGM escape sequences
+
+                            buffer.Append(line).Append(' ');// accumulate the strings for now,
+                                                            // then apply regular expression to
+                                                            // get the pieces,
+                        }
+                        else
+                        {
+                            // Extract the relevant pieces and write to a file in the output dir
+                            Match matcher = EXTRACTION_PATTERN.Match(buffer.ToString());
+                            if (matcher.Success)
+                            {
+                                do
+                                {
+                                    for (int i = 1; i <= matcher.Groups.Count; i++)
+                                    {
+                                        if (matcher.Groups[i] != null)
+                                        {
+                                            outBuffer.Append(matcher.Groups[i].Value);
+                                        }
+                                    }
+                                    outBuffer.Append(LINE_SEPARATOR).Append(LINE_SEPARATOR);
+                                } while ((matcher = matcher.NextMatch()).Success);
+                            }
+
+                            string @out = outBuffer.ToString();
+                            for (int i = 0; i < META_CHARS_SERIALIZATIONS.Length; i++)
+                            {
+                                @out = @out.Replace(META_CHARS_SERIALIZATIONS[i], META_CHARS[i]);
+                            }
+                            string outFile = System.IO.Path.Combine(outputDir.FullName, sgmFile.Name + "-"
+                                + (docNumber++) + ".txt");
+                            // System.out.println("Writing " + outFile);
+                            StreamWriter writer = new StreamWriter(new FileStream(outFile, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                            writer.Write(@out);
+                            writer.Dispose();
+                            outBuffer.Length = 0;
+                            buffer.Length = 0;
+                        }
+                    }
+                }
+            }
+            catch (IOException e)
+            {
+                throw new Exception(e.ToString(), e);
+            }
+        }
+
+        public static void Main(string[] args)
+        {
+            if (args.Length != 2)
+            {
+                Usage("Wrong number of arguments (" + args.Length + ")");
+                return;
+            }
+            DirectoryInfo reutersDir = new DirectoryInfo(args[0]);
+            if (!reutersDir.Exists)
+            {
+                Usage("Cannot find Path to Reuters SGM files (" + reutersDir + ")");
+                return;
+            }
+
+            // First, extract to a tmp directory and only if everything succeeds, rename
+            // to output directory.
+            DirectoryInfo outputDir = new DirectoryInfo(args[1]);
+            outputDir = new DirectoryInfo(outputDir.FullName + "-tmp");
+            outputDir.Create();
+            ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
+            extractor.Extract();
+            // Now rename to requested output dir
+            outputDir.MoveTo(args[1]);
+        }
+
+        private static void Usage(string msg)
+        {
+            SystemConsole.Error.WriteLine("Usage: " + msg + " :: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
new file mode 100644
index 0000000..b61fbc5
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
@@ -0,0 +1,178 @@
+using Lucene.Net.Benchmarks.ByTask.Feeds;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Documents;
+using Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.Utils
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Extract the downloaded Wikipedia dump into separate files for indexing.
+    /// </summary>
+    public class ExtractWikipedia
+    {
+        private DirectoryInfo outputDir;
+
+        public static int count = 0;
+
+        internal static readonly int BASE = 10;
+        protected DocMaker m_docMaker;
+
+        public ExtractWikipedia(DocMaker docMaker, DirectoryInfo outputDir)
+        {
+            this.outputDir = outputDir;
+            this.m_docMaker = docMaker;
+            SystemConsole.WriteLine("Deleting all files in " + outputDir);
+            FileInfo[] files = outputDir.GetFiles();
+            for (int i = 0; i < files.Length; i++)
+            {
+                files[i].Delete();
+            }
+        }
+
+        public virtual DirectoryInfo Directory(int count, DirectoryInfo directory)
+        {
+            if (directory == null)
+            {
+                directory = outputDir;
+            }
+            int @base = BASE;
+            while (@base <= count)
+            {
+                @base *= BASE;
+            }
+            if (count < BASE)
+            {
+                return directory;
+            }
+            directory = new DirectoryInfo(System.IO.Path.Combine(directory.FullName, (((int)(@base / BASE)).ToString(CultureInfo.InvariantCulture))));
+            directory = new DirectoryInfo(System.IO.Path.Combine(directory.FullName, (((int)(count / (@base / BASE))).ToString(CultureInfo.InvariantCulture))));
+            return Directory(count % (@base / BASE), directory);
+        }
+
+        public virtual void Create(string id, string title, string time, string body)
+        {
+            DirectoryInfo d = Directory(count++, null);
+            d.Create();
+            FileInfo f = new FileInfo(System.IO.Path.Combine(d.FullName, id + ".txt"));
+
+            StringBuilder contents = new StringBuilder();
+
+            contents.Append(time);
+            contents.Append("\n\n");
+            contents.Append(title);
+            contents.Append("\n\n");
+            contents.Append(body);
+            contents.Append("\n");
+
+            try
+            {
+                using (TextWriter writer = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8))
+                    writer.Write(contents.ToString());
+            }
+            catch (IOException ioe)
+            {
+                throw new Exception(ioe.ToString(), ioe);
+            }
+        }
+
+        public virtual void Extract()
+        {
+            Document doc = null;
+            SystemConsole.WriteLine("Starting Extraction");
+            long start = Support.Time.CurrentTimeMilliseconds();
+            try
+            {
+                while ((doc = m_docMaker.MakeDocument()) != null)
+                {
+                    Create(doc.Get(DocMaker.ID_FIELD), doc.Get(DocMaker.TITLE_FIELD), doc
+                        .Get(DocMaker.DATE_FIELD), doc.Get(DocMaker.BODY_FIELD));
+                }
+            }
+            catch (NoMoreDataException /*e*/)
+            {
+                //continue
+            }
+            long finish = Support.Time.CurrentTimeMilliseconds();
+            SystemConsole.WriteLine("Extraction took " + (finish - start) + " ms");
+        }
+
+        public static void Main(string[] args)
+        {
+
+            FileInfo wikipedia = null;
+            DirectoryInfo outputDir = new DirectoryInfo("./enwiki");
+            bool keepImageOnlyDocs = true;
+            for (int i = 0; i < args.Length; i++)
+            {
+                string arg = args[i];
+                if (arg.Equals("--input", StringComparison.Ordinal) || arg.Equals("-i", StringComparison.Ordinal))
+                {
+                    wikipedia = new FileInfo(args[i + 1]);
+                    i++;
+                }
+                else if (arg.Equals("--output", StringComparison.Ordinal) || arg.Equals("-o", StringComparison.Ordinal))
+                {
+                    outputDir = new DirectoryInfo(args[i + 1]);
+                    i++;
+                }
+                else if (arg.Equals("--discardImageOnlyDocs", StringComparison.Ordinal) || arg.Equals("-d", StringComparison.Ordinal))
+                {
+                    keepImageOnlyDocs = false;
+                }
+            }
+
+            IDictionary<string, string> properties = new Dictionary<string, string>();
+            properties["docs.file"] = wikipedia.FullName;
+            properties["content.source.forever"] = "false";
+            properties["keep.image.only.docs"] = keepImageOnlyDocs.ToString();
+            Config config = new Config(properties);
+
+            ContentSource source = new EnwikiContentSource();
+            source.SetConfig(config);
+
+            DocMaker docMaker = new DocMaker();
+            docMaker.SetConfig(config, source);
+            docMaker.ResetInputs();
+            if (wikipedia.Exists)
+            {
+                SystemConsole.WriteLine("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
+                outputDir.Create();
+                ExtractWikipedia extractor = new ExtractWikipedia(docMaker, outputDir);
+                extractor.Extract();
+            }
+            else
+            {
+                PrintUsage();
+            }
+        }
+
+        private static void PrintUsage()
+        {
+            SystemConsole.Error.WriteLine("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractWikipedia --input|-i <Path to Wikipedia XML file> " +
+                    "[--output|-o <Output Path>] [--discardImageOnlyDocs|-d]");
+            SystemConsole.Error.WriteLine("--discardImageOnlyDocs tells the extractor to skip Wiki docs that contain only images");
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Benchmark/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/project.json b/src/Lucene.Net.Benchmark/project.json
new file mode 100644
index 0000000..adac6d5
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/project.json
@@ -0,0 +1,53 @@
+{
+  "version": "4.8.0",
+  "title": "Lucene.Net.Benchmark",
+  "description": "System for benchmarking the Lucene.Net full-text search engine library from The Apache Software Foundation.",
+  "authors": [ "The Apache Software Foundation" ],
+  "packOptions": {
+    "projectUrl": "http://lucenenet.apache.org/",
+    "licenseUrl": "https://github.com/apache/lucenenet/blob/master/LICENSE.txt",
+    "iconUrl": "https://github.com/apache/lucenenet/blob/master/branding/logo/lucene-net-icon-128x128.png?raw=true",
+    "owners": [ "The Apache Software Foundation" ],
+    "repository": { "url": "https://github.com/apache/lucenenet" },
+    "tags": [ "lucene.net", "core", "text", "search", "information", "retrieval", "lucene", "apache", "analysis", "index", "query" ]
+  },
+  "buildOptions": {
+    "compile": {
+      "includeFiles": [ "../CommonAssemblyInfo.cs" ]
+    },
+    "nowarn": [ "1591", "1573" ]
+  },
+  "dependencies": {
+	"icu.net": "54.1.1-alpha",
+    "Lucene.Net": "4.8.0",
+    "Lucene.Net.Analysis.Common": "4.8.0",
+	"Lucene.Net.Facet": "4.8.0",
+	"Lucene.Net.Highlighter": "4.8.0",
+	"Lucene.Net.ICU": "4.8.0",
+	"Lucene.Net.Queries": "4.8.0",
+	"Lucene.Net.QueryParser": "4.8.0",
+	"Lucene.Net.Spatial": "4.8.0",
+	"Sax.Net": "2.0.2",
+	"SharpZipLib": "0.86.0",
+    "Spatial4n.Core": "0.4.1-beta00003",
+	"TagSoup.Net": "1.2.1.1"
+  },
+  "frameworks": {
+    "netstandard1.5": {
+      "imports": "dnxcore50",
+      "buildOptions": {
+        "debugType": "portable",
+        "define": [ "NETSTANDARD" ]
+      },
+      "dependencies": {
+        "NETStandard.Library": "1.6.0"
+      }
+    },
+    "net451": {
+      "buildOptions": {
+        "debugType": "full",
+        "define": [ "FEATURE_SERIALIZABLE" ]
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.TestFramework/Util/TestUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Util/TestUtil.cs b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
index e7eb247..bfc73dd 100644
--- a/src/Lucene.Net.TestFramework/Util/TestUtil.cs
+++ b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
@@ -148,11 +148,20 @@ namespace Lucene.Net.Util
             {
                 foreach (var entry in zip.Entries)
                 {
+                    // Ignore internal folders - these are tacked onto the FullName anyway
+                    if (entry.FullName.EndsWith("/", StringComparison.Ordinal) || entry.FullName.EndsWith("\\", StringComparison.Ordinal))
+                    {
+                        continue;
+                    }
                     using (Stream input = entry.Open())
                     {
-                        FileInfo targetFile = new FileInfo(Path.Combine(destDir.FullName, entry.FullName));
+                        FileInfo targetFile = new FileInfo(CorrectPath(Path.Combine(destDir.FullName, entry.FullName)));
+                        if (!targetFile.Directory.Exists)
+                        {
+                            targetFile.Directory.Create();
+                        }
 
-                        using (Stream output = new FileStream(targetFile.FullName, FileMode.OpenOrCreate, FileAccess.Write))
+                        using (Stream output = new FileStream(targetFile.FullName, FileMode.Create, FileAccess.Write))
                         {
                             input.CopyTo(output);
                         }
@@ -161,6 +170,15 @@ namespace Lucene.Net.Util
             }
         }
 
+        private static string CorrectPath(string input)
+        {
+            if (Path.DirectorySeparatorChar.Equals('/'))
+            {
+                return input.Replace('\\', '/');
+            }
+            return input.Replace('/', '\\');
+        }
+
         public static void SyncConcurrentMerges(IndexWriter writer)
         {
             SyncConcurrentMerges(writer.Config.MergeScheduler);

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/BenchmarkTestCase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/BenchmarkTestCase.cs b/src/Lucene.Net.Tests.Benchmark/BenchmarkTestCase.cs
new file mode 100644
index 0000000..8981ee0
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/BenchmarkTestCase.cs
@@ -0,0 +1,129 @@
+using Lucene.Net.Benchmarks.ByTask;
+using Lucene.Net.Util;
+using System;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Base class for all Benchmark unit tests.
+    /// </summary>
+    public abstract class BenchmarkTestCase : LuceneTestCase
+    {
+        private static DirectoryInfo WORKDIR;
+
+        public override void BeforeClass()
+        {
+            base.BeforeClass();
+            WORKDIR = CreateTempDir("benchmark");
+            // LUCENENET: Our directory numbers are sequential. Doing a delete
+            // here will make threads collide.
+            //WORKDIR.Delete();
+            //WORKDIR.Create();
+
+            propLines = new string[] {
+                "work.dir=" + getWorkDirPath(),
+                "directory=RAMDirectory",
+                "print.props=false",
+            };
+        }
+
+        public override void AfterClass()
+        {
+            WORKDIR = null;
+            base.AfterClass();
+        }
+
+
+        public DirectoryInfo getWorkDir()
+        {
+            return WORKDIR;
+        }
+
+        /** Copy a resource into the workdir */
+        public void copyToWorkDir(string resourceName)
+        {
+            Stream resource = GetType().getResourceAsStream(resourceName);
+            Stream dest = new FileStream(System.IO.Path.Combine(getWorkDir().FullName, resourceName), FileMode.Create, FileAccess.Write);
+            byte[] buffer = new byte[8192];
+            int len;
+
+            while ((len = resource.Read(buffer, 0, buffer.Length)) > 0)
+            {
+                dest.Write(buffer, 0, len);
+            }
+
+            resource.Dispose();
+            dest.Dispose();
+        }
+
+        /** Return a path, suitable for a .alg config file, for a resource in the workdir */
+        public String getWorkDirResourcePath(String resourceName)
+        {
+            return System.IO.Path.Combine(getWorkDir().FullName, resourceName).Replace("\\", "/");
+        }
+
+        /** Return a path, suitable for a .alg config file, for the workdir */
+        public String getWorkDirPath()
+        {
+            return getWorkDir().FullName.Replace("\\", "/");
+        }
+
+        // create the benchmark and execute it. 
+        public Benchmark execBenchmark(String[] algLines)
+        {
+            String algText = algLinesToText(algLines);
+            logTstLogic(algText);
+            Benchmark benchmark = new Benchmark(new StringReader(algText));
+            benchmark.Execute();
+            return benchmark;
+        }
+
+        // properties in effect in all tests here
+        String[] propLines;
+
+        static readonly String NEW_LINE = Environment.NewLine;
+
+        // catenate alg lines to make the alg text
+        private String algLinesToText(String[] algLines)
+        {
+            String indent = "  ";
+            StringBuilder sb = new StringBuilder();
+            for (int i = 0; i < propLines.Length; i++)
+            {
+                sb.append(indent).append(propLines[i]).append(NEW_LINE);
+            }
+            for (int i = 0; i < algLines.Length; i++)
+            {
+                sb.append(indent).append(algLines[i]).append(NEW_LINE);
+            }
+            return sb.toString();
+        }
+
+        private static void logTstLogic(String txt)
+        {
+            if (!VERBOSE)
+                return;
+            Console.WriteLine("Test logic of:");
+            Console.WriteLine(txt);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
new file mode 100644
index 0000000..301c807
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
@@ -0,0 +1,193 @@
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Benchmarks.ByTask.Tasks;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Support;
+using NUnit.Framework;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests the functionality of {@link DocMaker}.
+    /// </summary>
+    public class DocMakerTest : BenchmarkTestCase
+    {
+        public sealed class OneDocSource : ContentSource
+        {
+            private bool finish = false;
+
+            protected override void Dispose(bool disposing)
+            {
+            }
+
+            public override DocData GetNextDocData(DocData docData)
+            {
+                if (finish)
+                {
+                    throw new NoMoreDataException();
+                }
+
+                docData.Body = ("body");
+                docData.SetDate("date");
+                docData.Title = ("title");
+                Dictionary<string, string> props = new Dictionary<string, string>();
+                props["key"] = "value";
+                docData.Props = props;
+                finish = true;
+
+                return docData;
+            }
+        }
+
+        private void doTestIndexProperties(bool setIndexProps,
+            bool indexPropsVal, int numExpectedResults)
+        {
+            Dictionary<string, string> props = new Dictionary<string, string>();
+
+            // Indexing configuration.
+            props["analyzer"] = typeof(WhitespaceAnalyzer).AssemblyQualifiedName;
+            props["content.source"] = typeof(OneDocSource).AssemblyQualifiedName;
+            props["directory"] = "RAMDirectory";
+            if (setIndexProps)
+            {
+                props["doc.index.props"] = indexPropsVal.ToString();
+            }
+
+            // Create PerfRunData
+            Config config = new Config(props);
+            PerfRunData runData = new PerfRunData(config);
+
+            TaskSequence tasks = new TaskSequence(runData, TestName, null, false);
+            tasks.AddTask(new CreateIndexTask(runData));
+            tasks.AddTask(new AddDocTask(runData));
+            tasks.AddTask(new CloseIndexTask(runData));
+            tasks.DoLogic();
+
+            IndexReader reader = DirectoryReader.Open(runData.Directory);
+            IndexSearcher searcher = NewSearcher(reader);
+            TopDocs td = searcher.Search(new TermQuery(new Term("key", "value")), 10);
+            assertEquals(numExpectedResults, td.TotalHits);
+            reader.Dispose();
+        }
+
+        private Document createTestNormsDocument(bool setNormsProp,
+            bool normsPropVal, bool setBodyNormsProp, bool bodyNormsVal)
+        {
+            Dictionary<string, string> props = new Dictionary<string, string>();
+
+            // Indexing configuration.
+            props["analyzer"] = typeof(WhitespaceAnalyzer).AssemblyQualifiedName;
+            props["directory"] = "RAMDirectory";
+            if (setNormsProp)
+            {
+                props["doc.tokenized.norms"] = normsPropVal.ToString(CultureInfo.InvariantCulture);
+            }
+            if (setBodyNormsProp)
+            {
+                props["doc.body.tokenized.norms"] = bodyNormsVal.ToString(CultureInfo.InvariantCulture);
+            }
+
+            // Create PerfRunData
+            Config config = new Config(props);
+
+            DocMaker dm = new DocMaker();
+            dm.SetConfig(config, new OneDocSource());
+            return dm.MakeDocument();
+        }
+
+        /* Tests doc.index.props property. */
+        [Test]
+        public void TestIndexProperties()
+        {
+            // default is to not index properties.
+            doTestIndexProperties(false, false, 0);
+
+            // set doc.index.props to false.
+            doTestIndexProperties(true, false, 0);
+
+            // set doc.index.props to true.
+            doTestIndexProperties(true, true, 1);
+        }
+
+        /* Tests doc.tokenized.norms and doc.body.tokenized.norms properties. */
+        [Test]
+        public void TestNorms()
+        {
+
+            Document doc;
+
+            // Don't set anything, use the defaults
+            doc = createTestNormsDocument(false, false, false, false);
+            assertTrue(doc.GetField(DocMaker.TITLE_FIELD).FieldType.OmitNorms);
+            assertFalse(doc.GetField(DocMaker.BODY_FIELD).FieldType.OmitNorms);
+
+            // Set norms to false
+            doc = createTestNormsDocument(true, false, false, false);
+            assertTrue(doc.GetField(DocMaker.TITLE_FIELD).FieldType.OmitNorms);
+            assertFalse(doc.GetField(DocMaker.BODY_FIELD).FieldType.OmitNorms);
+
+            // Set norms to true
+            doc = createTestNormsDocument(true, true, false, false);
+            assertFalse(doc.GetField(DocMaker.TITLE_FIELD).FieldType.OmitNorms);
+            assertFalse(doc.GetField(DocMaker.BODY_FIELD).FieldType.OmitNorms);
+
+            // Set body norms to false
+            doc = createTestNormsDocument(false, false, true, false);
+            assertTrue(doc.GetField(DocMaker.TITLE_FIELD).FieldType.OmitNorms);
+            assertTrue(doc.GetField(DocMaker.BODY_FIELD).FieldType.OmitNorms);
+
+            // Set body norms to true
+            doc = createTestNormsDocument(false, false, true, true);
+            assertTrue(doc.GetField(DocMaker.TITLE_FIELD).FieldType.OmitNorms);
+            assertFalse(doc.GetField(DocMaker.BODY_FIELD).FieldType.OmitNorms);
+        }
+
+        [Test]
+        public void TestDocMakerLeak()
+        {
+            // DocMaker did not close its ContentSource if resetInputs was called twice,
+            // leading to a file handle leak.
+            FileInfo f = new FileInfo(Path.Combine(getWorkDir().FullName, "docMakerLeak.txt"));
+            TextWriter ps = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+            ps.WriteLine("one title\t" + Time.CurrentTimeMilliseconds() + "\tsome content");
+            ps.Dispose();
+
+            Dictionary<string, string> props = new Dictionary<string, string>();
+            props["docs.file"] = f.FullName;
+            props["content.source.forever"] = "false";
+            Config config = new Config(props);
+
+            ContentSource source = new LineDocSource();
+            source.SetConfig(config);
+
+            DocMaker dm = new DocMaker();
+            dm.SetConfig(config, source);
+            dm.ResetInputs();
+            dm.ResetInputs();
+            dm.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/EnwikiContentSourceTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/EnwikiContentSourceTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/EnwikiContentSourceTest.cs
new file mode 100644
index 0000000..95ded38
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/EnwikiContentSourceTest.cs
@@ -0,0 +1,194 @@
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    [Ignore("LUCENENET TODO: Never finishes")]
+    public class EnwikiContentSourceTest : LuceneTestCase
+    {
+        /** An EnwikiContentSource which works on a String and not files. */
+        private class StringableEnwikiSource : EnwikiContentSource
+        {
+
+
+            private readonly String docs;
+
+            public StringableEnwikiSource(String docs)
+            {
+                this.docs = docs;
+            }
+
+            protected override Stream OpenInputStream()
+            {
+                return new MemoryStream(Encoding.UTF8.GetBytes(docs));
+            }
+
+        }
+
+        private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate)
+        {
+            assertNotNull(dd);
+            assertEquals(expName, dd.Name);
+            assertEquals(expTitle, dd.Title);
+            assertEquals(expBody, dd.Body);
+            assertEquals(expDate, dd.Date);
+        }
+
+        private void assertNoMoreDataException(EnwikiContentSource stdm)
+        {
+            try
+            {
+                stdm.GetNextDocData(null);
+                fail("Expecting NoMoreDataException");
+            }
+#pragma warning disable 168
+            catch (NoMoreDataException e)
+#pragma warning restore 168
+            {
+                // expected
+            }
+        }
+
+        private readonly String PAGE1 =
+              "  <page>\r\n" +
+              "    <title>Title1</title>\r\n" +
+              "    <ns>0</ns>\r\n" +
+              "    <id>1</id>\r\n" +
+              "    <revision>\r\n" +
+              "      <id>11</id>\r\n" +
+              "      <parentid>111</parentid>\r\n" +
+              "      <timestamp>2011-09-14T11:35:09Z</timestamp>\r\n" +
+              "      <contributor>\r\n" +
+              "      <username>Mister1111</username>\r\n" +
+              "        <id>1111</id>\r\n" +
+              "      </contributor>\r\n" +
+              "      <minor />\r\n" +
+              "      <comment>/* Never mind */</comment>\r\n" +
+              "      <text>Some text 1 here</text>\r\n" +
+              "    </revision>\r\n" +
+              "  </page>\r\n";
+
+        private readonly String PAGE2 =
+            "  <page>\r\n" +
+                "    <title>Title2</title>\r\n" +
+                "    <ns>0</ns>\r\n" +
+                "    <id>2</id>\r\n" +
+                "    <revision>\r\n" +
+                "      <id>22</id>\r\n" +
+                "      <parentid>222</parentid>\r\n" +
+                "      <timestamp>2022-09-14T22:35:09Z</timestamp>\r\n" +
+                "      <contributor>\r\n" +
+                "      <username>Mister2222</username>\r\n" +
+                "        <id>2222</id>\r\n" +
+                "      </contributor>\r\n" +
+                "      <minor />\r\n" +
+                "      <comment>/* Never mind */</comment>\r\n" +
+                "      <text>Some text 2 here</text>\r\n" +
+                "    </revision>\r\n" +
+                "  </page>\r\n";
+
+        [Test]
+        public void TestOneDocument()
+        {
+            String docs =
+                "<mediawiki>\r\n" +
+                    PAGE1 +
+                "</mediawiki>";
+
+            EnwikiContentSource source = createContentSource(docs, false);
+
+            DocData dd = source.GetNextDocData(new DocData());
+            assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+
+            assertNoMoreDataException(source);
+        }
+
+        private EnwikiContentSource createContentSource(String docs, bool forever)
+        {
+
+            Dictionary<string, string> props = new Dictionary<string, string>();
+            props["print.props"] = "false";
+            props["content.source.forever"] = forever.ToString(CultureInfo.InvariantCulture);
+            Config config = new Config(props);
+
+            EnwikiContentSource source = new StringableEnwikiSource(docs);
+            source.SetConfig(config);
+
+            // doc-maker just for initiating content source inputs
+            DocMaker docMaker = new DocMaker();
+            docMaker.SetConfig(config, source);
+            docMaker.ResetInputs();
+            return source;
+        }
+
+        [Test]
+        public void TestTwoDocuments()
+        {
+            String docs =
+                "<mediawiki>\r\n" +
+                    PAGE1 +
+                    PAGE2 +
+                "</mediawiki>";
+
+            EnwikiContentSource source = createContentSource(docs, false);
+
+            DocData dd1 = source.GetNextDocData(new DocData());
+            assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+            DocData dd2 = source.GetNextDocData(new DocData());
+            assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
+
+
+            assertNoMoreDataException(source);
+        }
+
+        [Test]
+        public void TestForever()
+        {
+            String docs =
+                "<mediawiki>\r\n" +
+                    PAGE1 +
+                    PAGE2 +
+                "</mediawiki>";
+
+            EnwikiContentSource source = createContentSource(docs, true);
+
+            // same documents several times
+            for (int i = 0; i < 3; i++)
+            {
+                DocData dd1 = source.GetNextDocData(new DocData());
+                assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
+
+                DocData dd2 = source.GetNextDocData(new DocData());
+                assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
+                // Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
+            }
+
+            source.Dispose();
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b515271d/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
new file mode 100644
index 0000000..7cd27f1
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
@@ -0,0 +1,271 @@
+using ICSharpCode.SharpZipLib.BZip2;
+using Lucene.Net.Analysis.Core;
+using Lucene.Net.Benchmarks.ByTask.Tasks;
+using Lucene.Net.Benchmarks.ByTask.Utils;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+
+namespace Lucene.Net.Benchmarks.ByTask.Feeds
+{
+    /*
+     * Licensed to the Apache Software Foundation (ASF) under one or more
+     * contributor license agreements.  See the NOTICE file distributed with
+     * this work for additional information regarding copyright ownership.
+     * The ASF licenses this file to You under the Apache License, Version 2.0
+     * (the "License"); you may not use this file except in compliance with
+     * the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+
+    /// <summary>
+    /// Tests the functionality of {@link LineDocSource}.
+    /// </summary>
+    public class LineDocSourceTest : BenchmarkTestCase
+    {
+        //private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
+
+        private void createBZ2LineFile(FileInfo file, bool addHeader)
+        {
+            Stream @out = new FileStream(file.FullName, FileMode.Create, FileAccess.Write);
+            @out = new BZip2OutputStream(@out); // csFactory.createCompressorOutputStream("bzip2", @out);
+            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            writeDocsToFile(writer, addHeader, null);
+            writer.Dispose();
+        }
+
+        private void writeDocsToFile(TextWriter writer, bool addHeader, IDictionary<string, string> otherFields)
+        {
+            if (addHeader)
+            {
+                writer.Write(WriteLineDocTask.FIELDS_HEADER_INDICATOR);
+                writer.Write(WriteLineDocTask.SEP);
+                writer.Write(DocMaker.TITLE_FIELD);
+                writer.Write(WriteLineDocTask.SEP);
+                writer.Write(DocMaker.DATE_FIELD);
+                writer.Write(WriteLineDocTask.SEP);
+                writer.Write(DocMaker.BODY_FIELD);
+                if (otherFields != null)
+                {
+                    // additional field names in the header 
+                    foreach (Object fn in otherFields.Keys)
+                    {
+                        writer.Write(WriteLineDocTask.SEP);
+                        writer.Write(fn.toString());
+                    }
+                }
+                writer.WriteLine();
+            }
+            StringBuilder doc = new StringBuilder();
+            doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
+            if (otherFields != null)
+            {
+                // additional field values in the doc line 
+                foreach (Object fv in otherFields.Values)
+                {
+                    doc.append(WriteLineDocTask.SEP).append(fv.toString());
+                }
+            }
+            writer.Write(doc.toString());
+            writer.WriteLine();
+        }
+
+        private void createRegularLineFile(FileInfo file, bool addHeader)
+        {
+            Stream @out = new FileStream(file.FullName, FileMode.Create, FileAccess.Write);
+            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            writeDocsToFile(writer, addHeader, null);
+            writer.Dispose();
+        }
+
+        private void createRegularLineFileWithMoreFields(FileInfo file, params String[] extraFields)
+        {
+            Stream @out = new FileStream(file.FullName, FileMode.Create, FileAccess.Write);
+            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            Dictionary<string, string> p = new Dictionary<string, string>();
+            foreach (String f in extraFields)
+            {
+                p[f] = f;
+            }
+            writeDocsToFile(writer, true, p);
+            writer.Dispose();
+        }
+
+        private void doIndexAndSearchTest(FileInfo file, Type lineParserClass, String storedField)
+        {
+            doIndexAndSearchTestWithRepeats(file, lineParserClass, 1, storedField); // no extra repetitions
+            doIndexAndSearchTestWithRepeats(file, lineParserClass, 2, storedField); // 1 extra repetition
+            doIndexAndSearchTestWithRepeats(file, lineParserClass, 4, storedField); // 3 extra repetitions
+        }
+
+        private void doIndexAndSearchTestWithRepeats(FileInfo file,
+            Type lineParserClass, int numAdds, String storedField)
+        {
+
+            IndexReader reader = null;
+            IndexSearcher searcher = null;
+            PerfRunData runData = null;
+            try
+            {
+                Dictionary<string, string> props = new Dictionary<string, string>();
+
+                // LineDocSource specific settings.
+                props["docs.file"] = file.FullName;
+                if (lineParserClass != null)
+                {
+                    props["line.parser"] = lineParserClass.AssemblyQualifiedName;
+                }
+
+                // Indexing configuration.
+                props["analyzer"] = typeof(WhitespaceAnalyzer).AssemblyQualifiedName;
+                props["content.source"] = typeof(LineDocSource).AssemblyQualifiedName;
+                props["directory"] = "RAMDirectory";
+                props["doc.stored"] = "true";
+                props["doc.index.props"] = "true";
+
+                // Create PerfRunData
+                Config config = new Config(props);
+                runData = new PerfRunData(config);
+
+                TaskSequence tasks = new TaskSequence(runData, "testBzip2", null, false);
+                tasks.AddTask(new CreateIndexTask(runData));
+                for (int i = 0; i < numAdds; i++)
+                {
+                    tasks.AddTask(new AddDocTask(runData));
+                }
+                tasks.AddTask(new CloseIndexTask(runData));
+                try
+                {
+                    tasks.DoLogic();
+                }
+                finally
+                {
+                    tasks.Dispose();
+                }
+
+                reader = DirectoryReader.Open(runData.Directory);
+                searcher = NewSearcher(reader);
+                TopDocs td = searcher.Search(new TermQuery(new Term("body", "body")), 10);
+                assertEquals(numAdds, td.TotalHits);
+                assertNotNull(td.ScoreDocs[0]);
+
+                if (storedField == null)
+                {
+                    storedField = DocMaker.BODY_FIELD; // added to all docs and satisfies field-name == value
+                }
+                assertEquals("Wrong field value", storedField, searcher.Doc(0).Get(storedField));
+            }
+            finally
+            {
+                IOUtils.Dispose(reader, runData);
+            }
+
+        }
+
+        /* Tests LineDocSource with a bzip2 input stream. */
+        [Test]
+        public void TestBZip2()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line.bz2"));
+            createBZ2LineFile(file, true);
+            doIndexAndSearchTest(file, null, null);
+        }
+
+        [Test]
+        public void TestBZip2NoHeaderLine()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line.bz2"));
+            createBZ2LineFile(file, false);
+            doIndexAndSearchTest(file, null, null);
+        }
+
+        [Test]
+        public void TestRegularFile()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+            createRegularLineFile(file, true);
+            doIndexAndSearchTest(file, null, null);
+        }
+
+        [Test]
+        public void TestRegularFileSpecialHeader()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+            createRegularLineFile(file, true);
+            doIndexAndSearchTest(file, typeof(HeaderLineParser), null);
+        }
+
+        [Test]
+        public void TestRegularFileNoHeaderLine()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+            createRegularLineFile(file, false);
+            doIndexAndSearchTest(file, null, null);
+        }
+
+        [Test]
+        public void TestInvalidFormat()
+        {
+            String[]
+            testCases = new String[] {
+                "", // empty line
+                "title", // just title
+                "title" + WriteLineDocTask.SEP, // title + SEP
+                "title" + WriteLineDocTask.SEP + "body", // title + SEP + body
+                                                        // note that title + SEP + body + SEP is a valid line, which results in an
+                                                        // empty body
+            };
+
+            for (int i = 0; i < testCases.Length; i++)
+            {
+                FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+                TextWriter writer = new StreamWriter(new FileStream(file.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                writer.Write(testCases[i]);
+                writer.WriteLine();
+                writer.Dispose();
+                try
+                {
+                    doIndexAndSearchTest(file, null, null);
+                    fail("Some exception should have been thrown for: [" + testCases[i] + "]");
+                }
+#pragma warning disable 168
+                catch (Exception e)
+#pragma warning restore 168
+                {
+                    // expected.
+                }
+            }
+        }
+
+        /** Doc Name is not part of the default header */
+        [Test]
+        public void TestWithDocsName()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+            createRegularLineFileWithMoreFields(file, DocMaker.NAME_FIELD);
+            doIndexAndSearchTest(file, null, DocMaker.NAME_FIELD);
+        }
+
+        /** Use fields names that are not defined in Docmaker and so will go to Properties */
+        [Test]
+        public void TestWithProperties()
+        {
+            FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
+            String specialField = "mySpecialField";
+            createRegularLineFileWithMoreFields(file, specialField);
+            doIndexAndSearchTest(file, null, specialField);
+        }
+    }
+}