You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/23 08:49:29 UTC

[Lucene.Net] svn commit: r1205314 - in /incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk: src/contrib/Core/Analysis/Ext/ src/contrib/Regex/ src/core/Support/ test/contrib/Core/Analysis/Ext/ test/contrib/Regex/

Author: ccurrens
Date: Wed Nov 23 07:49:28 2011
New Revision: 1205314

URL: http://svn.apache.org/viewvc?rev=1205314&view=rev
Log:
ported Contrib.Regex

Added:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs
Removed:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexpQuery.cs
Modified:
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs
    incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
Binary files - no diff available.

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -16,7 +16,9 @@
  */
 
 using System;
+using System.Text;
 using Lucene.Net.Index;
+using Lucene.Net.Util;
 
 namespace Lucene.Net.Search.Regex
 {
@@ -27,16 +29,17 @@ namespace Lucene.Net.Search.Regex
 	public class RegexQuery : MultiTermQuery, IRegexQueryCapable, IEquatable<RegexQuery>
 	{
 		private IRegexCapabilities _regexImpl = new CSharpRegexCapabilities();
+	    public Term Term { get; private set; }
 
-		public RegexQuery(Term term) : base(term)
+		public RegexQuery(Term term)
 		{
+            Term = term;
 		}
 
 		/// <summary>Construct the enumeration to be used, expanding the pattern term. </summary>
 		public override FilteredTermEnum GetEnum(IndexReader reader)
 		{
-			Term term = new Term(GetTerm().Field(), GetTerm().Text());
-			return new RegexTermEnum(reader, term, _regexImpl);
+			return new RegexTermEnum(reader, Term, _regexImpl);
 		}
 
 		public void SetRegexImplementation(IRegexCapabilities impl)
@@ -49,7 +52,21 @@ namespace Lucene.Net.Search.Regex
 			return _regexImpl;
 		}
 
-		/// <summary>
+
+        public override String ToString(String field)
+        {
+            StringBuilder buffer = new StringBuilder();
+            if (!Term.Field().Equals(field))
+            {
+                buffer.Append(Term.Field());
+                buffer.Append(":");
+            }
+            buffer.Append(Term.Text());
+            buffer.Append(ToStringUtils.Boost(GetBoost()));
+            return buffer.ToString();
+        }
+
+	    /// <summary>
 		/// Indicates whether the current object is equal to another object of the same type.
 		/// </summary>
 		/// <returns>

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -97,15 +97,9 @@ namespace Lucene.Net.Search.Regex
 			return _term.Field();
 		}
 
-		/// <summary>Returns a collection of all terms matched by this query.</summary>
-		/// <deprecated> use extractTerms instead
-		/// </deprecated>
-		/// <seealso cref="Query.ExtractTerms">
-		/// </seealso>
-		[Obsolete("use ExtractTerms instead")]
-        public override ICollection GetTerms()
+        public ICollection<Term> GetTerms()
         {
-            ArrayList terms = new ArrayList {_term};
+            ICollection<Term> terms = new List<Term>(){_term};
 		    return terms;
         }
     

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs Wed Nov 23 07:49:28 2011
@@ -182,7 +182,7 @@ namespace Lucene.Net.Support
             return _dict.ContainsKey(key);
         }
 
-        public void Add(TKey key, TValue value)
+        public virtual void Add(TKey key, TValue value)
         {
             if (!_isValueType && _comparer.Equals(key, default(TKey)))
             {

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
Binary files - no diff available.

Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj Wed Nov 23 07:49:28 2011
@@ -19,7 +19,6 @@
  under the License.
 
 -->
-
 <Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <PropertyGroup>
     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -62,7 +61,8 @@
   </ItemGroup>
   <ItemGroup>
     <Compile Include="Properties\AssemblyInfo.cs" />
-    <Compile Include="TestRegexpQuery.cs" />
+    <Compile Include="TestRegexQuery.cs" />
+    <Compile Include="TestSpanRegexQuery.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\..\..\src\contrib\Regex\Contrib.Regex.csproj">

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs?rev=1205314&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -0,0 +1,154 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for Additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Regex;
+using Lucene.Net.Search.Spans;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+
+namespace Lucene.Net.Search.Regex
+{
+    public class TestRegexQuery : TestCase
+    {
+        private IndexSearcher searcher;
+        private const String FN = "field";
+
+        [SetUp]
+        public void SetUp()
+        {
+            RAMDirectory directory = new RAMDirectory();
+            try
+            {
+                IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true,
+                                                     IndexWriter.MaxFieldLength.LIMITED);
+                Document doc = new Document();
+                doc.Add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
+                writer.AddDocument(doc);
+                writer.Optimize();
+                writer.Close();
+                searcher = new IndexSearcher(directory, true);
+            }
+            catch (Exception e)
+            {
+                Assert.Fail(e.ToString());
+            }
+        }
+        [TearDown]
+        public void TearDown()
+        {
+            try
+            {
+                searcher.Close();
+            }
+            catch (Exception e)
+            {
+                Assert.Fail(e.ToString());
+            }
+        }
+
+        private static Term NewTerm(String value) { return new Term(FN, value); }
+
+        private int RegexQueryNrHits(String regex, IRegexCapabilities capability)
+        {
+            RegexQuery query = new RegexQuery(NewTerm(regex));
+
+            if (capability != null)
+                query.SetRegexImplementation(capability);
+
+            return searcher.Search(query, null, 1000).totalHits;
+        }
+
+        private int SpanRegexQueryNrHits(String regex1, String regex2, int slop, bool ordered)
+        {
+            SpanRegexQuery srq1 = new SpanRegexQuery(NewTerm(regex1));
+            SpanRegexQuery srq2 = new SpanRegexQuery(NewTerm(regex2));
+            SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq1, srq2 }, slop, ordered);
+
+            return searcher.Search(query, null, 1000).totalHits;
+        }
+
+        [Test]
+        public void TestMatchAll()
+        {
+            TermEnum terms = new RegexQuery(new Term(FN, "jum.")).GetEnum(searcher.GetIndexReader());
+            Assert.Ignore("Difference in behavior of .NET and Java");
+            //These terms match in .NET's regex engine.  I feel there's not much I can do about it.
+            //// no term should match
+            //Assert.Null(terms.Term());
+            //Assert.False(terms.Next());
+        }
+
+        [Test]
+        public void TestRegex1()
+        {
+            Assert.AreEqual(1, RegexQueryNrHits("^q.[aeiou]c.*$", null));
+        }
+
+        [Test]
+        public void TestRegex2()
+        {
+            Assert.AreEqual(0, RegexQueryNrHits("^.[aeiou]c.*$", null));
+        }
+
+        [Test]
+        public void TestRegex3()
+        {
+            Assert.AreEqual(0, RegexQueryNrHits("^q.[aeiou]c$", null));
+        }
+
+        [Test]
+        public void TestSpanRegex1()
+        {
+            Assert.AreEqual(1, SpanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 6, true));
+        }
+
+        [Test]
+        public void TestSpanRegex2()
+        {
+            Assert.AreEqual(0, SpanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 5, true));
+        }
+
+        [Test]
+        public void TestEquals()
+        {
+            RegexQuery query1 = new RegexQuery(NewTerm("foo.*"));
+            //query1.SetRegexImplementation(new JakartaRegexpCapabilities());
+
+            RegexQuery query2 = new RegexQuery(NewTerm("foo.*"));
+            Assert.True(query1.Equals(query2));
+        }
+
+        [Test]
+        public void TestJavaUtilCaseSensativeFail()
+        {
+            Assert.AreEqual(0, RegexQueryNrHits("^.*DOG.*$", null));
+        }
+
+        [Test]
+        public void TestJavaUtilCaseInsensative()
+        {
+            //Assert.AreEqual(1, RegexQueryNrHits("^.*DOG.*$", new CSharpRegexCapabilities()));
+        }
+    }
+}
\ No newline at end of file

Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs?rev=1205314&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -0,0 +1,111 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Regex;
+using Lucene.Net.Search.Spans;
+using Lucene.Net.Store;
+using Version = Lucene.Net.Util.Version;
+using NUnit.Framework;
+
+namespace Contrib.Regex.Test
+{
+    [TestFixture]
+    public class TestSpanRegexQuery : TestCase
+    {
+        Directory indexStoreA = new RAMDirectory();
+
+        Directory indexStoreB = new RAMDirectory();
+
+        [Test]
+        public void TestSpanRegex()
+        {
+            RAMDirectory directory = new RAMDirectory();
+            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+            Document doc = new Document();
+            // doc.Add(new Field("field", "the quick brown fox jumps over the lazy dog",
+            // Field.Store.NO, Field.Index.ANALYZED));
+            // writer.AddDocument(doc);
+            // doc = new Document();
+            doc.Add(new Field("field", "auto update", Field.Store.NO,
+                Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+            doc = new Document();
+            doc.Add(new Field("field", "first auto update", Field.Store.NO,
+                Field.Index.ANALYZED));
+            writer.AddDocument(doc);
+            writer.Optimize();
+            writer.Close();
+
+            IndexSearcher searcher = new IndexSearcher(directory, true);
+            SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*"));
+            SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
+            // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
+            // true);
+            int numHits = searcher.Search(sfq, null, 1000).totalHits;
+            Assert.AreEqual(1, numHits);
+        }
+
+        [Test]
+        public void TestSpanRegexBug()
+        {
+            CreateRamDirectories();
+
+            SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*"));
+            SpanRegexQuery stq = new SpanRegexQuery(new Term("field", "b.*"));
+            SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6,
+                true);
+
+            // 1. Search the same store which works
+            IndexSearcher[] arrSearcher = new IndexSearcher[2];
+            arrSearcher[0] = new IndexSearcher(indexStoreA, true);
+            arrSearcher[1] = new IndexSearcher(indexStoreB, true);
+            MultiSearcher searcher = new MultiSearcher(arrSearcher);
+            int numHits = searcher.Search(query, null, 1000).totalHits;
+            arrSearcher[0].Close();
+            arrSearcher[1].Close();
+
+            // Will fail here
+            // We expect 2 but only one matched
+            // The rewriter function only write it once on the first IndexSearcher
+            // So it's using term: a1 b1 to search on the second IndexSearcher
+            // As a result, it won't match the document in the second IndexSearcher
+            Assert.AreEqual(2, numHits);
+            indexStoreA.Close();
+            indexStoreB.Close();
+        }
+
+        private void CreateRamDirectories()
+        {
+            // creating a document to store
+            Document lDoc = new Document();
+            lDoc.Add(new Field("field", "a1 b1", Field.Store.NO,
+                Field.Index.ANALYZED_NO_NORMS));
+
+            // creating a document to store
+            Document lDoc2 = new Document();
+            lDoc2.Add(new Field("field", "a2 b2", Field.Store.NO,
+                Field.Index.ANALYZED_NO_NORMS));
+
+            // creating first index writer
+            IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Version.LUCENE_CURRENT),
+                true, IndexWriter.MaxFieldLength.LIMITED);
+            writerA.AddDocument(lDoc);
+            writerA.Optimize();
+            writerA.Close();
+
+            // creating second index writer
+            IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Version.LUCENE_CURRENT),
+                true, IndexWriter.MaxFieldLength.LIMITED);
+            writerB.AddDocument(lDoc2);
+            writerB.Optimize();
+            writerB.Close();
+        }
+    }
+}
\ No newline at end of file