You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2011/11/23 08:49:29 UTC
[Lucene.Net] svn commit: r1205314 - in
/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk:
src/contrib/Core/Analysis/Ext/ src/contrib/Regex/ src/core/Support/
test/contrib/Core/Analysis/Ext/ test/contrib/Regex/
Author: ccurrens
Date: Wed Nov 23 07:49:28 2011
New Revision: 1205314
URL: http://svn.apache.org/viewvc?rev=1205314&view=rev
Log:
ported Contrib.Regex
Added:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs
Removed:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexpQuery.cs
Modified:
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs
incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Core/Analysis/Ext/Analysis.Ext.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
Binary files - no diff available.
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/RegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -16,7 +16,9 @@
*/
using System;
+using System.Text;
using Lucene.Net.Index;
+using Lucene.Net.Util;
namespace Lucene.Net.Search.Regex
{
@@ -27,16 +29,17 @@ namespace Lucene.Net.Search.Regex
public class RegexQuery : MultiTermQuery, IRegexQueryCapable, IEquatable<RegexQuery>
{
private IRegexCapabilities _regexImpl = new CSharpRegexCapabilities();
+ public Term Term { get; private set; }
- public RegexQuery(Term term) : base(term)
+ public RegexQuery(Term term)
{
+ Term = term;
}
/// <summary>Construct the enumeration to be used, expanding the pattern term. </summary>
public override FilteredTermEnum GetEnum(IndexReader reader)
{
- Term term = new Term(GetTerm().Field(), GetTerm().Text());
- return new RegexTermEnum(reader, term, _regexImpl);
+ return new RegexTermEnum(reader, Term, _regexImpl);
}
public void SetRegexImplementation(IRegexCapabilities impl)
@@ -49,7 +52,21 @@ namespace Lucene.Net.Search.Regex
return _regexImpl;
}
- /// <summary>
+
+ public override String ToString(String field)
+ {
+ StringBuilder buffer = new StringBuilder();
+ if (!Term.Field().Equals(field))
+ {
+ buffer.Append(Term.Field());
+ buffer.Append(":");
+ }
+ buffer.Append(Term.Text());
+ buffer.Append(ToStringUtils.Boost(GetBoost()));
+ return buffer.ToString();
+ }
+
+ /// <summary>
/// Indicates whether the current object is equal to another object of the same type.
/// </summary>
/// <returns>
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/contrib/Regex/SpanRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -97,15 +97,9 @@ namespace Lucene.Net.Search.Regex
return _term.Field();
}
- /// <summary>Returns a collection of all terms matched by this query.</summary>
- /// <deprecated> use extractTerms instead
- /// </deprecated>
- /// <seealso cref="Query.ExtractTerms">
- /// </seealso>
- [Obsolete("use ExtractTerms instead")]
- public override ICollection GetTerms()
+ public ICollection<Term> GetTerms()
{
- ArrayList terms = new ArrayList {_term};
+ ICollection<Term> terms = new List<Term>(){_term};
return terms;
}
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/src/core/Support/HashMap.cs Wed Nov 23 07:49:28 2011
@@ -182,7 +182,7 @@ namespace Lucene.Net.Support
return _dict.ContainsKey(key);
}
- public void Add(TKey key, TValue value)
+ public virtual void Add(TKey key, TValue value)
{
if (!_isValueType && _comparer.Equals(key, default(TKey)))
{
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Core/Analysis/Ext/Analysis.Ext.Test.cs?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
Binary files - no diff available.
Modified: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj?rev=1205314&r1=1205313&r2=1205314&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/Contrib.Regex.Test.csproj Wed Nov 23 07:49:28 2011
@@ -19,7 +19,6 @@
under the License.
-->
-
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
@@ -62,7 +61,8 @@
</ItemGroup>
<ItemGroup>
<Compile Include="Properties\AssemblyInfo.cs" />
- <Compile Include="TestRegexpQuery.cs" />
+ <Compile Include="TestRegexQuery.cs" />
+ <Compile Include="TestSpanRegexQuery.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\src\contrib\Regex\Contrib.Regex.csproj">
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs?rev=1205314&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for Additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using Lucene.Net.Analysis;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Regex;
+using Lucene.Net.Search.Spans;
+using Lucene.Net.Store;
+using Lucene.Net.Util;
+using NUnit.Framework;
+
+
+namespace Lucene.Net.Search.Regex
+{
+ public class TestRegexQuery : TestCase
+ {
+ private IndexSearcher searcher;
+ private const String FN = "field";
+
+ [SetUp]
+ public void SetUp()
+ {
+ RAMDirectory directory = new RAMDirectory();
+ try
+ {
+ IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true,
+ IndexWriter.MaxFieldLength.LIMITED);
+ Document doc = new Document();
+ doc.Add(new Field(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+ writer.Optimize();
+ writer.Close();
+ searcher = new IndexSearcher(directory, true);
+ }
+ catch (Exception e)
+ {
+ Assert.Fail(e.ToString());
+ }
+ }
+ [TearDown]
+ public void TearDown()
+ {
+ try
+ {
+ searcher.Close();
+ }
+ catch (Exception e)
+ {
+ Assert.Fail(e.ToString());
+ }
+ }
+
+ private static Term NewTerm(String value) { return new Term(FN, value); }
+
+ private int RegexQueryNrHits(String regex, IRegexCapabilities capability)
+ {
+ RegexQuery query = new RegexQuery(NewTerm(regex));
+
+ if (capability != null)
+ query.SetRegexImplementation(capability);
+
+ return searcher.Search(query, null, 1000).totalHits;
+ }
+
+ private int SpanRegexQueryNrHits(String regex1, String regex2, int slop, bool ordered)
+ {
+ SpanRegexQuery srq1 = new SpanRegexQuery(NewTerm(regex1));
+ SpanRegexQuery srq2 = new SpanRegexQuery(NewTerm(regex2));
+ SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq1, srq2 }, slop, ordered);
+
+ return searcher.Search(query, null, 1000).totalHits;
+ }
+
+ [Test]
+ public void TestMatchAll()
+ {
+ TermEnum terms = new RegexQuery(new Term(FN, "jum.")).GetEnum(searcher.GetIndexReader());
+ Assert.Ignore("Difference in behavior of .NET and Java");
+ //These terms match in .NET's regex engine. I feel there's not much I can do about it.
+ //// no term should match
+ //Assert.Null(terms.Term());
+ //Assert.False(terms.Next());
+ }
+
+ [Test]
+ public void TestRegex1()
+ {
+ Assert.AreEqual(1, RegexQueryNrHits("^q.[aeiou]c.*$", null));
+ }
+
+ [Test]
+ public void TestRegex2()
+ {
+ Assert.AreEqual(0, RegexQueryNrHits("^.[aeiou]c.*$", null));
+ }
+
+ [Test]
+ public void TestRegex3()
+ {
+ Assert.AreEqual(0, RegexQueryNrHits("^q.[aeiou]c$", null));
+ }
+
+ [Test]
+ public void TestSpanRegex1()
+ {
+ Assert.AreEqual(1, SpanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 6, true));
+ }
+
+ [Test]
+ public void TestSpanRegex2()
+ {
+ Assert.AreEqual(0, SpanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 5, true));
+ }
+
+ [Test]
+ public void TestEquals()
+ {
+ RegexQuery query1 = new RegexQuery(NewTerm("foo.*"));
+ //query1.SetRegexImplementation(new JakartaRegexpCapabilities());
+
+ RegexQuery query2 = new RegexQuery(NewTerm("foo.*"));
+ Assert.True(query1.Equals(query2));
+ }
+
+ [Test]
+ public void TestJavaUtilCaseSensativeFail()
+ {
+ Assert.AreEqual(0, RegexQueryNrHits("^.*DOG.*$", null));
+ }
+
+ [Test]
+ public void TestJavaUtilCaseInsensative()
+ {
+ //Assert.AreEqual(1, RegexQueryNrHits("^.*DOG.*$", new CSharpRegexCapabilities()));
+ }
+ }
+}
\ No newline at end of file
Added: incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs?rev=1205314&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net.3.0.3/trunk/test/contrib/Regex/TestSpanRegexQuery.cs Wed Nov 23 07:49:28 2011
@@ -0,0 +1,111 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Lucene.Net;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Documents;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
+using Lucene.Net.Search.Regex;
+using Lucene.Net.Search.Spans;
+using Lucene.Net.Store;
+using Version = Lucene.Net.Util.Version;
+using NUnit.Framework;
+
+namespace Contrib.Regex.Test
+{
+ [TestFixture]
+ public class TestSpanRegexQuery : TestCase
+ {
+ Directory indexStoreA = new RAMDirectory();
+
+ Directory indexStoreB = new RAMDirectory();
+
+ [Test]
+ public void TestSpanRegex()
+ {
+ RAMDirectory directory = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
+ Document doc = new Document();
+ // doc.Add(new Field("field", "the quick brown fox jumps over the lazy dog",
+ // Field.Store.NO, Field.Index.ANALYZED));
+ // writer.AddDocument(doc);
+ // doc = new Document();
+ doc.Add(new Field("field", "auto update", Field.Store.NO,
+ Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+ doc = new Document();
+ doc.Add(new Field("field", "first auto update", Field.Store.NO,
+ Field.Index.ANALYZED));
+ writer.AddDocument(doc);
+ writer.Optimize();
+ writer.Close();
+
+ IndexSearcher searcher = new IndexSearcher(directory, true);
+ SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*"));
+ SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
+ // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
+ // true);
+ int numHits = searcher.Search(sfq, null, 1000).totalHits;
+ Assert.AreEqual(1, numHits);
+ }
+
+ [Test]
+ public void TestSpanRegexBug()
+ {
+ CreateRamDirectories();
+
+ SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "a.*"));
+ SpanRegexQuery stq = new SpanRegexQuery(new Term("field", "b.*"));
+ SpanNearQuery query = new SpanNearQuery(new SpanQuery[] { srq, stq }, 6,
+ true);
+
+ // 1. Search the same store which works
+ IndexSearcher[] arrSearcher = new IndexSearcher[2];
+ arrSearcher[0] = new IndexSearcher(indexStoreA, true);
+ arrSearcher[1] = new IndexSearcher(indexStoreB, true);
+ MultiSearcher searcher = new MultiSearcher(arrSearcher);
+ int numHits = searcher.Search(query, null, 1000).totalHits;
+ arrSearcher[0].Close();
+ arrSearcher[1].Close();
+
+ // Will fail here
+ // We expect 2 but only one matched
+ // The rewriter function only write it once on the first IndexSearcher
+ // So it's using term: a1 b1 to search on the second IndexSearcher
+ // As a result, it won't match the document in the second IndexSearcher
+ Assert.AreEqual(2, numHits);
+ indexStoreA.Close();
+ indexStoreB.Close();
+ }
+
+ private void CreateRamDirectories()
+ {
+ // creating a document to store
+ Document lDoc = new Document();
+ lDoc.Add(new Field("field", "a1 b1", Field.Store.NO,
+ Field.Index.ANALYZED_NO_NORMS));
+
+ // creating a document to store
+ Document lDoc2 = new Document();
+ lDoc2.Add(new Field("field", "a2 b2", Field.Store.NO,
+ Field.Index.ANALYZED_NO_NORMS));
+
+ // creating first index writer
+ IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Version.LUCENE_CURRENT),
+ true, IndexWriter.MaxFieldLength.LIMITED);
+ writerA.AddDocument(lDoc);
+ writerA.Optimize();
+ writerA.Close();
+
+ // creating second index writer
+ IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Version.LUCENE_CURRENT),
+ true, IndexWriter.MaxFieldLength.LIMITED);
+ writerB.AddDocument(lDoc2);
+ writerB.Optimize();
+ writerB.Close();
+ }
+ }
+}
\ No newline at end of file