You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/09 05:47:57 UTC
[1/4] lucenenet git commit: More Analysis porting fixes
Repository: lucenenet
Updated Branches:
refs/heads/master 6f6f938ef -> cd3ee1366
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
index 2571ccd..d14fb0e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ElisionFilter.cs
@@ -1,80 +1,77 @@
-namespace org.apache.lucene.analysis.util
-{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+using Lucene.Net.Analysis.Tokenattributes;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
- /// <summary>
- /// Removes elisions from a <seealso cref="TokenStream"/>. For example, "l'avion" (the plane) will be
- /// tokenized as "avion" (plane).
- /// </summary>
- /// <seealso cref= <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a> </seealso>
- public sealed class ElisionFilter : TokenFilter
- {
- private readonly CharArraySet articles;
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+namespace Lucene.Net.Analysis.Util
+{
- /// <summary>
- /// Constructs an elision filter with a Set of stop words </summary>
- /// <param name="input"> the source <seealso cref="TokenStream"/> </param>
- /// <param name="articles"> a set of stopword articles </param>
- public ElisionFilter(TokenStream input, CharArraySet articles) : base(input)
- {
- this.articles = articles;
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Removes elisions from a <seealso cref="TokenStream"/>. For example, "l'avion" (the plane) will be
+ /// tokenized as "avion" (plane).
+ /// </summary>
+ /// <seealso cref= <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a> </seealso>
+ public sealed class ElisionFilter : TokenFilter
+ {
+ private readonly CharArraySet articles;
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
- /// <summary>
- /// Increments the <seealso cref="TokenStream"/> with a <seealso cref="CharTermAttribute"/> without elisioned start
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- if (input.incrementToken())
- {
- char[] termBuffer = termAtt.buffer();
- int termLength = termAtt.length();
+ /// <summary>
+ /// Constructs an elision filter with a Set of stop words </summary>
+ /// <param name="input"> the source <seealso cref="TokenStream"/> </param>
+ /// <param name="articles"> a set of stopword articles </param>
+ public ElisionFilter(TokenStream input, CharArraySet articles)
+ : base(input)
+ {
+ this.articles = articles;
+ }
- int index = -1;
- for (int i = 0; i < termLength; i++)
- {
- char ch = termBuffer[i];
- if (ch == '\'' || ch == '\u2019')
- {
- index = i;
- break;
- }
- }
+ /// <summary>
+ /// Increments the <seealso cref="TokenStream"/> with a <seealso cref="CharTermAttribute"/> without elisioned start
+ /// </summary>
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ char[] termBuffer = termAtt.Buffer();
+ int termLength = termAtt.Length;
- // An apostrophe has been found. If the prefix is an article strip it off.
- if (index >= 0 && articles.contains(termBuffer, 0, index))
- {
- termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
- }
+ int index = -1;
+ for (int i = 0; i < termLength; i++)
+ {
+ char ch = termBuffer[i];
+ if (ch == '\'' || ch == '\u2019')
+ {
+ index = i;
+ break;
+ }
+ }
- return true;
- }
- else
- {
- return false;
- }
- }
- }
+ // An apostrophe has been found. If the prefix is an article strip it off.
+ if (index >= 0 && articles.Contains(termBuffer, 0, index))
+ {
+ termAtt.CopyBuffer(termBuffer, index + 1, termLength - (index + 1));
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
index 598fef8..60044dd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs
@@ -1,6 +1,7 @@
using System;
+using System.IO;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -80,9 +81,7 @@ namespace org.apache.lucene.analysis.util
this.@delegate = @delegate;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
- public InputStream openResource(string resource)
+ public Stream OpenResource(string resource)
{
try
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
index 0feb6b8..d5eb9fd 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilteringTokenFilter.cs
@@ -1,5 +1,7 @@
using System;
using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis.Util
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
index 64cdb36..fda0459 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/MultiTermAwareComponent.cs
@@ -1,39 +1,36 @@
-using Lucene.Net.Analysis.Util;
-
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// Add to any analysis factory component to allow returning an
- /// analysis component factory for use with partial terms in prefix queries,
- /// wildcard queries, range query endpoints, regex queries, etc.
- ///
- /// @lucene.experimental
- /// </summary>
- public interface MultiTermAwareComponent
- {
- /// <summary>
- /// Returns an analysis component to handle analysis if multi-term queries.
- /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
- /// </summary>
- AbstractAnalysisFactory MultiTermComponent {get;}
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Add to any analysis factory component to allow returning an
+ /// analysis component factory for use with partial terms in prefix queries,
+ /// wildcard queries, range query endpoints, regex queries, etc.
+ ///
+ /// @lucene.experimental
+ /// </summary>
+ public interface MultiTermAwareComponent
+ {
+ /// <summary>
+ /// Returns an analysis component to handle analysis if multi-term queries.
+ /// The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
+ /// </summary>
+ AbstractAnalysisFactory MultiTermComponent { get; }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
index ead67a2..4081e36 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/OpenStringBuilder.cs
@@ -1,6 +1,6 @@
using System;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
index 3e4bc1f..4d95bc0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoader.cs
@@ -1,49 +1,48 @@
using System;
+using System.IO;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- /// <summary>
- /// Abstraction for loading resources (streams, files, and classes).
- /// </summary>
- public interface ResourceLoader
- {
-
- /// <summary>
- /// Opens a named resource
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public java.io.InputStream openResource(String resource) throws java.io.IOException;
- InputStream openResource(string resource);
-
-
- /// <summary>
- /// Finds class of the name and expected type
- /// </summary>
- Type findClass<T>(string cname, Type expectedType);
-
- /// <summary>
- /// Creates an instance of the name and expected type
- /// </summary>
- // TODO: fix exception handling
- T newInstance<T>(string cname, Type expectedType);
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Abstraction for loading resources (streams, files, and classes).
+ /// </summary>
+ public interface ResourceLoader
+ {
+
+ /// <summary>
+ /// Opens a named resource
+ /// </summary>
+ Stream OpenResource(string resource);
+
+
+ /// <summary>
+ /// Finds class of the name and expected type
+ /// </summary>
+ Type FindClass<T>(string cname, Type expectedType);
+
+ /// <summary>
+ /// Creates an instance of the name and expected type
+ /// </summary>
+ // TODO: fix exception handling
+ T NewInstance<T>(string cname, Type expectedType);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
index 97fe682..eceb3e6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ResourceLoaderAware.cs
@@ -15,24 +15,23 @@
* limitations under the License.
*/
-namespace org.apache.lucene.analysis.util
-{
+using org.apache.lucene.analysis.util;
- /// <summary>
- /// Interface for a component that needs to be initialized by
- /// an implementation of <seealso cref="ResourceLoader"/>.
- /// </summary>
- /// <seealso cref= ResourceLoader </seealso>
- public interface ResourceLoaderAware
- {
+namespace Lucene.Net.Analysis.Util
+{
- /// <summary>
- /// Initializes this component with the provided ResourceLoader
- /// (used for loading classes, files, etc).
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: void inform(ResourceLoader loader) throws java.io.IOException;
- void inform(ResourceLoader loader);
- }
+ /// <summary>
+ /// Interface for a component that needs to be initialized by
+ /// an implementation of <seealso cref="ResourceLoader"/>.
+ /// </summary>
+ /// <seealso cref= ResourceLoader </seealso>
+ public interface ResourceLoaderAware
+ {
+ /// <summary>
+ /// Initializes this component with the provided ResourceLoader
+ /// (used for loading classes, files, etc).
+ /// </summary>
+ void Inform(ResourceLoader loader);
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
index 1aae904..bac5fb6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/RollingCharBuffer.cs
@@ -1,200 +1,176 @@
using System;
using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using ArrayUtil = org.apache.lucene.util.ArrayUtil;
- using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-
- /// <summary>
- /// Acts like a forever growing char[] as you read
- /// characters into it from the provided reader, but
- /// internally it uses a circular buffer to only hold the
- /// characters that haven't been freed yet. This is like a
- /// PushbackReader, except you don't have to specify
- /// up-front the max size of the buffer, but you do have to
- /// periodically call <seealso cref="#freeBefore"/>.
- /// </summary>
-
- public sealed class RollingCharBuffer
- {
-
- private Reader reader;
-
- private char[] buffer = new char[512];
-
- // Next array index to write to in buffer:
- private int nextWrite;
-
- // Next absolute position to read from reader:
- private int nextPos;
-
- // How many valid chars (wrapped) are in the buffer:
- private int count;
-
- // True if we hit EOF
- private bool end;
-
- /// <summary>
- /// Clear array and switch to new reader. </summary>
- public void reset(Reader reader)
- {
- this.reader = reader;
- nextPos = 0;
- nextWrite = 0;
- count = 0;
- end = false;
- }
-
- /* Absolute position read. NOTE: pos must not jump
- * ahead by more than 1! Ie, it's OK to read arbitarily
- * far back (just not prior to the last {@link
- * #freeBefore}), but NOT ok to read arbitrarily far
- * ahead. Returns -1 if you hit EOF. */
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public int get(int pos) throws java.io.IOException
- public int get(int pos)
- {
- //System.out.println(" get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
- if (pos == nextPos)
- {
- if (end)
- {
- return -1;
- }
- if (count == buffer.Length)
- {
- // Grow
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] newBuffer = new char[org.apache.lucene.util.ArrayUtil.oversize(1+count, org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR)];
- char[] newBuffer = new char[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_CHAR)];
- //System.out.println(Thread.currentThread().getName() + ": cb grow " + newBuffer.length);
- Array.Copy(buffer, nextWrite, newBuffer, 0, buffer.Length - nextWrite);
- Array.Copy(buffer, 0, newBuffer, buffer.Length - nextWrite, nextWrite);
- nextWrite = buffer.Length;
- buffer = newBuffer;
- }
- if (nextWrite == buffer.Length)
- {
- nextWrite = 0;
- }
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int toRead = buffer.length - Math.max(count, nextWrite);
- int toRead = buffer.Length - Math.Max(count, nextWrite);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int readCount = reader.read(buffer, nextWrite, toRead);
- int readCount = reader.read(buffer, nextWrite, toRead);
- if (readCount == -1)
- {
- end = true;
- return -1;
- }
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int ch = buffer[nextWrite];
- int ch = buffer[nextWrite];
- nextWrite += readCount;
- count += readCount;
- nextPos += readCount;
- return ch;
- }
- else
- {
- // Cannot read from future (except by 1):
- Debug.Assert(pos < nextPos);
-
- // Cannot read from already freed past:
- Debug.Assert(nextPos - pos <= count, "nextPos=" + nextPos + " pos=" + pos + " count=" + count);
-
- return buffer[getIndex(pos)];
- }
- }
-
- // For assert:
- private bool inBounds(int pos)
- {
- return pos >= 0 && pos < nextPos && pos >= nextPos - count;
- }
-
- private int getIndex(int pos)
- {
- int index = nextWrite - (nextPos - pos);
- if (index < 0)
- {
- // Wrap:
- index += buffer.Length;
- Debug.Assert(index >= 0);
- }
- return index;
- }
-
- public char[] get(int posStart, int length)
- {
- Debug.Assert(length > 0);
- Debug.Assert(inBounds(posStart), "posStart=" + posStart + " length=" + length);
- //System.out.println(" buffer.get posStart=" + posStart + " len=" + length);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int startIndex = getIndex(posStart);
- int startIndex = getIndex(posStart);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int endIndex = getIndex(posStart + length);
- int endIndex = getIndex(posStart + length);
- //System.out.println(" startIndex=" + startIndex + " endIndex=" + endIndex);
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] result = new char[length];
- char[] result = new char[length];
- if (endIndex >= startIndex && length < buffer.Length)
- {
- Array.Copy(buffer, startIndex, result, 0, endIndex - startIndex);
- }
- else
- {
- // Wrapped:
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int part1 = buffer.length-startIndex;
- int part1 = buffer.Length - startIndex;
- Array.Copy(buffer, startIndex, result, 0, part1);
- Array.Copy(buffer, 0, result, buffer.Length - startIndex, length - part1);
- }
- return result;
- }
-
- /// <summary>
- /// Call this to notify us that no chars before this
- /// absolute position are needed anymore.
- /// </summary>
- public void freeBefore(int pos)
- {
- Debug.Assert(pos >= 0);
- Debug.Assert(pos <= nextPos);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int newCount = nextPos - pos;
- int newCount = nextPos - pos;
- Debug.Assert(newCount <= count, "newCount=" + newCount + " count=" + count);
- Debug.Assert(newCount <= buffer.Length, "newCount=" + newCount + " buf.length=" + buffer.Length);
- count = newCount;
- }
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Acts like a forever growing char[] as you read
+ /// characters into it from the provided reader, but
+ /// internally it uses a circular buffer to only hold the
+ /// characters that haven't been freed yet. This is like a
+ /// PushbackReader, except you don't have to specify
+ /// up-front the max size of the buffer, but you do have to
+ /// periodically call <seealso cref="#freeBefore"/>.
+ /// </summary>
+
+ public sealed class RollingCharBuffer
+ {
+
+ private TextReader reader;
+
+ private char[] buffer = new char[512];
+
+ // Next array index to write to in buffer:
+ private int nextWrite;
+
+ // Next absolute position to read from reader:
+ private int nextPos;
+
+ // How many valid chars (wrapped) are in the buffer:
+ private int count;
+
+ // True if we hit EOF
+ private bool end;
+
+ /// <summary>
+ /// Clear array and switch to new reader. </summary>
+ public void Reset(TextReader reader)
+ {
+ this.reader = reader;
+ nextPos = 0;
+ nextWrite = 0;
+ count = 0;
+ end = false;
+ }
+
+ /* Absolute position read. NOTE: pos must not jump
+ * ahead by more than 1! Ie, it's OK to read arbitarily
+ * far back (just not prior to the last {@link
+ * #freeBefore}), but NOT ok to read arbitrarily far
+ * ahead. Returns -1 if you hit EOF. */
+ public int Get(int pos)
+ {
+ //System.out.println(" get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
+ if (pos == nextPos)
+ {
+ if (end)
+ {
+ return -1;
+ }
+ if (count == buffer.Length)
+ {
+ // Grow
+ var newBuffer = new char[ArrayUtil.Oversize(1 + count, RamUsageEstimator.NUM_BYTES_CHAR)];
+ //System.out.println(Thread.currentThread().getName() + ": cb grow " + newBuffer.length);
+ Array.Copy(buffer, nextWrite, newBuffer, 0, buffer.Length - nextWrite);
+ Array.Copy(buffer, 0, newBuffer, buffer.Length - nextWrite, nextWrite);
+ nextWrite = buffer.Length;
+ buffer = newBuffer;
+ }
+ if (nextWrite == buffer.Length)
+ {
+ nextWrite = 0;
+ }
+
+ int toRead = buffer.Length - Math.Max(count, nextWrite);
+ int readCount = reader.Read(buffer, nextWrite, toRead);
+ if (readCount == -1)
+ {
+ end = true;
+ return -1;
+ }
+ int ch = buffer[nextWrite];
+ nextWrite += readCount;
+ count += readCount;
+ nextPos += readCount;
+ return ch;
+ }
+ else
+ {
+ // Cannot read from future (except by 1):
+ Debug.Assert(pos < nextPos);
+
+ // Cannot read from already freed past:
+ Debug.Assert(nextPos - pos <= count, "nextPos=" + nextPos + " pos=" + pos + " count=" + count);
+
+ return buffer[GetIndex(pos)];
+ }
+ }
+
+ // For assert:
+ private bool InBounds(int pos)
+ {
+ return pos >= 0 && pos < nextPos && pos >= nextPos - count;
+ }
+
+ private int GetIndex(int pos)
+ {
+ int index = nextWrite - (nextPos - pos);
+ if (index < 0)
+ {
+ // Wrap:
+ index += buffer.Length;
+ Debug.Assert(index >= 0);
+ }
+ return index;
+ }
+
+ public char[] Get(int posStart, int length)
+ {
+ Debug.Assert(length > 0);
+ Debug.Assert(InBounds(posStart), "posStart=" + posStart + " length=" + length);
+ //System.out.println(" buffer.get posStart=" + posStart + " len=" + length);
+
+ int startIndex = GetIndex(posStart);
+ int endIndex = GetIndex(posStart + length);
+ //System.out.println(" startIndex=" + startIndex + " endIndex=" + endIndex);
+
+ var result = new char[length];
+ if (endIndex >= startIndex && length < buffer.Length)
+ {
+ Array.Copy(buffer, startIndex, result, 0, endIndex - startIndex);
+ }
+ else
+ {
+ // Wrapped:
+ int part1 = buffer.Length - startIndex;
+ Array.Copy(buffer, startIndex, result, 0, part1);
+ Array.Copy(buffer, 0, result, buffer.Length - startIndex, length - part1);
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Call this to notify us that no chars before this
+ /// absolute position are needed anymore.
+ /// </summary>
+ public void FreeBefore(int pos)
+ {
+ Debug.Assert(pos >= 0);
+ Debug.Assert(pos <= nextPos);
+ int newCount = nextPos - pos;
+ Debug.Assert(newCount <= count, "newCount=" + newCount + " count=" + count);
+ Debug.Assert(newCount <= buffer.Length, "newCount=" + newCount + " buf.length=" + buffer.Length);
+ count = newCount;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
index 873936e..231f550 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/SegmentingTokenizerBase.cs
@@ -1,7 +1,10 @@
using System;
using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -22,8 +25,6 @@ namespace org.apache.lucene.analysis.util
*/
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-
/// <summary>
/// Breaks text into sentences with a <seealso cref="BreakIterator"/> and
/// allows subclasses to decompose these sentences into words.
@@ -79,15 +80,13 @@ namespace org.apache.lucene.analysis.util
this.iterator = iterator;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
- if (length == 0 || !incrementWord())
+ if (length == 0 || !IncrementWord())
{
- while (!incrementSentence())
+ while (!IncrementSentence())
{
- refill();
+ Refill();
if (length <= 0) // no more bytes to read;
{
return false;
@@ -98,30 +97,24 @@ namespace org.apache.lucene.analysis.util
return true;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- base.reset();
+ base.Reset();
wrapper.setText(buffer, 0, 0);
iterator.Text = wrapper;
length = usableLength = offset = 0;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
- public override void end()
+ public override void End()
{
- base.end();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int finalOffset = correctOffset(length < 0 ? offset : offset + length);
- int finalOffset = correctOffset(length < 0 ? offset : offset + length);
- offsetAtt.setOffset(finalOffset, finalOffset);
+ base.End();
+ int finalOffset = CorrectOffset(length < 0 ? offset : offset + length);
+ offsetAtt.SetOffset(finalOffset, finalOffset);
}
/// <summary>
/// Returns the last unambiguous break position in the text. </summary>
- private int findSafeEnd()
+ private int FindSafeEnd()
{
for (int i = length - 1; i >= 0; i--)
{
@@ -135,9 +128,9 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// For sentence tokenization, these are the unambiguous break positions. </summary>
- protected internal virtual bool isSafeEnd(char ch)
+ protected internal virtual bool IsSafeEnd(char ch)
{
- switch (ch)
+ switch ((int)ch)
{
case 0x000D:
case 0x000A:
@@ -150,47 +143,44 @@ namespace org.apache.lucene.analysis.util
}
}
- /// <summary>
- /// Refill the buffer, accumulating the offset and setting usableLength to the
- /// last unambiguous break position
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private void refill() throws java.io.IOException
- private void refill()
- {
- offset += usableLength;
- int leftover = length - usableLength;
- Array.Copy(buffer, usableLength, buffer, 0, leftover);
- int requested = buffer.Length - leftover;
- int returned = read(input, buffer, leftover, requested);
- length = returned < 0 ? leftover : returned + leftover;
- if (returned < requested) // reader has been emptied, process the rest
- {
- usableLength = length;
- }
- else // still more data to be read, find a safe-stopping place
- {
- usableLength = findSafeEnd();
- if (usableLength < 0)
- {
- usableLength = length; /*
+ /// <summary>
+ /// Refill the buffer, accumulating the offset and setting usableLength to the
+ /// last unambiguous break position
+ /// </summary>
+ private void Refill()
+ {
+ offset += usableLength;
+ int leftover = length - usableLength;
+ Array.Copy(buffer, usableLength, buffer, 0, leftover);
+ int requested = buffer.Length - leftover;
+ int returned = read(input, buffer, leftover, requested);
+ length = returned < 0 ? leftover : returned + leftover;
+ if (returned < requested) // reader has been emptied, process the rest
+ {
+ usableLength = length;
+ }
+ else // still more data to be read, find a safe-stopping place
+ {
+ usableLength = FindSafeEnd();
+ if (usableLength < 0)
+ {
+ usableLength = length; /*
}
* more than IOBUFFER of text without breaks,
* gonna possibly truncate tokens
*/
- }
+ }
- wrapper.setText(buffer, 0, Math.Max(0, usableLength));
- iterator.Text = wrapper;
- }
+ wrapper.SetText(buffer, 0, Math.Max(0, usableLength));
+ iterator.Text = wrapper;
+ }
+ }
- // TODO: refactor to a shared readFully somewhere
+ // TODO: refactor to a shared readFully somewhere
// (NGramTokenizer does this too):
/// <summary>
/// commons-io's readFully, but without bugs if offset != 0 </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private static int read(java.io.Reader input, char[] buffer, int offset, int length) throws java.io.IOException
- private static int read(Reader input, char[] buffer, int offset, int length)
+ private static int Read(TextReader input, char[] buffer, int offset, int length)
{
Debug.Assert(length >= 0, "length must not be negative: " + length);
@@ -212,9 +202,7 @@ namespace org.apache.lucene.analysis.util
/// return true if there is a token from the buffer, or null if it is
/// exhausted.
/// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private boolean incrementSentence() throws java.io.IOException
- private bool incrementSentence()
+ private bool IncrementSentence()
{
if (length == 0) // we must refill the buffer
{
@@ -223,7 +211,7 @@ namespace org.apache.lucene.analysis.util
while (true)
{
- int start = iterator.current();
+ int start = iterator.Current();
if (start == BreakIterator.DONE)
{
@@ -248,11 +236,10 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// Provides the next input sentence for analysis </summary>
- protected internal abstract void setNextSentence(int sentenceStart, int sentenceEnd);
+ protected internal abstract void SetNextSentence(int sentenceStart, int sentenceEnd);
/// <summary>
/// Returns true if another word is available </summary>
- protected internal abstract bool incrementWord();
+ protected internal abstract bool IncrementWord();
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
index e8a1ddc..145c064 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StemmerUtil.cs
@@ -1,153 +1,146 @@
using System;
using System.Diagnostics;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
- /// <summary>
- /// Some commonly-used stemming functions
- ///
- /// @lucene.internal
- /// </summary>
- public class StemmerUtil
- {
- /// <summary>
- /// no instance </summary>
- private StemmerUtil()
- {
- }
+ /// <summary>
+ /// Some commonly-used stemming functions
+ ///
+ /// @lucene.internal
+ /// </summary>
+ public class StemmerUtil
+ {
+ /// <summary>
+ /// no instance </summary>
+ private StemmerUtil()
+ {
+ }
- /// <summary>
- /// Returns true if the character array starts with the suffix.
- /// </summary>
- /// <param name="s"> Input Buffer </param>
- /// <param name="len"> length of input buffer </param>
- /// <param name="prefix"> Prefix string to test </param>
- /// <returns> true if <code>s</code> starts with <code>prefix</code> </returns>
- public static bool StartsWith(char[] s, int len, string prefix)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int prefixLen = prefix.length();
- int prefixLen = prefix.Length;
- if (prefixLen > len)
- {
- return false;
- }
- for (int i = 0; i < prefixLen; i++)
- {
- if (s[i] != prefix[i])
- {
- return false;
- }
- }
- return true;
- }
+ /// <summary>
+ /// Returns true if the character array starts with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="prefix"> Prefix string to test </param>
+ /// <returns> true if <code>s</code> starts with <code>prefix</code> </returns>
+ public static bool StartsWith(char[] s, int len, string prefix)
+ {
+ int prefixLen = prefix.Length;
+ if (prefixLen > len)
+ {
+ return false;
+ }
+ for (int i = 0; i < prefixLen; i++)
+ {
+ if (s[i] != prefix[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
- /// <summary>
- /// Returns true if the character array ends with the suffix.
- /// </summary>
- /// <param name="s"> Input Buffer </param>
- /// <param name="len"> length of input buffer </param>
- /// <param name="suffix"> Suffix string to test </param>
- /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
- public static bool EndsWith(char[] s, int len, string suffix)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int suffixLen = suffix.length();
- int suffixLen = suffix.Length;
- if (suffixLen > len)
- {
- return false;
- }
- for (int i = suffixLen - 1; i >= 0; i--)
- {
- if (s[len - (suffixLen - i)] != suffix[i])
- {
- return false;
- }
- }
+ /// <summary>
+ /// Returns true if the character array ends with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="suffix"> Suffix string to test </param>
+ /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ public static bool EndsWith(char[] s, int len, string suffix)
+ {
+ int suffixLen = suffix.Length;
+ if (suffixLen > len)
+ {
+ return false;
+ }
+ for (int i = suffixLen - 1; i >= 0; i--)
+ {
+ if (s[len - (suffixLen - i)] != suffix[i])
+ {
+ return false;
+ }
+ }
- return true;
- }
+ return true;
+ }
- /// <summary>
- /// Returns true if the character array ends with the suffix.
- /// </summary>
- /// <param name="s"> Input Buffer </param>
- /// <param name="len"> length of input buffer </param>
- /// <param name="suffix"> Suffix string to test </param>
- /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
- public static bool EndsWith(char[] s, int len, char[] suffix)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int suffixLen = suffix.length;
- int suffixLen = suffix.Length;
- if (suffixLen > len)
- {
- return false;
- }
- for (int i = suffixLen - 1; i >= 0; i--)
- {
- if (s[len - (suffixLen - i)] != suffix[i])
- {
- return false;
- }
- }
+ /// <summary>
+ /// Returns true if the character array ends with the suffix.
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <param name="suffix"> Suffix string to test </param>
+ /// <returns> true if <code>s</code> ends with <code>suffix</code> </returns>
+ public static bool EndsWith(char[] s, int len, char[] suffix)
+ {
+ int suffixLen = suffix.Length;
+ if (suffixLen > len)
+ {
+ return false;
+ }
+ for (int i = suffixLen - 1; i >= 0; i--)
+ {
+ if (s[len - (suffixLen - i)] != suffix[i])
+ {
+ return false;
+ }
+ }
- return true;
- }
+ return true;
+ }
- /// <summary>
- /// Delete a character in-place
- /// </summary>
- /// <param name="s"> Input Buffer </param>
- /// <param name="pos"> Position of character to delete </param>
- /// <param name="len"> length of input buffer </param>
- /// <returns> length of input buffer after deletion </returns>
- public static int delete(char[] s, int pos, int len)
- {
- Debug.Assert(pos < len);
- if (pos < len - 1) // don't arraycopy if asked to delete last character
- {
- Array.Copy(s, pos + 1, s, pos, len - pos - 1);
- }
- return len - 1;
- }
-
- /// <summary>
- /// Delete n characters in-place
- /// </summary>
- /// <param name="s"> Input Buffer </param>
- /// <param name="pos"> Position of character to delete </param>
- /// <param name="len"> Length of input buffer </param>
- /// <param name="nChars"> number of characters to delete </param>
- /// <returns> length of input buffer after deletion </returns>
- public static int deleteN(char[] s, int pos, int len, int nChars)
- {
- Debug.Assert(pos + nChars <= len);
- if (pos + nChars < len) // don't arraycopy if asked to delete the last characters
- {
- Array.Copy(s, pos + nChars, s, pos, len - pos - nChars);
- }
- return len - nChars;
- }
- }
+ /// <summary>
+ /// Delete a character in-place
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="pos"> Position of character to delete </param>
+ /// <param name="len"> length of input buffer </param>
+ /// <returns> length of input buffer after deletion </returns>
+ public static int Delete(char[] s, int pos, int len)
+ {
+ Debug.Assert(pos < len);
+ if (pos < len - 1) // don't arraycopy if asked to delete last character
+ {
+ Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+ }
+ return len - 1;
+ }
+ /// <summary>
+ /// Delete n characters in-place
+ /// </summary>
+ /// <param name="s"> Input Buffer </param>
+ /// <param name="pos"> Position of character to delete </param>
+ /// <param name="len"> Length of input buffer </param>
+ /// <param name="nChars"> number of characters to delete </param>
+ /// <returns> length of input buffer after deletion </returns>
+ public static int DeleteN(char[] s, int pos, int len, int nChars)
+ {
+ Debug.Assert(pos + nChars <= len);
+ if (pos + nChars < len) // don't arraycopy if asked to delete the last characters
+ {
+ Array.Copy(s, pos + nChars, s, pos, len - pos - nChars);
+ }
+ return len - nChars;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
index 2433a83..de736a3 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs
@@ -1,6 +1,4 @@
-using System;
-
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -16,15 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+using System;
+using System.IO;
+using Lucene.Net.Util;
+using Version = System.Version;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
-
-
- using IOUtils = org.apache.lucene.util.IOUtils;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Base class for Analyzers that need to make use of stopword sets.
///
/// </summary>
@@ -59,13 +56,11 @@ namespace org.apache.lucene.analysis.util
/// the Lucene version for cross version compatibility </param>
/// <param name="stopwords">
/// the analyzer's stopword set </param>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version, final CharArraySet stopwords)
protected internal StopwordAnalyzerBase(Version version, CharArraySet stopwords)
{
matchVersion = version;
// analyzers should use char array set for stopwords!
- this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.unmodifiableSet(CharArraySet.copy(version, stopwords));
+ this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.unmodifiableSet(CharArraySet.Copy(version, stopwords));
}
/// <summary>
@@ -73,8 +68,6 @@ namespace org.apache.lucene.analysis.util
/// </summary>
/// <param name="version">
/// the Lucene version for cross version compatibility </param>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: protected StopwordAnalyzerBase(final org.apache.lucene.util.Version version)
protected internal StopwordAnalyzerBase(Version version) : this(version, null)
{
}
@@ -96,22 +89,18 @@ namespace org.apache.lucene.analysis.util
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(final boolean ignoreCase, final Class aClass, final String resource, final String comment) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
- protected internal static CharArraySet loadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
+ protected internal static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
{
- Reader reader = null;
+ TextReader reader = null;
try
{
- reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
- return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
+ reader = IOUtils.GetDecodingReader(aClass.GetResourceAsStream(resource), StandardCharsets.UTF_8);
+ return WordlistLoader.GetWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
}
finally
{
- IOUtils.close(reader);
+ IOUtils.Close(reader);
}
-
}
/// <summary>
@@ -126,19 +115,17 @@ namespace org.apache.lucene.analysis.util
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.File stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
- protected internal static CharArraySet loadStopwordSet(File stopwords, Version matchVersion)
+ protected internal static CharArraySet LoadStopwordSet(File stopwords, Version matchVersion)
{
Reader reader = null;
try
{
- reader = IOUtils.getDecodingReader(stopwords, StandardCharsets.UTF_8);
- return WordlistLoader.getWordSet(reader, matchVersion);
+ reader = IOUtils.GetDecodingReader(stopwords, StandardCharsets.UTF_8);
+ return WordlistLoader.GetWordSet(reader, matchVersion);
}
finally
{
- IOUtils.close(reader);
+ IOUtils.Close(reader);
}
}
@@ -154,17 +141,15 @@ namespace org.apache.lucene.analysis.util
/// reader </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <seealso cref="IOException"/> </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected static CharArraySet loadStopwordSet(java.io.Reader stopwords, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
protected internal static CharArraySet loadStopwordSet(Reader stopwords, Version matchVersion)
{
try
{
- return WordlistLoader.getWordSet(stopwords, matchVersion);
+ return WordlistLoader.GetWordSet(stopwords, matchVersion);
}
finally
{
- IOUtils.close(stopwords);
+ IOUtils.Close(stopwords);
}
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
index baf3975..36e1877 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/WordlistLoader.cs
@@ -1,7 +1,11 @@
using System;
using System.Collections.Generic;
+using System.IO;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+using Version = System.Version;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -20,12 +24,7 @@ namespace org.apache.lucene.analysis.util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using IOUtils = org.apache.lucene.util.IOUtils;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Loader for text files that represent a list of stopwords.
/// </summary>
/// <seealso cref= IOUtils to obtain <seealso cref="Reader"/> instances
@@ -50,9 +49,7 @@ namespace org.apache.lucene.analysis.util
/// <param name="reader"> Reader containing the wordlist </param>
/// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
/// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, CharArraySet result) throws java.io.IOException
- public static CharArraySet getWordSet(Reader reader, CharArraySet result)
+ public static CharArraySet GetWordSet(TextReader reader, CharArraySet result)
{
BufferedReader br = null;
try
@@ -78,13 +75,11 @@ namespace org.apache.lucene.analysis.util
/// Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
/// </summary>
/// <param name="reader"> Reader containing the wordlist </param>
- /// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
+ /// <param name="matchVersion"> the Lucene <seealso cref="System.Version"/> </param>
/// <returns> A <seealso cref="CharArraySet"/> with the reader's words </returns>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
- public static CharArraySet getWordSet(Reader reader, Version matchVersion)
+ public static CharArraySet GetWordSet(TextReader reader, Version matchVersion)
{
- return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
+ return GetWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/// <summary>
@@ -97,11 +92,9 @@ namespace org.apache.lucene.analysis.util
/// <param name="comment"> The string representing a comment. </param>
/// <param name="matchVersion"> the Lucene <seealso cref="Version"/> </param>
/// <returns> A CharArraySet with the reader's words </returns>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, String comment, org.apache.lucene.util.Version matchVersion) throws java.io.IOException
- public static CharArraySet getWordSet(Reader reader, string comment, Version matchVersion)
+ public static CharArraySet GetWordSet(TextReader reader, string comment, Version matchVersion)
{
- return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
+ return GetWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/// <summary>
@@ -114,16 +107,14 @@ namespace org.apache.lucene.analysis.util
/// <param name="comment"> The string representing a comment. </param>
/// <param name="result"> the <seealso cref="CharArraySet"/> to fill with the readers words </param>
/// <returns> the given <seealso cref="CharArraySet"/> with the reader's words </returns>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static CharArraySet getWordSet(java.io.Reader reader, String comment, CharArraySet result) throws java.io.IOException
- public static CharArraySet getWordSet(Reader reader, string comment, CharArraySet result)
+ public static CharArraySet GetWordSet(TextReader reader, string comment, CharArraySet result)
{
BufferedReader br = null;
try
{
br = getBufferedReader(reader);
string word = null;
- while ((word = br.readLine()) != null)
+ while ((word = br.ReadLine()) != null)
{
if (word.StartsWith(comment, StringComparison.Ordinal) == false)
{
@@ -133,7 +124,7 @@ namespace org.apache.lucene.analysis.util
}
finally
{
- IOUtils.close(br);
+ IOUtils.Close(br);
}
return result;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Core/Analysis/CharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Analysis/CharFilter.cs b/src/Lucene.Net.Core/Analysis/CharFilter.cs
index 930db48..bae10b1 100644
--- a/src/Lucene.Net.Core/Analysis/CharFilter.cs
+++ b/src/Lucene.Net.Core/Analysis/CharFilter.cs
@@ -42,14 +42,14 @@ namespace Lucene.Net.Analysis
/// <summary>
/// The underlying character-input stream.
/// </summary>
- public readonly TextReader Input;
+ public readonly TextReader input;
/// <summary>
/// Create a new CharFilter wrapping the provided reader. </summary>
/// <param name="input"> a Reader, can also be a CharFilter for chaining. </param>
protected CharFilter(TextReader input)
{
- this.Input = input;
+ this.input = input;
}
/// <summary>
@@ -61,7 +61,7 @@ namespace Lucene.Net.Analysis
/// </summary>
public override void Close()
{
- Input.Close();
+ input.Close();
}
/// <summary>
@@ -78,7 +78,7 @@ namespace Lucene.Net.Analysis
public int CorrectOffset(int currentOff)
{
int corrected = Correct(currentOff);
- return (Input is CharFilter) ? ((CharFilter)Input).CorrectOffset(corrected) : corrected;
+ return (input is CharFilter) ? ((CharFilter)input).CorrectOffset(corrected) : corrected;
}
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.TestFramework/Analysis/MockCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.TestFramework/Analysis/MockCharFilter.cs b/src/Lucene.Net.TestFramework/Analysis/MockCharFilter.cs
index 698ab30..e1aece0 100644
--- a/src/Lucene.Net.TestFramework/Analysis/MockCharFilter.cs
+++ b/src/Lucene.Net.TestFramework/Analysis/MockCharFilter.cs
@@ -68,7 +68,7 @@ namespace Lucene.Net.Analysis
}
// otherwise actually read one
- int c = Input.Read();
+ int c = input.Read();
if (c < 0)
{
return c;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Tests/core/Analysis/TestCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Analysis/TestCharFilter.cs b/src/Lucene.Net.Tests/core/Analysis/TestCharFilter.cs
index c6f37fd..e04ba03 100644
--- a/src/Lucene.Net.Tests/core/Analysis/TestCharFilter.cs
+++ b/src/Lucene.Net.Tests/core/Analysis/TestCharFilter.cs
@@ -62,7 +62,7 @@ namespace Lucene.Net.Analysis
public override int Read(char[] cbuf, int off, int len)
{
- int numRead = Input.Read(cbuf, off, len);
+ int numRead = input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
@@ -81,7 +81,7 @@ namespace Lucene.Net.Analysis
public override int Read(char[] cbuf, int off, int len)
{
- int numRead = Input.Read(cbuf, off, len);
+ int numRead = input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Tests/core/Analysis/TestMockAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests/core/Analysis/TestMockAnalyzer.cs b/src/Lucene.Net.Tests/core/Analysis/TestMockAnalyzer.cs
index e185484..b784eef 100644
--- a/src/Lucene.Net.Tests/core/Analysis/TestMockAnalyzer.cs
+++ b/src/Lucene.Net.Tests/core/Analysis/TestMockAnalyzer.cs
@@ -277,7 +277,7 @@ namespace Lucene.Net.Analysis
MockCharFilter charfilter = new MockCharFilter(reader, 2);
MockAnalyzer analyzer = new MockAnalyzer(Random());
Exception priorException = null;
- TokenStream ts = analyzer.TokenStream("bogus", charfilter.Input);
+ TokenStream ts = analyzer.TokenStream("bogus", charfilter.input);
try
{
ts.Reset();
[4/4] lucenenet git commit: More Analysis porting fixes
Posted by sy...@apache.org.
More Analysis porting fixes
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/cd3ee136
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/cd3ee136
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/cd3ee136
Branch: refs/heads/master
Commit: cd3ee13667f98fb0ee64c82868ba1fed9cd1349e
Parents: 6f6f938
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Sun Nov 9 06:47:50 2014 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Sun Nov 9 06:47:50 2014 +0200
----------------------------------------------------------------------
.../Analysis/CharFilter/BaseCharFilter.cs | 161 +++---
.../Analysis/CharFilter/MappingCharFilter.cs | 36 +-
.../Analysis/CharFilter/NormalizeCharMap.cs | 2 +-
.../Analysis/CommonGrams/CommonGramsFilter.cs | 1 +
.../Compound/CompoundWordTokenFilterBase.cs | 36 +-
.../DictionaryCompoundWordTokenFilter.cs | 259 +++++-----
.../DictionaryCompoundWordTokenFilterFactory.cs | 136 +++--
.../HyphenationCompoundWordTokenFilter.cs | 497 +++++++++----------
...HyphenationCompoundWordTokenFilterFactory.cs | 25 +-
.../Analysis/Core/LowerCaseFilter.cs | 1 +
.../Analysis/Core/StopAnalyzer.cs | 5 +-
.../Analysis/Core/StopFilterFactory.cs | 286 ++++++-----
.../Analysis/Core/UpperCaseFilter.cs | 3 +-
.../Analysis/Core/UpperCaseFilterFactory.cs | 124 +++--
.../Miscellaneous/ASCIIFoldingFilter.cs | 56 +--
.../Miscellaneous/ASCIIFoldingFilterFactory.cs | 12 +-
.../Miscellaneous/CapitalizationFilter.cs | 31 +-
.../CapitalizationFilterFactory.cs | 207 ++++----
.../Analysis/Miscellaneous/KeepWordFilter.cs | 88 ++--
.../Miscellaneous/KeepWordFilterFactory.cs | 194 ++++----
.../Miscellaneous/KeywordMarkerFilterFactory.cs | 172 +++----
.../Miscellaneous/KeywordRepeatFilterFactory.cs | 87 ++--
.../Lucene47WordDelimiterFilter.cs | 3 +-
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 5 +-
.../RemoveDuplicatesTokenFilter.cs | 131 +++--
.../RemoveDuplicatesTokenFilterFactory.cs | 11 +-
.../Miscellaneous/ScandinavianFoldingFilter.cs | 6 +-
.../ScandinavianNormalizationFilter.cs | 6 +-
.../Miscellaneous/SetKeywordMarkerFilter.cs | 107 ++--
.../StemmerOverrideFilterFactory.cs | 168 +++----
.../Miscellaneous/WordDelimiterFilter.cs | 3 +-
.../Miscellaneous/WordDelimiterFilterFactory.cs | 34 +-
.../Analysis/Ngram/EdgeNGramFilterFactory.cs | 105 ++--
.../Analysis/Ngram/EdgeNGramTokenFilter.cs | 97 ++--
.../Analysis/Ngram/EdgeNGramTokenizerFactory.cs | 19 +-
.../Analysis/Ngram/NGramTokenFilter.cs | 6 +-
.../Analysis/Ngram/NGramTokenizer.cs | 3 +-
.../DelimitedPayloadTokenFilterFactory.cs | 147 +++---
.../Analysis/Position/PositionFilter.cs | 25 +-
.../Analysis/Position/PositionFilterFactory.cs | 119 +++--
.../Analysis/Query/QueryAutoStopWordAnalyzer.cs | 401 +++++++--------
.../Analysis/Standard/ClassicAnalyzer.cs | 8 +-
.../Analysis/Standard/StandardAnalyzer.cs | 8 +-
.../Analysis/Standard/UAX29URLEmailAnalyzer.cs | 6 +-
.../Analysis/Synonym/FSTSynonymFilterFactory.cs | 12 +-
.../Analysis/Synonym/SlowSynonymFilter.cs | 22 +-
.../Synonym/SlowSynonymFilterFactory.cs | 1 +
.../Analysis/Synonym/SlowSynonymMap.cs | 10 +-
.../Analysis/Synonym/SynonymFilterFactory.cs | 173 ++++---
.../Analysis/Util/AnalysisSPILoader.cs | 9 +-
.../Analysis/Util/CharArrayMap.cs | 57 +--
.../Analysis/Util/CharArraySet.cs | 54 +-
.../Analysis/Util/CharacterUtils.cs | 14 +-
.../Analysis/Util/ClasspathResourceLoader.cs | 11 +-
.../Analysis/Util/ElisionFilter.cs | 141 +++---
.../Analysis/Util/FilesystemResourceLoader.cs | 7 +-
.../Analysis/Util/FilteringTokenFilter.cs | 2 +
.../Analysis/Util/MultiTermAwareComponent.cs | 67 ++-
.../Analysis/Util/OpenStringBuilder.cs | 2 +-
.../Analysis/Util/ResourceLoader.cs | 87 ++--
.../Analysis/Util/ResourceLoaderAware.cs | 33 +-
.../Analysis/Util/RollingCharBuffer.cs | 364 +++++++-------
.../Analysis/Util/SegmentingTokenizerBase.cs | 113 ++---
.../Analysis/Util/StemmerUtil.cs | 271 +++++-----
.../Analysis/Util/StopwordAnalyzerBase.cs | 53 +-
.../Analysis/Util/WordlistLoader.cs | 39 +-
src/Lucene.Net.Core/Analysis/CharFilter.cs | 8 +-
.../Analysis/MockCharFilter.cs | 2 +-
.../core/Analysis/TestCharFilter.cs | 4 +-
.../core/Analysis/TestMockAnalyzer.cs | 2 +-
70 files changed, 2532 insertions(+), 2863 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
index 1127842..064b653 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/BaseCharFilter.cs
@@ -15,91 +15,97 @@
* limitations under the License.
*/
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+
namespace Lucene.Net.Analysis.CharFilter
{
/// <summary>
- /// Base utility class for implementing a <seealso cref="CharFilter"/>.
- /// You subclass this, and then record mappings by calling
- /// <seealso cref="#addOffCorrectMap"/>, and then invoke the correct
- /// method to correct an offset.
- /// </summary>
- public abstract class BaseCharFilter : CharFilter
- {
+ /// Base utility class for implementing a <seealso cref="CharFilter"/>.
+ /// You subclass this, and then record mappings by calling
+ /// <seealso cref="#addOffCorrectMap"/>, and then invoke the correct
+ /// method to correct an offset.
+ /// </summary>
+ public abstract class BaseCharFilter : CharFilter
+ {
- private int[] offsets;
- private int[] diffs;
- private int size = 0;
+ private int[] offsets;
+ private int[] diffs;
+ private int size = 0;
- public BaseCharFilter(Reader @in) : base(@in)
- {
- }
+ protected BaseCharFilter(TextReader @in)
+ : base(@in)
+ {
+ }
- /// <summary>
- /// Retrieve the corrected offset. </summary>
- protected internal override int correct(int currentOff)
- {
- if (offsets == null || currentOff < offsets[0])
- {
- return currentOff;
- }
+ /// <summary>
+ /// Retrieve the corrected offset. </summary>
+ protected internal override int Correct(int currentOff)
+ {
+ if (offsets == null || currentOff < offsets[0])
+ {
+ return currentOff;
+ }
- int hi = size - 1;
- if (currentOff >= offsets[hi])
- {
- return currentOff + diffs[hi];
- }
+ int hi = size - 1;
+ if (currentOff >= offsets[hi])
+ {
+ return currentOff + diffs[hi];
+ }
- int lo = 0;
- int mid = -1;
+ int lo = 0;
+ int mid = -1;
- while (hi >= lo)
- {
- mid = (int)((uint)(lo + hi) >> 1);
- if (currentOff < offsets[mid])
- {
- hi = mid - 1;
- }
- else if (currentOff > offsets[mid])
- {
- lo = mid + 1;
- }
- else
- {
- return currentOff + diffs[mid];
- }
- }
+ while (hi >= lo)
+ {
+ mid = (int)((uint)(lo + hi) >> 1);
+ if (currentOff < offsets[mid])
+ {
+ hi = mid - 1;
+ }
+ else if (currentOff > offsets[mid])
+ {
+ lo = mid + 1;
+ }
+ else
+ {
+ return currentOff + diffs[mid];
+ }
+ }
- if (currentOff < offsets[mid])
- {
- return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
- }
- else
- {
- return currentOff + diffs[mid];
- }
- }
+ if (currentOff < offsets[mid])
+ {
+ return mid == 0 ? currentOff : currentOff + diffs[mid - 1];
+ }
+ else
+ {
+ return currentOff + diffs[mid];
+ }
+ }
- protected internal virtual int LastCumulativeDiff
- {
- get
- {
- return offsets == null ? 0 : diffs[size-1];
- }
- }
+ protected internal virtual int LastCumulativeDiff
+ {
+ get
+ {
+ return offsets == null ? 0 : diffs[size - 1];
+ }
+ }
- /// <summary>
- /// <para>
- /// Adds an offset correction mapping at the given output stream offset.
- /// </para>
- /// <para>
- /// Assumption: the offset given with each successive call to this method
- /// will not be smaller than the offset given at the previous invocation.
- /// </para>
- /// </summary>
- /// <param name="off"> The output stream offset at which to apply the correction </param>
- /// <param name="cumulativeDiff"> The input offset is given by adding this
- /// to the output offset </param>
- protected internal virtual void addOffCorrectMap(int off, int cumulativeDiff)
+ /// <summary>
+ /// <para>
+ /// Adds an offset correction mapping at the given output stream offset.
+ /// </para>
+ /// <para>
+ /// Assumption: the offset given with each successive call to this method
+ /// will not be smaller than the offset given at the previous invocation.
+ /// </para>
+ /// </summary>
+ /// <param name="off"> The output stream offset at which to apply the correction </param>
+ /// <param name="cumulativeDiff"> The input offset is given by adding this
+ /// to the output offset </param>
+ protected internal virtual void AddOffCorrectMap(int off, int cumulativeDiff)
{
if (offsets == null)
{
@@ -108,11 +114,11 @@ namespace Lucene.Net.Analysis.CharFilter
}
else if (size == offsets.Length)
{
- offsets = ArrayUtil.grow(offsets);
- diffs = ArrayUtil.grow(diffs);
+ offsets = ArrayUtil.Grow(offsets);
+ diffs = ArrayUtil.Grow(diffs);
}
- assert(size == 0 || off >= offsets[size - 1]) : "Offset #" + size + "(" + off + ") is less than the last recorded offset " + offsets[size - 1] + "\n" + Arrays.ToString(offsets) + "\n" + Arrays.ToString(diffs);
+ Debug.Assert(size == 0 || off >= offsets[size - 1]) : "Offset #" + size + "(" + off + ") is less than the last recorded offset " + offsets[size - 1] + "\n" + Arrays.ToString(offsets) + "\n" + Arrays.ToString(diffs);
if (size == 0 || off != offsets[size - 1])
{
@@ -124,6 +130,5 @@ namespace Lucene.Net.Analysis.CharFilter
diffs[size - 1] = cumulativeDiff;
}
}
- }
-
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
index 5a148be..cd0cdc2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/MappingCharFilter.cs
@@ -1,7 +1,4 @@
-using System;
-using System.Diagnostics;
-using System.Collections.Generic;
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,19 +14,18 @@ using System.Collections.Generic;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-using Lucene.Net.Analysis.CharFilter;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using Lucene.Net.Util.Fst;
+using org.apache.lucene.analysis.charfilter;
-namespace org.apache.lucene.analysis.charfilter
+namespace Lucene.Net.Analysis.CharFilter
{
-
-
- using RollingCharBuffer = org.apache.lucene.analysis.util.RollingCharBuffer;
- using CharsRef = org.apache.lucene.util.CharsRef;
- using CharSequenceOutputs = org.apache.lucene.util.fst.CharSequenceOutputs;
- using FST = org.apache.lucene.util.fst.FST;
- using Outputs = org.apache.lucene.util.fst.Outputs;
-
- /// <summary>
+ /// <summary>
/// Simplistic <seealso cref="CharFilter"/> that applies the mappings
/// contained in a <seealso cref="NormalizeCharMap"/> to the character
/// stream, and correcting the resulting changes to the
@@ -54,9 +50,9 @@ namespace org.apache.lucene.analysis.charfilter
/// <summary>
/// Default constructor that takes a <seealso cref="Reader"/>. </summary>
- public MappingCharFilter(NormalizeCharMap normMap, Reader @in) : base(@in)
+ public MappingCharFilter(NormalizeCharMap normMap, TextReader @in) : base(@in)
{
- buffer.reset(@in);
+ buffer.Reset(@in);
map = normMap.map;
cachedRootArcs = normMap.cachedRootArcs;
@@ -71,11 +67,9 @@ namespace org.apache.lucene.analysis.charfilter
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- input.reset();
+ input.Reset();
buffer.reset(input);
replacement = null;
inputOff = 0;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
index ade4318..c143455 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CharFilter/NormalizeCharMap.cs
@@ -1,7 +1,6 @@
using System;
using System.Diagnostics;
using System.Collections.Generic;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -18,6 +17,7 @@ using System.Collections.Generic;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+using Lucene.Net.Analysis.CharFilter;
namespace org.apache.lucene.analysis.charfilter
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
index 06deccf..0d083c4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilter.cs
@@ -16,6 +16,7 @@
*/
using System.Text;
using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using org.apache.lucene.analysis.util;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
index 58b40a1..f9cdf1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/CompoundWordTokenFilterBase.cs
@@ -1,7 +1,10 @@
-using System.Diagnostics;
-using System.Collections.Generic;
+using System.Collections.Generic;
+using System.Diagnostics;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.compound
+namespace Lucene.Net.Analysis.Compound
{
/*
@@ -20,16 +23,7 @@ namespace org.apache.lucene.analysis.compound
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Base class for decomposition token filters.
/// <para>
///
@@ -86,7 +80,7 @@ namespace org.apache.lucene.analysis.compound
protected internal CompoundWordTokenFilterBase(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(input)
{
this.matchVersion = matchVersion;
- this.tokens = new LinkedList<>();
+ this.tokens = new LinkedList<CompoundToken>();
if (minWordSize < 0)
{
throw new System.ArgumentException("minWordSize cannot be negative");
@@ -106,17 +100,15 @@ namespace org.apache.lucene.analysis.compound
this.dictionary = dictionary;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
if (tokens.Count > 0)
{
Debug.Assert(current != null);
- CompoundToken token = tokens.RemoveFirst();
- restoreState(current); // keep all other attributes untouched
- termAtt.setEmpty().append(token.txt);
- offsetAtt.setOffset(token.startOffset, token.endOffset);
+ CompoundToken token = tokens.First.Value; tokens.RemoveFirst();
+ RestoreState(current); // keep all other attributes untouched
+ termAtt.SetEmpty().Append(token.txt);
+ offsetAtt.SetOffset(token.startOffset, token.endOffset);
posIncAtt.PositionIncrement = 0;
return true;
}
@@ -165,7 +157,7 @@ namespace org.apache.lucene.analysis.compound
{
private readonly CompoundWordTokenFilterBase outerInstance;
- public readonly CharSequence txt;
+ public readonly string txt;
public readonly int startOffset, endOffset;
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
index 6b875e0..a69c35d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilter.cs
@@ -1,137 +1,134 @@
-namespace org.apache.lucene.analysis.compound
-{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
- /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
- /// <para>
- /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
- /// "Donaudampfschiff" even when you only enter "schiff".
- /// It uses a brute-force algorithm to achieve this.
- /// </para>
- /// <para>
- /// You must specify the required <seealso cref="Version"/> compatibility when creating
- /// CompoundWordTokenFilterBase:
- /// <ul>
- /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- /// supplementary characters in strings and char arrays provided as compound word
- /// dictionaries.
- /// </ul>
- /// </para>
- /// </summary>
- public class DictionaryCompoundWordTokenFilter : CompoundWordTokenFilterBase
- {
+namespace Lucene.Net.Analysis.Compound
+{
- /// <summary>
- /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
- /// </summary>
- /// <param name="matchVersion">
- /// Lucene version to enable correct Unicode 4.0 behavior in the
- /// dictionaries if Version > 3.0. See <a
- /// href="CompoundWordTokenFilterBase.html#version"
- /// >CompoundWordTokenFilterBase</a> for details. </param>
- /// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
- /// <param name="dictionary">
- /// the word dictionary to match against. </param>
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary) : base(matchVersion, input, dictionary)
- {
- if (dictionary == null)
- {
- throw new System.ArgumentException("dictionary cannot be null");
- }
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+ /// <para>
+ /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
+ /// "Donaudampfschiff" even when you only enter "schiff".
+ /// It uses a brute-force algorithm to achieve this.
+ /// </para>
+ /// <para>
+ /// You must specify the required <seealso cref="Version"/> compatibility when creating
+ /// CompoundWordTokenFilterBase:
+ /// <ul>
+ /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ /// supplementary characters in strings and char arrays provided as compound word
+ /// dictionaries.
+ /// </ul>
+ /// </para>
+ /// </summary>
+ public class DictionaryCompoundWordTokenFilter : CompoundWordTokenFilterBase
+ {
- /// <summary>
- /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
- /// </summary>
- /// <param name="matchVersion">
- /// Lucene version to enable correct Unicode 4.0 behavior in the
- /// dictionaries if Version > 3.0. See <a
- /// href="CompoundWordTokenFilterBase.html#version"
- /// >CompoundWordTokenFilterBase</a> for details. </param>
- /// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
- /// <param name="dictionary">
- /// the word dictionary to match against. </param>
- /// <param name="minWordSize">
- /// only words longer than this get processed </param>
- /// <param name="minSubwordSize">
- /// only subwords longer than this get to the output stream </param>
- /// <param name="maxSubwordSize">
- /// only subwords shorter than this get to the output stream </param>
- /// <param name="onlyLongestMatch">
- /// Add only the longest matching subword to the stream </param>
- public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
- {
- if (dictionary == null)
- {
- throw new System.ArgumentException("dictionary cannot be null");
- }
- }
+ /// <summary>
+ /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to enable correct Unicode 4.0 behavior in the
+ /// dictionaries if Version > 3.0. See <a
+ /// href="CompoundWordTokenFilterBase.html#version"
+ /// >CompoundWordTokenFilterBase</a> for details. </param>
+ /// <param name="input">
+ /// the <seealso cref="TokenStream"/> to process </param>
+ /// <param name="dictionary">
+ /// the word dictionary to match against. </param>
+ public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary)
+ : base(matchVersion, input, dictionary)
+ {
+ if (dictionary == null)
+ {
+ throw new System.ArgumentException("dictionary cannot be null");
+ }
+ }
- protected internal override void decompose()
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int len = termAtt.length();
- int len = termAtt.length();
- for (int i = 0;i <= len - this.minSubwordSize;++i)
- {
- CompoundToken longestMatchToken = null;
- for (int j = this.minSubwordSize;j <= this.maxSubwordSize;++j)
- {
- if (i + j > len)
- {
- break;
- }
- if (dictionary.contains(termAtt.buffer(), i, j))
- {
- if (this.onlyLongestMatch)
- {
- if (longestMatchToken != null)
- {
- if (longestMatchToken.txt.length() < j)
- {
- longestMatchToken = new CompoundToken(this, i,j);
- }
- }
- else
- {
- longestMatchToken = new CompoundToken(this, i,j);
- }
- }
- else
- {
- tokens.AddLast(new CompoundToken(this, i,j));
- }
- }
- }
- if (this.onlyLongestMatch && longestMatchToken != null)
- {
- tokens.AddLast(longestMatchToken);
- }
- }
- }
- }
+ /// <summary>
+ /// Creates a new <seealso cref="DictionaryCompoundWordTokenFilter"/>
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to enable correct Unicode 4.0 behavior in the
+ /// dictionaries if Version > 3.0. See <a
+ /// href="CompoundWordTokenFilterBase.html#version"
+ /// >CompoundWordTokenFilterBase</a> for details. </param>
+ /// <param name="input">
+ /// the <seealso cref="TokenStream"/> to process </param>
+ /// <param name="dictionary">
+ /// the word dictionary to match against. </param>
+ /// <param name="minWordSize">
+ /// only words longer than this get processed </param>
+ /// <param name="minSubwordSize">
+ /// only subwords longer than this get to the output stream </param>
+ /// <param name="maxSubwordSize">
+ /// only subwords shorter than this get to the output stream </param>
+ /// <param name="onlyLongestMatch">
+ /// Add only the longest matching subword to the stream </param>
+ public DictionaryCompoundWordTokenFilter(Version matchVersion, TokenStream input, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
+ : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+ {
+ if (dictionary == null)
+ {
+ throw new System.ArgumentException("dictionary cannot be null");
+ }
+ }
+ protected internal override void Decompose()
+ {
+ int len = termAtt.Length;
+ for (int i = 0; i <= len - this.minSubwordSize; ++i)
+ {
+ CompoundToken longestMatchToken = null;
+ for (int j = this.minSubwordSize; j <= this.maxSubwordSize; ++j)
+ {
+ if (i + j > len)
+ {
+ break;
+ }
+ if (dictionary.Contains(termAtt.Buffer(), i, j))
+ {
+ if (this.onlyLongestMatch)
+ {
+ if (longestMatchToken != null)
+ {
+ if (longestMatchToken.txt.Length < j)
+ {
+ longestMatchToken = new CompoundToken(this, i, j);
+ }
+ }
+ else
+ {
+ longestMatchToken = new CompoundToken(this, i, j);
+ }
+ }
+ else
+ {
+ tokens.AddLast(new CompoundToken(this, i, j));
+ }
+ }
+ }
+ if (this.onlyLongestMatch && longestMatchToken != null)
+ {
+ tokens.AddLast(longestMatchToken);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
index 497d89d..ef8f1dc 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/DictionaryCompoundWordTokenFilterFactory.cs
@@ -1,81 +1,71 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.compound
+namespace Lucene.Net.Analysis.Compound
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
+ /// minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ private CharArraySet dictionary;
+ private readonly string dictFile;
+ private readonly int minWordSize;
+ private readonly int minSubwordSize;
+ private readonly int maxSubwordSize;
+ private readonly bool onlyLongestMatch;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
-
-
- /// <summary>
- /// Factory for <seealso cref="DictionaryCompoundWordTokenFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_dictcomp" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="dictionary.txt"
- /// minWordSize="5" minSubwordSize="2" maxSubwordSize="15" onlyLongestMatch="true"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class DictionaryCompoundWordTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- private CharArraySet dictionary;
- private readonly string dictFile;
- private readonly int minWordSize;
- private readonly int minSubwordSize;
- private readonly int maxSubwordSize;
- private readonly bool onlyLongestMatch;
-
- /// <summary>
- /// Creates a new DictionaryCompoundWordTokenFilterFactory </summary>
- public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args) : base(args)
- {
- assureMatchVersion();
- dictFile = require(args, "dictionary");
- minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
- minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
- maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
- onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- dictionary = base.getWordSet(loader, dictFile, false);
- }
-
- public override TokenStream create(TokenStream input)
- {
- // if the dictionary is null, it means it was empty
- return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
- }
- }
+ /// <summary>
+ /// Creates a new DictionaryCompoundWordTokenFilterFactory </summary>
+ public DictionaryCompoundWordTokenFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ assureMatchVersion();
+ dictFile = require(args, "dictionary");
+ minWordSize = getInt(args, "minWordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
+ minSubwordSize = getInt(args, "minSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
+ maxSubwordSize = getInt(args, "maxSubwordSize", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
+ onlyLongestMatch = getBoolean(args, "onlyLongestMatch", true);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+ public virtual void Inform(ResourceLoader loader)
+ {
+ dictionary = base.GetWordSet(loader, dictFile, false);
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ // if the dictionary is null, it means it was empty
+ return dictionary == null ? input : new DictionaryCompoundWordTokenFilter(luceneMatchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
index 0b5e99c..a8014ad 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -1,255 +1,248 @@
-namespace org.apache.lucene.analysis.compound
-{
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.compound;
+using org.apache.lucene.analysis.compound.hyphenation;
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using Hyphenation = org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
- using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using Version = org.apache.lucene.util.Version;
- using InputSource = org.xml.sax.InputSource;
-
- /// <summary>
- /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
- /// <para>
- /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
- /// "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
- /// grammar and a word dictionary to achieve this.
- /// </para>
- /// <para>
- /// You must specify the required <seealso cref="Version"/> compatibility when creating
- /// CompoundWordTokenFilterBase:
- /// <ul>
- /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
- /// supplementary characters in strings and char arrays provided as compound word
- /// dictionaries.
- /// </ul>
- /// </para>
- /// </summary>
- public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
- {
- private HyphenationTree hyphenator;
-
- /// <summary>
- /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
- /// </summary>
- /// <param name="matchVersion">
- /// Lucene version to enable correct Unicode 4.0 behavior in the
- /// dictionaries if Version > 3.0. See <a
- /// href="CompoundWordTokenFilterBase.html#version"
- /// >CompoundWordTokenFilterBase</a> for details. </param>
- /// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
- /// <param name="hyphenator">
- /// the hyphenation pattern tree to use for hyphenation </param>
- /// <param name="dictionary">
- /// the word dictionary to match against. </param>
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary) : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
- {
- }
-
- /// <summary>
- /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
- /// </summary>
- /// <param name="matchVersion">
- /// Lucene version to enable correct Unicode 4.0 behavior in the
- /// dictionaries if Version > 3.0. See <a
- /// href="CompoundWordTokenFilterBase.html#version"
- /// >CompoundWordTokenFilterBase</a> for details. </param>
- /// <param name="input">
- /// the <seealso cref="TokenStream"/> to process </param>
- /// <param name="hyphenator">
- /// the hyphenation pattern tree to use for hyphenation </param>
- /// <param name="dictionary">
- /// the word dictionary to match against. </param>
- /// <param name="minWordSize">
- /// only words longer than this get processed </param>
- /// <param name="minSubwordSize">
- /// only subwords longer than this get to the output stream </param>
- /// <param name="maxSubwordSize">
- /// only subwords shorter than this get to the output stream </param>
- /// <param name="onlyLongestMatch">
- /// Add only the longest matching subword to the stream </param>
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
- {
-
- this.hyphenator = hyphenator;
- }
-
- /// <summary>
- /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
- /// <para>
- /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
- /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
- /// null, minWordSize, minSubwordSize, maxSubwordSize }
- /// </para>
- /// </summary>
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize) : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
- {
- }
-
- /// <summary>
- /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
- /// <para>
- /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int)
- /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
- /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
- /// </para>
- /// </summary>
- public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator) : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
- {
- }
-
- /// <summary>
- /// Create a hyphenator tree
- /// </summary>
- /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
- /// <returns> An object representing the hyphenation patterns </returns>
- /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(String hyphenationFilename) throws java.io.IOException
- public static HyphenationTree getHyphenationTree(string hyphenationFilename)
- {
- return getHyphenationTree(new InputSource(hyphenationFilename));
- }
-
- /// <summary>
- /// Create a hyphenator tree
- /// </summary>
- /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
- /// <returns> An object representing the hyphenation patterns </returns>
- /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(java.io.File hyphenationFile) throws java.io.IOException
- public static HyphenationTree getHyphenationTree(File hyphenationFile)
- {
- return getHyphenationTree(new InputSource(hyphenationFile.toURI().toASCIIString()));
- }
-
- /// <summary>
- /// Create a hyphenator tree
- /// </summary>
- /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
- /// <returns> An object representing the hyphenation patterns </returns>
- /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public static org.apache.lucene.analysis.compound.hyphenation.HyphenationTree getHyphenationTree(org.xml.sax.InputSource hyphenationSource) throws java.io.IOException
- public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
- {
- HyphenationTree tree = new HyphenationTree();
- tree.loadPatterns(hyphenationSource);
- return tree;
- }
-
- protected internal override void decompose()
- {
- // get the hyphenation points
- Hyphenation hyphens = hyphenator.hyphenate(termAtt.buffer(), 0, termAtt.length(), 1, 1);
- // No hyphen points found -> exit
- if (hyphens == null)
- {
- return;
- }
-
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int[] hyp = hyphens.getHyphenationPoints();
- int[] hyp = hyphens.HyphenationPoints;
-
- for (int i = 0; i < hyp.Length; ++i)
- {
- int remaining = hyp.Length - i;
- int start = hyp[i];
- CompoundToken longestMatchToken = null;
- for (int j = 1; j < remaining; j++)
- {
- int partLength = hyp[i + j] - start;
-
- // if the part is longer than maxSubwordSize we
- // are done with this round
- if (partLength > this.maxSubwordSize)
- {
- break;
- }
-
- // we only put subwords to the token stream
- // that are longer than minPartSize
- if (partLength < this.minSubwordSize)
- {
- // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
- // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
- continue;
- }
-
- // check the dictionary
- if (dictionary == null || dictionary.contains(termAtt.buffer(), start, partLength))
- {
- if (this.onlyLongestMatch)
- {
- if (longestMatchToken != null)
- {
- if (longestMatchToken.txt.length() < partLength)
- {
- longestMatchToken = new CompoundToken(this, start, partLength);
- }
- }
- else
- {
- longestMatchToken = new CompoundToken(this, start, partLength);
- }
- }
- else
- {
- tokens.AddLast(new CompoundToken(this, start, partLength));
- }
- }
- else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
- {
- // check the dictionary again with a word that is one character
- // shorter
- // to avoid problems with genitive 's characters and other binding
- // characters
- if (this.onlyLongestMatch)
- {
- if (longestMatchToken != null)
- {
- if (longestMatchToken.txt.length() < partLength - 1)
- {
- longestMatchToken = new CompoundToken(this, start, partLength - 1);
- }
- }
- else
- {
- longestMatchToken = new CompoundToken(this, start, partLength - 1);
- }
- }
- else
- {
- tokens.AddLast(new CompoundToken(this, start, partLength - 1));
- }
- }
- }
- if (this.onlyLongestMatch && longestMatchToken != null)
- {
- tokens.AddLast(longestMatchToken);
- }
- }
- }
- }
+namespace Lucene.Net.Analysis.Compound
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// A <seealso cref="TokenFilter"/> that decomposes compound words found in many Germanic languages.
+ /// <para>
+ /// "Donaudampfschiff" becomes Donau, dampf, schiff so that you can find
+ /// "Donaudampfschiff" even when you only enter "schiff". It uses a hyphenation
+ /// grammar and a word dictionary to achieve this.
+ /// </para>
+ /// <para>
+ /// You must specify the required <seealso cref="Version"/> compatibility when creating
+ /// CompoundWordTokenFilterBase:
+ /// <ul>
+ /// <li>As of 3.1, CompoundWordTokenFilterBase correctly handles Unicode 4.0
+ /// supplementary characters in strings and char arrays provided as compound word
+ /// dictionaries.
+ /// </ul>
+ /// </para>
+ /// </summary>
+ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
+ {
+ private readonly HyphenationTree hyphenator;
+
+ /// <summary>
+ /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to enable correct Unicode 4.0 behavior in the
+ /// dictionaries if Version > 3.0. See <a
+ /// href="CompoundWordTokenFilterBase.html#version"
+ /// >CompoundWordTokenFilterBase</a> for details. </param>
+ /// <param name="input">
+ /// the <seealso cref="TokenStream"/> to process </param>
+ /// <param name="hyphenator">
+ /// the hyphenation pattern tree to use for hyphenation </param>
+ /// <param name="dictionary">
+ /// the word dictionary to match against. </param>
+ public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary)
+ : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new <seealso cref="HyphenationCompoundWordTokenFilter"/> instance.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to enable correct Unicode 4.0 behavior in the
+ /// dictionaries if Version > 3.0. See <a
+ /// href="CompoundWordTokenFilterBase.html#version"
+ /// >CompoundWordTokenFilterBase</a> for details. </param>
+ /// <param name="input">
+ /// the <seealso cref="TokenStream"/> to process </param>
+ /// <param name="hyphenator">
+ /// the hyphenation pattern tree to use for hyphenation </param>
+ /// <param name="dictionary">
+ /// the word dictionary to match against. </param>
+ /// <param name="minWordSize">
+ /// only words longer than this get processed </param>
+ /// <param name="minSubwordSize">
+ /// only subwords longer than this get to the output stream </param>
+ /// <param name="maxSubwordSize">
+ /// only subwords shorter than this get to the output stream </param>
+ /// <param name="onlyLongestMatch">
+ /// Add only the longest matching subword to the stream </param>
+ public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch)
+ : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch)
+ {
+
+ this.hyphenator = hyphenator;
+ }
+
+ /// <summary>
+ /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+ /// <para>
+ /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
+ /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ /// null, minWordSize, minSubwordSize, maxSubwordSize }
+ /// </para>
+ /// </summary>
+ public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize)
+ : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false)
+ {
+ }
+
+ /// <summary>
+ /// Create a HyphenationCompoundWordTokenFilter with no dictionary.
+ /// <para>
+ /// Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, int, int, int)
+ /// HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
+ /// DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE }
+ /// </para>
+ /// </summary>
+ public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input, HyphenationTree hyphenator)
+ : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE)
+ {
+ }
+
+ /// <summary>
+ /// Create a hyphenator tree
+ /// </summary>
+ /// <param name="hyphenationFilename"> the filename of the XML grammar to load </param>
+ /// <returns> An object representing the hyphenation patterns </returns>
+ /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+ public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
+ {
+ return getHyphenationTree(new InputSource(hyphenationFilename));
+ }
+
+ /// <summary>
+ /// Create a hyphenator tree
+ /// </summary>
+ /// <param name="hyphenationFile"> the file of the XML grammar to load </param>
+ /// <returns> An object representing the hyphenation patterns </returns>
+ /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+ public static HyphenationTree GetHyphenationTree(File hyphenationFile)
+ {
+ return getHyphenationTree(new InputSource(hyphenationFile.ToURI().toASCIIString()));
+ }
+
+ /// <summary>
+ /// Create a hyphenator tree
+ /// </summary>
+ /// <param name="hyphenationSource"> the InputSource pointing to the XML grammar </param>
+ /// <returns> An object representing the hyphenation patterns </returns>
+ /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
+ public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
+ {
+ var tree = new HyphenationTree();
+ tree.loadPatterns(hyphenationSource);
+ return tree;
+ }
+
+ protected internal override void decompose()
+ {
+ // get the hyphenation points
+ Hyphenation hyphens = hyphenator.hyphenate(termAtt.Buffer(), 0, termAtt.Length(), 1, 1);
+ // No hyphen points found -> exit
+ if (hyphens == null)
+ {
+ return;
+ }
+
+ int[] hyp = hyphens.HyphenationPoints;
+
+ for (int i = 0; i < hyp.Length; ++i)
+ {
+ int remaining = hyp.Length - i;
+ int start = hyp[i];
+ CompoundToken longestMatchToken = null;
+ for (int j = 1; j < remaining; j++)
+ {
+ int partLength = hyp[i + j] - start;
+
+ // if the part is longer than maxSubwordSize we
+ // are done with this round
+ if (partLength > this.maxSubwordSize)
+ {
+ break;
+ }
+
+ // we only put subwords to the token stream
+ // that are longer than minPartSize
+ if (partLength < this.minSubwordSize)
+ {
+ // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
+ // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
+ continue;
+ }
+
+ // check the dictionary
+ if (dictionary == null || dictionary.Contains(termAtt.Buffer(), start, partLength))
+ {
+ if (this.onlyLongestMatch)
+ {
+ if (longestMatchToken != null)
+ {
+ if (longestMatchToken.txt.Length() < partLength)
+ {
+ longestMatchToken = new CompoundToken(this, start, partLength);
+ }
+ }
+ else
+ {
+ longestMatchToken = new CompoundToken(this, start, partLength);
+ }
+ }
+ else
+ {
+ tokens.AddLast(new CompoundToken(this, start, partLength));
+ }
+ }
+ else if (dictionary.contains(termAtt.buffer(), start, partLength - 1))
+ {
+ // check the dictionary again with a word that is one character
+ // shorter
+ // to avoid problems with genitive 's characters and other binding
+ // characters
+ if (this.onlyLongestMatch)
+ {
+ if (longestMatchToken != null)
+ {
+ if (longestMatchToken.txt.Length() < partLength - 1)
+ {
+ longestMatchToken = new CompoundToken(this, start, partLength - 1);
+ }
+ }
+ else
+ {
+ longestMatchToken = new CompoundToken(this, start, partLength - 1);
+ }
+ }
+ else
+ {
+ tokens.AddLast(new CompoundToken(this, start, partLength - 1));
+ }
+ }
+ }
+ if (this.onlyLongestMatch && longestMatchToken != null)
+ {
+ tokens.AddLast(longestMatchToken);
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
index 4a51f7b..ba6e2f7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilterFactory.cs
@@ -1,7 +1,10 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.compound;
+using org.apache.lucene.analysis.compound.hyphenation;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.compound
+namespace Lucene.Net.Analysis.Compound
{
/*
@@ -20,17 +23,7 @@ namespace org.apache.lucene.analysis.compound
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using HyphenationTree = org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
- using IOUtils = org.apache.lucene.util.IOUtils;
-
- using InputSource = org.xml.sax.InputSource;
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="HyphenationCompoundWordTokenFilter"/>.
/// <para>
/// This factory accepts the following parameters:
@@ -89,9 +82,7 @@ namespace org.apache.lucene.analysis.compound
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
+ public virtual void Inform(ResourceLoader loader)
{
InputStream stream = null;
try
@@ -103,8 +94,6 @@ namespace org.apache.lucene.analysis.compound
// TODO: Broken, because we cannot resolve real system id
// ResourceLoader should also supply method like ClassLoader to get resource URL
stream = loader.openResource(hypFile);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final org.xml.sax.InputSource is = new org.xml.sax.InputSource(stream);
InputSource @is = new InputSource(stream);
@is.Encoding = encoding; // if it's null let xml parser decide
@is.SystemId = hypFile;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
index 097bc4b..b3c0c58 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/LowerCaseFilter.cs
@@ -1,4 +1,5 @@
using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using org.apache.lucene.analysis.util;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
index cc5a39e..2857938 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
@@ -1,5 +1,6 @@
using System.Collections.Generic;
using System.IO;
+using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
using org.apache.lucene.analysis.util;
@@ -90,12 +91,12 @@ namespace Lucene.Net.Analysis.Core
/// <summary>
/// Creates
/// <seealso cref="Analyzer.TokenStreamComponents"/>
- /// used to tokenize all the text in the provided <seealso cref="Reader"/>.
+ /// used to tokenize all the text in the provided <seealso cref="TextReader"/>.
/// </summary>
/// <returns> <seealso cref="Analyzer.TokenStreamComponents"/>
/// built from a <seealso cref="LowerCaseTokenizer"/> filtered with
/// <seealso cref="StopFilter"/> </returns>
- protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ public override Analyzer.TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
{
Tokenizer source = new LowerCaseTokenizer(matchVersion, reader);
return new Analyzer.TokenStreamComponents(source, new StopFilter(matchVersion, source, stopwords));
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
index c74874d..4f89bae 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopFilterFactory.cs
@@ -1,162 +1,154 @@
using System;
using System.Collections.Generic;
-using Lucene.Net.Analysis.Core;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ // jdocs
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
- using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader; // jdocs
+ /// <summary>
+ /// Factory for <seealso cref="StopFilter"/>.
+ ///
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.StopFilterFactory" ignoreCase="true"
+ /// words="stopwords.txt" format="wordset" />
+ /// </analyzer>
+ /// </fieldType></pre>
+ ///
+ /// <para>
+ /// All attributes are optional:
+ /// </para>
+ /// <ul>
+ /// <li><code>ignoreCase</code> defaults to <code>false</code></li>
+ /// <li><code>words</code> should be the name of a stopwords file to parse, if not
+ /// specified the factory will use <seealso cref="StopAnalyzer#ENGLISH_STOP_WORDS_SET"/>
+ /// </li>
+ /// <li><code>format</code> defines how the <code>words</code> file will be parsed,
+ /// and defaults to <code>wordset</code>. If <code>words</code> is not specified,
+ /// then <code>format</code> must not be specified.
+ /// </li>
+ /// </ul>
+ /// <para>
+ /// The valid values for the <code>format</code> option are:
+ /// </para>
+ /// <ul>
+ /// <li><code>wordset</code> - This is the default format, which supports one word per
+ /// line (including any intra-word whitespace) and allows whole line comments
+ /// begining with the "#" character. Blank lines are ignored. See
+ /// <seealso cref="WordlistLoader#getLines WordlistLoader.getLines"/> for details.
+ /// </li>
+ /// <li><code>snowball</code> - This format allows for multiple words specified on each
+ /// line, and trailing comments may be specified using the vertical line ("|").
+ /// Blank lines are ignored. See
+ /// <seealso cref="WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet"/>
+ /// for details.
+ /// </li>
+ /// </ul>
+ /// </summary>
+ public class StopFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ public const string FORMAT_WORDSET = "wordset";
+ public const string FORMAT_SNOWBALL = "snowball";
- /// <summary>
- /// Factory for <seealso cref="StopFilter"/>.
- ///
- /// <pre class="prettyprint">
- /// <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.StopFilterFactory" ignoreCase="true"
- /// words="stopwords.txt" format="wordset" />
- /// </analyzer>
- /// </fieldType></pre>
- ///
- /// <para>
- /// All attributes are optional:
- /// </para>
- /// <ul>
- /// <li><code>ignoreCase</code> defaults to <code>false</code></li>
- /// <li><code>words</code> should be the name of a stopwords file to parse, if not
- /// specified the factory will use <seealso cref="StopAnalyzer#ENGLISH_STOP_WORDS_SET"/>
- /// </li>
- /// <li><code>format</code> defines how the <code>words</code> file will be parsed,
- /// and defaults to <code>wordset</code>. If <code>words</code> is not specified,
- /// then <code>format</code> must not be specified.
- /// </li>
- /// </ul>
- /// <para>
- /// The valid values for the <code>format</code> option are:
- /// </para>
- /// <ul>
- /// <li><code>wordset</code> - This is the default format, which supports one word per
- /// line (including any intra-word whitespace) and allows whole line comments
- /// begining with the "#" character. Blank lines are ignored. See
- /// <seealso cref="WordlistLoader#getLines WordlistLoader.getLines"/> for details.
- /// </li>
- /// <li><code>snowball</code> - This format allows for multiple words specified on each
- /// line, and trailing comments may be specified using the vertical line ("|").
- /// Blank lines are ignored. See
- /// <seealso cref="WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet"/>
- /// for details.
- /// </li>
- /// </ul>
- /// </summary>
- public class StopFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- public const string FORMAT_WORDSET = "wordset";
- public const string FORMAT_SNOWBALL = "snowball";
+ private CharArraySet stopWords;
+ private readonly string stopWordFiles;
+ private readonly string format;
+ private readonly bool ignoreCase;
+ private readonly bool enablePositionIncrements;
- private CharArraySet stopWords;
- private readonly string stopWordFiles;
- private readonly string format;
- private readonly bool ignoreCase;
- private readonly bool enablePositionIncrements;
+ /// <summary>
+ /// Creates a new StopFilterFactory </summary>
+ public StopFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ assureMatchVersion();
+ stopWordFiles = get(args, "words");
+ format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- /// <summary>
- /// Creates a new StopFilterFactory </summary>
- public StopFilterFactory(IDictionary<string, string> args) : base(args)
- {
- assureMatchVersion();
- stopWordFiles = get(args, "words");
- format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
- ignoreCase = getBoolean(args, "ignoreCase", false);
- enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
+ public virtual void Inform(ResourceLoader loader)
+ {
+ if (stopWordFiles != null)
+ {
+ if (FORMAT_WORDSET.Equals(format, StringComparison.CurrentCultureIgnoreCase))
+ {
+ stopWords = GetWordSet(loader, stopWordFiles, ignoreCase);
+ }
+ else if (FORMAT_SNOWBALL.Equals(format, StringComparison.CurrentCultureIgnoreCase))
+ {
+ stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
+ }
+ else
+ {
+ throw new System.ArgumentException("Unknown 'format' specified for 'words' file: " + format);
+ }
+ }
+ else
+ {
+ if (null != format)
+ {
+ throw new System.ArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
+ }
+ stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
+ }
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- if (stopWordFiles != null)
- {
- if (FORMAT_WORDSET.Equals(format, StringComparison.CurrentCultureIgnoreCase))
- {
- stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
- }
- else if (FORMAT_SNOWBALL.Equals(format, StringComparison.CurrentCultureIgnoreCase))
- {
- stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
- }
- else
- {
- throw new System.ArgumentException("Unknown 'format' specified for 'words' file: " + format);
- }
- }
- else
- {
- if (null != format)
- {
- throw new System.ArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
- }
- stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
- }
- }
+ public virtual bool EnablePositionIncrements
+ {
+ get
+ {
+ return enablePositionIncrements;
+ }
+ }
- public virtual bool EnablePositionIncrements
- {
- get
- {
- return enablePositionIncrements;
- }
- }
+ public virtual bool IgnoreCase
+ {
+ get
+ {
+ return ignoreCase;
+ }
+ }
- public virtual bool IgnoreCase
- {
- get
- {
- return ignoreCase;
- }
- }
-
- public virtual CharArraySet StopWords
- {
- get
- {
- return stopWords;
- }
- }
-
- public override TokenStream create(TokenStream input)
- {
- StopFilter stopFilter = new StopFilter(luceneMatchVersion,input,stopWords);
- stopFilter.EnablePositionIncrements = enablePositionIncrements;
- return stopFilter;
- }
- }
+ public virtual CharArraySet StopWords
+ {
+ get
+ {
+ return stopWords;
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ StopFilter stopFilter = new StopFilter(luceneMatchVersion, input, stopWords);
+ stopFilter.EnablePositionIncrements = enablePositionIncrements;
+ return stopFilter;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
index d5b7f10..286da3a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilter.cs
@@ -1,4 +1,5 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.core
{
@@ -21,7 +22,7 @@ namespace org.apache.lucene.analysis.core
*/
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+ using CharacterUtils = CharacterUtils;
using Version = org.apache.lucene.util.Version;
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
index df3580f..e6b78b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/UpperCaseFilterFactory.cs
@@ -1,74 +1,68 @@
using System.Collections.Generic;
-using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using org.apache.lucene.analysis.core;
-namespace org.apache.lucene.analysis.core
+namespace Lucene.Net.Analysis.Core
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="UpperCaseFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_uppercase" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.UpperCaseFilterFactory"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ ///
+ /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
+ /// upper case character represents more than one lower case character. Use this filter
+ /// when you require uppercase tokens. Use the <seealso cref="LowerCaseFilterFactory"/> for
+ /// general search matching
+ /// </para>
+ /// </summary>
+ public class UpperCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+ {
- using AbstractAnalysisFactory = AbstractAnalysisFactory;
- using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
- using TokenFilterFactory = TokenFilterFactory;
+ /// <summary>
+ /// Creates a new UpperCaseFilterFactory </summary>
+ public UpperCaseFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ assureMatchVersion();
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- /// <summary>
- /// Factory for <seealso cref="UpperCaseFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_uppercase" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.UpperCaseFilterFactory"/>
- /// </analyzer>
- /// </fieldType></pre>
- ///
- /// <para><b>NOTE:</b> In Unicode, this transformation may lose information when the
- /// upper case character represents more than one lower case character. Use this filter
- /// when you require uppercase tokens. Use the <seealso cref="LowerCaseFilterFactory"/> for
- /// general search matching
- /// </para>
- /// </summary>
- public class UpperCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
- {
-
- /// <summary>
- /// Creates a new UpperCaseFilterFactory </summary>
- public UpperCaseFilterFactory(IDictionary<string, string> args) : base(args)
- {
- assureMatchVersion();
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override UpperCaseFilter create(TokenStream input)
- {
- return new UpperCaseFilter(luceneMatchVersion,input);
- }
-
- public virtual AbstractAnalysisFactory MultiTermComponent
- {
- get
- {
- return this;
- }
- }
- }
+ public override TokenStream Create(TokenStream input)
+ {
+ return new UpperCaseFilter(luceneMatchVersion, input);
+ }
+ public virtual AbstractAnalysisFactory MultiTermComponent
+ {
+ get
+ {
+ return this;
+ }
+ }
+ }
}
\ No newline at end of file
[2/4] lucenenet git commit: More Analysis porting fixes
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
index 0b6dc5a..5c1d0bb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenFilter.cs
@@ -1,6 +1,12 @@
using System;
-
-namespace org.apache.lucene.analysis.ngram
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.reverse;
+using org.apache.lucene.analysis.util;
+using Version = Lucene.Net.Util.Version;
+
+namespace Lucene.Net.Analysis.Ngram
{
/*
@@ -19,16 +25,7 @@ namespace org.apache.lucene.analysis.ngram
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using ReverseStringFilter = org.apache.lucene.analysis.reverse.ReverseStringFilter;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
- using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Tokenizes the given token into n-grams of given size(s).
/// <para>
/// This <seealso cref="TokenFilter"/> create n-grams from the beginning edge or ending edge of a input token.
@@ -45,35 +42,21 @@ namespace org.apache.lucene.analysis.ngram
public const int DEFAULT_MAX_GRAM_SIZE = 1;
public const int DEFAULT_MIN_GRAM_SIZE = 1;
- /// <summary>
- /// Specifies which side of the input the n-gram should be generated from </summary>
- public enum Side
- {
+ /// <summary>
+ /// Specifies which side of the input the n-gram should be generated from </summary>
+ public enum Side
+ {
- /// <summary>
- /// Get the n-gram from the front of the input </summary>
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
- FRONT
- {
- public String getLabel() { return "front"
- }
- },
+ /// <summary>
+ /// Get the n-gram from the front of the input </summary>
+ FRONT,
- /// <summary>
- /// Get the n-gram from the end of the input </summary>
- [System.Obsolete]
-//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
- @Deprecated BACK
- {
- public String getLabel()
- {
- return "back";
- }
- }
-
- public =
+ /// <summary>
+ /// Get the n-gram from the end of the input </summary>
+ [System.Obsolete] BACK,
+ }
- // Get the appropriate Side from a string
+ // Get the appropriate Side from a string
public static Side getSide(String sideName)
{
//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
@@ -88,7 +71,6 @@ namespace org.apache.lucene.analysis.ngram
}
return null;
}
- }
private readonly Version version;
private readonly CharacterUtils charUtils;
@@ -127,7 +109,7 @@ namespace org.apache.lucene.analysis.ngram
throw new System.ArgumentException("version must not be null");
}
- if (version.onOrAfter(Version.LUCENE_44) && side == Side.BACK)
+ if (version.OnOrAfter(Version.LUCENE_44) && side == Side.BACK)
{
throw new System.ArgumentException("Side.BACK is not supported anymore as of Lucene 4.4, use ReverseStringFilter up-front and afterward");
}
@@ -178,27 +160,25 @@ namespace org.apache.lucene.analysis.ngram
{
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
while (true)
{
if (curTermBuffer == null)
{
- if (!input.incrementToken())
+ if (!input.IncrementToken())
{
return false;
}
else
{
- curTermBuffer = termAtt.buffer().clone();
- curTermLength = termAtt.length();
+ curTermBuffer = termAtt.Buffer().Clone();
+ curTermLength = termAtt.Length();
curCodePointCount = charUtils.codePointCount(termAtt);
curGramSize = minGram;
- tokStart = offsetAtt.startOffset();
- tokEnd = offsetAtt.endOffset();
- if (version.onOrAfter(Version.LUCENE_44))
+ tokStart = offsetAtt.StartOffset();
+ tokEnd = offsetAtt.EndOffset();
+ if (version.OnOrAfter(Version.LUCENE_44))
{
// Never update offsets
updateOffsets = false;
@@ -218,20 +198,16 @@ namespace org.apache.lucene.analysis.ngram
if (curGramSize <= curCodePointCount) // if the remaining input is too short, we can't generate any n-grams
{
// grab gramSize chars from front or back
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
int start = side == Side.FRONT ? 0 : charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, curTermLength, -curGramSize);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
- clearAttributes();
+ ClearAttributes();
if (updateOffsets)
{
- offsetAtt.setOffset(tokStart + start, tokStart + end);
+ offsetAtt.SetOffset(tokStart + start, tokStart + end);
}
else
{
- offsetAtt.setOffset(tokStart, tokEnd);
+ offsetAtt.SetOffset(tokStart, tokEnd);
}
// first ngram gets increment, others don't
if (curGramSize == minGram)
@@ -244,7 +220,7 @@ namespace org.apache.lucene.analysis.ngram
posIncrAtt.PositionIncrement = 0;
}
posLenAtt.PositionLength = savePosLen;
- termAtt.copyBuffer(curTermBuffer, start, end - start);
+ termAtt.CopyBuffer(curTermBuffer, start, end - start);
curGramSize++;
return true;
}
@@ -253,14 +229,11 @@ namespace org.apache.lucene.analysis.ngram
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- base.reset();
+ base.Reset();
curTermBuffer = null;
savePosIncr = 0;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
index 195a6e1..23bf8c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramTokenizerFactory.cs
@@ -1,7 +1,10 @@
using System.Collections.Generic;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.ngram;
-namespace org.apache.lucene.analysis.ngram
+namespace Lucene.Net.Analysis.Ngram
{
/*
@@ -20,13 +23,7 @@ namespace org.apache.lucene.analysis.ngram
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using TokenizerFactory = TokenizerFactory;
- using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Creates new instances of <seealso cref="EdgeNGramTokenizer"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
@@ -54,9 +51,9 @@ namespace org.apache.lucene.analysis.ngram
}
}
- public override Tokenizer create(AttributeFactory factory, Reader input)
+ public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
{
- if (luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+ if (luceneMatchVersion.OnOrAfter(Version.LUCENE_44))
{
if (!EdgeNGramTokenFilter.Side.FRONT.Label.Equals(side))
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
index 3e7012c..59b8dcb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.ngram
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.ngram
{
/*
@@ -23,7 +25,7 @@
using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
- using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+ using CharacterUtils = CharacterUtils;
using Version = org.apache.lucene.util.Version;
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
index b782e94..37d6102 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/NGramTokenizer.cs
@@ -1,5 +1,6 @@
using System;
using System.Diagnostics;
+using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.ngram
{
@@ -26,7 +27,7 @@ namespace org.apache.lucene.analysis.ngram
using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
using PositionLengthAttribute = org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
- using CharacterUtils = org.apache.lucene.analysis.util.CharacterUtils;
+ using CharacterUtils = CharacterUtils;
using Version = org.apache.lucene.util.Version;
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
index 4d5dd75..ec0e412 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Payloads/DelimitedPayloadTokenFilterFactory.cs
@@ -1,85 +1,82 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.payloads;
-namespace org.apache.lucene.analysis.payloads
+namespace Lucene.Net.Analysis.Payloads
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="DelimitedPayloadTokenFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class DelimitedPayloadTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ public const string ENCODER_ATTR = "encoder";
+ public const string DELIMITER_ATTR = "delimiter";
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
+ private readonly string encoderClass;
+ private readonly char delimiter;
- /// <summary>
- /// Factory for <seealso cref="DelimitedPayloadTokenFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_dlmtd" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float" delimiter="|"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class DelimitedPayloadTokenFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- public const string ENCODER_ATTR = "encoder";
- public const string DELIMITER_ATTR = "delimiter";
+ private PayloadEncoder encoder;
- private readonly string encoderClass;
- private readonly char delimiter;
+ /// <summary>
+ /// Creates a new DelimitedPayloadTokenFilterFactory </summary>
+ public DelimitedPayloadTokenFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ encoderClass = require(args, ENCODER_ATTR);
+ delimiter = getChar(args, DELIMITER_ATTR, '|');
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- private PayloadEncoder encoder;
+ public override TokenStream Create(TokenStream input)
+ {
+ return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
+ }
- /// <summary>
- /// Creates a new DelimitedPayloadTokenFilterFactory </summary>
- public DelimitedPayloadTokenFilterFactory(IDictionary<string, string> args) : base(args)
- {
- encoderClass = require(args, ENCODER_ATTR);
- delimiter = getChar(args, DELIMITER_ATTR, '|');
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override DelimitedPayloadTokenFilter create(TokenStream input)
- {
- return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
- }
-
- public virtual void inform(ResourceLoader loader)
- {
- if (encoderClass.Equals("float"))
- {
- encoder = new FloatEncoder();
- }
- else if (encoderClass.Equals("integer"))
- {
- encoder = new IntegerEncoder();
- }
- else if (encoderClass.Equals("identity"))
- {
- encoder = new IdentityEncoder();
- }
- else
- {
- encoder = loader.newInstance(encoderClass, typeof(PayloadEncoder));
- }
- }
- }
+ public virtual void Inform(ResourceLoader loader)
+ {
+ if (encoderClass.Equals("float"))
+ {
+ encoder = new FloatEncoder();
+ }
+ else if (encoderClass.Equals("integer"))
+ {
+ encoder = new IntegerEncoder();
+ }
+ else if (encoderClass.Equals("identity"))
+ {
+ encoder = new IdentityEncoder();
+ }
+ else
+ {
+ encoder = loader.NewInstance(encoderClass, typeof(PayloadEncoder));
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
index 92f73bc..104422b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
@@ -1,6 +1,7 @@
using System;
+using Lucene.Net.Analysis.Tokenattributes;
-namespace org.apache.lucene.analysis.position
+namespace Lucene.Net.Analysis.Position
{
/*
@@ -19,10 +20,7 @@ namespace org.apache.lucene.analysis.position
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
- /// <summary>
+ /// <summary>
/// Set the positionIncrement of all tokens to the "positionIncrement",
/// except the first return token which retains its original positionIncrement value.
/// The default positionIncrement value is zero. </summary>
@@ -51,8 +49,6 @@ namespace org.apache.lucene.analysis.position
/// all but the first token from the given input stream.
/// </summary>
/// <param name="input"> the input stream </param>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public PositionFilter(final org.apache.lucene.analysis.TokenStream input)
public PositionFilter(TokenStream input) : this(input, 0)
{
}
@@ -64,8 +60,6 @@ namespace org.apache.lucene.analysis.position
/// <param name="input"> the input stream </param>
/// <param name="positionIncrement"> position increment to assign to all but the first
/// token from the input stream </param>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public PositionFilter(final org.apache.lucene.analysis.TokenStream input, final int positionIncrement)
public PositionFilter(TokenStream input, int positionIncrement) : base(input)
{
if (positionIncrement < 0)
@@ -75,11 +69,9 @@ namespace org.apache.lucene.analysis.position
this.positionIncrement = positionIncrement;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
- if (input.incrementToken())
+ if (input.IncrementToken())
{
if (firstTokenPositioned)
{
@@ -97,13 +89,10 @@ namespace org.apache.lucene.analysis.position
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- base.reset();
+ base.Reset();
firstTokenPositioned = false;
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
index 74bf1e4..cc65164 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
@@ -1,70 +1,65 @@
using System;
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.position
+namespace Lucene.Net.Analysis.Position
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using TokenFilterFactory = TokenFilterFactory;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
- /// Factory for <seealso cref="PositionFilter"/>.
- /// Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
- /// original positionIncrement value. The default positionIncrement value is zero.
- /// <pre class="prettyprint">
- /// <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.PositionFilterFactory" positionIncrement="0"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- /// <seealso cref= org.apache.lucene.analysis.position.PositionFilter
- /// @since solr 1.4 </seealso>
- /// @deprecated (4.4)
- [Obsolete("(4.4)")]
- public class PositionFilterFactory : TokenFilterFactory
- {
- private readonly int positionIncrement;
-
- /// <summary>
- /// Creates a new PositionFilterFactory </summary>
- public PositionFilterFactory(IDictionary<string, string> args) : base(args)
- {
- positionIncrement = getInt(args, "positionIncrement", 0);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44))
- {
- throw new System.ArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility");
- }
- }
-
- public override PositionFilter create(TokenStream input)
- {
- return new PositionFilter(input, positionIncrement);
- }
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="PositionFilter"/>.
+ /// Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
+ /// original positionIncrement value. The default positionIncrement value is zero.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.PositionFilterFactory" positionIncrement="0"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ /// <seealso cref= org.apache.lucene.analysis.position.PositionFilter
+ /// @since solr 1.4 </seealso>
+ /// @deprecated (4.4)
+ [Obsolete("(4.4)")]
+ public class PositionFilterFactory : TokenFilterFactory
+ {
+ private readonly int positionIncrement;
+ /// <summary>
+ /// Creates a new PositionFilterFactory </summary>
+ public PositionFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ positionIncrement = getInt(args, "positionIncrement", 0);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+ {
+ throw new System.ArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility");
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ return new PositionFilter(input, positionIncrement);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
index 2daf790..be73228 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
@@ -1,213 +1,198 @@
using System.Collections.Generic;
+using System.IO;
+using System.Linq;
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Index;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.query
+namespace Lucene.Net.Analysis.Query
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using StopFilter = StopFilter;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using IndexReader = org.apache.lucene.index.IndexReader;
- using MultiFields = org.apache.lucene.index.MultiFields;
- using Term = org.apache.lucene.index.Term;
- using Terms = org.apache.lucene.index.Terms;
- using TermsEnum = org.apache.lucene.index.TermsEnum;
- using BytesRef = org.apache.lucene.util.BytesRef;
- using CharsRef = org.apache.lucene.util.CharsRef;
- using UnicodeUtil = org.apache.lucene.util.UnicodeUtil;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
- /// An <seealso cref="Analyzer"/> used primarily at query time to wrap another analyzer and provide a layer of protection
- /// which prevents very common words from being passed into queries.
- /// <para>
- /// For very large indexes the cost
- /// of reading TermDocs for a very common word can be high. This analyzer was created after experience with
- /// a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for
- /// this term to take 2 seconds.
- /// </para>
- /// </summary>
- public sealed class QueryAutoStopWordAnalyzer : AnalyzerWrapper
- {
-
- private readonly Analyzer @delegate;
- private readonly IDictionary<string, HashSet<string>> stopWordsPerField = new Dictionary<string, HashSet<string>>();
- //The default maximum percentage (40%) of index documents which
- //can contain a term, after which the term is considered to be a stop word.
- public const float defaultMaxDocFreqPercent = 0.4f;
- private readonly Version matchVersion;
-
- /// <summary>
- /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
- /// indexed fields from terms with a document frequency percentage greater than
- /// <seealso cref="#defaultMaxDocFreqPercent"/>
- /// </summary>
- /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
- /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
- /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
- /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader) throws java.io.IOException
- public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader) : this(matchVersion, @delegate, indexReader, defaultMaxDocFreqPercent)
- {
- }
-
- /// <summary>
- /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
- /// indexed fields from terms with a document frequency greater than the given
- /// maxDocFreq
- /// </summary>
- /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
- /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
- /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
- /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
- /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, int maxDocFreq) throws java.io.IOException
- public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, int maxDocFreq) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxDocFreq)
- {
- }
-
- /// <summary>
- /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
- /// indexed fields from terms with a document frequency percentage greater than
- /// the given maxPercentDocs
- /// </summary>
- /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
- /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
- /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
- /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
- /// contain a term, after which the word is considered to be a stop word </param>
- /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, float maxPercentDocs) throws java.io.IOException
- public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, MultiFields.getIndexedFields(indexReader), maxPercentDocs)
- {
- }
-
- /// <summary>
- /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
- /// given selection of fields from terms with a document frequency percentage
- /// greater than the given maxPercentDocs
- /// </summary>
- /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
- /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
- /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
- /// <param name="fields"> Selection of fields to calculate stopwords for </param>
- /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
- /// contain a term, after which the word is considered to be a stop word </param>
- /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, float maxPercentDocs) throws java.io.IOException
- public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, float maxPercentDocs) : this(matchVersion, @delegate, indexReader, fields, (int)(indexReader.numDocs() * maxPercentDocs))
- {
- }
-
- /// <summary>
- /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
- /// given selection of fields from terms with a document frequency greater than
- /// the given maxDocFreq
- /// </summary>
- /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
- /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
- /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
- /// <param name="fields"> Selection of fields to calculate stopwords for </param>
- /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
- /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public QueryAutoStopWordAnalyzer(org.apache.lucene.util.Version matchVersion, org.apache.lucene.analysis.Analyzer delegate, org.apache.lucene.index.IndexReader indexReader, Collection<String> fields, int maxDocFreq) throws java.io.IOException
- public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, int maxDocFreq) : base(@delegate.ReuseStrategy)
- {
- this.matchVersion = matchVersion;
- this.@delegate = @delegate;
-
- foreach (string field in fields)
- {
- HashSet<string> stopWords = new HashSet<string>();
- Terms terms = MultiFields.getTerms(indexReader, field);
- CharsRef spare = new CharsRef();
- if (terms != null)
- {
- TermsEnum te = terms.iterator(null);
- BytesRef text;
- while ((text = te.next()) != null)
- {
- if (te.docFreq() > maxDocFreq)
- {
- UnicodeUtil.UTF8toUTF16(text, spare);
- stopWords.Add(spare.ToString());
- }
- }
- }
- stopWordsPerField[field] = stopWords;
- }
- }
-
- protected internal override Analyzer getWrappedAnalyzer(string fieldName)
- {
- return @delegate;
- }
-
- protected internal override TokenStreamComponents wrapComponents(string fieldName, TokenStreamComponents components)
- {
- HashSet<string> stopWords = stopWordsPerField[fieldName];
- if (stopWords == null)
- {
- return components;
- }
- StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
- return new TokenStreamComponents(components.Tokenizer, stopFilter);
- }
-
- /// <summary>
- /// Provides information on which stop words have been identified for a field
- /// </summary>
- /// <param name="fieldName"> The field for which stop words identified in "addStopWords"
- /// method calls will be returned </param>
- /// <returns> the stop words identified for a field </returns>
- public string[] getStopWords(string fieldName)
- {
- HashSet<string> stopWords = stopWordsPerField[fieldName];
- return stopWords != null ? stopWords.toArray(new string[stopWords.Count]) : new string[0];
- }
-
- /// <summary>
- /// Provides information on which stop words have been identified for all fields
- /// </summary>
- /// <returns> the stop words (as terms) </returns>
- public Term[] StopWords
- {
- get
- {
- IList<Term> allStopWords = new List<Term>();
- foreach (string fieldName in stopWordsPerField.Keys)
- {
- HashSet<string> stopWords = stopWordsPerField[fieldName];
- foreach (string text in stopWords)
- {
- allStopWords.Add(new Term(fieldName, text));
- }
- }
- return allStopWords.ToArray();
- }
- }
-
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// An <seealso cref="Analyzer"/> used primarily at query time to wrap another analyzer and provide a layer of protection
+ /// which prevents very common words from being passed into queries.
+ /// <para>
+ /// For very large indexes the cost
+ /// of reading TermDocs for a very common word can be high. This analyzer was created after experience with
+ /// a 38 million doc index which had a term in around 50% of docs and was causing TermQueries for
+ /// this term to take 2 seconds.
+ /// </para>
+ /// </summary>
+ public sealed class QueryAutoStopWordAnalyzer : AnalyzerWrapper
+ {
+
+ private readonly Analyzer @delegate;
+ private readonly IDictionary<string, HashSet<string>> stopWordsPerField = new Dictionary<string, HashSet<string>>();
+ //The default maximum percentage (40%) of index documents which
+ //can contain a term, after which the term is considered to be a stop word.
+ public const float defaultMaxDocFreqPercent = 0.4f;
+ private readonly Version matchVersion;
+
+ /// <summary>
+ /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+ /// indexed fields from terms with a document frequency percentage greater than
+ /// <seealso cref="#defaultMaxDocFreqPercent"/>
+ /// </summary>
+ /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+ /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+ /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+ /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+ public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader)
+ : this(matchVersion, @delegate, indexReader, defaultMaxDocFreqPercent)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+ /// indexed fields from terms with a document frequency greater than the given
+ /// maxDocFreq
+ /// </summary>
+ /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+ /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+ /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+ /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+ /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+ public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, int maxDocFreq)
+ : this(matchVersion, @delegate, indexReader, MultiFields.GetIndexedFields(indexReader), maxDocFreq)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for all
+ /// indexed fields from terms with a document frequency percentage greater than
+ /// the given maxPercentDocs
+ /// </summary>
+ /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+ /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+ /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+ /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+ /// contain a term, after which the word is considered to be a stop word </param>
+ /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+ public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, float maxPercentDocs)
+ : this(matchVersion, @delegate, indexReader, MultiFields.GetIndexedFields(indexReader), maxPercentDocs)
+ {
+ }
+
+ /// <summary>
+ /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+ /// given selection of fields from terms with a document frequency percentage
+ /// greater than the given maxPercentDocs
+ /// </summary>
+ /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+ /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+ /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+ /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+ /// <param name="maxPercentDocs"> The maximum percentage (between 0.0 and 1.0) of index documents which
+ /// contain a term, after which the word is considered to be a stop word </param>
+ /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+ public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, float maxPercentDocs)
+ : this(matchVersion, @delegate, indexReader, fields, (int)(indexReader.NumDocs * maxPercentDocs))
+ {
+ }
+
+ /// <summary>
+ /// Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
+ /// given selection of fields from terms with a document frequency greater than
+ /// the given maxDocFreq
+ /// </summary>
+ /// <param name="matchVersion"> Version to be used in <seealso cref="StopFilter"/> </param>
+ /// <param name="delegate"> Analyzer whose TokenStream will be filtered </param>
+ /// <param name="indexReader"> IndexReader to identify the stopwords from </param>
+ /// <param name="fields"> Selection of fields to calculate stopwords for </param>
+ /// <param name="maxDocFreq"> Document frequency terms should be above in order to be stopwords </param>
+ /// <exception cref="IOException"> Can be thrown while reading from the IndexReader </exception>
+ public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer @delegate, IndexReader indexReader, ICollection<string> fields, int maxDocFreq)
+ : base(@delegate.Strategy)
+ {
+ this.matchVersion = matchVersion;
+ this.@delegate = @delegate;
+
+ foreach (string field in fields)
+ {
+ var stopWords = new HashSet<string>();
+ Terms terms = MultiFields.GetTerms(indexReader, field);
+ CharsRef spare = new CharsRef();
+ if (terms != null)
+ {
+ TermsEnum te = terms.Iterator(null);
+ BytesRef text;
+ while ((text = te.Next()) != null)
+ {
+ if (te.DocFreq() > maxDocFreq)
+ {
+ UnicodeUtil.UTF8toUTF16(text, spare);
+ stopWords.Add(spare.ToString());
+ }
+ }
+ }
+ stopWordsPerField[field] = stopWords;
+ }
+ }
+
+ protected override Analyzer GetWrappedAnalyzer(string fieldName)
+ {
+ return @delegate;
+ }
+
+ protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
+ {
+ HashSet<string> stopWords = stopWordsPerField[fieldName];
+ if (stopWords == null)
+ {
+ return components;
+ }
+ StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
+ return new TokenStreamComponents(components.Tokenizer, stopFilter);
+ }
+
+ /// <summary>
+ /// Provides information on which stop words have been identified for a field
+ /// </summary>
+ /// <param name="fieldName"> The field for which stop words identified in "addStopWords"
+ /// method calls will be returned </param>
+ /// <returns> the stop words identified for a field </returns>
+ public string[] GetStopWords(string fieldName)
+ {
+ HashSet<string> stopWords = stopWordsPerField[fieldName];
+ return stopWords != null ? stopWords.ToArray(new string[stopWords.Count]) : new string[0];
+ }
+
+ /// <summary>
+ /// Provides information on which stop words have been identified for all fields
+ /// </summary>
+ /// <returns> the stop words (as terms) </returns>
+ public Term[] StopWords
+ {
+ get
+ {
+ IList<Term> allStopWords = new List<Term>();
+ foreach (string fieldName in stopWordsPerField.Keys)
+ {
+ HashSet<string> stopWords = stopWordsPerField[fieldName];
+ foreach (string text in stopWords)
+ {
+ allStopWords.Add(new Term(fieldName, text));
+ }
+ }
+ return allStopWords.ToArray();
+ }
+ }
+
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
index f2387f1..dcfe368 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -1,4 +1,6 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
namespace org.apache.lucene.analysis.standard
{
@@ -24,9 +26,9 @@ namespace org.apache.lucene.analysis.standard
using LowerCaseFilter = LowerCaseFilter;
using StopAnalyzer = StopAnalyzer;
using StopFilter = StopFilter;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
- using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+ using CharArraySet = CharArraySet;
+ using StopwordAnalyzerBase = StopwordAnalyzerBase;
+ using WordlistLoader = WordlistLoader;
using Version = org.apache.lucene.util.Version;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index 73d16e3..392f656 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -1,4 +1,6 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
namespace org.apache.lucene.analysis.standard
{
@@ -24,9 +26,9 @@ namespace org.apache.lucene.analysis.standard
using LowerCaseFilter = LowerCaseFilter;
using StopAnalyzer = StopAnalyzer;
using StopFilter = StopFilter;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
- using WordlistLoader = org.apache.lucene.analysis.util.WordlistLoader;
+ using CharArraySet = CharArraySet;
+ using StopwordAnalyzerBase = StopwordAnalyzerBase;
+ using WordlistLoader = WordlistLoader;
using Version = org.apache.lucene.util.Version;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index 2067ff6..fd546ce 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -1,4 +1,6 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
+using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
namespace org.apache.lucene.analysis.standard
{
@@ -23,8 +25,8 @@ namespace org.apache.lucene.analysis.standard
using LowerCaseFilter = LowerCaseFilter;
using StopAnalyzer = StopAnalyzer;
using StopFilter = StopFilter;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using StopwordAnalyzerBase = org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+ using CharArraySet = CharArraySet;
+ using StopwordAnalyzerBase = StopwordAnalyzerBase;
using Version = org.apache.lucene.util.Version;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
index c38f1dd..2be937c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
@@ -56,8 +56,8 @@ namespace Lucene.Net.Analysis.Synonym
for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
{
string key = itr.Current;
- tokArgs[key.replaceAll("^tokenizerFactory\\.","")] = args[key];
- itr.remove();
+ tokArgs[key.ReplaceAll("^tokenizerFactory\\.","")] = args[key];
+ itr.Remove();
}
}
if (args.Count > 0)
@@ -66,19 +66,15 @@ namespace Lucene.Net.Analysis.Synonym
}
}
- public override TokenStream create(TokenStream input)
+ public override TokenStream Create(TokenStream input)
{
// if the fst is null, it means there's actually no synonyms... just return the original stream
// as there is nothing to do here.
return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(ResourceLoader loader) throws java.io.IOException
public void inform(ResourceLoader loader)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);
@@ -115,7 +111,7 @@ namespace Lucene.Net.Analysis.Synonym
this.factory = factory;
}
- protected internal override Analyzer.TokenStreamComponents createComponents(string fieldName, Reader reader)
+ protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, Reader reader)
{
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
index 15abb7a..95b3c1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilter.cs
@@ -1,7 +1,9 @@
using System;
using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.synonym
+namespace Lucene.Net.Analysis.Synonym
{
/*
@@ -20,15 +22,7 @@ namespace org.apache.lucene.analysis.synonym
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
-
-
- /// <summary>
+ /// <summary>
/// SynonymFilter handles multi-token synonyms with variable position increment offsets.
/// <para>
/// The matched tokens from the input stream may be optionally passed through (includeOrig=true)
@@ -56,10 +50,10 @@ namespace org.apache.lucene.analysis.synonym
this.map = map;
// just ensuring these attributes exist...
- addAttribute(typeof(CharTermAttribute));
- addAttribute(typeof(PositionIncrementAttribute));
- addAttribute(typeof(OffsetAttribute));
- addAttribute(typeof(TypeAttribute));
+ AddAttribute(typeof(CharTermAttribute));
+ AddAttribute(typeof(PositionIncrementAttribute));
+ AddAttribute(typeof(OffsetAttribute));
+ AddAttribute(typeof(TypeAttribute));
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
index 5e76e47..1f3d604 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymFilterFactory.cs
@@ -2,6 +2,7 @@
using System.Collections;
using System.Collections.Generic;
using System.Text;
+using Lucene.Net.Analysis.Synonym;
using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.synonym
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
index cfc7d71..ab54cf5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SlowSynonymMap.cs
@@ -1,8 +1,10 @@
using System;
using System.Collections.Generic;
+using System.Linq;
using System.Text;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.synonym
+namespace Lucene.Net.Analysis.Synonym
{
/*
@@ -21,11 +23,7 @@ namespace org.apache.lucene.analysis.synonym
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using org.apache.lucene.analysis.util;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Mapping rules for use with <seealso cref="SlowSynonymFilter"/> </summary>
/// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
[Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
index b6967d8..9d924f7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/SynonymFilterFactory.cs
@@ -1,77 +1,73 @@
using System;
using System.Collections.Generic;
-using Lucene.Net.Analysis.Synonym;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.synonym;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.synonym
+namespace Lucene.Net.Analysis.Synonym
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="SynonymFilter"/>.
+ /// <pre class="prettyprint" >
+ /// <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
+ /// format="solr" ignoreCase="false" expand="true"
+ /// tokenizerFactory="solr.WhitespaceTokenizerFactory"
+ /// [optional tokenizer factory parameters]/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ ///
+ /// <para>
+ /// An optional param name prefix of "tokenizerFactory." may be used for any
+ /// init params that the SynonymFilterFactory needs to pass to the specified
+ /// TokenizerFactory. If the TokenizerFactory expects an init parameters with
+ /// the same name as an init param used by the SynonymFilterFactory, the prefix
+ /// is mandatory.
+ /// </para>
+ /// <para>
+ /// The optional {@code format} parameter controls how the synonyms will be parsed:
+ /// It supports the short names of {@code solr} for <seealso cref="SolrSynonymParser"/>
+ /// and {@code wordnet} for and <seealso cref="WordnetSynonymParser"/>, or your own
+ /// {@code SynonymMap.Parser} class name. The default is {@code solr}.
+ /// A custom <seealso cref="SynonymMap.Parser"/> is expected to have a constructor taking:
+ /// <ul>
+ /// <li><code>boolean dedup</code> - true if duplicates should be ignored, false otherwise</li>
+ /// <li><code>boolean expand</code> - true if conflation groups should be expanded, false if they are one-directional</li>
+ /// <li><code><seealso cref="Analyzer"/> analyzer</code> - an analyzer used for each raw synonym</li>
+ /// </ul>
+ /// </para>
+ /// </summary>
+ public class SynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ private readonly TokenFilterFactory delegator;
-
- using Version = org.apache.lucene.util.Version;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
- /// Factory for <seealso cref="SynonymFilter"/>.
- /// <pre class="prettyprint" >
- /// <fieldType name="text_synonym" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
- /// format="solr" ignoreCase="false" expand="true"
- /// tokenizerFactory="solr.WhitespaceTokenizerFactory"
- /// [optional tokenizer factory parameters]/>
- /// </analyzer>
- /// </fieldType></pre>
- ///
- /// <para>
- /// An optional param name prefix of "tokenizerFactory." may be used for any
- /// init params that the SynonymFilterFactory needs to pass to the specified
- /// TokenizerFactory. If the TokenizerFactory expects an init parameters with
- /// the same name as an init param used by the SynonymFilterFactory, the prefix
- /// is mandatory.
- /// </para>
- /// <para>
- /// The optional {@code format} parameter controls how the synonyms will be parsed:
- /// It supports the short names of {@code solr} for <seealso cref="SolrSynonymParser"/>
- /// and {@code wordnet} for and <seealso cref="WordnetSynonymParser"/>, or your own
- /// {@code SynonymMap.Parser} class name. The default is {@code solr}.
- /// A custom <seealso cref="SynonymMap.Parser"/> is expected to have a constructor taking:
- /// <ul>
- /// <li><code>boolean dedup</code> - true if duplicates should be ignored, false otherwise</li>
- /// <li><code>boolean expand</code> - true if conflation groups should be expanded, false if they are one-directional</li>
- /// <li><code><seealso cref="Analyzer"/> analyzer</code> - an analyzer used for each raw synonym</li>
- /// </ul>
- /// </para>
- /// </summary>
- public class SynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- private readonly TokenFilterFactory delegator;
-
- public SynonymFilterFactory(IDictionary<string, string> args) : base(args)
+ public SynonymFilterFactory(IDictionary<string, string> args)
+ : base(args)
{
assureMatchVersion();
- if (luceneMatchVersion.onOrAfter(Version.LUCENE_34))
+ if (luceneMatchVersion.OnOrAfter(Lucene.Net.Util.Version.LUCENE_34))
{
- delegator = new FSTSynonymFilterFactory(new Dictionary<>(OriginalArgs));
+ delegator = new FSTSynonymFilterFactory(new Dictionary<string, string>(OriginalArgs));
}
else
{
@@ -85,31 +81,28 @@ namespace org.apache.lucene.analysis.synonym
}
}
- public override TokenStream create(TokenStream input)
- {
- return delegator.create(input);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- ((ResourceLoaderAware) delegator).inform(loader);
- }
+ public override TokenStream Create(TokenStream input)
+ {
+ return delegator.Create(input);
+ }
- /// <summary>
- /// Access to the delegator TokenFilterFactory for test verification
- /// </summary>
- /// @deprecated Method exists only for testing 4x, will be removed in 5.0
- /// @lucene.internal
- [Obsolete("Method exists only for testing 4x, will be removed in 5.0")]
- internal virtual TokenFilterFactory Delegator
- {
- get
- {
- return delegator;
- }
- }
- }
+ public virtual void Inform(ResourceLoader loader)
+ {
+ ((ResourceLoaderAware)delegator).Inform(loader);
+ }
+ /// <summary>
+ /// Access to the delegator TokenFilterFactory for test verification
+ /// </summary>
+ /// @deprecated Method exists only for testing 4x, will be removed in 5.0
+ /// @lucene.internal
+ [Obsolete("Method exists only for testing 4x, will be removed in 5.0")]
+ internal virtual TokenFilterFactory Delegator
+ {
+ get
+ {
+ return delegator;
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
index 351446f..f0e9bda 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -2,9 +2,8 @@
using System.Collections;
using System.Collections.Generic;
using System.Threading;
-using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -23,11 +22,7 @@ namespace org.apache.lucene.analysis.util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using SPIClassIterator = org.apache.lucene.util.SPIClassIterator;
-
- /// <summary>
+ /// <summary>
/// Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
/// @lucene.internal
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index 1086572..e608b1f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -3,6 +3,8 @@ using System.Diagnostics;
using System.Collections;
using System.Collections.Generic;
using System.Text;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
namespace org.apache.lucene.analysis.util
{
@@ -25,9 +27,6 @@ namespace org.apache.lucene.analysis.util
*/
- using Version = org.apache.lucene.util.Version;
-
-
/// <summary>
/// A simple class that stores key Strings as char[]'s in a
/// hash table. Note that this is not a general purpose
@@ -51,7 +50,7 @@ namespace org.apache.lucene.analysis.util
/// 3.1 pass a <seealso cref="Version"/> < 3.1 to the constructors.
/// </para>
/// </summary>
- public class CharArrayMap<V> : AbstractMap<object, V>
+ public class CharArrayMap<V> : IDictionary<object, V>
{
// private only because missing generics
//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
@@ -123,78 +122,72 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// Clears all entries in this map. This method is supported for reusing, but not <seealso cref="Map#remove"/>. </summary>
- public override void clear()
+ public override void Clear()
{
count = 0;
- Arrays.fill(keys, null);
- Arrays.fill(values, null);
+ Arrays.Fill(keys, null);
+ Arrays.Fill(values, null);
}
/// <summary>
/// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
/// are in the <seealso cref="#keySet()"/>
/// </summary>
- public virtual bool containsKey(char[] text, int off, int len)
+ public virtual bool ContainsKey(char[] text, int off, int len)
{
return keys[getSlot(text, off, len)] != null;
}
/// <summary>
/// true if the <code>CharSequence</code> is in the <seealso cref="#keySet()"/> </summary>
- public virtual bool containsKey(CharSequence cs)
+ public virtual bool ContainsKey(string cs)
{
return keys[getSlot(cs)] != null;
}
- public override bool containsKey(object o)
+ public override bool ContainsKey(object o)
{
if (o is char[])
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] text = (char[])o;
char[] text = (char[])o;
- return containsKey(text, 0, text.Length);
+ return ContainsKey(text, 0, text.Length);
}
- return containsKey(o.ToString());
+ return ContainsKey(o.ToString());
}
/// <summary>
/// returns the value of the mapping of <code>len</code> chars of <code>text</code>
/// starting at <code>off</code>
/// </summary>
- public virtual V get(char[] text, int off, int len)
+ public virtual V Get(char[] text, int off, int len)
{
return values[getSlot(text, off, len)];
}
/// <summary>
/// returns the value of the mapping of the chars inside this {@code CharSequence} </summary>
- public virtual V get(CharSequence cs)
+ public virtual V Get(string cs)
{
return values[getSlot(cs)];
}
- public override V get(object o)
+ public V Get(object o)
{
- if (o is char[])
+ var text = o as char[];
+ if (text != null)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] text = (char[])o;
- char[] text = (char[])o;
- return get(text, 0, text.Length);
+ return Get(text, 0, text.Length);
}
- return get(o.ToString());
+ return Get(o.ToString());
}
- private int getSlot(char[] text, int off, int len)
+ private int GetSlot(char[] text, int off, int len)
{
int code = getHashCode(text, off, len);
int pos = code & (keys.Length - 1);
char[] text2 = keys[pos];
if (text2 != null && !Equals(text, off, len, text2))
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
int inc = ((code >> 8) + code) | 1;
do
{
@@ -208,15 +201,13 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// Returns true if the String is in the set </summary>
- private int getSlot(CharSequence text)
+ private int GetSlot(string text)
{
int code = getHashCode(text);
int pos = code & (keys.Length - 1);
char[] text2 = keys[pos];
if (text2 != null && !Equals(text, text2))
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
int inc = ((code >> 8) + code) | 1;
do
{
@@ -265,8 +256,6 @@ namespace org.apache.lucene.analysis.util
int slot = getSlot(text, 0, text.Length);
if (keys[slot] != null)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final V oldValue = values[slot];
V oldValue = values[slot];
values[slot] = value;
return oldValue;
@@ -277,15 +266,13 @@ namespace org.apache.lucene.analysis.util
if (count + (count >> 2) > keys.Length)
{
- rehash();
+ Rehash();
}
return null;
}
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("unchecked") private void rehash()
- private void rehash()
+ private void Rehash()
{
Debug.Assert(keys.Length == values.Length);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index d9253d7..370c56a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -1,7 +1,9 @@
using System.Collections.Generic;
using System.Text;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -20,12 +22,7 @@ namespace org.apache.lucene.analysis.util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using Version = org.apache.lucene.util.Version;
-
-
- /// <summary>
+ /// <summary>
/// A simple class that stores Strings as char[]'s in a
/// hash table. Note that this is not a general purpose
/// class. For example, it cannot remove items from the
@@ -51,12 +48,12 @@ namespace org.apache.lucene.analysis.util
/// does not behave like it should in all cases. The generic type is
/// {@code Set<Object>}, because you can add any object to it,
/// that has a string representation. The add methods will use
- /// <seealso cref="Object#toString"/> and store the result using a {@code char[]}
+ /// <seealso cref="object#toString"/> and store the result using a {@code char[]}
/// buffer. The same behavior have the {@code contains()} methods.
/// The <seealso cref="#iterator()"/> returns an {@code Iterator<char[]>}.
/// </para>
/// </summary>
- public class CharArraySet : AbstractSet<object>
+ public class CharArraySet : ISet<object>
{
public static readonly CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap<object>());
private static readonly object PLACEHOLDER = new object();
@@ -91,13 +88,11 @@ namespace org.apache.lucene.analysis.util
/// otherwise <code>true</code>. </param>
public CharArraySet<T1>(Version matchVersion, ICollection<T1> c, bool ignoreCase) : this(matchVersion, c.Count, ignoreCase)
{
- addAll(c);
+ AddAll(c);
}
/// <summary>
/// Create set from the specified map (internal only), used also by <seealso cref="CharArrayMap#keySet()"/> </summary>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: CharArraySet(final CharArrayMap<Object> map)
internal CharArraySet(CharArrayMap<object> map)
{
this.map = map;
@@ -105,7 +100,7 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// Clears all entries in this set. This method is supported for reusing, but not <seealso cref="Set#remove"/>. </summary>
- public override void clear()
+ public void Clear()
{
map.clear();
}
@@ -114,38 +109,31 @@ namespace org.apache.lucene.analysis.util
/// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
/// are in the set
/// </summary>
- public virtual bool contains(char[] text, int off, int len)
+ public virtual bool Contains(char[] text, int off, int len)
{
return map.containsKey(text, off, len);
}
/// <summary>
/// true if the <code>CharSequence</code> is in the set </summary>
- public virtual bool contains(CharSequence cs)
+ public virtual bool Contains(string cs)
{
return map.containsKey(cs);
}
- public override bool contains(object o)
+ public bool Contains(object o)
{
return map.containsKey(o);
}
- public override bool add(object o)
+ public bool Add(object o)
{
return map.put(o, PLACEHOLDER) == null;
}
/// <summary>
- /// Add this CharSequence into the set </summary>
- public virtual bool add(CharSequence text)
- {
- return map.put(text, PLACEHOLDER) == null;
- }
-
- /// <summary>
/// Add this String into the set </summary>
- public virtual bool add(string text)
+ public virtual bool Add(string text)
{
return map.put(text, PLACEHOLDER) == null;
}
@@ -155,12 +143,12 @@ namespace org.apache.lucene.analysis.util
/// If ignoreCase is true for this Set, the text array will be directly modified.
/// The user should never modify this text array after calling this method.
/// </summary>
- public virtual bool add(char[] text)
+ public virtual bool Add(char[] text)
{
return map.put(text, PLACEHOLDER) == null;
}
- public override int size()
+ public override int Size()
{
return map.size();
}
@@ -211,9 +199,7 @@ namespace org.apache.lucene.analysis.util
/// <returns> a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
/// is a <seealso cref="CharArraySet"/> the ignoreCase property as well as the
/// matchVersion will be of the given set will be preserved. </returns>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public static CharArraySet copy(final org.apache.lucene.util.Version matchVersion, final java.util.Set<?> set)
- public static CharArraySet copy<T1>(Version matchVersion, HashSet<T1> set)
+ public static CharArraySet Copy<T1>(Version matchVersion, HashSet<T1> set)
{
if (set == EMPTY_SET)
{
@@ -221,8 +207,6 @@ namespace org.apache.lucene.analysis.util
}
if (set is CharArraySet)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final CharArraySet source = (CharArraySet) set;
CharArraySet source = (CharArraySet) set;
return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
}
@@ -232,8 +216,6 @@ namespace org.apache.lucene.analysis.util
/// <summary>
/// Returns an <seealso cref="Iterator"/> for {@code char[]} instances in this set.
/// </summary>
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public java.util.Iterator<Object> iterator()
public override IEnumerator<object> iterator()
{
// use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
@@ -242,9 +224,7 @@ namespace org.apache.lucene.analysis.util
public override string ToString()
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("[");
- StringBuilder sb = new StringBuilder("[");
+ var sb = new StringBuilder("[");
foreach (object item in this)
{
if (sb.Length > 1)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index e876a6f..0b88d7b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -1,6 +1,8 @@
using System.Diagnostics;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -19,11 +21,7 @@ namespace org.apache.lucene.analysis.util
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// <seealso cref="CharacterUtils"/> provides a unified interface to Character-related
/// operations to implement backwards compatible character operations based on a
/// <seealso cref="Version"/> instance.
@@ -43,11 +41,9 @@ namespace org.apache.lucene.analysis.util
/// a version instance </param>
/// <returns> a <seealso cref="CharacterUtils"/> implementation according to the given
/// <seealso cref="Version"/> instance. </returns>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public static CharacterUtils getInstance(final org.apache.lucene.util.Version matchVersion)
public static CharacterUtils getInstance(Version matchVersion)
{
- return matchVersion.onOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
+ return matchVersion.OnOrAfter(Version.LUCENE_31) ? JAVA_5 : JAVA_4;
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
index 8b7c93b..8ead1fb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/ClasspathResourceLoader.cs
@@ -1,7 +1,8 @@
using System;
+using System.IO;
using System.Threading;
-namespace org.apache.lucene.analysis.util
+namespace Lucene.Net.Analysis.Util
{
/*
@@ -62,13 +63,9 @@ namespace org.apache.lucene.analysis.util
this.loader = loader;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public java.io.InputStream openResource(String resource) throws java.io.IOException
- public InputStream openResource(string resource)
+ public Stream openResource(string resource)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final java.io.InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
- InputStream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
+ Stream stream = (clazz != null) ? clazz.getResourceAsStream(resource) : loader.getResourceAsStream(resource);
if (stream == null)
{
throw new IOException("Resource not found: " + resource);
[3/4] lucenenet git commit: More Analysis porting fixes
Posted by sy...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
index 87574cc..f170ebf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilter.cs
@@ -1,6 +1,8 @@
using System.Diagnostics;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -19,13 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using ArrayUtil = org.apache.lucene.util.ArrayUtil;
- using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-
- /// <summary>
+ /// <summary>
/// This class converts alphabetic, numeric, and symbolic Unicode characters
/// which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
/// block) into their ASCII equivalents, if one exists.
@@ -94,38 +90,30 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
if (state != null)
{
Debug.Assert(preserveOriginal, "state should only be captured if preserveOriginal is true");
- restoreState(state);
+ RestoreState(state);
posIncAttr.PositionIncrement = 0;
state = null;
return true;
}
- if (input.incrementToken())
+ if (input.IncrementToken())
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
- char[] buffer = termAtt.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int length = termAtt.length();
- int length = termAtt.length();
+ char[] buffer = termAtt.Buffer();
+ int length = termAtt.Length;
// If no characters actually require rewriting then we
// just return token as-is:
for (int i = 0 ; i < length ; ++i)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char c = buffer[i];
char c = buffer[i];
if (c >= '\u0080')
{
- foldToASCII(buffer, length);
- termAtt.copyBuffer(output, 0, outputPos);
+ FoldToASCII(buffer, length);
+ termAtt.CopyBuffer(output, 0, outputPos);
break;
}
}
@@ -137,11 +125,9 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- base.reset();
+ base.Reset();
state = null;
}
@@ -150,22 +136,20 @@ namespace org.apache.lucene.analysis.miscellaneous
/// accents are removed from accented characters. </summary>
/// <param name="input"> The string to fold </param>
/// <param name="length"> The number of characters in the input string </param>
- public void foldToASCII(char[] input, int length)
+ public void FoldToASCII(char[] input, int length)
{
if (preserveOriginal)
{
- state = captureState();
+ state = CaptureState();
}
// Worst-case length required:
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int maxSizeNeeded = 4 * length;
int maxSizeNeeded = 4 * length;
if (output.Length < maxSizeNeeded)
{
- output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
+ output = new char[ArrayUtil.Oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
}
- outputPos = foldToASCII(input, 0, output, 0, length);
+ outputPos = FoldToASCII(input, 0, output, 0, length);
}
/// <summary>
@@ -178,15 +162,11 @@ namespace org.apache.lucene.analysis.miscellaneous
/// <param name="length"> The number of characters to fold </param>
/// <returns> length of output
/// @lucene.internal </returns>
- public static int foldToASCII(char[] input, int inputPos, char[] output, int outputPos, int length)
+ public static int FoldToASCII(char[] input, int inputPos, char[] output, int outputPos, int length)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int end = inputPos + length;
int end = inputPos + length;
for (int pos = inputPos; pos < end ; ++pos)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char c = input[pos];
char c = input[pos];
// Quick test: if it's not in range then just keep current character
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
index 17d787a..d431ee7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ASCIIFoldingFilterFactory.cs
@@ -1,8 +1,7 @@
using System.Collections.Generic;
using Lucene.Net.Analysis.Util;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -21,12 +20,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using AbstractAnalysisFactory = AbstractAnalysisFactory;
- using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="ASCIIFoldingFilter"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_ascii" class="solr.TextField" positionIncrementGap="100">
@@ -51,7 +45,7 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
- public override ASCIIFoldingFilter create(TokenStream input)
+ public override TokenStream Create(TokenStream input)
{
return new ASCIIFoldingFilter(input, preserveOriginal);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index 94c8d4b..9264435 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -1,7 +1,9 @@
using System;
using System.Collections.Generic;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -20,12 +22,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-
- /// <summary>
+ /// <summary>
/// A filter to apply normal capitalization rules to Tokens. It will make the first letter
/// capital and the rest lower case.
/// <p/>
@@ -82,17 +79,15 @@ namespace org.apache.lucene.analysis.miscellaneous
this.maxTokenLength = maxTokenLength;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
- if (!input.incrementToken())
+ if (!input.IncrementToken())
{
return false;
}
- char[] termBuffer = termAtt.buffer();
- int termBufferLength = termAtt.length();
+ char[] termBuffer = termAtt.Buffer();
+ int termBufferLength = termAtt.Length;
char[] backup = null;
if (maxWordCount < DEFAULT_MAX_WORD_COUNT)
@@ -115,7 +110,7 @@ namespace org.apache.lucene.analysis.miscellaneous
int len = i - lastWordStart;
if (len > 0)
{
- processWord(termBuffer, lastWordStart, len, wordCount++);
+ ProcessWord(termBuffer, lastWordStart, len, wordCount++);
lastWordStart = i + 1;
i++;
}
@@ -125,19 +120,19 @@ namespace org.apache.lucene.analysis.miscellaneous
// process the last word
if (lastWordStart < termBufferLength)
{
- processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
+ ProcessWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
}
if (wordCount > maxWordCount)
{
- termAtt.copyBuffer(backup, 0, termBufferLength);
+ termAtt.CopyBuffer(backup, 0, termBufferLength);
}
}
return true;
}
- private void processWord(char[] buffer, int offset, int length, int wordCount)
+ private void ProcessWord(char[] buffer, int offset, int length, int wordCount)
{
if (length < 1)
{
@@ -154,7 +149,7 @@ namespace org.apache.lucene.analysis.miscellaneous
return;
}
- if (keep != null && keep.contains(buffer, offset, length))
+ if (keep != null && keep.Contains(buffer, offset, length))
{
if (wordCount == 0 && forceFirstLetter)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
index bd4f335..fccc4db 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilterFactory.cs
@@ -1,117 +1,114 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using org.apache.lucene.analysis.miscellaneous;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="CapitalizationFilter"/>.
+ /// <p/>
+ /// The factory takes parameters:<br/>
+ /// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
+ /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
+ /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
+ /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
+ /// "okPrefix" - do not change word capitalization if a word begins with something in this list.
+ /// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
+ /// "Mckinley"<br/>
+ /// "minWordLength" - how long the word needs to be to get capitalization applied. If the
+ /// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
+ /// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
+ /// assumed to be correct.<br/>
+ ///
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
+ /// keep="java solr lucene" keepIgnoreCase="false"
+ /// okPrefix="McK McD McA"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ ///
+ /// @since solr 1.3
+ /// </summary>
+ public class CapitalizationFilterFactory : TokenFilterFactory
+ {
+ public const string KEEP = "keep";
+ public const string KEEP_IGNORE_CASE = "keepIgnoreCase";
+ public const string OK_PREFIX = "okPrefix";
+ public const string MIN_WORD_LENGTH = "minWordLength";
+ public const string MAX_WORD_COUNT = "maxWordCount";
+ public const string MAX_TOKEN_LENGTH = "maxTokenLength";
+ public const string ONLY_FIRST_WORD = "onlyFirstWord";
+ public const string FORCE_FIRST_LETTER = "forceFirstLetter";
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using TokenFilterFactory = TokenFilterFactory;
+ internal CharArraySet keep;
+ internal ICollection<char[]> okPrefix = Collections.EmptyList<char[]>(); // for Example: McK
- /// <summary>
- /// Factory for <seealso cref="CapitalizationFilter"/>.
- /// <p/>
- /// The factory takes parameters:<br/>
- /// "onlyFirstWord" - should each word be capitalized or all of the words?<br/>
- /// "keep" - a keep word list. Each word that should be kept separated by whitespace.<br/>
- /// "keepIgnoreCase - true or false. If true, the keep list will be considered case-insensitive.<br/>
- /// "forceFirstLetter" - Force the first letter to be capitalized even if it is in the keep list<br/>
- /// "okPrefix" - do not change word capitalization if a word begins with something in this list.
- /// for example if "McK" is on the okPrefix list, the word "McKinley" should not be changed to
- /// "Mckinley"<br/>
- /// "minWordLength" - how long the word needs to be to get capitalization applied. If the
- /// minWordLength is 3, "and" > "And" but "or" stays "or"<br/>
- /// "maxWordCount" - if the token contains more then maxWordCount words, the capitalization is
- /// assumed to be correct.<br/>
- ///
- /// <pre class="prettyprint">
- /// <fieldType name="text_cptlztn" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.CapitalizationFilterFactory" onlyFirstWord="true"
- /// keep="java solr lucene" keepIgnoreCase="false"
- /// okPrefix="McK McD McA"/>
- /// </analyzer>
- /// </fieldType></pre>
- ///
- /// @since solr 1.3
- /// </summary>
- public class CapitalizationFilterFactory : TokenFilterFactory
- {
- public const string KEEP = "keep";
- public const string KEEP_IGNORE_CASE = "keepIgnoreCase";
- public const string OK_PREFIX = "okPrefix";
- public const string MIN_WORD_LENGTH = "minWordLength";
- public const string MAX_WORD_COUNT = "maxWordCount";
- public const string MAX_TOKEN_LENGTH = "maxTokenLength";
- public const string ONLY_FIRST_WORD = "onlyFirstWord";
- public const string FORCE_FIRST_LETTER = "forceFirstLetter";
+ internal readonly int minWordLength; // don't modify capitalization for words shorter then this
+ internal readonly int maxWordCount;
+ internal readonly int maxTokenLength;
+ internal readonly bool onlyFirstWord;
+ internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
- internal CharArraySet keep;
+ /// <summary>
+ /// Creates a new CapitalizationFilterFactory </summary>
+ public CapitalizationFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ assureMatchVersion();
+ bool ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
+ HashSet<string> k = getSet(args, KEEP);
+ if (k != null)
+ {
+ keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
+ keep.AddAll(k);
+ }
- internal ICollection<char[]> okPrefix = Collections.emptyList(); // for Example: McK
+ k = getSet(args, OK_PREFIX);
+ if (k != null)
+ {
+ okPrefix = new List<char[]>();
+ foreach (string item in k)
+ {
+ okPrefix.Add(item.ToCharArray());
+ }
+ }
- internal readonly int minWordLength; // don't modify capitalization for words shorter then this
- internal readonly int maxWordCount;
- internal readonly int maxTokenLength;
- internal readonly bool onlyFirstWord;
- internal readonly bool forceFirstLetter; // make sure the first letter is capital even if it is in the keep list
-
- /// <summary>
- /// Creates a new CapitalizationFilterFactory </summary>
- public CapitalizationFilterFactory(IDictionary<string, string> args) : base(args)
- {
- assureMatchVersion();
- bool ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
- HashSet<string> k = getSet(args, KEEP);
- if (k != null)
- {
- keep = new CharArraySet(luceneMatchVersion, 10, ignoreCase);
- keep.addAll(k);
- }
-
- k = getSet(args, OK_PREFIX);
- if (k != null)
- {
- okPrefix = new List<>();
- foreach (string item in k)
- {
- okPrefix.Add(item.ToCharArray());
- }
- }
-
- minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
- maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
- maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
- onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
- forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override CapitalizationFilter create(TokenStream input)
- {
- return new CapitalizationFilter(input, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
- }
- }
+ minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
+ maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
+ maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
+ onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
+ forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ return new CapitalizationFilter(input, onlyFirstWord, keep, forceFirstLetter, okPrefix, minWordLength, maxWordCount, maxTokenLength);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index f110d37..f9f9a53 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -1,5 +1,6 @@
using System;
-using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -18,50 +19,45 @@ using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
* limitations under the License.
*/
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
-
- using FilteringTokenFilter = FilteringTokenFilter;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
- /// A TokenFilter that only keeps tokens with text contained in the
- /// required words. This filter behaves like the inverse of StopFilter.
- ///
- /// @since solr 1.3
- /// </summary>
- public sealed class KeepWordFilter : FilteringTokenFilter
- {
- private readonly CharArraySet words;
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
-
- /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4.
- [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
- public KeepWordFilter(Version version, bool enablePositionIncrements, TokenStream @in, CharArraySet words) : base(version, enablePositionIncrements, @in)
- {
- this.words = words;
- }
-
- /// <summary>
- /// Create a new <seealso cref="KeepWordFilter"/>.
- /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
- /// used by this filter and should not be modified.
- /// </para>
- /// </summary>
- /// <param name="version"> the Lucene match version </param>
- /// <param name="in"> the <seealso cref="TokenStream"/> to consume </param>
- /// <param name="words"> the words to keep </param>
- public KeepWordFilter(Version version, TokenStream @in, CharArraySet words) : base(version, @in)
- {
- this.words = words;
- }
-
- public override bool accept()
- {
- return words.contains(termAtt.buffer(), 0, termAtt.length());
- }
- }
-
+ /// <summary>
+ /// A TokenFilter that only keeps tokens with text contained in the
+ /// required words. This filter behaves like the inverse of StopFilter.
+ ///
+ /// @since solr 1.3
+ /// </summary>
+ public sealed class KeepWordFilter : FilteringTokenFilter
+ {
+ private readonly CharArraySet words;
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+ /// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4.
+ [Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
+ public KeepWordFilter(Version version, bool enablePositionIncrements, TokenStream @in, CharArraySet words)
+ : base(version, enablePositionIncrements, @in)
+ {
+ this.words = words;
+ }
+
+ /// <summary>
+ /// Create a new <seealso cref="KeepWordFilter"/>.
+ /// <para><b>NOTE</b>: The words set passed to this constructor will be directly
+ /// used by this filter and should not be modified.
+ /// </para>
+ /// </summary>
+ /// <param name="version"> the Lucene match version </param>
+ /// <param name="in"> the <seealso cref="TokenStream"/> to consume </param>
+ /// <param name="words"> the words to keep </param>
+ public KeepWordFilter(Version version, TokenStream @in, CharArraySet words)
+ : base(version, @in)
+ {
+ this.words = words;
+ }
+
+ public override bool Accept()
+ {
+ return words.Contains(termAtt.Buffer(), 0, termAtt.Length);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
index 8aa687f..266f4b9 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilterFactory.cs
@@ -1,113 +1,103 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
+using org.apache.lucene.analysis.util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="KeepWordFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class KeepWordFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ private readonly bool ignoreCase;
+ private readonly bool enablePositionIncrements;
+ private readonly string wordFiles;
+ private CharArraySet words;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
+ /// <summary>
+ /// Creates a new KeepWordFilterFactory </summary>
+ public KeepWordFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ assureMatchVersion();
+ wordFiles = get(args, "words");
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+ public virtual void Inform(ResourceLoader loader)
+ {
+ if (wordFiles != null)
+ {
+ words = GetWordSet(loader, wordFiles, ignoreCase);
+ }
+ }
- /// <summary>
- /// Factory for <seealso cref="KeepWordFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_keepword" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.KeepWordFilterFactory" words="keepwords.txt" ignoreCase="false"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class KeepWordFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- private readonly bool ignoreCase;
- private readonly bool enablePositionIncrements;
- private readonly string wordFiles;
- private CharArraySet words;
+ public virtual bool EnablePositionIncrements
+ {
+ get
+ {
+ return enablePositionIncrements;
+ }
+ }
- /// <summary>
- /// Creates a new KeepWordFilterFactory </summary>
- public KeepWordFilterFactory(IDictionary<string, string> args) : base(args)
- {
- assureMatchVersion();
- wordFiles = get(args, "words");
- ignoreCase = getBoolean(args, "ignoreCase", false);
- enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
+ public virtual bool IgnoreCase
+ {
+ get
+ {
+ return ignoreCase;
+ }
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- if (wordFiles != null)
- {
- words = getWordSet(loader, wordFiles, ignoreCase);
- }
- }
-
- public virtual bool EnablePositionIncrements
- {
- get
- {
- return enablePositionIncrements;
- }
- }
-
- public virtual bool IgnoreCase
- {
- get
- {
- return ignoreCase;
- }
- }
-
- public virtual CharArraySet Words
- {
- get
- {
- return words;
- }
- }
-
- public override TokenStream create(TokenStream input)
- {
- // if the set is null, it means it was empty
- if (words == null)
- {
- return input;
- }
- else
- {
-//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
-//ORIGINAL LINE: @SuppressWarnings("deprecation") final org.apache.lucene.analysis.TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
- TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
- return filter;
- }
- }
- }
+ public virtual CharArraySet Words
+ {
+ get
+ {
+ return words;
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ // if the set is null, it means it was empty
+ if (words == null)
+ {
+ return input;
+ }
+ else
+ {
+ TokenStream filter = new KeepWordFilter(luceneMatchVersion, enablePositionIncrements, input, words);
+ return filter;
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
index 14eeafa..c374fae 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilterFactory.cs
@@ -1,99 +1,91 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="KeywordMarkerFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class KeywordMarkerFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ public const string PROTECTED_TOKENS = "protected";
+ public const string PATTERN = "pattern";
+ private readonly string wordFiles;
+ private readonly string stringPattern;
+ private readonly bool ignoreCase;
+ private Pattern pattern;
+ private CharArraySet protectedWords;
+ /// <summary>
+ /// Creates a new KeywordMarkerFilterFactory </summary>
+ public KeywordMarkerFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ wordFiles = get(args, PROTECTED_TOKENS);
+ stringPattern = get(args, PATTERN);
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
+ public virtual void Inform(ResourceLoader loader)
+ {
+ if (wordFiles != null)
+ {
+ protectedWords = GetWordSet(loader, wordFiles, ignoreCase);
+ }
+ if (stringPattern != null)
+ {
+ pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
+ }
+ }
- /// <summary>
- /// Factory for <seealso cref="KeywordMarkerFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.KeywordMarkerFilterFactory" protected="protectedkeyword.txt" pattern="^.+er$" ignoreCase="false"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class KeywordMarkerFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- public const string PROTECTED_TOKENS = "protected";
- public const string PATTERN = "pattern";
- private readonly string wordFiles;
- private readonly string stringPattern;
- private readonly bool ignoreCase;
- private Pattern pattern;
- private CharArraySet protectedWords;
-
- /// <summary>
- /// Creates a new KeywordMarkerFilterFactory </summary>
- public KeywordMarkerFilterFactory(IDictionary<string, string> args) : base(args)
- {
- wordFiles = get(args, PROTECTED_TOKENS);
- stringPattern = get(args, PATTERN);
- ignoreCase = getBoolean(args, "ignoreCase", false);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- if (wordFiles != null)
- {
- protectedWords = getWordSet(loader, wordFiles, ignoreCase);
- }
- if (stringPattern != null)
- {
- pattern = ignoreCase ? Pattern.compile(stringPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE) : Pattern.compile(stringPattern);
- }
- }
-
- public virtual bool IgnoreCase
- {
- get
- {
- return ignoreCase;
- }
- }
-
- public override TokenStream create(TokenStream input)
- {
- if (pattern != null)
- {
- input = new PatternKeywordMarkerFilter(input, pattern);
- }
- if (protectedWords != null)
- {
- input = new SetKeywordMarkerFilter(input, protectedWords);
- }
- return input;
- }
- }
+ public virtual bool IgnoreCase
+ {
+ get
+ {
+ return ignoreCase;
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ if (pattern != null)
+ {
+ input = new PatternKeywordMarkerFilter(input, pattern);
+ }
+ if (protectedWords != null)
+ {
+ input = new SetKeywordMarkerFilter(input, protectedWords);
+ }
+ return input;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
index b6f7b86..1e97350 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordRepeatFilterFactory.cs
@@ -1,52 +1,51 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="KeywordRepeatFilter"/>.
+ ///
+ /// Since <seealso cref="KeywordRepeatFilter"/> emits two tokens for every input token, and any tokens that aren't transformed
+ /// later in the analysis chain will be in the document twice. Therefore, consider adding
+ /// <seealso cref="RemoveDuplicatesTokenFilterFactory"/> later in the analysis chain.
+ /// </summary>
+ public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
+ {
- using TokenFilterFactory = TokenFilterFactory;
+ /// <summary>
+ /// Creates a new KeywordRepeatFilterFactory </summary>
+ public KeywordRepeatFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- /// <summary>
- /// Factory for <seealso cref="KeywordRepeatFilter"/>.
- ///
- /// Since <seealso cref="KeywordRepeatFilter"/> emits two tokens for every input token, and any tokens that aren't transformed
- /// later in the analysis chain will be in the document twice. Therefore, consider adding
- /// <seealso cref="RemoveDuplicatesTokenFilterFactory"/> later in the analysis chain.
- /// </summary>
- public sealed class KeywordRepeatFilterFactory : TokenFilterFactory
- {
-
- /// <summary>
- /// Creates a new KeywordRepeatFilterFactory </summary>
- public KeywordRepeatFilterFactory(IDictionary<string, string> args) : base(args)
- {
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override TokenStream create(TokenStream input)
- {
- return new KeywordRepeatFilter(input);
- }
- }
+ public override TokenStream Create(TokenStream input)
+ {
+ return new KeywordRepeatFilter(input);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index fa5d5da..da3bda4 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -17,6 +17,7 @@ using System.Text;
* limitations under the License.
*/
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.miscellaneous
{
@@ -27,7 +28,7 @@ namespace org.apache.lucene.analysis.miscellaneous
using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using CharArraySet = CharArraySet;
using ArrayUtil = org.apache.lucene.util.ArrayUtil;
using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 77cbe8e..d074038 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -1,5 +1,6 @@
using System;
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.miscellaneous
{
@@ -26,7 +27,7 @@ namespace org.apache.lucene.analysis.miscellaneous
using StopFilter = StopFilter;
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using CharArraySet = CharArraySet;
using Version = org.apache.lucene.util.Version;
/// <summary>
@@ -109,7 +110,7 @@ namespace org.apache.lucene.analysis.miscellaneous
/// given stop set (after previously having applied toLowerCase()
/// if applicable). For example, created via
/// <seealso cref="StopFilter#makeStopSet(Version, String[])"/>and/or
- /// <seealso cref="org.apache.lucene.analysis.util.WordlistLoader"/>as in
+ /// <seealso cref="WordlistLoader"/>as in
/// <code>WordlistLoader.getWordSet(new File("samples/fulltext/stopwords.txt")</code>
/// or <a href="http://www.unine.ch/info/clef/">other stop words
/// lists </a>. </param>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
index 9c2586f..b4a5c90 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilter.cs
@@ -1,6 +1,4 @@
-using System;
-
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,83 +15,72 @@
* limitations under the License.
*/
-namespace org.apache.lucene.analysis.miscellaneous
-{
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using Version = org.apache.lucene.util.Version;
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using Version = Lucene.Net.Util.Version;
- /// <summary>
- /// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
- /// </summary>
- public sealed class RemoveDuplicatesTokenFilter : TokenFilter
- {
-
- private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
- private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /// <summary>
+ /// A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
+ /// </summary>
+ public sealed class RemoveDuplicatesTokenFilter : TokenFilter
+ {
- // use a fixed version, as we don't care about case sensitivity.
- private readonly CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
+ private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
+ private readonly PositionIncrementAttribute posIncAttribute = addAttribute(typeof(PositionIncrementAttribute));
- /// <summary>
- /// Creates a new RemoveDuplicatesTokenFilter
- /// </summary>
- /// <param name="in"> TokenStream that will be filtered </param>
- public RemoveDuplicatesTokenFilter(TokenStream @in) : base(@in)
- {
- }
+ // use a fixed version, as we don't care about case sensitivity.
+ private readonly CharArraySet previous = new CharArraySet(Version.LUCENE_31, 8, false);
- /// <summary>
- /// {@inheritDoc}
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- while (input.incrementToken())
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char term[] = termAttribute.buffer();
- char[] term = termAttribute.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int length = termAttribute.length();
- int length = termAttribute.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int posIncrement = posIncAttribute.getPositionIncrement();
- int posIncrement = posIncAttribute.PositionIncrement;
+ /// <summary>
+ /// Creates a new RemoveDuplicatesTokenFilter
+ /// </summary>
+ /// <param name="in"> TokenStream that will be filtered </param>
+ public RemoveDuplicatesTokenFilter(TokenStream @in)
+ : base(@in)
+ {
+ }
- if (posIncrement > 0)
- {
- previous.clear();
- }
+ /// <summary>
+ /// {@inheritDoc}
+ /// </summary>
+ public override bool IncrementToken()
+ {
+ while (input.IncrementToken())
+ {
+ char[] term = termAttribute.Buffer();
+ int length = termAttribute.Length;
+ int posIncrement = posIncAttribute.PositionIncrement;
- bool duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
+ if (posIncrement > 0)
+ {
+ previous.Clear();
+ }
- // clone the term, and add to the set of seen terms.
- char[] saved = new char[length];
- Array.Copy(term, 0, saved, 0, length);
- previous.add(saved);
+ bool duplicate = (posIncrement == 0 && previous.Contains(term, 0, length));
- if (!duplicate)
- {
- return true;
- }
- }
- return false;
- }
+ // clone the term, and add to the set of seen terms.
+ char[] saved = new char[length];
+ Array.Copy(term, 0, saved, 0, length);
+ previous.Add(saved);
- /// <summary>
- /// {@inheritDoc}
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- base.reset();
- previous.clear();
- }
- }
+ if (!duplicate)
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+ /// <summary>
+ /// {@inheritDoc}
+ /// </summary>
+ public override void Reset()
+ {
+ base.Reset();
+ previous.Clear();
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
index bae261e..f7d5c03 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/RemoveDuplicatesTokenFilterFactory.cs
@@ -1,7 +1,7 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -20,10 +20,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="RemoveDuplicatesTokenFilter"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
@@ -46,7 +43,7 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
- public override RemoveDuplicatesTokenFilter create(TokenStream input)
+ public override RemoveDuplicatesTokenFilter Create(TokenStream input)
{
return new RemoveDuplicatesTokenFilter(input);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
index 06ecebc..ffc4ba7 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianFoldingFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.miscellaneous
{
/*
@@ -19,7 +21,7 @@
*/
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+ using StemmerUtil = StemmerUtil;
/// <summary>
/// This filter folds Scandinavian characters åÅäæÄÆ->a and öÖøØ->o.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
index 3113949..b6bd9de 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/ScandinavianNormalizationFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.miscellaneous
{
/*
@@ -19,7 +21,7 @@
*/
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using StemmerUtil = org.apache.lucene.analysis.util.StemmerUtil;
+ using StemmerUtil = StemmerUtil;
/// <summary>
/// This filter normalize use of the interchangeable Scandinavian characters æÆäÄöÖøØ
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
index b732319..c21607b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/SetKeywordMarkerFilter.cs
@@ -1,59 +1,56 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
-
- /// <summary>
- /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
- /// contained in the provided set is marked as a keyword by setting
- /// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
- /// </summary>
- public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
- {
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
- private readonly CharArraySet keywordSet;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
- /// <summary>
- /// Create a new KeywordSetMarkerFilter, that marks the current token as a
- /// keyword if the tokens term buffer is contained in the given set via the
- /// <seealso cref="KeywordAttribute"/>.
- /// </summary>
- /// <param name="in">
- /// TokenStream to filter </param>
- /// <param name="keywordSet">
- /// the keywords set to lookup the current termbuffer </param>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public SetKeywordMarkerFilter(final org.apache.lucene.analysis.TokenStream in, final org.apache.lucene.analysis.util.CharArraySet keywordSet)
- public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet) : base(@in)
- {
- this.keywordSet = keywordSet;
- }
-
- protected internal override bool Keyword
- {
- get
- {
- return keywordSet.contains(termAtt.buffer(), 0, termAtt.length());
- }
- }
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>. Each token
+ /// contained in the provided set is marked as a keyword by setting
+ /// <seealso cref="KeywordAttribute#setKeyword(boolean)"/> to <code>true</code>.
+ /// </summary>
+ public sealed class SetKeywordMarkerFilter : KeywordMarkerFilter
+ {
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+ private readonly CharArraySet keywordSet;
- }
+ /// <summary>
+ /// Create a new KeywordSetMarkerFilter, that marks the current token as a
+ /// keyword if the tokens term buffer is contained in the given set via the
+ /// <seealso cref="KeywordAttribute"/>.
+ /// </summary>
+ /// <param name="in">
+ /// TokenStream to filter </param>
+ /// <param name="keywordSet">
+ /// the keywords set to lookup the current termbuffer </param>
+ public SetKeywordMarkerFilter(TokenStream @in, CharArraySet keywordSet)
+ : base(@in)
+ {
+ this.keywordSet = keywordSet;
+ }
+ protected internal override bool Keyword
+ {
+ get
+ {
+ return keywordSet.Contains(termAtt.Buffer(), 0, termAtt.Length);
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
index f755c1f..578bf7b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/StemmerOverrideFilterFactory.cs
@@ -1,97 +1,89 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="StemmerOverrideFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class StemmerOverrideFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ private StemmerOverrideFilter.StemmerOverrideMap dictionary;
+ private readonly string dictionaryFiles;
+ private readonly bool ignoreCase;
+ /// <summary>
+ /// Creates a new StemmerOverrideFilterFactory </summary>
+ public StemmerOverrideFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ dictionaryFiles = get(args, "dictionary");
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
- using StemmerOverrideMap = org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
+ public virtual void Inform(ResourceLoader loader)
+ {
+ if (dictionaryFiles != null)
+ {
+ assureMatchVersion();
+ IList<string> files = splitFileNames(dictionaryFiles);
+ if (files.Count > 0)
+ {
+ StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
+ foreach (string file in files)
+ {
+ IList<string> list = getLines(loader, file.Trim());
+ foreach (string line in list)
+ {
+ string[] mapping = line.Split("\t", 2);
+ builder.add(mapping[0], mapping[1]);
+ }
+ }
+ dictionary = builder.build();
+ }
+ }
+ }
- /// <summary>
- /// Factory for <seealso cref="StemmerOverrideFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_dicstem" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.StemmerOverrideFilterFactory" dictionary="dictionary.txt" ignoreCase="false"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class StemmerOverrideFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- private StemmerOverrideMap dictionary;
- private readonly string dictionaryFiles;
- private readonly bool ignoreCase;
-
- /// <summary>
- /// Creates a new StemmerOverrideFilterFactory </summary>
- public StemmerOverrideFilterFactory(IDictionary<string, string> args) : base(args)
- {
- dictionaryFiles = get(args, "dictionary");
- ignoreCase = getBoolean(args, "ignoreCase", false);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
- {
- if (dictionaryFiles != null)
- {
- assureMatchVersion();
- IList<string> files = splitFileNames(dictionaryFiles);
- if (files.Count > 0)
- {
- StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
- foreach (string file in files)
- {
- IList<string> list = getLines(loader, file.Trim());
- foreach (string line in list)
- {
- string[] mapping = line.Split("\t", 2);
- builder.add(mapping[0], mapping[1]);
- }
- }
- dictionary = builder.build();
- }
- }
- }
-
- public virtual bool IgnoreCase
- {
- get
- {
- return ignoreCase;
- }
- }
-
- public override TokenStream create(TokenStream input)
- {
- return dictionary == null ? input : new StemmerOverrideFilter(input, dictionary);
- }
- }
+ public virtual bool IgnoreCase
+ {
+ get
+ {
+ return ignoreCase;
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ return dictionary == null ? input : new StemmerOverrideFilter(input, dictionary);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index 16575e6..2b1811c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -17,6 +17,7 @@ using System.Text;
* limitations under the License.
*/
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.miscellaneous
{
@@ -27,7 +28,7 @@ namespace org.apache.lucene.analysis.miscellaneous
using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
+ using CharArraySet = CharArraySet;
using ArrayUtil = org.apache.lucene.util.ArrayUtil;
using AttributeSource = org.apache.lucene.util.AttributeSource;
using InPlaceMergeSorter = org.apache.lucene.util.InPlaceMergeSorter;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
index 747ed48..d13a3c0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilterFactory.cs
@@ -1,8 +1,12 @@
using System;
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.miscellaneous;
+using Version = Lucene.Net.Util.Version;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -21,15 +25,6 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using CharArraySet = org.apache.lucene.analysis.util.CharArraySet;
- using ResourceLoader = org.apache.lucene.analysis.util.ResourceLoader;
- using ResourceLoaderAware = org.apache.lucene.analysis.util.ResourceLoaderAware;
- using TokenFilterFactory = TokenFilterFactory;
- using Version = org.apache.lucene.util.Version;
-
-
- using org.apache.lucene.analysis.miscellaneous;
//JAVA TO C# CONVERTER TODO TASK: This Java 'import static' statement cannot be converted to C#:
// import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
@@ -109,13 +104,11 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void inform(org.apache.lucene.analysis.util.ResourceLoader loader) throws java.io.IOException
- public virtual void inform(ResourceLoader loader)
+ public virtual void Inform(ResourceLoader loader)
{
if (wordFiles != null)
{
- protectedWords = getWordSet(loader, wordFiles, false);
+ protectedWords = GetWordSet(loader, wordFiles, false);
}
if (types != null)
{
@@ -130,9 +123,9 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
- public override TokenFilter create(TokenStream input)
+ public override TokenFilter Create(TokenStream input)
{
- if (luceneMatchVersion.onOrAfter(Version.LUCENE_48))
+ if (luceneMatchVersion.OnOrAfter(Version.LUCENE_48))
{
return new WordDelimiterFilter(luceneMatchVersion, input, typeTable == null ? WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE : typeTable, flags, protectedWords);
}
@@ -170,19 +163,19 @@ namespace org.apache.lucene.analysis.miscellaneous
}
// ensure the table is always at least as big as DEFAULT_WORD_DELIM_TABLE for performance
- sbyte[] types = new sbyte[Math.Max(typeMap.lastKey() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
+ sbyte[] types = new sbyte[Math.Max(typeMap.LastKey() + 1, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE.Length)];
for (int i = 0; i < types.Length; i++)
{
types[i] = WordDelimiterIterator.getType(i);
}
- foreach (KeyValuePair<char?, sbyte?> mapping in typeMap.entrySet())
+ foreach (KeyValuePair<char?, sbyte?> mapping in typeMap.EntrySet())
{
types[mapping.Key] = mapping.Value;
}
return types;
}
- private sbyte? parseType(string s)
+ private sbyte? ParseType(string s)
{
if (s.Equals("LOWER"))
{
@@ -266,5 +259,4 @@ namespace org.apache.lucene.analysis.miscellaneous
return new string(@out, 0, writePos);
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/cd3ee136/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
index 5b3d94b..64b9ab2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Ngram/EdgeNGramFilterFactory.cs
@@ -1,61 +1,58 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.ngram
+namespace Lucene.Net.Analysis.Ngram
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Creates new instances of <seealso cref="EdgeNGramTokenFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="1"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class EdgeNGramFilterFactory : TokenFilterFactory
+ {
+ private readonly int maxGramSize;
+ private readonly int minGramSize;
+ private readonly string side;
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
- /// Creates new instances of <seealso cref="EdgeNGramTokenFilter"/>.
- /// <pre class="prettyprint">
- /// <fieldType name="text_edgngrm" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer>
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="1"/>
- /// </analyzer>
- /// </fieldType></pre>
- /// </summary>
- public class EdgeNGramFilterFactory : TokenFilterFactory
- {
- private readonly int maxGramSize;
- private readonly int minGramSize;
- private readonly string side;
-
- /// <summary>
- /// Creates a new EdgeNGramFilterFactory </summary>
- public EdgeNGramFilterFactory(IDictionary<string, string> args) : base(args)
- {
- minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
- maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
- side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.Label);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override EdgeNGramTokenFilter create(TokenStream input)
- {
- return new EdgeNGramTokenFilter(luceneMatchVersion, input, side, minGramSize, maxGramSize);
- }
- }
+ /// <summary>
+ /// Creates a new EdgeNGramFilterFactory </summary>
+ public EdgeNGramFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ minGramSize = getInt(args, "minGramSize", EdgeNGramTokenFilter.DEFAULT_MIN_GRAM_SIZE);
+ maxGramSize = getInt(args, "maxGramSize", EdgeNGramTokenFilter.DEFAULT_MAX_GRAM_SIZE);
+ side = get(args, "side", EdgeNGramTokenFilter.Side.FRONT.Label);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+ public override TokenStream Create(TokenStream input)
+ {
+ return new EdgeNGramTokenFilter(luceneMatchVersion, input, side, minGramSize, maxGramSize);
+ }
+ }
}
\ No newline at end of file