You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/01/25 14:46:35 UTC
[1/2] lucenenet git commit: More porting work
Repository: lucenenet
Updated Branches:
refs/heads/master 1b806ebf5 -> 56bfeaab2
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
index f6857d9..3aedabe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TeeSinkTokenFilter.cs
@@ -1,300 +1,277 @@
-using System.Collections.Generic;
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.sinks
+namespace Lucene.Net.Analysis.Sinks
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// This TokenFilter provides the ability to set aside attribute states
+ /// that have already been analyzed. This is useful in situations where multiple fields share
+ /// many common analysis steps and then go their separate ways.
+ /// <p/>
+ /// It is also useful for doing things like entity extraction or proper noun analysis as
+ /// part of the analysis workflow and saving off those tokens for use in another field.
+ ///
+ /// <pre class="prettyprint">
+ /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
+ /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+ /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+ ///
+ /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
+ /// source2.addSinkTokenStream(sink1);
+ /// source2.addSinkTokenStream(sink2);
+ ///
+ /// TokenStream final1 = new LowerCaseFilter(version, source1);
+ /// TokenStream final2 = source2;
+ /// TokenStream final3 = new EntityDetect(sink1);
+ /// TokenStream final4 = new URLDetect(sink2);
+ ///
+ /// d.add(new TextField("f1", final1, Field.Store.NO));
+ /// d.add(new TextField("f2", final2, Field.Store.NO));
+ /// d.add(new TextField("f3", final3, Field.Store.NO));
+ /// d.add(new TextField("f4", final4, Field.Store.NO));
+ /// </pre>
+ /// In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
+ /// <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
+ /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ /// It is important, that tees are consumed before sinks (in the above example, the field names must be
+ /// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+ /// add another sink and then pass all tokens to the sinks at once using <seealso cref="#consumeAllTokens"/>.
+ /// This TokenFilter is exhausted after this. In the above example, change
+ /// the example above to:
+ /// <pre class="prettyprint">
+ /// ...
+ /// TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
+ /// TokenStream final2 = source2.newSinkTokenStream();
+ /// sink1.consumeAllTokens();
+ /// sink2.consumeAllTokens();
+ /// ...
+ /// </pre>
+ /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ /// <para>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+ /// </para>
+ /// </summary>
+ public sealed class TeeSinkTokenFilter : TokenFilter
+ {
+ private readonly ICollection<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
+ /// <summary>
+ /// Instantiates a new TeeSinkTokenFilter.
+ /// </summary>
+ public TeeSinkTokenFilter(TokenStream input)
+ : base(input)
+ {
+ }
- using AttributeImpl = org.apache.lucene.util.AttributeImpl;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
+ /// <summary>
+ /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream.
+ /// </summary>
+ public SinkTokenStream NewSinkTokenStream()
+ {
+ return NewSinkTokenStream(ACCEPT_ALL_FILTER);
+ }
- /// <summary>
- /// This TokenFilter provides the ability to set aside attribute states
- /// that have already been analyzed. This is useful in situations where multiple fields share
- /// many common analysis steps and then go their separate ways.
- /// <p/>
- /// It is also useful for doing things like entity extraction or proper noun analysis as
- /// part of the analysis workflow and saving off those tokens for use in another field.
- ///
- /// <pre class="prettyprint">
- /// TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader1));
- /// TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
- /// TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
- ///
- /// TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(version, reader2));
- /// source2.addSinkTokenStream(sink1);
- /// source2.addSinkTokenStream(sink2);
- ///
- /// TokenStream final1 = new LowerCaseFilter(version, source1);
- /// TokenStream final2 = source2;
- /// TokenStream final3 = new EntityDetect(sink1);
- /// TokenStream final4 = new URLDetect(sink2);
- ///
- /// d.add(new TextField("f1", final1, Field.Store.NO));
- /// d.add(new TextField("f2", final2, Field.Store.NO));
- /// d.add(new TextField("f3", final3, Field.Store.NO));
- /// d.add(new TextField("f4", final4, Field.Store.NO));
- /// </pre>
- /// In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
- /// <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
- /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
- /// It is important, that tees are consumed before sinks (in the above example, the field names must be
- /// less the sink's field names). If you are not sure, which stream is consumed first, you can simply
- /// add another sink and then pass all tokens to the sinks at once using <seealso cref="#consumeAllTokens"/>.
- /// This TokenFilter is exhausted after this. In the above example, change
- /// the example above to:
- /// <pre class="prettyprint">
- /// ...
- /// TokenStream final1 = new LowerCaseFilter(version, source1.newSinkTokenStream());
- /// TokenStream final2 = source2.newSinkTokenStream();
- /// sink1.consumeAllTokens();
- /// sink2.consumeAllTokens();
- /// ...
- /// </pre>
- /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
- /// <para>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
- /// </para>
- /// </summary>
- public sealed class TeeSinkTokenFilter : TokenFilter
- {
- private readonly IList<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
+ /// <summary>
+ /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream
+ /// that pass the supplied filter. </summary>
+ /// <seealso cref= SinkFilter></seealso>
+ public SinkTokenStream NewSinkTokenStream(SinkFilter filter)
+ {
+ var sink = new SinkTokenStream(CloneAttributes(), filter);
+ this.sinks.Add(new WeakReference<SinkTokenStream>(sink));
+ return sink;
+ }
- /// <summary>
- /// Instantiates a new TeeSinkTokenFilter.
- /// </summary>
- public TeeSinkTokenFilter(TokenStream input) : base(input)
- {
- }
+ /// <summary>
+ /// Adds a <seealso cref="SinkTokenStream"/> created by another <code>TeeSinkTokenFilter</code>
+ /// to this one. The supplied stream will also receive all consumed tokens.
+ /// This method can be used to pass tokens from two different tees to one sink.
+ /// </summary>
+ public void AddSinkTokenStream(SinkTokenStream sink)
+ {
+ // check that sink has correct factory
+ if (!attributeFactory.Equals(sink.attributeFactory))
+ {
+ throw new System.ArgumentException("The supplied sink is not compatible to this tee");
+ }
+ // add eventually missing attribute impls to the existing sink
+ for (var it = CloneAttributes().AttributeImplsIterator; it.MoveNext(); )
+ {
+ sink.AddAttributeImpl(it.Current);
+ }
+ this.sinks.Add(new WeakReference<SinkTokenStream>(sink));
+ }
- /// <summary>
- /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream.
- /// </summary>
- public SinkTokenStream newSinkTokenStream()
- {
- return newSinkTokenStream(ACCEPT_ALL_FILTER);
- }
+ /// <summary>
+ /// <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
+ /// when itself is consumed. To be sure, that all tokens from the input
+ /// stream are passed to the sinks, you can call this methods.
+ /// This instance is exhausted after this, but all sinks are instant available.
+ /// </summary>
+ public void ConsumeAllTokens()
+ {
+ while (IncrementToken())
+ {
+ }
+ }
- /// <summary>
- /// Returns a new <seealso cref="SinkTokenStream"/> that receives all tokens consumed by this stream
- /// that pass the supplied filter. </summary>
- /// <seealso cref= SinkFilter </seealso>
- public SinkTokenStream newSinkTokenStream(SinkFilter filter)
- {
- SinkTokenStream sink = new SinkTokenStream(this.cloneAttributes(), filter);
- this.sinks.Add(new WeakReference<>(sink));
- return sink;
- }
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ // capture state lazily - maybe no SinkFilter accepts this state
+ AttributeSource.State state = null;
+ foreach (WeakReference<SinkTokenStream> @ref in sinks)
+ {
+ SinkTokenStream sink;
+ if (@ref.TryGetTarget(out sink))
+ {
+ if (sink.Accept(this))
+ {
+ if (state == null)
+ {
+ state = CaptureState();
+ }
+ sink.AddState(state);
+ }
+ }
+ }
+ return true;
+ }
- /// <summary>
- /// Adds a <seealso cref="SinkTokenStream"/> created by another <code>TeeSinkTokenFilter</code>
- /// to this one. The supplied stream will also receive all consumed tokens.
- /// This method can be used to pass tokens from two different tees to one sink.
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: public void addSinkTokenStream(final SinkTokenStream sink)
- public void addSinkTokenStream(SinkTokenStream sink)
- {
- // check that sink has correct factory
- if (!this.AttributeFactory.Equals(sink.AttributeFactory))
- {
- throw new System.ArgumentException("The supplied sink is not compatible to this tee");
- }
- // add eventually missing attribute impls to the existing sink
- for (IEnumerator<AttributeImpl> it = this.cloneAttributes().AttributeImplsIterator; it.MoveNext();)
- {
- sink.addAttributeImpl(it.Current);
- }
- this.sinks.Add(new WeakReference<>(sink));
- }
+ return false;
+ }
- /// <summary>
- /// <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
- /// when itself is consumed. To be sure, that all tokens from the input
- /// stream are passed to the sinks, you can call this methods.
- /// This instance is exhausted after this, but all sinks are instant available.
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void consumeAllTokens() throws java.io.IOException
- public void consumeAllTokens()
- {
- while (incrementToken())
- {
- }
- }
+ public override void End()
+ {
+ base.End();
+ AttributeSource.State finalState = CaptureState();
+ foreach (WeakReference<SinkTokenStream> @ref in sinks)
+ {
+ SinkTokenStream sink; ;
+ if (@ref.TryGetTarget(out sink))
+ {
+ sink.FinalState = finalState;
+ }
+ }
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- if (input.incrementToken())
- {
- // capture state lazily - maybe no SinkFilter accepts this state
- AttributeSource.State state = null;
- foreach (WeakReference<SinkTokenStream> @ref in sinks)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
- SinkTokenStream sink = @ref.get();
- if (sink != null)
- {
- if (sink.accept(this))
- {
- if (state == null)
- {
- state = this.captureState();
- }
- sink.addState(state);
- }
- }
- }
- return true;
- }
+ /// <summary>
+ /// A filter that decides which <seealso cref="AttributeSource"/> states to store in the sink.
+ /// </summary>
+ public abstract class SinkFilter
+ {
+ /// <summary>
+ /// Returns true, iff the current state of the passed-in <seealso cref="AttributeSource"/> shall be stored
+ /// in the sink.
+ /// </summary>
+ public abstract bool Accept(AttributeSource source);
- return false;
- }
+ /// <summary>
+ /// Called by <seealso cref="SinkTokenStream#reset()"/>. This method does nothing by default
+ /// and can optionally be overridden.
+ /// </summary>
+ public virtual void Reset()
+ {
+ // nothing to do; can be overridden
+ }
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
- public override void end()
- {
- base.end();
- AttributeSource.State finalState = captureState();
- foreach (WeakReference<SinkTokenStream> @ref in sinks)
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final SinkTokenStream sink = ref.get();
- SinkTokenStream sink = @ref.get();
- if (sink != null)
- {
- sink.FinalState = finalState;
- }
- }
- }
+ /// <summary>
+ /// TokenStream output from a tee with optional filtering.
+ /// </summary>
+ public sealed class SinkTokenStream : TokenStream
+ {
+ internal readonly ICollection<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
+ internal AttributeSource.State finalState;
+ internal IEnumerator<AttributeSource.State> it = null;
+ internal SinkFilter filter;
- /// <summary>
- /// A filter that decides which <seealso cref="AttributeSource"/> states to store in the sink.
- /// </summary>
- public abstract class SinkFilter
- {
- /// <summary>
- /// Returns true, iff the current state of the passed-in <seealso cref="AttributeSource"/> shall be stored
- /// in the sink.
- /// </summary>
- public abstract bool accept(AttributeSource source);
+ internal SinkTokenStream(AttributeSource source, SinkFilter filter)
+ : base(source)
+ {
+ this.filter = filter;
+ }
- /// <summary>
- /// Called by <seealso cref="SinkTokenStream#reset()"/>. This method does nothing by default
- /// and can optionally be overridden.
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public void reset() throws java.io.IOException
- public virtual void reset()
- {
- // nothing to do; can be overridden
- }
- }
+ internal bool Accept(AttributeSource source)
+ {
+ return filter.Accept(source);
+ }
- /// <summary>
- /// TokenStream output from a tee with optional filtering.
- /// </summary>
- public sealed class SinkTokenStream : TokenStream
- {
- internal readonly IList<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
- internal AttributeSource.State finalState;
- internal IEnumerator<AttributeSource.State> it = null;
- internal SinkFilter filter;
+ internal void AddState(AttributeSource.State state)
+ {
+ if (it != null)
+ {
+ throw new System.InvalidOperationException("The tee must be consumed before sinks are consumed.");
+ }
+ cachedStates.Add(state);
+ }
- internal SinkTokenStream(AttributeSource source, SinkFilter filter) : base(source)
- {
- this.filter = filter;
- }
+ internal AttributeSource.State FinalState
+ {
+ set
+ {
+ this.finalState = value;
+ }
+ }
- internal bool accept(AttributeSource source)
- {
- return filter.accept(source);
- }
+ public override bool IncrementToken()
+ {
+ // lazy init the iterator
+ if (it == null)
+ {
+ it = cachedStates.GetEnumerator();
+ }
- internal void addState(AttributeSource.State state)
- {
- if (it != null)
- {
- throw new System.InvalidOperationException("The tee must be consumed before sinks are consumed.");
- }
- cachedStates.Add(state);
- }
+ if (!it.MoveNext())
+ return false;
- internal AttributeSource.State FinalState
- {
- set
- {
- this.finalState = value;
- }
- }
+ var state = it.Current;
+ RestoreState(state);
+ return true;
+ }
- public override bool incrementToken()
- {
- // lazy init the iterator
- if (it == null)
- {
- it = cachedStates.GetEnumerator();
- }
+ public override void End()
+ {
+ if (finalState != null)
+ {
+ RestoreState(finalState);
+ }
+ }
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
- if (!it.hasNext())
- {
- return false;
- }
+ public override void Reset()
+ {
+ it = cachedStates.GetEnumerator();
+ }
+ }
-//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
- AttributeSource.State state = it.next();
- restoreState(state);
- return true;
- }
+ private static readonly SinkFilter ACCEPT_ALL_FILTER = new SinkFilterAnonymousInnerClassHelper();
- public override void end()
- {
- if (finalState != null)
- {
- restoreState(finalState);
- }
- }
+ private class SinkFilterAnonymousInnerClassHelper : SinkFilter
+ {
+ public override bool Accept(AttributeSource source)
+ {
+ return true;
+ }
+ }
- public override void reset()
- {
- it = cachedStates.GetEnumerator();
- }
- }
-
- private static readonly SinkFilter ACCEPT_ALL_FILTER = new SinkFilterAnonymousInnerClassHelper();
-
- private class SinkFilterAnonymousInnerClassHelper : SinkFilter
- {
- public SinkFilterAnonymousInnerClassHelper()
- {
- }
-
- public override bool accept(AttributeSource source)
- {
- return true;
- }
- }
-
- }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
index 568fea6..a5404c5 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenRangeSinkFilter.cs
@@ -1,73 +1,70 @@
-namespace org.apache.lucene.analysis.sinks
-{
-
- /// <summary>
- /// Licensed to the Apache Software Foundation (ASF) under one or more
- /// contributor license agreements. See the NOTICE file distributed with
- /// this work for additional information regarding copyright ownership.
- /// The ASF licenses this file to You under the Apache License, Version 2.0
- /// (the "License"); you may not use this file except in compliance with
- /// the License. You may obtain a copy of the License at
- ///
- /// http://www.apache.org/licenses/LICENSE-2.0
- ///
- /// Unless required by applicable law or agreed to in writing, software
- /// distributed under the License is distributed on an "AS IS" BASIS,
- /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- /// See the License for the specific language governing permissions and
- /// limitations under the License.
- /// </summary>
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
- using AttributeSource = org.apache.lucene.util.AttributeSource;
+using Lucene.Net.Util;
- /// <summary>
- /// Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
- ///
- ///
- /// </summary>
- public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
- {
- private int lower;
- private int upper;
- private int count;
+namespace Lucene.Net.Analysis.Sinks
+{
- public TokenRangeSinkFilter(int lower, int upper)
- {
- if (lower < 1)
- {
- throw new System.ArgumentException("lower must be greater than zero");
- }
- if (lower > upper)
- {
- throw new System.ArgumentException("lower must not be greater than upper");
- }
- this.lower = lower;
- this.upper = upper;
- }
+ /// <summary>
+ /// Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
+ ///
+ ///
+ /// </summary>
+ public class TokenRangeSinkFilter : TeeSinkTokenFilter.SinkFilter
+ {
+ private readonly int lower;
+ private readonly int upper;
+ private int count;
+ public TokenRangeSinkFilter(int lower, int upper)
+ {
+ if (lower < 1)
+ {
+ throw new System.ArgumentException("lower must be greater than zero");
+ }
+ if (lower > upper)
+ {
+ throw new System.ArgumentException("lower must not be greater than upper");
+ }
+ this.lower = lower;
+ this.upper = upper;
+ }
- public override bool accept(AttributeSource source)
- {
- try
- {
- if (count >= lower && count < upper)
- {
- return true;
- }
- return false;
- }
- finally
- {
- count++;
- }
- }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- count = 0;
- }
- }
+ public override bool Accept(AttributeSource source)
+ {
+ try
+ {
+ if (count >= lower && count < upper)
+ {
+ return true;
+ }
+ return false;
+ }
+ finally
+ {
+ count++;
+ }
+ }
+ public override void Reset()
+ {
+ count = 0;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
index f844a1c..d46493b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/TokenTypeSinkFilter.cs
@@ -1,50 +1,47 @@
-namespace org.apache.lucene.analysis.sinks
-{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
-
- /// <summary>
- /// Adds a token to the sink if it has a specific type.
- /// </summary>
- public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
- {
- private string typeToMatch;
- private TypeAttribute typeAtt;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
- public TokenTypeSinkFilter(string typeToMatch)
- {
- this.typeToMatch = typeToMatch;
- }
-
- public override bool accept(AttributeSource source)
- {
- if (typeAtt == null)
- {
- typeAtt = source.addAttribute(typeof(TypeAttribute));
- }
-
- //check to see if this is a Category
- return (typeToMatch.Equals(typeAtt.type()));
- }
-
- }
+namespace Lucene.Net.Analysis.Sinks
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Adds a token to the sink if it has a specific type.
+ /// </summary>
+ public class TokenTypeSinkFilter : TeeSinkTokenFilter.SinkFilter
+ {
+ private readonly string typeToMatch;
+ private ITypeAttribute typeAtt;
+
+ public TokenTypeSinkFilter(string typeToMatch)
+ {
+ this.typeToMatch = typeToMatch;
+ }
+
+ public override bool Accept(AttributeSource source)
+ {
+ if (typeAtt == null)
+ {
+ typeAtt = source.AddAttribute<ITypeAttribute>();
+ }
+
+ //check to see if this is a Category
+ return (typeToMatch.Equals(typeAtt.Type));
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index 5db482e..4d30289 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -1,8 +1,9 @@
-using System;
-/* The following code was generated by JFlex 1.5.1 */
-using Lucene.Net.Analysis.Standard;
+/* The following code was generated by JFlex 1.5.1 */
+using System;
+using System.IO;
+using org.apache.lucene.analysis.standard;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
/*
@@ -28,10 +29,7 @@ namespace org.apache.lucene.analysis.standard
the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
*/
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
- /// <summary>
+ /// <summary>
/// This class implements the classic lucene StandardTokenizer up until 3.0
/// </summary>
@@ -205,7 +203,7 @@ namespace org.apache.lucene.analysis.standard
/// <summary>
/// the input device </summary>
- private Reader zzReader;
+ private TextReader zzReader;
/// <summary>
/// the current state of the DFA </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index 80ea22b..5458b33 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -1,4 +1,5 @@
-using Lucene.Net.Analysis.Core;
+using System.IO;
+using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using org.apache.lucene.analysis.standard;
@@ -84,7 +85,7 @@ namespace Lucene.Net.Analysis.Standard
/// <param name="matchVersion"> Lucene version to match See {@link
/// <a href="#version">above</a>} </param>
/// <param name="stopwords"> Reader to read stop words from </param>
- public StandardAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+ public StandardAnalyzer(Version matchVersion, TextReader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
{
}
@@ -109,7 +110,7 @@ namespace Lucene.Net.Analysis.Standard
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
{
- StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
+ var src = new StandardTokenizer(matchVersion, reader);
src.MaxTokenLength = maxTokenLength;
TokenStream tok = new StandardFilter(matchVersion, src);
tok = new LowerCaseFilter(matchVersion, tok);
@@ -121,8 +122,8 @@ namespace Lucene.Net.Analysis.Standard
{
private readonly StandardAnalyzer outerInstance;
- private Reader reader;
- private StandardTokenizer src;
+ private TextReader reader;
+ private readonly StandardTokenizer src;
public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
index 2be937c..f28fd11 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Synonym/FSTSynonymFilterFactory.cs
@@ -1,182 +1,177 @@
using System;
using System.Collections;
using System.Collections.Generic;
+using System.IO;
+using System.Text.RegularExpressions;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
-using org.apache.lucene.analysis.core;
+using Lucene.Net.Util;
using org.apache.lucene.analysis.synonym;
-using org.apache.lucene.analysis.util;
+using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Analysis.Synonym
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /// @deprecated (3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility
- /// mechanism that will be removed in Lucene 5.0
- // NOTE: rename this to "SynonymFilterFactory" and nuke that delegator in Lucene 5.0!
- [Obsolete("(3.4) use <seealso cref="SynonymFilterFactory"/> instead. this is only a backwards compatibility")]
- internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
- {
- private readonly bool ignoreCase;
- private readonly string tokenizerFactory;
- private readonly string synonyms;
- private readonly string format;
- private readonly bool expand;
- private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
-
- private SynonymMap map;
-
- public FSTSynonymFilterFactory(IDictionary<string, string> args) : base(args)
- {
- ignoreCase = getBoolean(args, "ignoreCase", false);
- synonyms = require(args, "synonyms");
- format = get(args, "format");
- expand = getBoolean(args, "expand", true);
-
- tokenizerFactory = get(args, "tokenizerFactory");
- if (tokenizerFactory != null)
- {
- assureMatchVersion();
- tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
- for (IEnumerator<string> itr = args.Keys.GetEnumerator(); itr.MoveNext();)
- {
- string key = itr.Current;
- tokArgs[key.ReplaceAll("^tokenizerFactory\\.","")] = args[key];
- itr.Remove();
- }
- }
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override TokenStream Create(TokenStream input)
- {
- // if the fst is null, it means there's actually no synonyms... just return the original stream
- // as there is nothing to do here.
- return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
- }
-
- public void inform(ResourceLoader loader)
- {
- TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
-
- Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);
-
- try
- {
- string formatClass = format;
- if (format == null || format.Equals("solr"))
- {
- formatClass = typeof(SolrSynonymParser).Name;
- }
- else if (format.Equals("wordnet"))
- {
- formatClass = typeof(WordnetSynonymParser).Name;
- }
- // TODO: expose dedup as a parameter?
- map = loadSynonyms(loader, formatClass, true, analyzer);
- }
- catch (ParseException e)
- {
- throw new IOException("Error parsing synonyms file:", e);
- }
- }
-
- private class AnalyzerAnonymousInnerClassHelper : Analyzer
- {
- private readonly FSTSynonymFilterFactory outerInstance;
-
- private TokenizerFactory factory;
-
- public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory)
- {
- this.outerInstance = outerInstance;
- this.factory = factory;
- }
-
- protected internal override Analyzer.TokenStreamComponents CreateComponents(string fieldName, Reader reader)
- {
- Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
- TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
- return new Analyzer.TokenStreamComponents(tokenizer, stream);
- }
- }
-
- /// <summary>
- /// Load synonyms with the given <seealso cref="SynonymMap.Parser"/> class.
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private org.apache.lucene.analysis.synonym.SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, org.apache.lucene.analysis.Analyzer analyzer) throws java.io.IOException, java.text.ParseException
- private SynonymMap loadSynonyms(ResourceLoader loader, string cname, bool dedup, Analyzer analyzer)
- {
- CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
-
- SynonymMap.Parser parser;
- Type clazz = loader.findClass(cname, typeof(SynonymMap.Parser));
- try
- {
- parser = clazz.getConstructor(typeof(bool), typeof(bool), typeof(Analyzer)).newInstance(dedup, expand, analyzer);
- }
- catch (Exception e)
- {
- throw new Exception(e);
- }
-
- File synonymFile = new File(synonyms);
- if (synonymFile.exists())
- {
- decoder.reset();
- parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
- }
- else
- {
- IList<string> files = splitFileNames(synonyms);
- foreach (string file in files)
- {
- decoder.reset();
- parser.parse(new InputStreamReader(loader.openResource(file), decoder));
- }
- }
- return parser.build();
- }
-
- // (there are no tests for this functionality)
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws java.io.IOException
- private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, string cname)
- {
- Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
- try
- {
- TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
- if (tokFactory is ResourceLoaderAware)
- {
- ((ResourceLoaderAware) tokFactory).inform(loader);
- }
- return tokFactory;
- }
- catch (Exception e)
- {
- throw new Exception(e);
- }
- }
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, ResourceLoaderAware
+ {
+ private readonly bool ignoreCase;
+ private readonly string tokenizerFactory;
+ private readonly string synonyms;
+ private readonly string format;
+ private readonly bool expand;
+ private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
+
+ private SynonymMap map;
+
+ [Obsolete(@"(3.4) use <seealso cref=""SynonymFilterFactory"" instead. this is only a backwards compatibility")]
+ public FSTSynonymFilterFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ ignoreCase = getBoolean(args, "ignoreCase", false);
+ synonyms = require(args, "synonyms");
+ format = get(args, "format");
+ expand = getBoolean(args, "expand", true);
+
+ tokenizerFactory = get(args, "tokenizerFactory");
+ if (tokenizerFactory != null)
+ {
+ assureMatchVersion();
+ tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
+ for (var itr = args.Keys.GetEnumerator(); itr.MoveNext(); )
+ {
+ var key = itr.Current;
+ tokArgs[Regex.Replace(itr.Current, "^tokenizerFactory\\.", string.Empty)] = args[key];
+ itr.Remove();
+ }
+ }
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public override TokenStream Create(TokenStream input)
+ {
+ // if the fst is null, it means there's actually no synonyms... just return the original stream
+ // as there is nothing to do here.
+ return map.fst == null ? input : new SynonymFilter(input, map, ignoreCase);
+ }
+
+ public void Inform(ResourceLoader loader)
+ {
+ TokenizerFactory factory = tokenizerFactory == null ? null : LoadTokenizerFactory(loader, tokenizerFactory);
+
+ Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, factory);
+
+ try
+ {
+ string formatClass = format;
+ if (format == null || format.Equals("solr"))
+ {
+ formatClass = typeof(SolrSynonymParser).Name;
+ }
+ else if (format.Equals("wordnet"))
+ {
+ formatClass = typeof(WordnetSynonymParser).Name;
+ }
+ // TODO: expose dedup as a parameter?
+ map = LoadSynonyms(loader, formatClass, true, analyzer);
+ }
+ catch (ParseException e)
+ {
+ throw new IOException("Error parsing synonyms file:", e);
+ }
+ }
+
+ private class AnalyzerAnonymousInnerClassHelper : Analyzer
+ {
+ private readonly FSTSynonymFilterFactory outerInstance;
+
+ private readonly TokenizerFactory factory;
+
+ public AnalyzerAnonymousInnerClassHelper(FSTSynonymFilterFactory outerInstance, TokenizerFactory factory)
+ {
+ this.outerInstance = outerInstance;
+ this.factory = factory;
+ }
+
+ public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.Create(reader);
+ TokenStream stream = outerInstance.ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
+ return new Analyzer.TokenStreamComponents(tokenizer, stream);
+ }
+ }
+
+ /// <summary>
+ /// Load synonyms with the given <seealso cref="SynonymMap.Parser"/> class.
+ /// </summary>
+ private SynonymMap LoadSynonyms(ResourceLoader loader, string cname, bool dedup, Analyzer analyzer)
+ {
+ CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+
+ SynonymMap.Parser parser;
+ Type clazz = loader.findClass(cname, typeof(SynonymMap.Parser));
+ try
+ {
+ parser = clazz.getConstructor(typeof(bool), typeof(bool), typeof(Analyzer)).newInstance(dedup, expand, analyzer);
+ }
+ catch (Exception e)
+ {
+ throw new Exception(e);
+ }
+
+ if (File.Exists(synonyms))
+ {
+ decoder.Reset();
+ parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder));
+ }
+ else
+ {
+ IList<string> files = splitFileNames(synonyms);
+ foreach (string file in files)
+ {
+ decoder.reset();
+ parser.parse(new InputStreamReader(loader.openResource(file), decoder));
+ }
+ }
+ return parser.build();
+ }
+
+ // (there are no tests for this functionality)
+ private TokenizerFactory LoadTokenizerFactory(ResourceLoader loader, string cname)
+ {
+ Type clazz = loader.findClass(cname, typeof(TokenizerFactory));
+ try
+ {
+ TokenizerFactory tokFactory = clazz.getConstructor(typeof(IDictionary)).newInstance(tokArgs);
+ if (tokFactory is ResourceLoaderAware)
+ {
+ ((ResourceLoaderAware)tokFactory).inform(loader);
+ }
+ return tokFactory;
+ }
+ catch (Exception e)
+ {
+ throw new Exception(e);
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index 8cf5e28..af4c555 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -44,19 +44,17 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// the luceneVersion arg </summary>
- protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
- /// <summary>
- /// whether the luceneMatchVersion arg is explicitly specified in the serialized schema </summary>
- private bool isExplicitLuceneMatchVersion = false;
+ protected internal readonly Lucene.Net.Util.Version? luceneMatchVersion;
- /// <summary>
+ /// <summary>
/// Initialize this factory via a set of key-value pairs.
/// </summary>
protected internal AbstractAnalysisFactory(IDictionary<string, string> args)
{
- originalArgs = Collections.UnmodifiableMap(new Dictionary<>(args));
+ ExplicitLuceneMatchVersion = false;
+ originalArgs = Collections.UnmodifiableMap(args);
string version = get(args, LUCENE_MATCH_VERSION_PARAM);
- luceneMatchVersion = version == null ? null : Version.ParseLeniently(version);
+ luceneMatchVersion = version == null ? null : Lucene.Net.Util.Version.ParseLeniently(version);
args.Remove(CLASS_NAME); // consume the class arg
}
@@ -304,8 +302,6 @@ namespace Lucene.Net.Analysis.Util
/// <summary>
/// Returns the resource's lines (with content treated as UTF-8)
/// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected final java.util.List<String> getLines(ResourceLoader loader, String resource) throws java.io.IOException
protected internal IList<string> getLines(ResourceLoader loader, string resource)
{
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
@@ -315,8 +311,6 @@ namespace Lucene.Net.Analysis.Util
/// same as <seealso cref="#getWordSet(ResourceLoader, String, boolean)"/>,
/// except the input is in snowball format.
/// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: protected final CharArraySet getSnowballWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
protected internal CharArraySet getSnowballWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
{
assureMatchVersion();
@@ -389,18 +383,6 @@ namespace Lucene.Net.Analysis.Util
}
}
- public virtual bool ExplicitLuceneMatchVersion
- {
- get
- {
- return isExplicitLuceneMatchVersion;
- }
- set
- {
- this.isExplicitLuceneMatchVersion = value;
- }
- }
-
+ public virtual bool ExplicitLuceneMatchVersion { get; set; }
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
index fba8b3a..4e76504 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -433,16 +433,19 @@ namespace org.apache.lucene.analysis.util
throw new System.NotSupportedException();
}
- public override int size()
- {
- return count;
- }
-
- public override string ToString()
- {
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("{");
- StringBuilder sb = new StringBuilder("{");
+ public override int Size
+ {
+ get
+ {
+ {
+ return count;
+ }
+ }
+ }
+
+ public override string ToString()
+ {
+ var sb = new StringBuilder("{");
foreach (KeyValuePair<object, V> entry in entrySet())
{
if (sb.Length > 1)
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index 6b8a9db..e6d7cac 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -148,12 +148,17 @@ namespace Lucene.Net.Analysis.Util
return map.put(text, PLACEHOLDER) == null;
}
- public override int Size()
- {
- return map.size();
- }
-
- /// <summary>
+ public override int Size
+ {
+ get
+ {
+ {
+ return map.size();
+ }
+ }
+ }
+
+ /// <summary>
/// Returns an unmodifiable <seealso cref="CharArraySet"/>. This allows to provide
/// unmodifiable views of internal sets for "read-only" use.
/// </summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Core/Support/Compatibility/Collections.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/Compatibility/Collections.cs b/src/Lucene.Net.Core/Support/Compatibility/Collections.cs
index 9f29bbe..279c20c 100644
--- a/src/Lucene.Net.Core/Support/Compatibility/Collections.cs
+++ b/src/Lucene.Net.Core/Support/Compatibility/Collections.cs
@@ -30,5 +30,12 @@ namespace Lucene.Net
{
return ImmutableHashSet.Create<T>(items.ToArray());
}
+
+ public static IDictionary<T, TS> UnmodifiableMap<T, TS>(IDictionary<T, TS> d)
+ {
+ var builder = ImmutableDictionary.CreateBuilder<T, TS>();
+ builder.AddRange(d);
+ return builder.ToImmutable();
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Core/Util/Version.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/Version.cs b/src/Lucene.Net.Core/Util/Version.cs
index fd15caa..59abd4f 100644
--- a/src/Lucene.Net.Core/Util/Version.cs
+++ b/src/Lucene.Net.Core/Util/Version.cs
@@ -26,7 +26,7 @@ namespace Lucene.Net.Util
/// <p><b>WARNING</b>: When changing the version parameter
/// that you supply to components in Lucene, do not simply
/// change the version at search-time, but instead also adjust
- /// your indexing code to match, and re-index.
+ /// your indexing code to match, and re-index.</p>
/// </summary>
public enum Version
{
[2/2] lucenenet git commit: More porting work
Posted by sy...@apache.org.
More porting work
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/56bfeaab
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/56bfeaab
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/56bfeaab
Branch: refs/heads/master
Commit: 56bfeaab22154916e96433eb91572f26d04d1ef2
Parents: 1b806eb
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Sun Jan 25 15:46:16 2015 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Sun Jan 25 15:46:16 2015 +0200
----------------------------------------------------------------------
.../CodepointCountFilterFactory.cs | 13 +-
.../Analysis/Miscellaneous/EmptyTokenStream.cs | 6 +-
.../Miscellaneous/HyphenatedWordsFilter.cs | 311 ++++++-----
.../HyphenatedWordsFilterFactory.cs | 11 +-
.../Analysis/Miscellaneous/KeepWordFilter.cs | 5 +-
.../Miscellaneous/KeywordMarkerFilter.cs | 114 ++--
.../Analysis/Miscellaneous/LengthFilter.cs | 20 +-
.../Miscellaneous/LengthFilterFactory.cs | 1 +
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 13 +-
.../Miscellaneous/PatternKeywordMarkerFilter.cs | 4 +-
.../Analysis/Path/PathHierarchyTokenizer.cs | 476 ++++++++---------
.../Path/PathHierarchyTokenizerFactory.cs | 191 ++++---
.../Path/ReversePathHierarchyTokenizer.cs | 421 +++++++--------
.../Analysis/Position/PositionFilterFactory.cs | 7 +-
.../Analysis/Query/QueryAutoStopWordAnalyzer.cs | 10 +-
.../Analysis/Sinks/DateRecognizerSinkFilter.cs | 17 +-
.../Analysis/Sinks/TeeSinkTokenFilter.cs | 521 +++++++++----------
.../Analysis/Sinks/TokenRangeSinkFilter.cs | 127 +++--
.../Analysis/Sinks/TokenTypeSinkFilter.cs | 91 ++--
.../Analysis/Standard/ClassicTokenizerImpl.cs | 16 +-
.../Analysis/Standard/StandardAnalyzer.cs | 11 +-
.../Analysis/Synonym/FSTSynonymFilterFactory.cs | 335 ++++++------
.../Analysis/Util/AbstractAnalysisFactory.cs | 30 +-
.../Analysis/Util/CharArrayMap.cs | 23 +-
.../Analysis/Util/CharArraySet.cs | 17 +-
.../Support/Compatibility/Collections.cs | 7 +
src/Lucene.Net.Core/Util/Version.cs | 2 +-
27 files changed, 1365 insertions(+), 1435 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
index e85fd1e..bb37bd1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CodepointCountFilterFactory.cs
@@ -1,7 +1,8 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
+using org.apache.lucene.analysis.miscellaneous;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -20,10 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="CodepointCountFilter"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_lngth" class="solr.TextField" positionIncrementGap="100">
@@ -52,10 +50,9 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
- public override CodepointCountFilter create(TokenStream input)
+ public override CodepointCountFilter Create(TokenStream input)
{
return new CodepointCountFilter(luceneMatchVersion, input, min, max);
}
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
index 38af481..ef84806 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/EmptyTokenStream.cs
@@ -1,4 +1,4 @@
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -24,11 +24,9 @@
public sealed class EmptyTokenStream : TokenStream
{
- public override bool incrementToken()
+ public override bool IncrementToken()
{
return false;
}
-
}
-
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
index 96a2dfa..022ee31 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilter.cs
@@ -1,164 +1,159 @@
using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using org.apache.lucene.analysis;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
- /// <summary>
- /// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
- /// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
- /// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together.
- /// This filter should be used on indexing time only.
- /// Example field definition in schema.xml:
- /// <pre class="prettyprint">
- /// <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
- /// <analyzer type="index">
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- /// <filter class="solr.StopFilterFactory" ignoreCase="true"/>
- /// <filter class="solr.HyphenatedWordsFilterFactory"/>
- /// <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
- /// <filter class="solr.LowerCaseFilterFactory"/>
- /// <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- /// </analyzer>
- /// <analyzer type="query">
- /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- /// <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- /// <filter class="solr.StopFilterFactory" ignoreCase="true"/>
- /// <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
- /// <filter class="solr.LowerCaseFilterFactory"/>
- /// <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
- /// </analyzer>
- /// </fieldtype>
- /// </pre>
- ///
- /// </summary>
- public sealed class HyphenatedWordsFilter : TokenFilter
- {
-
- private readonly CharTermAttribute termAttribute = addAttribute(typeof(CharTermAttribute));
- private readonly OffsetAttribute offsetAttribute = addAttribute(typeof(OffsetAttribute));
-
- private readonly StringBuilder hyphenated = new StringBuilder();
- private State savedState;
- private bool exhausted = false;
- private int lastEndOffset = 0;
-
- /// <summary>
- /// Creates a new HyphenatedWordsFilter
- /// </summary>
- /// <param name="in"> TokenStream that will be filtered </param>
- public HyphenatedWordsFilter(TokenStream @in) : base(@in)
- {
- }
-
- /// <summary>
- /// {@inheritDoc}
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- while (!exhausted && input.incrementToken())
- {
- char[] term = termAttribute.buffer();
- int termLength = termAttribute.length();
- lastEndOffset = offsetAttribute.endOffset();
-
- if (termLength > 0 && term[termLength - 1] == '-')
- {
- // a hyphenated word
- // capture the state of the first token only
- if (savedState == null)
- {
- savedState = captureState();
- }
- hyphenated.Append(term, 0, termLength - 1);
- }
- else if (savedState == null)
- {
- // not part of a hyphenated word.
- return true;
- }
- else
- {
- // the final portion of a hyphenated word
- hyphenated.Append(term, 0, termLength);
- unhyphenate();
- return true;
- }
- }
-
- exhausted = true;
-
- if (savedState != null)
- {
- // the final term ends with a hyphen
- // add back the hyphen, for backwards compatibility.
- hyphenated.Append('-');
- unhyphenate();
- return true;
- }
-
- return false;
- }
-
- /// <summary>
- /// {@inheritDoc}
- /// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- base.reset();
- hyphenated.Length = 0;
- savedState = null;
- exhausted = false;
- lastEndOffset = 0;
- }
-
- // ================================================= Helper Methods ================================================
-
- /// <summary>
- /// Writes the joined unhyphenated term
- /// </summary>
- private void unhyphenate()
- {
- restoreState(savedState);
- savedState = null;
-
- char[] term = termAttribute.buffer();
- int length = hyphenated.Length;
- if (length > termAttribute.length())
- {
- term = termAttribute.resizeBuffer(length);
- }
-
- hyphenated.getChars(0, length, term, 0);
- termAttribute.Length = length;
- offsetAttribute.setOffset(offsetAttribute.startOffset(), lastEndOffset);
- hyphenated.Length = 0;
- }
- }
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// When the plain text is extracted from documents, we will often have many words hyphenated and broken into
+ /// two lines. This is often the case with documents where narrow text columns are used, such as newsletters.
+ /// In order to increase search efficiency, this filter puts hyphenated words broken into two lines back together.
+ /// This filter should be used on indexing time only.
+ /// Example field definition in schema.xml:
+ /// <pre class="prettyprint">
+ /// <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer type="index">
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ /// <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+ /// <filter class="solr.HyphenatedWordsFilterFactory"/>
+ /// <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+ /// <filter class="solr.LowerCaseFilterFactory"/>
+ /// <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ /// </analyzer>
+ /// <analyzer type="query">
+ /// <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ /// <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ /// <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+ /// <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+ /// <filter class="solr.LowerCaseFilterFactory"/>
+ /// <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ /// </analyzer>
+ /// </fieldtype>
+ /// </pre>
+ ///
+ /// </summary>
+ public sealed class HyphenatedWordsFilter : TokenFilter
+ {
+
+ private readonly ICharTermAttribute termAttribute;
+ private readonly IOffsetAttribute offsetAttribute;
+
+ private readonly StringBuilder hyphenated = new StringBuilder();
+ private State savedState;
+ private bool exhausted = false;
+ private int lastEndOffset = 0;
+
+ /// <summary>
+ /// Creates a new HyphenatedWordsFilter
+ /// </summary>
+ /// <param name="in"> TokenStream that will be filtered </param>
+ public HyphenatedWordsFilter(TokenStream @in)
+ : base(@in)
+ {
+ termAttribute = AddAttribute<ICharTermAttribute>();
+ offsetAttribute = AddAttribute<IOffsetAttribute>();
+ }
+
+ /// <summary>
+ /// {@inheritDoc}
+ /// </summary>
+ public override bool IncrementToken()
+ {
+ while (!exhausted && input.IncrementToken())
+ {
+ char[] term = termAttribute.Buffer();
+ int termLength = termAttribute.Length;
+ lastEndOffset = offsetAttribute.EndOffset();
+
+ if (termLength > 0 && term[termLength - 1] == '-')
+ {
+ // a hyphenated word
+ // capture the state of the first token only
+ if (savedState == null)
+ {
+ savedState = CaptureState();
+ }
+ hyphenated.Append(term, 0, termLength - 1);
+ }
+ else if (savedState == null)
+ {
+ // not part of a hyphenated word.
+ return true;
+ }
+ else
+ {
+ // the final portion of a hyphenated word
+ hyphenated.Append(term, 0, termLength);
+ Unhyphenate();
+ return true;
+ }
+ }
+
+ exhausted = true;
+
+ if (savedState != null)
+ {
+ // the final term ends with a hyphen
+ // add back the hyphen, for backwards compatibility.
+ hyphenated.Append('-');
+ Unhyphenate();
+ return true;
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// {@inheritDoc}
+ /// </summary>
+ public override void Reset()
+ {
+ base.Reset();
+ hyphenated.Length = 0;
+ savedState = null;
+ exhausted = false;
+ lastEndOffset = 0;
+ }
+
+ // ================================================= Helper Methods ================================================
+
+ /// <summary>
+ /// Writes the joined unhyphenated term
+ /// </summary>
+ private void Unhyphenate()
+ {
+ RestoreState(savedState);
+ savedState = null;
+
+ char[] term = termAttribute.Buffer();
+ int length = hyphenated.Length;
+ if (length > termAttribute.Length)
+ {
+ term = termAttribute.ResizeBuffer(length);
+ }
+
+ hyphenated.GetChars(0, length, term, 0);
+ termAttribute.Length = length;
+ offsetAttribute.SetOffset(offsetAttribute.StartOffset(), lastEndOffset);
+ hyphenated.Length = 0;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
index 946cd57..b274564 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/HyphenatedWordsFilterFactory.cs
@@ -1,7 +1,7 @@
using System.Collections.Generic;
-using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -20,10 +20,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using TokenFilterFactory = TokenFilterFactory;
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="HyphenatedWordsFilter"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_hyphn" class="solr.TextField" positionIncrementGap="100">
@@ -46,7 +43,7 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
- public override HyphenatedWordsFilter create(TokenStream input)
+ public override TokenStream Create(TokenStream input)
{
return new HyphenatedWordsFilter(input);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
index f9f9a53..b699de1 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeepWordFilter.cs
@@ -30,7 +30,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
public sealed class KeepWordFilter : FilteringTokenFilter
{
private readonly CharArraySet words;
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+ private readonly ICharTermAttribute termAtt;
/// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4.
[Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
@@ -38,6 +38,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
: base(version, enablePositionIncrements, @in)
{
this.words = words;
+ termAtt = AddAttribute<ICharTermAttribute>();
}
/// <summary>
@@ -55,7 +56,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
this.words = words;
}
- public override bool Accept()
+ protected internal override bool Accept()
{
return words.Contains(termAtt.Buffer(), 0, termAtt.Length);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
index 8918274..6403e57 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/KeywordMarkerFilter.cs
@@ -1,61 +1,59 @@
-namespace org.apache.lucene.analysis.miscellaneous
-{
-
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- using KeywordAttribute = org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-
- /// <summary>
- /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>.
- /// </summary>
- /// <seealso cref= KeywordAttribute </seealso>
- public abstract class KeywordMarkerFilter : TokenFilter
- {
-
- private readonly KeywordAttribute keywordAttr = addAttribute(typeof(KeywordAttribute));
+using Lucene.Net.Analysis.Tokenattributes;
- /// <summary>
- /// Creates a new <seealso cref="KeywordMarkerFilter"/> </summary>
- /// <param name="in"> the input stream </param>
- protected internal KeywordMarkerFilter(TokenStream @in) : base(@in)
- {
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- if (input.incrementToken())
- {
- if (Keyword)
- {
- keywordAttr.Keyword = true;
- }
- return true;
- }
- else
- {
- return false;
- }
- }
-
- protected internal abstract bool Keyword {get;}
-
- }
+namespace Lucene.Net.Analysis.Miscellaneous
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Marks terms as keywords via the <seealso cref="KeywordAttribute"/>.
+ /// </summary>
+ /// <seealso cref= KeywordAttribute </seealso>
+ public abstract class KeywordMarkerFilter : TokenFilter
+ {
+
+ private readonly IKeywordAttribute keywordAttr;
+
+ /// <summary>
+ /// Creates a new <seealso cref="KeywordMarkerFilter"/> </summary>
+ /// <param name="in"> the input stream </param>
+ protected internal KeywordMarkerFilter(TokenStream @in)
+ : base(@in)
+ {
+ keywordAttr = AddAttribute<IKeywordAttribute>();
+ }
+
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ if (Keyword)
+ {
+ keywordAttr.Keyword = true;
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ protected internal abstract bool Keyword { get; }
+
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
index 802ff26..e0ba510 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilter.cs
@@ -1,7 +1,8 @@
using System;
-using FilteringTokenFilter = Lucene.Net.Analysis.Util.FilteringTokenFilter;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -20,12 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using FilteringTokenFilter = FilteringTokenFilter;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Removes words that are too long or too short from the stream.
/// <para>
/// Note: Length is calculated as the number of UTF-16 code units.
@@ -37,7 +33,7 @@ namespace org.apache.lucene.analysis.miscellaneous
private readonly int min;
private readonly int max;
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+ private readonly ICharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
/// @deprecated enablePositionIncrements=false is not supported anymore as of Lucene 4.4.
[Obsolete("enablePositionIncrements=false is not supported anymore as of Lucene 4.4.")]
@@ -77,11 +73,9 @@ namespace org.apache.lucene.analysis.miscellaneous
this.max = max;
}
- public override bool accept()
+ public override bool Accept()
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int len = termAtt.length();
- int len = termAtt.length();
+ int len = termAtt.Length;
return (len >= min && len <= max);
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
index 6f0e4a3..afdc961 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/LengthFilterFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Miscellaneous;
using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
namespace org.apache.lucene.analysis.miscellaneous
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index d074038..4fe2822 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -2,7 +2,7 @@
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
-namespace org.apache.lucene.analysis.miscellaneous
+namespace Lucene.Net.Analysis.Miscellaneous
{
/*
@@ -21,16 +21,7 @@ namespace org.apache.lucene.analysis.miscellaneous
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using StopAnalyzer = StopAnalyzer;
- using StopFilter = StopFilter;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using CharArraySet = CharArraySet;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
+ /// <summary>
/// Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
/// <seealso cref="java.io.Reader"/>, that can flexibly separate text into terms via a regular expression <seealso cref="Pattern"/>
/// (with behaviour identical to <seealso cref="String#split(String)"/>),
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
index 4402d5a..3886da0 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternKeywordMarkerFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.miscellaneous
+using Lucene.Net.Analysis.Miscellaneous;
+
+namespace org.apache.lucene.analysis.miscellaneous
{
/*
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
index b826cd6..69cc6c2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizer.cs
@@ -1,242 +1,242 @@
using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Reader = System.IO.TextReader;
-namespace org.apache.lucene.analysis.path
+namespace Lucene.Net.Analysis.Path
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
- /// <summary>
- /// Tokenizer for path-like hierarchies.
- /// <para>
- /// Take something like:
- ///
- /// <pre>
- /// /something/something/else
- /// </pre>
- ///
- /// and make:
- ///
- /// <pre>
- /// /something
- /// /something/something
- /// /something/something/else
- /// </pre>
- /// </para>
- /// </summary>
- public class PathHierarchyTokenizer : Tokenizer
- {
-
- public PathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
- {
- }
-
- public PathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
- {
- }
-
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
- {
- }
-
- public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
- {
- }
-
- public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
- {
- }
-
- public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
- {
- }
-
- public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
- {
- }
-
- public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
- {
- if (bufferSize < 0)
- {
- throw new System.ArgumentException("bufferSize cannot be negative");
- }
- if (skip < 0)
- {
- throw new System.ArgumentException("skip cannot be negative");
- }
- termAtt.resizeBuffer(bufferSize);
-
- this.delimiter = delimiter;
- this.replacement = replacement;
- this.skip = skip;
- resultToken = new StringBuilder(bufferSize);
- }
-
- private const int DEFAULT_BUFFER_SIZE = 1024;
- public const char DEFAULT_DELIMITER = '/';
- public const int DEFAULT_SKIP = 0;
-
- private readonly char delimiter;
- private readonly char replacement;
- private readonly int skip;
-
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
- private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
- private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
- private int startPosition = 0;
- private int skipped = 0;
- private bool endDelimiter = false;
- private StringBuilder resultToken;
-
- private int charsRead = 0;
-
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- clearAttributes();
- termAtt.append(resultToken);
- if (resultToken.Length == 0)
- {
- posAtt.PositionIncrement = 1;
- }
- else
- {
- posAtt.PositionIncrement = 0;
- }
- int length = 0;
- bool added = false;
- if (endDelimiter)
- {
- termAtt.append(replacement);
- length++;
- endDelimiter = false;
- added = true;
- }
-
- while (true)
- {
- int c = input.read();
- if (c >= 0)
- {
- charsRead++;
- }
- else
- {
- if (skipped > skip)
- {
- length += resultToken.Length;
- termAtt.Length = length;
- offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
- if (added)
- {
- resultToken.Length = 0;
- resultToken.Append(termAtt.buffer(), 0, length);
- }
- return added;
- }
- else
- {
- return false;
- }
- }
- if (!added)
- {
- added = true;
- skipped++;
- if (skipped > skip)
- {
- termAtt.append(c == delimiter ? replacement : (char)c);
- length++;
- }
- else
- {
- startPosition++;
- }
- }
- else
- {
- if (c == delimiter)
- {
- if (skipped > skip)
- {
- endDelimiter = true;
- break;
- }
- skipped++;
- if (skipped > skip)
- {
- termAtt.append(replacement);
- length++;
- }
- else
- {
- startPosition++;
- }
- }
- else
- {
- if (skipped > skip)
- {
- termAtt.append((char)c);
- length++;
- }
- else
- {
- startPosition++;
- }
- }
- }
- }
- length += resultToken.Length;
- termAtt.Length = length;
- offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
- resultToken.Length = 0;
- resultToken.Append(termAtt.buffer(), 0, length);
- return true;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
- public override void end()
- {
- base.end();
- // set final offset
- int finalOffset = correctOffset(charsRead);
- offsetAtt.setOffset(finalOffset, finalOffset);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- base.reset();
- resultToken.Length = 0;
- charsRead = 0;
- endDelimiter = false;
- skipped = 0;
- startPosition = 0;
- }
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Tokenizer for path-like hierarchies.
+ /// <para>
+ /// Take something like:
+ ///
+ /// <pre>
+ /// /something/something/else
+ /// </pre>
+ ///
+ /// and make:
+ ///
+ /// <pre>
+ /// /something
+ /// /something/something
+ /// /something/something/else
+ /// </pre>
+ /// </para>
+ /// </summary>
+ public class PathHierarchyTokenizer : Tokenizer
+ {
+
+ public PathHierarchyTokenizer(Reader input)
+ : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+ {
+ }
+
+ public PathHierarchyTokenizer(Reader input, int skip)
+ : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+ {
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter)
+ : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+ {
+ }
+
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement)
+ : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+ {
+ }
+
+ public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip)
+ : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+ {
+ }
+
+ public PathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip)
+ : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+ {
+ }
+
+ public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip)
+ : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+ {
+ }
+
+ public PathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip)
+ : base(factory, input)
+ {
+ if (bufferSize < 0)
+ {
+ throw new System.ArgumentException("bufferSize cannot be negative");
+ }
+ if (skip < 0)
+ {
+ throw new System.ArgumentException("skip cannot be negative");
+ }
+
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ posAtt = AddAttribute<IPositionIncrementAttribute>();
+ termAtt = AddAttribute<ICharTermAttribute>();
+ termAtt.ResizeBuffer(bufferSize);
+
+ this.delimiter = delimiter;
+ this.replacement = replacement;
+ this.skip = skip;
+ resultToken = new StringBuilder(bufferSize);
+ }
+
+ private const int DEFAULT_BUFFER_SIZE = 1024;
+ public const char DEFAULT_DELIMITER = '/';
+ public const int DEFAULT_SKIP = 0;
+
+ private readonly char delimiter;
+ private readonly char replacement;
+ private readonly int skip;
+
+ private readonly ICharTermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+ private readonly IPositionIncrementAttribute posAtt;
+ private int startPosition = 0;
+ private int skipped = 0;
+ private bool endDelimiter = false;
+ private readonly StringBuilder resultToken;
+
+ private int charsRead = 0;
+
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ termAtt.Append(resultToken);
+ if (resultToken.Length == 0)
+ {
+ posAtt.PositionIncrement = 1;
+ }
+ else
+ {
+ posAtt.PositionIncrement = 0;
+ }
+ int length = 0;
+ bool added = false;
+ if (endDelimiter)
+ {
+ termAtt.Append(replacement);
+ length++;
+ endDelimiter = false;
+ added = true;
+ }
+
+ while (true)
+ {
+ int c = input.Read();
+ if (c >= 0)
+ {
+ charsRead++;
+ }
+ else
+ {
+ if (skipped > skip)
+ {
+ length += resultToken.Length;
+ termAtt.Length = length;
+ offsetAtt.SetOffset(CorrectOffset(startPosition), CorrectOffset(startPosition + length));
+ if (added)
+ {
+ resultToken.Length = 0;
+ resultToken.Append(termAtt.Buffer(), 0, length);
+ }
+ return added;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ if (!added)
+ {
+ added = true;
+ skipped++;
+ if (skipped > skip)
+ {
+ termAtt.Append(c == delimiter ? replacement : (char)c);
+ length++;
+ }
+ else
+ {
+ startPosition++;
+ }
+ }
+ else
+ {
+ if (c == delimiter)
+ {
+ if (skipped > skip)
+ {
+ endDelimiter = true;
+ break;
+ }
+ skipped++;
+ if (skipped > skip)
+ {
+ termAtt.Append(replacement);
+ length++;
+ }
+ else
+ {
+ startPosition++;
+ }
+ }
+ else
+ {
+ if (skipped > skip)
+ {
+ termAtt.Append((char)c);
+ length++;
+ }
+ else
+ {
+ startPosition++;
+ }
+ }
+ }
+ }
+ length += resultToken.Length;
+ termAtt.Length = length;
+ offsetAtt.SetOffset(CorrectOffset(startPosition), CorrectOffset(startPosition + length));
+ resultToken.Length = 0;
+ resultToken.Append(termAtt.Buffer(), 0, length);
+ return true;
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ int finalOffset = CorrectOffset(charsRead);
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ resultToken.Length = 0;
+ charsRead = 0;
+ endDelimiter = false;
+ skipped = 0;
+ startPosition = 0;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
index 7dd1e62..f43772c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/PathHierarchyTokenizerFactory.cs
@@ -1,105 +1,100 @@
using System.Collections.Generic;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.path
+namespace Lucene.Net.Analysis.Path
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using TokenizerFactory = TokenizerFactory;
- using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
-
- /// <summary>
- /// Factory for <seealso cref="PathHierarchyTokenizer"/>.
- /// <para>
- /// This factory is typically configured for use only in the <code>index</code>
- /// Analyzer (or only in the <code>query</code> Analyzer, but never both).
- /// </para>
- /// <para>
- /// For example, in the configuration below a query for
- /// <code>Books/NonFic</code> will match documents indexed with values like
- /// <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>,
- /// <code>Books/NonFic/Science/Physics</code>, etc. But it will not match
- /// documents indexed with values like <code>Books</code>, or
- /// <code>Books/Fic</code>...
- /// </para>
- ///
- /// <pre class="prettyprint">
- /// <fieldType name="descendent_path" class="solr.TextField">
- /// <analyzer type="index">
- /// <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
- /// </analyzer>
- /// <analyzer type="query">
- /// <tokenizer class="solr.KeywordTokenizerFactory" />
- /// </analyzer>
- /// </fieldType>
- /// </pre>
- /// <para>
- /// In this example however we see the oposite configuration, so that a query
- /// for <code>Books/NonFic/Science/Physics</code> would match documents
- /// containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>,
- /// or <code>Books/NonFic/Science/Physics</code>, but not
- /// <code>Books/NonFic/Science/Physics/Theory</code> or
- /// <code>Books/NonFic/Law</code>.
- /// </para>
- /// <pre class="prettyprint">
- /// <fieldType name="descendent_path" class="solr.TextField">
- /// <analyzer type="index">
- /// <tokenizer class="solr.KeywordTokenizerFactory" />
- /// </analyzer>
- /// <analyzer type="query">
- /// <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
- /// </analyzer>
- /// </fieldType>
- /// </pre>
- /// </summary>
- public class PathHierarchyTokenizerFactory : TokenizerFactory
- {
- private readonly char delimiter;
- private readonly char replacement;
- private readonly bool reverse;
- private readonly int skip;
-
- /// <summary>
- /// Creates a new PathHierarchyTokenizerFactory </summary>
- public PathHierarchyTokenizerFactory(IDictionary<string, string> args) : base(args)
- {
- delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
- replacement = getChar(args, "replace", delimiter);
- reverse = getBoolean(args, "reverse", false);
- skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
- if (args.Count > 0)
- {
- throw new System.ArgumentException("Unknown parameters: " + args);
- }
- }
-
- public override Tokenizer create(AttributeFactory factory, Reader input)
- {
- if (reverse)
- {
- return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
- }
- return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
- }
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Factory for <seealso cref="PathHierarchyTokenizer"/>.
+ /// <para>
+ /// This factory is typically configured for use only in the <code>index</code>
+ /// Analyzer (or only in the <code>query</code> Analyzer, but never both).
+ /// </para>
+ /// <para>
+ /// For example, in the configuration below a query for
+ /// <code>Books/NonFic</code> will match documents indexed with values like
+ /// <code>Books/NonFic</code>, <code>Books/NonFic/Law</code>,
+ /// <code>Books/NonFic/Science/Physics</code>, etc. But it will not match
+ /// documents indexed with values like <code>Books</code>, or
+ /// <code>Books/Fic</code>...
+ /// </para>
+ ///
+ /// <pre class="prettyprint">
+ /// <fieldType name="descendent_path" class="solr.TextField">
+ /// <analyzer type="index">
+ /// <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+ /// </analyzer>
+ /// <analyzer type="query">
+ /// <tokenizer class="solr.KeywordTokenizerFactory" />
+ /// </analyzer>
+ /// </fieldType>
+ /// </pre>
+ /// <para>
+ /// In this example however we see the oposite configuration, so that a query
+ /// for <code>Books/NonFic/Science/Physics</code> would match documents
+ /// containing <code>Books/NonFic</code>, <code>Books/NonFic/Science</code>,
+ /// or <code>Books/NonFic/Science/Physics</code>, but not
+ /// <code>Books/NonFic/Science/Physics/Theory</code> or
+ /// <code>Books/NonFic/Law</code>.
+ /// </para>
+ /// <pre class="prettyprint">
+ /// <fieldType name="descendent_path" class="solr.TextField">
+ /// <analyzer type="index">
+ /// <tokenizer class="solr.KeywordTokenizerFactory" />
+ /// </analyzer>
+ /// <analyzer type="query">
+ /// <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
+ /// </analyzer>
+ /// </fieldType>
+ /// </pre>
+ /// </summary>
+ public class PathHierarchyTokenizerFactory : TokenizerFactory
+ {
+ private readonly char delimiter;
+ private readonly char replacement;
+ private readonly bool reverse;
+ private readonly int skip;
+ /// <summary>
+ /// Creates a new PathHierarchyTokenizerFactory </summary>
+ public PathHierarchyTokenizerFactory(IDictionary<string, string> args)
+ : base(args)
+ {
+ delimiter = getChar(args, "delimiter", PathHierarchyTokenizer.DEFAULT_DELIMITER);
+ replacement = getChar(args, "replace", delimiter);
+ reverse = getBoolean(args, "reverse", false);
+ skip = getInt(args, "skip", PathHierarchyTokenizer.DEFAULT_SKIP);
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+ public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
+ {
+ if (reverse)
+ {
+ return new ReversePathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+ }
+ return new PathHierarchyTokenizer(factory, input, delimiter, replacement, skip);
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
index 00b5880..47a5d0f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Path/ReversePathHierarchyTokenizer.cs
@@ -1,214 +1,217 @@
using System.Collections.Generic;
using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Reader = System.IO.TextReader;
-namespace org.apache.lucene.analysis.path
+namespace Lucene.Net.Analysis.Path
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
- /// <summary>
- /// Tokenizer for domain-like hierarchies.
- /// <para>
- /// Take something like:
- ///
- /// <pre>
- /// www.site.co.uk
- /// </pre>
- ///
- /// and make:
- ///
- /// <pre>
- /// www.site.co.uk
- /// site.co.uk
- /// co.uk
- /// uk
- /// </pre>
- ///
- /// </para>
- /// </summary>
- public class ReversePathHierarchyTokenizer : Tokenizer
- {
-
- public ReversePathHierarchyTokenizer(Reader input) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, int skip) : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
- {
- }
-
- public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip) : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
- {
- }
-
- public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
- {
- }
- public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip) : base(factory, input)
- {
- if (bufferSize < 0)
- {
- throw new System.ArgumentException("bufferSize cannot be negative");
- }
- if (skip < 0)
- {
- throw new System.ArgumentException("skip cannot be negative");
- }
- termAtt.resizeBuffer(bufferSize);
- this.delimiter = delimiter;
- this.replacement = replacement;
- this.skip = skip;
- resultToken = new StringBuilder(bufferSize);
- resultTokenBuffer = new char[bufferSize];
- delimiterPositions = new List<>(bufferSize / 10);
- }
-
- private const int DEFAULT_BUFFER_SIZE = 1024;
- public const char DEFAULT_DELIMITER = '/';
- public const int DEFAULT_SKIP = 0;
-
- private readonly char delimiter;
- private readonly char replacement;
- private readonly int skip;
-
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
- private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
- private readonly PositionIncrementAttribute posAtt = addAttribute(typeof(PositionIncrementAttribute));
-
- private int endPosition = 0;
- private int finalOffset = 0;
- private int skipped = 0;
- private StringBuilder resultToken;
-
- private IList<int?> delimiterPositions;
- private int delimitersCount = -1;
- private char[] resultTokenBuffer;
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- clearAttributes();
- if (delimitersCount == -1)
- {
- int length = 0;
- delimiterPositions.Add(0);
- while (true)
- {
- int c = input.read();
- if (c < 0)
- {
- break;
- }
- length++;
- if (c == delimiter)
- {
- delimiterPositions.Add(length);
- resultToken.Append(replacement);
- }
- else
- {
- resultToken.Append((char)c);
- }
- }
- delimitersCount = delimiterPositions.Count;
- if (delimiterPositions[delimitersCount - 1] < length)
- {
- delimiterPositions.Add(length);
- delimitersCount++;
- }
- if (resultTokenBuffer.Length < resultToken.Length)
- {
- resultTokenBuffer = new char[resultToken.Length];
- }
- resultToken.getChars(0, resultToken.Length, resultTokenBuffer, 0);
- resultToken.Length = 0;
- int idx = delimitersCount - 1 - skip;
- if (idx >= 0)
- {
- // otherwise its ok, because we will skip and return false
- endPosition = delimiterPositions[idx];
- }
- finalOffset = correctOffset(length);
- posAtt.PositionIncrement = 1;
- }
- else
- {
- posAtt.PositionIncrement = 0;
- }
-
- while (skipped < delimitersCount - skip - 1)
- {
- int start = delimiterPositions[skipped];
- termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
- offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
- skipped++;
- return true;
- }
-
- return false;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
- public override void end()
- {
- base.end();
- // set final offset
- offsetAtt.setOffset(finalOffset, finalOffset);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- base.reset();
- resultToken.Length = 0;
- finalOffset = 0;
- endPosition = 0;
- skipped = 0;
- delimitersCount = -1;
- delimiterPositions.Clear();
- }
- }
-
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Tokenizer for domain-like hierarchies.
+ /// <para>
+ /// Take something like:
+ ///
+ /// <pre>
+ /// www.site.co.uk
+ /// </pre>
+ ///
+ /// and make:
+ ///
+ /// <pre>
+ /// www.site.co.uk
+ /// site.co.uk
+ /// co.uk
+ /// uk
+ /// </pre>
+ ///
+ /// </para>
+ /// </summary>
+ public class ReversePathHierarchyTokenizer : Tokenizer
+ {
+
+ public ReversePathHierarchyTokenizer(Reader input)
+ : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int skip)
+ : this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter)
+ : this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement)
+ : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement)
+ : this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip)
+ : this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip)
+ : this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, char delimiter, char replacement, int skip)
+ : this(factory, input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip)
+ {
+ }
+
+ public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip)
+ : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, bufferSize, delimiter, replacement, skip)
+ {
+ termAtt = AddAttribute<ICharTermAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ posAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
+
+ public ReversePathHierarchyTokenizer(AttributeFactory factory, Reader input, int bufferSize, char delimiter, char replacement, int skip)
+ : base(factory, input)
+ {
+ if (bufferSize < 0)
+ {
+ throw new System.ArgumentException("bufferSize cannot be negative");
+ }
+ if (skip < 0)
+ {
+ throw new System.ArgumentException("skip cannot be negative");
+ }
+ termAtt.ResizeBuffer(bufferSize);
+ this.delimiter = delimiter;
+ this.replacement = replacement;
+ this.skip = skip;
+ resultToken = new StringBuilder(bufferSize);
+ resultTokenBuffer = new char[bufferSize];
+ delimiterPositions = new List<int?>(bufferSize / 10);
+ }
+
+ private const int DEFAULT_BUFFER_SIZE = 1024;
+ public const char DEFAULT_DELIMITER = '/';
+ public const int DEFAULT_SKIP = 0;
+
+ private readonly char delimiter;
+ private readonly char replacement;
+ private readonly int skip;
+
+ private readonly ICharTermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+ private readonly IPositionIncrementAttribute posAtt;
+
+ private int endPosition = 0;
+ private int finalOffset = 0;
+ private int skipped = 0;
+ private readonly StringBuilder resultToken;
+
+ private readonly IList<int?> delimiterPositions;
+ private int delimitersCount = -1;
+ private char[] resultTokenBuffer;
+
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ if (delimitersCount == -1)
+ {
+ int length = 0;
+ delimiterPositions.Add(0);
+ while (true)
+ {
+ int c = input.Read();
+ if (c < 0)
+ {
+ break;
+ }
+ length++;
+ if (c == delimiter)
+ {
+ delimiterPositions.Add(length);
+ resultToken.Append(replacement);
+ }
+ else
+ {
+ resultToken.Append((char)c);
+ }
+ }
+ delimitersCount = delimiterPositions.Count;
+ if (delimiterPositions[delimitersCount - 1] < length)
+ {
+ delimiterPositions.Add(length);
+ delimitersCount++;
+ }
+ if (resultTokenBuffer.Length < resultToken.Length)
+ {
+ resultTokenBuffer = new char[resultToken.Length];
+ }
+ resultToken.GetChars(0, resultToken.Length, resultTokenBuffer, 0);
+ resultToken.Length = 0;
+ int idx = delimitersCount - 1 - skip;
+ if (idx >= 0)
+ {
+ // otherwise its ok, because we will skip and return false
+ endPosition = delimiterPositions[idx];
+ }
+ finalOffset = CorrectOffset(length);
+ posAtt.PositionIncrement = 1;
+ }
+ else
+ {
+ posAtt.PositionIncrement = 0;
+ }
+
+ while (skipped < delimitersCount - skip - 1)
+ {
+ var start = delimiterPositions[skipped] ?? 0;
+ termAtt.CopyBuffer(resultTokenBuffer, start, endPosition - start);
+ offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(endPosition));
+ skipped++;
+ return true;
+ }
+
+ return false;
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ resultToken.Length = 0;
+ finalOffset = 0;
+ endPosition = 0;
+ skipped = 0;
+ delimitersCount = -1;
+ delimiterPositions.Clear();
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
index cc65164..476c7fe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilterFactory.cs
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
namespace Lucene.Net.Analysis.Position
{
@@ -33,9 +34,7 @@ namespace Lucene.Net.Analysis.Position
/// </analyzer>
/// </fieldType></pre>
/// </summary>
- /// <seealso cref= org.apache.lucene.analysis.position.PositionFilter
- /// @since solr 1.4 </seealso>
- /// @deprecated (4.4)
+ /// <seealso cref=PositionFilter/>
[Obsolete("(4.4)")]
public class PositionFilterFactory : TokenFilterFactory
{
@@ -51,7 +50,7 @@ namespace Lucene.Net.Analysis.Position
{
throw new System.ArgumentException("Unknown parameters: " + args);
}
- if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44))
+ if (luceneMatchVersion != null && luceneMatchVersion.OnOrAfter(Lucene.Net.Util.Version.LUCENE_44))
{
throw new System.ArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility");
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
index be73228..548b7f6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Query/QueryAutoStopWordAnalyzer.cs
@@ -152,12 +152,12 @@ namespace Lucene.Net.Analysis.Query
protected override TokenStreamComponents WrapComponents(string fieldName, TokenStreamComponents components)
{
- HashSet<string> stopWords = stopWordsPerField[fieldName];
+ var stopWords = stopWordsPerField[fieldName];
if (stopWords == null)
{
return components;
}
- StopFilter stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
+ var stopFilter = new StopFilter(matchVersion, components.TokenStream, new CharArraySet(matchVersion, stopWords, false));
return new TokenStreamComponents(components.Tokenizer, stopFilter);
}
@@ -168,9 +168,9 @@ namespace Lucene.Net.Analysis.Query
/// method calls will be returned </param>
/// <returns> the stop words identified for a field </returns>
public string[] GetStopWords(string fieldName)
- {
- HashSet<string> stopWords = stopWordsPerField[fieldName];
- return stopWords != null ? stopWords.ToArray(new string[stopWords.Count]) : new string[0];
+ {
+ var stopWords = stopWordsPerField[fieldName];
+ return stopWords != null ? stopWords.ToArray() : new string[0];
}
/// <summary>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/56bfeaab/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
index a04fd51..dc080a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Sinks/DateRecognizerSinkFilter.cs
@@ -1,6 +1,8 @@
using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.sinks
+namespace Lucene.Net.Analysis.Sinks
{
/*
@@ -19,12 +21,7 @@ namespace org.apache.lucene.analysis.sinks
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
-
- /// <summary>
+ /// <summary>
/// Attempts to parse the <seealso cref="CharTermAttribute#buffer()"/> as a Date using a <seealso cref="java.text.DateFormat"/>.
/// If the value is a Date, it will add it to the sink.
/// <p/>
@@ -52,15 +49,15 @@ namespace org.apache.lucene.analysis.sinks
this.dateFormat = dateFormat;
}
- public override bool accept(AttributeSource source)
+ public override bool Accept(AttributeSource source)
{
if (termAtt == null)
{
- termAtt = source.addAttribute(typeof(CharTermAttribute));
+ termAtt = source.AddAttribute <ICharTermAttribute>();
}
try
{
- DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date
+ DateTime date = dateFormat.Parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date
if (date != null)
{
return true;