You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:39:59 UTC
[16/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/DocInverterPerField.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/DocInverterPerField.cs b/src/core/Index/DocInverterPerField.cs
index 8cd7c0a..0cdd9b6 100644
--- a/src/core/Index/DocInverterPerField.cs
+++ b/src/core/Index/DocInverterPerField.cs
@@ -22,214 +22,214 @@ using TokenStream = Lucene.Net.Analysis.TokenStream;
namespace Lucene.Net.Index
{
-
- /// <summary> Holds state for inverting all occurrences of a single
- /// field in the document. This class doesn't do anything
- /// itself; instead, it forwards the tokens produced by
- /// analysis to its own consumer
- /// (InvertedDocConsumerPerField). It also interacts with an
- /// endConsumer (InvertedDocEndConsumerPerField).
- /// </summary>
-
- sealed class DocInverterPerField:DocFieldConsumerPerField
- {
-
- private DocInverterPerThread perThread;
- private FieldInfo fieldInfo;
- internal InvertedDocConsumerPerField consumer;
- internal InvertedDocEndConsumerPerField endConsumer;
- internal DocumentsWriter.DocState docState;
- internal FieldInvertState fieldState;
-
- public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
- {
- this.perThread = perThread;
- this.fieldInfo = fieldInfo;
- docState = perThread.docState;
- fieldState = perThread.fieldState;
- this.consumer = perThread.consumer.AddField(this, fieldInfo);
- this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
- }
-
- public override void Abort()
- {
- consumer.Abort();
- endConsumer.Abort();
- }
-
- public override void ProcessFields(IFieldable[] fields, int count)
- {
-
- fieldState.Reset(docState.doc.Boost);
-
- int maxFieldLength = docState.maxFieldLength;
-
- bool doInvert = consumer.Start(fields, count);
-
- for (int i = 0; i < count; i++)
- {
-
- IFieldable field = fields[i];
-
- // TODO FI: this should be "genericized" to querying
- // consumer if it wants to see this particular field
- // tokenized.
- if (field.IsIndexed && doInvert)
- {
-
- bool anyToken;
-
- if (fieldState.length > 0)
- fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
-
- if (!field.IsTokenized)
- {
- // un-tokenized field
- System.String stringValue = field.StringValue;
- int valueLength = stringValue.Length;
- perThread.singleToken.Reinit(stringValue, 0, valueLength);
- fieldState.attributeSource = perThread.singleToken;
- consumer.Start(field);
-
- bool success = false;
- try
- {
- consumer.Add();
- success = true;
- }
- finally
- {
- if (!success)
- docState.docWriter.SetAborting();
- }
- fieldState.offset += valueLength;
- fieldState.length++;
- fieldState.position++;
- anyToken = valueLength > 0;
- }
- else
- {
- // tokenized field
- TokenStream stream;
- TokenStream streamValue = field.TokenStreamValue;
-
- if (streamValue != null)
- stream = streamValue;
- else
- {
- // the field does not have a TokenStream,
- // so we have to obtain one from the analyzer
- System.IO.TextReader reader; // find or make Reader
- System.IO.TextReader readerValue = field.ReaderValue;
-
- if (readerValue != null)
- reader = readerValue;
- else
- {
- System.String stringValue = field.StringValue;
- if (stringValue == null)
- throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
- perThread.stringReader.Init(stringValue);
- reader = perThread.stringReader;
- }
-
- // Tokenize field and add to postingTable
- stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
- }
-
- // reset the TokenStream to the first token
- stream.Reset();
-
- int startLength = fieldState.length;
-
- try
- {
- int offsetEnd = fieldState.offset - 1;
-
- bool hasMoreTokens = stream.IncrementToken();
-
- fieldState.attributeSource = stream;
+
+ /// <summary> Holds state for inverting all occurrences of a single
+ /// field in the document. This class doesn't do anything
+ /// itself; instead, it forwards the tokens produced by
+ /// analysis to its own consumer
+ /// (InvertedDocConsumerPerField). It also interacts with an
+ /// endConsumer (InvertedDocEndConsumerPerField).
+ /// </summary>
+
+ sealed class DocInverterPerField:DocFieldConsumerPerField
+ {
+
+ private DocInverterPerThread perThread;
+ private FieldInfo fieldInfo;
+ internal InvertedDocConsumerPerField consumer;
+ internal InvertedDocEndConsumerPerField endConsumer;
+ internal DocumentsWriter.DocState docState;
+ internal FieldInvertState fieldState;
+
+ public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
+ {
+ this.perThread = perThread;
+ this.fieldInfo = fieldInfo;
+ docState = perThread.docState;
+ fieldState = perThread.fieldState;
+ this.consumer = perThread.consumer.AddField(this, fieldInfo);
+ this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
+ }
+
+ public override void Abort()
+ {
+ consumer.Abort();
+ endConsumer.Abort();
+ }
+
+ public override void ProcessFields(IFieldable[] fields, int count)
+ {
+
+ fieldState.Reset(docState.doc.Boost);
+
+ int maxFieldLength = docState.maxFieldLength;
+
+ bool doInvert = consumer.Start(fields, count);
+
+ for (int i = 0; i < count; i++)
+ {
+
+ IFieldable field = fields[i];
+
+ // TODO FI: this should be "genericized" to querying
+ // consumer if it wants to see this particular field
+ // tokenized.
+ if (field.IsIndexed && doInvert)
+ {
+
+ bool anyToken;
+
+ if (fieldState.length > 0)
+ fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
+
+ if (!field.IsTokenized)
+ {
+ // un-tokenized field
+ System.String stringValue = field.StringValue;
+ int valueLength = stringValue.Length;
+ perThread.singleToken.Reinit(stringValue, 0, valueLength);
+ fieldState.attributeSource = perThread.singleToken;
+ consumer.Start(field);
+
+ bool success = false;
+ try
+ {
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.offset += valueLength;
+ fieldState.length++;
+ fieldState.position++;
+ anyToken = valueLength > 0;
+ }
+ else
+ {
+ // tokenized field
+ TokenStream stream;
+ TokenStream streamValue = field.TokenStreamValue;
+
+ if (streamValue != null)
+ stream = streamValue;
+ else
+ {
+ // the field does not have a TokenStream,
+ // so we have to obtain one from the analyzer
+ System.IO.TextReader reader; // find or make Reader
+ System.IO.TextReader readerValue = field.ReaderValue;
+
+ if (readerValue != null)
+ reader = readerValue;
+ else
+ {
+ System.String stringValue = field.StringValue;
+ if (stringValue == null)
+ throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
+ perThread.stringReader.Init(stringValue);
+ reader = perThread.stringReader;
+ }
+
+ // Tokenize field and add to postingTable
+ stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
+ }
+
+ // reset the TokenStream to the first token
+ stream.Reset();
+
+ int startLength = fieldState.length;
+
+ try
+ {
+ int offsetEnd = fieldState.offset - 1;
+
+ bool hasMoreTokens = stream.IncrementToken();
+
+ fieldState.attributeSource = stream;
IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute<IOffsetAttribute>();
- IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
-
- consumer.Start(field);
-
- for (; ; )
- {
-
- // If we hit an exception in stream.next below
- // (which is fairly common, eg if analyzer
- // chokes on a given document), then it's
- // non-aborting and (above) this one document
- // will be marked as deleted, but still
- // consume a docID
-
- if (!hasMoreTokens)
- break;
-
- int posIncr = posIncrAttribute.PositionIncrement;
- fieldState.position += posIncr;
- if (fieldState.position > 0)
- {
- fieldState.position--;
- }
-
- if (posIncr == 0)
- fieldState.numOverlap++;
-
- bool success = false;
- try
- {
- // If we hit an exception in here, we abort
- // all buffered documents since the last
- // flush, on the likelihood that the
- // internal state of the consumer is now
- // corrupt and should not be flushed to a
- // new segment:
- consumer.Add();
- success = true;
- }
- finally
- {
- if (!success)
- docState.docWriter.SetAborting();
- }
- fieldState.position++;
- offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
- if (++fieldState.length >= maxFieldLength)
- {
- if (docState.infoStream != null)
- docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
- break;
- }
-
- hasMoreTokens = stream.IncrementToken();
- }
- // trigger streams to perform end-of-stream operations
- stream.End();
-
- fieldState.offset += offsetAttribute.EndOffset;
- anyToken = fieldState.length > startLength;
- }
- finally
- {
- stream.Close();
- }
- }
-
- if (anyToken)
- fieldState.offset += docState.analyzer.GetOffsetGap(field);
- fieldState.boost *= field.Boost;
- }
+ IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
+
+ consumer.Start(field);
+
+ for (; ; )
+ {
+
+ // If we hit an exception in stream.next below
+ // (which is fairly common, eg if analyzer
+ // chokes on a given document), then it's
+ // non-aborting and (above) this one document
+ // will be marked as deleted, but still
+ // consume a docID
+
+ if (!hasMoreTokens)
+ break;
+
+ int posIncr = posIncrAttribute.PositionIncrement;
+ fieldState.position += posIncr;
+ if (fieldState.position > 0)
+ {
+ fieldState.position--;
+ }
+
+ if (posIncr == 0)
+ fieldState.numOverlap++;
+
+ bool success = false;
+ try
+ {
+ // If we hit an exception in here, we abort
+ // all buffered documents since the last
+ // flush, on the likelihood that the
+ // internal state of the consumer is now
+ // corrupt and should not be flushed to a
+ // new segment:
+ consumer.Add();
+ success = true;
+ }
+ finally
+ {
+ if (!success)
+ docState.docWriter.SetAborting();
+ }
+ fieldState.position++;
+ offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
+ if (++fieldState.length >= maxFieldLength)
+ {
+ if (docState.infoStream != null)
+ docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
+ break;
+ }
+
+ hasMoreTokens = stream.IncrementToken();
+ }
+ // trigger streams to perform end-of-stream operations
+ stream.End();
+
+ fieldState.offset += offsetAttribute.EndOffset;
+ anyToken = fieldState.length > startLength;
+ }
+ finally
+ {
+ stream.Close();
+ }
+ }
+
+ if (anyToken)
+ fieldState.offset += docState.analyzer.GetOffsetGap(field);
+ fieldState.boost *= field.Boost;
+ }
// LUCENE-2387: don't hang onto the field, so GC can
// reclaim
fields[i] = null;
- }
-
- consumer.Finish();
- endConsumer.Finish();
- }
- }
+ }
+
+ consumer.Finish();
+ endConsumer.Finish();
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/DocInverterPerThread.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/DocInverterPerThread.cs b/src/core/Index/DocInverterPerThread.cs
index c38ed35..afa6d14 100644
--- a/src/core/Index/DocInverterPerThread.cs
+++ b/src/core/Index/DocInverterPerThread.cs
@@ -22,86 +22,86 @@ using TokenStream = Lucene.Net.Analysis.TokenStream;
namespace Lucene.Net.Index
{
-
- /// <summary>This is a DocFieldConsumer that inverts each field,
- /// separately, from a Document, and accepts a
- /// InvertedTermsConsumer to process those terms.
- /// </summary>
-
- sealed class DocInverterPerThread : DocFieldConsumerPerThread
- {
- private void InitBlock()
- {
- singleToken = new SingleTokenAttributeSource();
- }
- internal DocInverter docInverter;
- internal InvertedDocConsumerPerThread consumer;
- internal InvertedDocEndConsumerPerThread endConsumer;
- internal SingleTokenAttributeSource singleToken;
-
- internal class SingleTokenAttributeSource : AttributeSource
- {
- internal ITermAttribute termAttribute;
- internal IOffsetAttribute offsetAttribute;
+
+ /// <summary>This is a DocFieldConsumer that inverts each field,
+ /// separately, from a Document, and accepts a
+ /// InvertedTermsConsumer to process those terms.
+ /// </summary>
+
+ sealed class DocInverterPerThread : DocFieldConsumerPerThread
+ {
+ private void InitBlock()
+ {
+ singleToken = new SingleTokenAttributeSource();
+ }
+ internal DocInverter docInverter;
+ internal InvertedDocConsumerPerThread consumer;
+ internal InvertedDocEndConsumerPerThread endConsumer;
+ internal SingleTokenAttributeSource singleToken;
+
+ internal class SingleTokenAttributeSource : AttributeSource
+ {
+ internal ITermAttribute termAttribute;
+ internal IOffsetAttribute offsetAttribute;
internal SingleTokenAttributeSource()
- {
+ {
termAttribute = AddAttribute<ITermAttribute>();
- offsetAttribute = AddAttribute<IOffsetAttribute>();
- }
-
- public void Reinit(System.String stringValue, int startOffset, int endOffset)
- {
- termAttribute.SetTermBuffer(stringValue);
- offsetAttribute.SetOffset(startOffset, endOffset);
- }
- }
-
- internal DocumentsWriter.DocState docState;
-
- internal FieldInvertState fieldState = new FieldInvertState();
-
- // Used to read a string value for a field
- internal ReusableStringReader stringReader = new ReusableStringReader();
-
- public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
- {
- InitBlock();
- this.docInverter = docInverter;
- docState = docFieldProcessorPerThread.docState;
- consumer = docInverter.consumer.AddThread(this);
- endConsumer = docInverter.endConsumer.AddThread(this);
- }
-
- public override void StartDocument()
- {
- consumer.StartDocument();
- endConsumer.StartDocument();
- }
-
- public override DocumentsWriter.DocWriter FinishDocument()
- {
- // TODO: allow endConsumer.finishDocument to also return
- // a DocWriter
- endConsumer.FinishDocument();
- return consumer.FinishDocument();
- }
-
- public override void Abort()
- {
- try
- {
- consumer.Abort();
- }
- finally
- {
- endConsumer.Abort();
- }
- }
-
- public override DocFieldConsumerPerField AddField(FieldInfo fi)
- {
- return new DocInverterPerField(this, fi);
- }
- }
+ offsetAttribute = AddAttribute<IOffsetAttribute>();
+ }
+
+ public void Reinit(System.String stringValue, int startOffset, int endOffset)
+ {
+ termAttribute.SetTermBuffer(stringValue);
+ offsetAttribute.SetOffset(startOffset, endOffset);
+ }
+ }
+
+ internal DocumentsWriter.DocState docState;
+
+ internal FieldInvertState fieldState = new FieldInvertState();
+
+ // Used to read a string value for a field
+ internal ReusableStringReader stringReader = new ReusableStringReader();
+
+ public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
+ {
+ InitBlock();
+ this.docInverter = docInverter;
+ docState = docFieldProcessorPerThread.docState;
+ consumer = docInverter.consumer.AddThread(this);
+ endConsumer = docInverter.endConsumer.AddThread(this);
+ }
+
+ public override void StartDocument()
+ {
+ consumer.StartDocument();
+ endConsumer.StartDocument();
+ }
+
+ public override DocumentsWriter.DocWriter FinishDocument()
+ {
+ // TODO: allow endConsumer.finishDocument to also return
+ // a DocWriter
+ endConsumer.FinishDocument();
+ return consumer.FinishDocument();
+ }
+
+ public override void Abort()
+ {
+ try
+ {
+ consumer.Abort();
+ }
+ finally
+ {
+ endConsumer.Abort();
+ }
+ }
+
+ public override DocFieldConsumerPerField AddField(FieldInfo fi)
+ {
+ return new DocInverterPerField(this, fi);
+ }
+ }
}
\ No newline at end of file