You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:39:59 UTC

[16/51] [partial] Mass convert mixed tabs to spaces

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/DocInverterPerField.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/DocInverterPerField.cs b/src/core/Index/DocInverterPerField.cs
index 8cd7c0a..0cdd9b6 100644
--- a/src/core/Index/DocInverterPerField.cs
+++ b/src/core/Index/DocInverterPerField.cs
@@ -22,214 +22,214 @@ using TokenStream = Lucene.Net.Analysis.TokenStream;
 
 namespace Lucene.Net.Index
 {
-	
-	/// <summary> Holds state for inverting all occurrences of a single
-	/// field in the document.  This class doesn't do anything
-	/// itself; instead, it forwards the tokens produced by
-	/// analysis to its own consumer
-	/// (InvertedDocConsumerPerField).  It also interacts with an
-	/// endConsumer (InvertedDocEndConsumerPerField).
-	/// </summary>
-	
-	sealed class DocInverterPerField:DocFieldConsumerPerField
-	{
-		
-		private DocInverterPerThread perThread;
-		private FieldInfo fieldInfo;
-		internal InvertedDocConsumerPerField consumer;
-		internal InvertedDocEndConsumerPerField endConsumer;
-		internal DocumentsWriter.DocState docState;
-		internal FieldInvertState fieldState;
-		
-		public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
-		{
-			this.perThread = perThread;
-			this.fieldInfo = fieldInfo;
-			docState = perThread.docState;
-			fieldState = perThread.fieldState;
-			this.consumer = perThread.consumer.AddField(this, fieldInfo);
-			this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
-		}
-		
-		public override void  Abort()
-		{
-			consumer.Abort();
-			endConsumer.Abort();
-		}
-		
-		public override void  ProcessFields(IFieldable[] fields, int count)
-		{
-			
-			fieldState.Reset(docState.doc.Boost);
-			
-			int maxFieldLength = docState.maxFieldLength;
-			
-			bool doInvert = consumer.Start(fields, count);
-			
-			for (int i = 0; i < count; i++)
-			{
-				
-				IFieldable field = fields[i];
-				
-				// TODO FI: this should be "genericized" to querying
-				// consumer if it wants to see this particular field
-				// tokenized.
-				if (field.IsIndexed && doInvert)
-				{
-					
-					bool anyToken;
-					
-					if (fieldState.length > 0)
-						fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
-					
-					if (!field.IsTokenized)
-					{
-						// un-tokenized field
-						System.String stringValue = field.StringValue;
-						int valueLength = stringValue.Length;
-						perThread.singleToken.Reinit(stringValue, 0, valueLength);
-						fieldState.attributeSource = perThread.singleToken;
-					    consumer.Start(field);
-						
-						bool success = false;
-						try
-						{
-							consumer.Add();
-							success = true;
-						}
-						finally
-						{
-							if (!success)
-								docState.docWriter.SetAborting();
-						}
-						fieldState.offset += valueLength;
-						fieldState.length++;
-						fieldState.position++;
-						anyToken = valueLength > 0;
-					}
-					else
-					{
-						// tokenized field
-						TokenStream stream;
-						TokenStream streamValue = field.TokenStreamValue;
-						
-						if (streamValue != null)
-							stream = streamValue;
-						else
-						{
-							// the field does not have a TokenStream,
-							// so we have to obtain one from the analyzer
-							System.IO.TextReader reader; // find or make Reader
-							System.IO.TextReader readerValue = field.ReaderValue;
-							
-							if (readerValue != null)
-								reader = readerValue;
-							else
-							{
-								System.String stringValue = field.StringValue;
-								if (stringValue == null)
-									throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
-								perThread.stringReader.Init(stringValue);
-								reader = perThread.stringReader;
-							}
-							
-							// Tokenize field and add to postingTable
-							stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
-						}
-						
-						// reset the TokenStream to the first token
-						stream.Reset();
-						
-						int startLength = fieldState.length;
-						
-						try
-						{
-							int offsetEnd = fieldState.offset - 1;
-							
-							bool hasMoreTokens = stream.IncrementToken();
-							
-							fieldState.attributeSource = stream;
+    
+    /// <summary> Holds state for inverting all occurrences of a single
+    /// field in the document.  This class doesn't do anything
+    /// itself; instead, it forwards the tokens produced by
+    /// analysis to its own consumer
+    /// (InvertedDocConsumerPerField).  It also interacts with an
+    /// endConsumer (InvertedDocEndConsumerPerField).
+    /// </summary>
+    
+    sealed class DocInverterPerField:DocFieldConsumerPerField
+    {
+        
+        private DocInverterPerThread perThread;
+        private FieldInfo fieldInfo;
+        internal InvertedDocConsumerPerField consumer;
+        internal InvertedDocEndConsumerPerField endConsumer;
+        internal DocumentsWriter.DocState docState;
+        internal FieldInvertState fieldState;
+        
+        public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo)
+        {
+            this.perThread = perThread;
+            this.fieldInfo = fieldInfo;
+            docState = perThread.docState;
+            fieldState = perThread.fieldState;
+            this.consumer = perThread.consumer.AddField(this, fieldInfo);
+            this.endConsumer = perThread.endConsumer.AddField(this, fieldInfo);
+        }
+        
+        public override void  Abort()
+        {
+            consumer.Abort();
+            endConsumer.Abort();
+        }
+        
+        public override void  ProcessFields(IFieldable[] fields, int count)
+        {
+            
+            fieldState.Reset(docState.doc.Boost);
+            
+            int maxFieldLength = docState.maxFieldLength;
+            
+            bool doInvert = consumer.Start(fields, count);
+            
+            for (int i = 0; i < count; i++)
+            {
+                
+                IFieldable field = fields[i];
+                
+                // TODO FI: this should be "genericized" to querying
+                // consumer if it wants to see this particular field
+                // tokenized.
+                if (field.IsIndexed && doInvert)
+                {
+                    
+                    bool anyToken;
+                    
+                    if (fieldState.length > 0)
+                        fieldState.position += docState.analyzer.GetPositionIncrementGap(fieldInfo.name);
+                    
+                    if (!field.IsTokenized)
+                    {
+                        // un-tokenized field
+                        System.String stringValue = field.StringValue;
+                        int valueLength = stringValue.Length;
+                        perThread.singleToken.Reinit(stringValue, 0, valueLength);
+                        fieldState.attributeSource = perThread.singleToken;
+                        consumer.Start(field);
+                        
+                        bool success = false;
+                        try
+                        {
+                            consumer.Add();
+                            success = true;
+                        }
+                        finally
+                        {
+                            if (!success)
+                                docState.docWriter.SetAborting();
+                        }
+                        fieldState.offset += valueLength;
+                        fieldState.length++;
+                        fieldState.position++;
+                        anyToken = valueLength > 0;
+                    }
+                    else
+                    {
+                        // tokenized field
+                        TokenStream stream;
+                        TokenStream streamValue = field.TokenStreamValue;
+                        
+                        if (streamValue != null)
+                            stream = streamValue;
+                        else
+                        {
+                            // the field does not have a TokenStream,
+                            // so we have to obtain one from the analyzer
+                            System.IO.TextReader reader; // find or make Reader
+                            System.IO.TextReader readerValue = field.ReaderValue;
+                            
+                            if (readerValue != null)
+                                reader = readerValue;
+                            else
+                            {
+                                System.String stringValue = field.StringValue;
+                                if (stringValue == null)
+                                    throw new System.ArgumentException("field must have either TokenStream, String or Reader value");
+                                perThread.stringReader.Init(stringValue);
+                                reader = perThread.stringReader;
+                            }
+                            
+                            // Tokenize field and add to postingTable
+                            stream = docState.analyzer.ReusableTokenStream(fieldInfo.name, reader);
+                        }
+                        
+                        // reset the TokenStream to the first token
+                        stream.Reset();
+                        
+                        int startLength = fieldState.length;
+                        
+                        try
+                        {
+                            int offsetEnd = fieldState.offset - 1;
+                            
+                            bool hasMoreTokens = stream.IncrementToken();
+                            
+                            fieldState.attributeSource = stream;
 
                             IOffsetAttribute offsetAttribute = fieldState.attributeSource.AddAttribute<IOffsetAttribute>();
-							IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
-							
-							consumer.Start(field);
-							
-							for (; ; )
-							{
-								
-								// If we hit an exception in stream.next below
-								// (which is fairly common, eg if analyzer
-								// chokes on a given document), then it's
-								// non-aborting and (above) this one document
-								// will be marked as deleted, but still
-								// consume a docID
-								
-								if (!hasMoreTokens)
-									break;
-								
-								int posIncr = posIncrAttribute.PositionIncrement;
-								fieldState.position += posIncr;
-								if (fieldState.position > 0)
-								{
-									fieldState.position--;
-								}
-								
-								if (posIncr == 0)
-									fieldState.numOverlap++;
-								
-								bool success = false;
-								try
-								{
-									// If we hit an exception in here, we abort
-									// all buffered documents since the last
-									// flush, on the likelihood that the
-									// internal state of the consumer is now
-									// corrupt and should not be flushed to a
-									// new segment:
-									consumer.Add();
-									success = true;
-								}
-								finally
-								{
-									if (!success)
-										docState.docWriter.SetAborting();
-								}
-								fieldState.position++;
-								offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
-								if (++fieldState.length >= maxFieldLength)
-								{
-									if (docState.infoStream != null)
-										docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
-									break;
-								}
-								
-								hasMoreTokens = stream.IncrementToken();
-							}
-							// trigger streams to perform end-of-stream operations
-							stream.End();
-							
-							fieldState.offset += offsetAttribute.EndOffset;
-							anyToken = fieldState.length > startLength;
-						}
-						finally
-						{
-							stream.Close();
-						}
-					}
-					
-					if (anyToken)
-						fieldState.offset += docState.analyzer.GetOffsetGap(field);
-					fieldState.boost *= field.Boost;
-				}
+                            IPositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.AddAttribute<IPositionIncrementAttribute>();
+                            
+                            consumer.Start(field);
+                            
+                            for (; ; )
+                            {
+                                
+                                // If we hit an exception in stream.next below
+                                // (which is fairly common, eg if analyzer
+                                // chokes on a given document), then it's
+                                // non-aborting and (above) this one document
+                                // will be marked as deleted, but still
+                                // consume a docID
+                                
+                                if (!hasMoreTokens)
+                                    break;
+                                
+                                int posIncr = posIncrAttribute.PositionIncrement;
+                                fieldState.position += posIncr;
+                                if (fieldState.position > 0)
+                                {
+                                    fieldState.position--;
+                                }
+                                
+                                if (posIncr == 0)
+                                    fieldState.numOverlap++;
+                                
+                                bool success = false;
+                                try
+                                {
+                                    // If we hit an exception in here, we abort
+                                    // all buffered documents since the last
+                                    // flush, on the likelihood that the
+                                    // internal state of the consumer is now
+                                    // corrupt and should not be flushed to a
+                                    // new segment:
+                                    consumer.Add();
+                                    success = true;
+                                }
+                                finally
+                                {
+                                    if (!success)
+                                        docState.docWriter.SetAborting();
+                                }
+                                fieldState.position++;
+                                offsetEnd = fieldState.offset + offsetAttribute.EndOffset;
+                                if (++fieldState.length >= maxFieldLength)
+                                {
+                                    if (docState.infoStream != null)
+                                        docState.infoStream.WriteLine("maxFieldLength " + maxFieldLength + " reached for field " + fieldInfo.name + ", ignoring following tokens");
+                                    break;
+                                }
+                                
+                                hasMoreTokens = stream.IncrementToken();
+                            }
+                            // trigger streams to perform end-of-stream operations
+                            stream.End();
+                            
+                            fieldState.offset += offsetAttribute.EndOffset;
+                            anyToken = fieldState.length > startLength;
+                        }
+                        finally
+                        {
+                            stream.Close();
+                        }
+                    }
+                    
+                    if (anyToken)
+                        fieldState.offset += docState.analyzer.GetOffsetGap(field);
+                    fieldState.boost *= field.Boost;
+                }
                 
                 // LUCENE-2387: don't hang onto the field, so GC can
                 // reclaim
                 fields[i] = null;
-			}
-			
-			consumer.Finish();
-			endConsumer.Finish();
-		}
-	}
+            }
+            
+            consumer.Finish();
+            endConsumer.Finish();
+        }
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/core/Index/DocInverterPerThread.cs
----------------------------------------------------------------------
diff --git a/src/core/Index/DocInverterPerThread.cs b/src/core/Index/DocInverterPerThread.cs
index c38ed35..afa6d14 100644
--- a/src/core/Index/DocInverterPerThread.cs
+++ b/src/core/Index/DocInverterPerThread.cs
@@ -22,86 +22,86 @@ using TokenStream = Lucene.Net.Analysis.TokenStream;
 
 namespace Lucene.Net.Index
 {
-	
-	/// <summary>This is a DocFieldConsumer that inverts each field,
-	/// separately, from a Document, and accepts a
-	/// InvertedTermsConsumer to process those terms. 
-	/// </summary>
-	
-	sealed class DocInverterPerThread : DocFieldConsumerPerThread
-	{
-		private void  InitBlock()
-		{
-			singleToken = new SingleTokenAttributeSource();
-		}
-		internal DocInverter docInverter;
-		internal InvertedDocConsumerPerThread consumer;
-		internal InvertedDocEndConsumerPerThread endConsumer;
-		internal SingleTokenAttributeSource singleToken;
-		
-		internal class SingleTokenAttributeSource : AttributeSource
-		{
-			internal ITermAttribute termAttribute;
-			internal IOffsetAttribute offsetAttribute;
+    
+    /// <summary>This is a DocFieldConsumer that inverts each field,
+    /// separately, from a Document, and accepts a
+    /// InvertedTermsConsumer to process those terms. 
+    /// </summary>
+    
+    sealed class DocInverterPerThread : DocFieldConsumerPerThread
+    {
+        private void  InitBlock()
+        {
+            singleToken = new SingleTokenAttributeSource();
+        }
+        internal DocInverter docInverter;
+        internal InvertedDocConsumerPerThread consumer;
+        internal InvertedDocEndConsumerPerThread endConsumer;
+        internal SingleTokenAttributeSource singleToken;
+        
+        internal class SingleTokenAttributeSource : AttributeSource
+        {
+            internal ITermAttribute termAttribute;
+            internal IOffsetAttribute offsetAttribute;
 
             internal SingleTokenAttributeSource()
-			{
+            {
                 termAttribute = AddAttribute<ITermAttribute>();
-				offsetAttribute = AddAttribute<IOffsetAttribute>();
-			}
-			
-			public void  Reinit(System.String stringValue, int startOffset, int endOffset)
-			{
-				termAttribute.SetTermBuffer(stringValue);
-				offsetAttribute.SetOffset(startOffset, endOffset);
-			}
-		}
-		
-		internal DocumentsWriter.DocState docState;
-		
-		internal FieldInvertState fieldState = new FieldInvertState();
-		
-		// Used to read a string value for a field
-		internal ReusableStringReader stringReader = new ReusableStringReader();
-		
-		public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
-		{
-			InitBlock();
-			this.docInverter = docInverter;
-			docState = docFieldProcessorPerThread.docState;
-			consumer = docInverter.consumer.AddThread(this);
-			endConsumer = docInverter.endConsumer.AddThread(this);
-		}
-		
-		public override void  StartDocument()
-		{
-			consumer.StartDocument();
-			endConsumer.StartDocument();
-		}
-		
-		public override DocumentsWriter.DocWriter FinishDocument()
-		{
-			// TODO: allow endConsumer.finishDocument to also return
-			// a DocWriter
-			endConsumer.FinishDocument();
-			return consumer.FinishDocument();
-		}
-		
-		public override void  Abort()
-		{
-			try
-			{
-				consumer.Abort();
-			}
-			finally
-			{
-				endConsumer.Abort();
-			}
-		}
-		
-		public override DocFieldConsumerPerField AddField(FieldInfo fi)
-		{
-			return new DocInverterPerField(this, fi);
-		}
-	}
+                offsetAttribute = AddAttribute<IOffsetAttribute>();
+            }
+            
+            public void  Reinit(System.String stringValue, int startOffset, int endOffset)
+            {
+                termAttribute.SetTermBuffer(stringValue);
+                offsetAttribute.SetOffset(startOffset, endOffset);
+            }
+        }
+        
+        internal DocumentsWriter.DocState docState;
+        
+        internal FieldInvertState fieldState = new FieldInvertState();
+        
+        // Used to read a string value for a field
+        internal ReusableStringReader stringReader = new ReusableStringReader();
+        
+        public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter)
+        {
+            InitBlock();
+            this.docInverter = docInverter;
+            docState = docFieldProcessorPerThread.docState;
+            consumer = docInverter.consumer.AddThread(this);
+            endConsumer = docInverter.endConsumer.AddThread(this);
+        }
+        
+        public override void  StartDocument()
+        {
+            consumer.StartDocument();
+            endConsumer.StartDocument();
+        }
+        
+        public override DocumentsWriter.DocWriter FinishDocument()
+        {
+            // TODO: allow endConsumer.finishDocument to also return
+            // a DocWriter
+            endConsumer.FinishDocument();
+            return consumer.FinishDocument();
+        }
+        
+        public override void  Abort()
+        {
+            try
+            {
+                consumer.Abort();
+            }
+            finally
+            {
+                endConsumer.Abort();
+            }
+        }
+        
+        public override DocFieldConsumerPerField AddField(FieldInfo fi)
+        {
+            return new DocInverterPerField(this, fi);
+        }
+    }
 }
\ No newline at end of file