You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by si...@apache.org on 2012/08/27 14:04:23 UTC

svn commit: r1377648 - in /incubator/lucene.net/trunk/src/demo: DeleteFiles/DeleteFiles.cs Demo.Common/FileDocument.cs Demo.Common/HTMLDocument.cs IndexFiles/IndexFiles.cs IndexHtml/IndexHtml.cs SearchFiles/SearchFiles.cs

Author: sisve
Date: Mon Aug 27 12:04:23 2012
New Revision: 1377648

URL: http://svn.apache.org/viewvc?rev=1377648&view=rev
Log:
Changes to demo-files (IDisposable, documentation, etc.)

Modified:
    incubator/lucene.net/trunk/src/demo/DeleteFiles/DeleteFiles.cs
    incubator/lucene.net/trunk/src/demo/Demo.Common/FileDocument.cs
    incubator/lucene.net/trunk/src/demo/Demo.Common/HTMLDocument.cs
    incubator/lucene.net/trunk/src/demo/IndexFiles/IndexFiles.cs
    incubator/lucene.net/trunk/src/demo/IndexHtml/IndexHtml.cs
    incubator/lucene.net/trunk/src/demo/SearchFiles/SearchFiles.cs

Modified: incubator/lucene.net/trunk/src/demo/DeleteFiles/DeleteFiles.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/DeleteFiles/DeleteFiles.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/DeleteFiles/DeleteFiles.cs (original)
+++ incubator/lucene.net/trunk/src/demo/DeleteFiles/DeleteFiles.cs Mon Aug 27 12:04:23 2012
@@ -16,57 +16,51 @@
  */
 
 using System;
-
-using IndexReader = Lucene.Net.Index.IndexReader;
-using Term = Lucene.Net.Index.Term;
-using Directory = Lucene.Net.Store.Directory;
+using Lucene.Net.Index;
 using FSDirectory = Lucene.Net.Store.FSDirectory;
 
 namespace Lucene.Net.Demo
 {
-	
-	
 	/// <summary>Deletes documents from an index that do not contain a term. </summary>
-	public class DeleteFiles
+	public static class DeleteFiles
 	{
 		
-		private DeleteFiles()
-		{
-		} // singleton
-		
 		/// <summary>Deletes documents from an index that do not contain a term. </summary>
 		[STAThread]
-		public static void  Main(System.String[] args)
+		public static void Main(System.String[] args)
 		{
-			System.String usage = typeof(DeleteFiles) + " <unique_term>";
+			var usage = typeof(DeleteFiles) + " <unique_term>";
 			if (args.Length == 0)
 			{
-				System.Console.Error.WriteLine("Usage: " + usage);
-				System.Environment.Exit(1);
+				Console.Error.WriteLine("Usage: " + usage);
+				Environment.Exit(1);
 			}
+
 			try
 			{
-				Directory directory = FSDirectory.Open("index");
-				IndexReader reader = IndexReader.Open(directory, false); // we don't want read-only because we are about to delete
-				
-				Term term = new Term("path", args[0]);
-				int deleted = reader.DeleteDocuments(term);
-				
-				System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);
-				
-				// one can also delete documents by their internal id:
-				/*
-				for (int i = 0; i < reader.maxDoc(); i++) {
-				System.out.println("Deleting document with id " + i);
-				reader.delete(i);
-				}*/
-				
-				reader.Close();
-				directory.Close();
+                // We don't want a read-only reader because we are about to delete.
+				using (var directory = FSDirectory.Open("index"))
+                using (var reader = IndexReader.Open(directory, false))
+                {
+                    var term = new Term("path", args[0]);
+                    var deleted = reader.DeleteDocuments(term);
+
+                    Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);
+
+                    // one can also delete documents by their internal id:
+                    /*
+                    for (int i = 0; i < reader.MaxDoc; i++) {
+                        Console.Out.WriteLine("Deleting document with id " + i);
+                        reader.DeleteDocument(i);
+                    }
+                    */
+
+                    reader.Commit();
+                }
 			}
-			catch (System.Exception e)
+			catch (Exception e)
 			{
-				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
+				Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
 			}
 		}
 	}

Modified: incubator/lucene.net/trunk/src/demo/Demo.Common/FileDocument.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/Demo.Common/FileDocument.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/Demo.Common/FileDocument.cs (original)
+++ incubator/lucene.net/trunk/src/demo/Demo.Common/FileDocument.cs Mon Aug 27 12:04:23 2012
@@ -16,17 +16,15 @@
  */
 
 using System;
-
-using DateTools = Lucene.Net.Documents.DateTools;
-using Document = Lucene.Net.Documents.Document;
-using Field = Lucene.Net.Documents.Field;
+using System.IO;
+using Lucene.Net.Documents;
 
 namespace Lucene.Net.Demo
 {
 	
 	/// <summary>A utility for making Lucene Documents from a File. </summary>
 	
-	public class FileDocument
+	public static class FileDocument
 	{
 		/// <summary>Makes a document for a File.
 		/// <p>
@@ -40,7 +38,7 @@ namespace Lucene.Net.Demo
 		/// <li><code>contents</code>--containing the full contents of the file, as a
 		/// Reader field;
 		/// </summary>
-		public static Document Document(System.IO.DirectoryInfo f)
+		public static Document Document(FileInfo f)
 		{
 			
 			// make a new, empty document
@@ -59,14 +57,10 @@ namespace Lucene.Net.Demo
 			// so that the text of the file is tokenized and indexed, but not stored.
 			// Note that FileReader expects the file to be in the system's default encoding.
 			// If that's not the case searching for special characters will fail.
-			doc.Add(new Field("contents", new System.IO.StreamReader(f.FullName, System.Text.Encoding.Default)));
+			doc.Add(new Field("contents", new StreamReader(f.FullName, System.Text.Encoding.Default)));
 			
 			// return the document
 			return doc;
 		}
-		
-		private FileDocument()
-		{
-		}
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/demo/Demo.Common/HTMLDocument.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/Demo.Common/HTMLDocument.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/Demo.Common/HTMLDocument.cs (original)
+++ incubator/lucene.net/trunk/src/demo/Demo.Common/HTMLDocument.cs Mon Aug 27 12:04:23 2012
@@ -16,20 +16,21 @@
  */
 
 using System;
+using System.IO;
+using Lucene.Net.Documents;
 
 using HTMLParser = Lucene.Net.Demo.Html.HTMLParser;
-using Lucene.Net.Documents;
 
 namespace Lucene.Net.Demo
 {
 	
 	/// <summary>A utility for making Lucene Documents for HTML documents. </summary>
 	
-	public class HTMLDocument
+	public static class HTMLDocument
 	{
-		internal static char dirSep = System.IO.Path.DirectorySeparatorChar.ToString()[0];
+		internal static char dirSep = Path.DirectorySeparatorChar;
 
-        public static System.String Uid(System.IO.DirectoryInfo f)
+        public static String Uid(FileInfo f)
 		{
 			// Append path and date into a string in such a way that lexicographic
 			// sorting gives the same results as a walk of the file hierarchy.  Thus
@@ -38,13 +39,13 @@ namespace Lucene.Net.Demo
 			return f.FullName.Replace(dirSep, '\u0000') + "\u0000" + DateTools.TimeToString(f.LastWriteTime.Millisecond, DateTools.Resolution.SECOND);
 		}
 		
-		public static System.String Uid2url(System.String uid)
+		public static String Uid2url(String uid)
 		{
-			System.String url = uid.Replace('\u0000', '/'); // replace nulls with slashes
+			var url = uid.Replace('\u0000', '/'); // replace nulls with slashes
 			return url.Substring(0, (url.LastIndexOf('/')) - (0)); // remove date from end
 		}
 
-        public static Document Document(System.IO.DirectoryInfo f)
+        public static Document Document(FileInfo f)
 		{
 			// make a new, empty document
 			Document doc = new Document();
@@ -62,27 +63,25 @@ namespace Lucene.Net.Demo
 			// This field is not stored with document, it is indexed, but it is not
 			// tokenized prior to indexing.
 			doc.Add(new Field("uid", Uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));
-			
-			System.IO.FileStream fis = new System.IO.FileStream(f.FullName, System.IO.FileMode.Open, System.IO.FileAccess.Read);
-			HTMLParser parser = new HTMLParser(fis);
-			
-			// Add the tag-stripped contents as a Reader-valued Text field so it will
-			// get tokenized and indexed.
-			doc.Add(new Field("contents", parser.GetReader()));
-			
-			// Add the summary as a field that is stored and returned with
-			// hit documents for display.
-			doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));
-			
-			// Add the title as a field that it can be searched and that is stored.
-			doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));
-			
-			// return the document
-			return doc;
-		}
-		
-		private HTMLDocument()
-		{
-		}
+
+            using (var fileStream = f.OpenRead())
+            {
+                var parser = new HTMLParser(fileStream);
+
+                // Add the tag-stripped contents as a Reader-valued Text field so it will
+                // get tokenized and indexed.
+                doc.Add(new Field("contents", parser.GetReader()));
+
+                // Add the summary as a field that is stored and returned with
+                // hit documents for display.
+                doc.Add(new Field("summary", parser.GetSummary(), Field.Store.YES, Field.Index.NO));
+
+                // Add the title as a field that it can be searched and that is stored.
+                doc.Add(new Field("title", parser.GetTitle(), Field.Store.YES, Field.Index.ANALYZED));
+
+                // return the document
+                return doc;
+            }
+        }
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/demo/IndexFiles/IndexFiles.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/IndexFiles/IndexFiles.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/IndexFiles/IndexFiles.cs (original)
+++ incubator/lucene.net/trunk/src/demo/IndexFiles/IndexFiles.cs Mon Aug 27 12:04:23 2012
@@ -16,9 +16,10 @@
  */
 
 using System;
+using System.IO;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Index;
 
-using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
-using IndexWriter = Lucene.Net.Index.IndexWriter;
 using FSDirectory = Lucene.Net.Store.FSDirectory;
 using Version = Lucene.Net.Util.Version;
 
@@ -26,100 +27,85 @@ namespace Lucene.Net.Demo
 {
 	
 	/// <summary>Index all text files under a directory. </summary>
-	public class IndexFiles
+	public static class IndexFiles
 	{
-		
-		private IndexFiles()
-		{
-		}
-
-        internal static readonly System.IO.DirectoryInfo INDEX_DIR = new System.IO.DirectoryInfo("index");
+        internal static readonly DirectoryInfo INDEX_DIR = new DirectoryInfo("index");
 		
 		/// <summary>Index all text files under a directory. </summary>
 		[STAThread]
-		public static void  Main(System.String[] args)
+		public static void Main(String[] args)
 		{
-			System.String usage = typeof(IndexFiles) + " <root_directory>";
+			var usage = typeof(IndexFiles) + " <root_directory>";
 			if (args.Length == 0)
 			{
-				System.Console.Error.WriteLine("Usage: " + usage);
-				System.Environment.Exit(1);
+				Console.Error.WriteLine("Usage: " + usage);
+				Environment.Exit(1);
 			}
-			
-			bool tmpBool;
-			if (System.IO.File.Exists(INDEX_DIR.FullName))
-				tmpBool = true;
-			else
-				tmpBool = System.IO.Directory.Exists(INDEX_DIR.FullName);
-			if (tmpBool)
-			{
-				System.Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
-				System.Environment.Exit(1);
-			}
-
-            var docDir = new System.IO.DirectoryInfo(args[0]);
-			bool tmpBool2;
-			if (System.IO.File.Exists(docDir.FullName))
-				tmpBool2 = true;
-			else
-				tmpBool2 = System.IO.Directory.Exists(docDir.FullName);
-			if (!tmpBool2) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#?
+
+			if (File.Exists(INDEX_DIR.FullName) || Directory.Exists(INDEX_DIR.FullName))
 			{
-				System.Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path");
-				System.Environment.Exit(1);
+				Console.Out.WriteLine("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
+				Environment.Exit(1);
+			}
+
+            var docDir = new DirectoryInfo(args[0]);
+		    var docDirExists = File.Exists(docDir.FullName) || Directory.Exists(docDir.FullName);
+			if (!docDirExists) // || !docDir.canRead()) // {{Aroush}} what is canRead() in C#?
+			{
+				Console.Out.WriteLine("Document directory '" + docDir.FullName + "' does not exist or is not readable, please check the path");
+				Environment.Exit(1);
 			}
 			
-			System.DateTime start = System.DateTime.Now;
+			var start = DateTime.Now;
 			try
 			{
-				IndexWriter writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
-				System.Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
-				IndexDocs(writer, docDir);
-				System.Console.Out.WriteLine("Optimizing...");
-				writer.Optimize();
-				writer.Close();
-				
-				System.DateTime end = System.DateTime.Now;
-				System.Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
+                using (var writer = new IndexWriter(FSDirectory.Open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.LIMITED))
+                {
+                    Console.Out.WriteLine("Indexing to directory '" + INDEX_DIR + "'...");
+                    IndexDirectory(writer, docDir);
+                    Console.Out.WriteLine("Optimizing...");
+                    writer.Optimize();
+                    writer.Commit();
+                }
+			    var end = DateTime.Now;
+				Console.Out.WriteLine(end.Millisecond - start.Millisecond + " total milliseconds");
 			}
-			catch (System.IO.IOException e)
+			catch (IOException e)
 			{
-				System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
+				Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
 			}
 		}
 		
-		internal static void  IndexDocs(IndexWriter writer, System.IO.DirectoryInfo file)
+        internal static void IndexDirectory(IndexWriter writer, DirectoryInfo directory)
+        {
+            foreach(var subDirectory in directory.GetDirectories())
+                IndexDirectory(writer, subDirectory);
+
+            foreach (var file in directory.GetFiles())
+                IndexDocs(writer, file);
+        }
+
+		internal static void IndexDocs(IndexWriter writer, FileInfo file)
 		{
-			// do not try to index files that cannot be read
-			// if (file.canRead())  // {{Aroush}} what is canRead() in C#?
+			Console.Out.WriteLine("adding " + file);
+
+			try
+			{
+				writer.AddDocument(FileDocument.Document(file));
+			}
+			catch (FileNotFoundException)
 			{
-				if (System.IO.Directory.Exists(file.FullName))
-				{
-					System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName);
-					// an IO error could occur
-					if (files != null)
-					{
-						for (int i = 0; i < files.Length; i++)
-						{
-                            IndexDocs(writer, new System.IO.DirectoryInfo(files[i]));
-						}
-					}
-				}
-				else
-				{
-					System.Console.Out.WriteLine("adding " + file);
-					try
-					{
-						writer.AddDocument(FileDocument.Document(file));
-					}
-					// at least on windows, some temporary files raise this exception with an "access denied" message
-					// checking if the file can be read doesn't help
-					catch (System.IO.FileNotFoundException fnfe)
-					{
-						;
-					}
-				}
+                // At least on Windows, some temporary files raise this exception with an
+                // "access denied" message checking if the file can be read doesn't help.
 			}
+            catch (UnauthorizedAccessException)
+            {
+                // Handle any access-denied errors that occur while reading the file.    
+            }
+            catch (IOException)
+            {
+                // Generic handler for any io-related exceptions that occur.
+            }
 		}
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/demo/IndexHtml/IndexHtml.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/IndexHtml/IndexHtml.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/IndexHtml/IndexHtml.cs (original)
+++ incubator/lucene.net/trunk/src/demo/IndexHtml/IndexHtml.cs Mon Aug 27 12:04:23 2012
@@ -16,13 +16,11 @@
  */
 
 using System;
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Index;
 
-using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
-using Document = Lucene.Net.Documents.Document;
-using IndexReader = Lucene.Net.Index.IndexReader;
-using IndexWriter = Lucene.Net.Index.IndexWriter;
-using Term = Lucene.Net.Index.Term;
-using TermEnum = Lucene.Net.Index.TermEnum;
 using FSDirectory = Lucene.Net.Store.FSDirectory;
 using Version = Lucene.Net.Util.Version;
 
@@ -30,32 +28,24 @@ namespace Lucene.Net.Demo
 {
 	
 	/// <summary>Indexer for HTML files. </summary>
-	public class IndexHTML
+	public static class IndexHTML
 	{
-		private IndexHTML()
-		{
-		}
-		
-		private static bool deleting = false; // true during deletion pass
-		private static IndexReader reader; // existing index
-		private static IndexWriter writer; // new index being built
-		private static TermEnum uidIter; // document id iterator
-		
+	    
 		/// <summary>Indexer for HTML files.</summary>
 		[STAThread]
-		public static void  Main(System.String[] argv)
+		public static void Main(System.String[] argv)
 		{
 			try
 			{
-                var index = new System.IO.DirectoryInfo("index");
+                var index = new DirectoryInfo("index");
 				bool create = false;
-                System.IO.DirectoryInfo root = null;
+                DirectoryInfo root = null;
 				
-				System.String usage = "IndexHTML [-create] [-index <index>] <root_directory>";
+				var usage = "IndexHTML [-create] [-index <index>] <root_directory>";
 				
 				if (argv.Length == 0)
 				{
-					System.Console.Error.WriteLine("Usage: " + usage);
+					Console.Error.WriteLine("Usage: " + usage);
 					return ;
 				}
 				
@@ -64,7 +54,7 @@ namespace Lucene.Net.Demo
 					if (argv[i].Equals("-index"))
 					{
 						// parse -index option
-                        index = new System.IO.DirectoryInfo(argv[++i]);
+                        index = new DirectoryInfo(argv[++i]);
 					}
 					else if (argv[i].Equals("-create"))
 					{
@@ -73,43 +63,49 @@ namespace Lucene.Net.Demo
 					}
 					else if (i != argv.Length - 1)
 					{
-						System.Console.Error.WriteLine("Usage: " + usage);
+						Console.Error.WriteLine("Usage: " + usage);
 						return ;
 					}
 					else
-                        root = new System.IO.DirectoryInfo(argv[i]);
+                        root = new DirectoryInfo(argv[i]);
 				}
 				
 				if (root == null)
 				{
-					System.Console.Error.WriteLine("Specify directory to index");
-					System.Console.Error.WriteLine("Usage: " + usage);
+					Console.Error.WriteLine("Specify directory to index");
+					Console.Error.WriteLine("Usage: " + usage);
 					return ;
 				}
 				
-				System.DateTime start = System.DateTime.Now;
-				
-				if (!create)
-				{
-					// delete stale docs
-					deleting = true;
-					IndexDocs(root, index, create);
-				}
-				writer = new IndexWriter(FSDirectory.Open(index), new StandardAnalyzer(Version.LUCENE_CURRENT), create, new IndexWriter.MaxFieldLength(1000000));
-				IndexDocs(root, index, create); // add new docs
-				
-				System.Console.Out.WriteLine("Optimizing index...");
-				writer.Optimize();
-				writer.Close();
-				
-				System.DateTime end = System.DateTime.Now;
+				var start = DateTime.Now;
+
+                using (var writer = new IndexWriter(FSDirectory.Open(index), new StandardAnalyzer(Version.LUCENE_30), create, new IndexWriter.MaxFieldLength(1000000)))
+                {
+				    if (!create)
+				    {
+					    // We're not creating a new index, iterate our index and remove
+                        // any stale documents.
+					    IndexDocs(writer, root, index, Operation.RemoveStale);
+				    }
+
+                    var operation = create 
+                        ? Operation.CompleteReindex 
+                        : Operation.IncrementalReindex;
+                    IndexDocs(writer, root, index, operation); // add new docs
+
+                    Console.Out.WriteLine("Optimizing index...");
+                    writer.Optimize();
+                    writer.Commit();
+                }
+
+			    var end = DateTime.Now;
 				
-				System.Console.Out.Write(end.Millisecond - start.Millisecond);
-				System.Console.Out.WriteLine(" total milliseconds");
+				Console.Out.Write(end.Millisecond - start.Millisecond);
+				Console.Out.WriteLine(" total milliseconds");
 			}
-			catch (System.Exception e)
+			catch (Exception e)
 			{
-				System.Console.Error.WriteLine(e.StackTrace);
+				Console.Error.WriteLine(e.StackTrace);
 			}
 		}
 		
@@ -119,86 +115,127 @@ namespace Lucene.Net.Demo
 		/* documents, to be indexed.
 		*/
 
-        private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create)
+        private static void IndexDocs(IndexWriter writer, DirectoryInfo file, DirectoryInfo index, Operation operation)
 		{
-			if (!create)
-			{
-				// incrementally update
-				
-				reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index
-				uidIter = reader.Terms(new Term("uid", "")); // init uid iterator
-				
-				IndexDocs(file);
-				
-				if (deleting)
-				{
-					// delete rest of stale docs
-					while (uidIter.Term != null && (System.Object) uidIter.Term.Field == (System.Object) "uid")
-					{
-						System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
-						reader.DeleteDocuments(uidIter.Term);
-						uidIter.Next();
-					}
-					deleting = false;
-				}
-				
-				uidIter.Close(); // close uid iterator
-				reader.Close(); // close existing index
-			}
-			// don't have exisiting
-			else
-				IndexDocs(file);
+            if (operation == Operation.CompleteReindex) 
+            {
+                // Perform a full reindexing.
+                IndexDirectory(writer, null, file, operation);
+            }
+            else
+            {
+                // Perform an incremental reindexing.
+
+                using (var reader = IndexReader.Open(FSDirectory.Open(index), true)) // open existing index
+                using (var uidIter = reader.Terms(new Term("uid", ""))) // init uid iterator
+                {
+                    IndexDirectory(writer, uidIter, file, operation);
+
+                    if (operation == Operation.RemoveStale) {
+                        // Delete remaining, presumed stale, documents. This works since
+                        // the above call to IndexDirectory should have positioned the uidIter
+                        // after any uids matching existing documents. Any remaining uid
+                        // is remains from documents that has been deleted since they was
+                        // indexed.
+                        while (uidIter.Term != null && uidIter.Term.Field == "uid") {
+                            Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
+                            writer.DeleteDocuments(uidIter.Term);
+                            uidIter.Next();
+                        }
+                    }
+                }
+            }
 		}
 
-        private static void IndexDocs(System.IO.DirectoryInfo file)
+        private static void IndexDirectory(IndexWriter writer, TermEnum uidIter, DirectoryInfo dir, Operation operation) {
+            var entries = Directory.GetFileSystemEntries(dir.FullName);
+
+            // Sort the entries. This is important, the uidIter TermEnum is
+            // iterated in a forward-only fashion, requiring all files to be
+            // passed in ascending order.
+            Array.Sort(entries);
+
+            foreach (var entry in entries) {
+                var path = Path.Combine(dir.FullName, entry);
+                if (Directory.Exists(path)) {
+                    IndexDirectory(writer, uidIter, new DirectoryInfo(path), operation);
+                } else if (File.Exists(path)) {
+                    IndexFile(writer, uidIter, new FileInfo(path), operation);
+                }
+            }
+        }
+
+        private static void IndexFile(IndexWriter writer, TermEnum uidIter, FileInfo file, Operation operation)
 		{
-			if (System.IO.Directory.Exists(file.FullName))
+			if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt"))
 			{
-				// if a directory
-				System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files
-				System.Array.Sort(files); // sort the files
-				for (int i = 0; i < files.Length; i++)
-				// recursively index them
-                    IndexDocs(new System.IO.DirectoryInfo(System.IO.Path.Combine(file.FullName, files[i])));
-			}
-			else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt"))
-			{
-				// index .txt files
+				// We've found a file we should index.
 				
-				if (uidIter != null)
+				if (operation == Operation.IncrementalReindex ||
+                    operation == Operation.RemoveStale)
 				{
-					System.String uid = HTMLDocument.Uid(file); // construct uid for doc
+                    // We should only get here with an open uidIter.
+                    Debug.Assert(uidIter != null, "Expected uidIter != null for operation " + operation);
+
+					var uid = HTMLDocument.Uid(file); // construct uid for doc
 					
-					while (uidIter.Term != null && (System.Object) uidIter.Term.Field == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) < 0)
+					while (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) < 0)
 					{
-						if (deleting)
+						if (operation == Operation.RemoveStale)
 						{
-							// delete stale docs
-							System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
-							reader.DeleteDocuments(uidIter.Term);
+							Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term.Text));
+							writer.DeleteDocuments(uidIter.Term);
 						}
 						uidIter.Next();
 					}
-					if (uidIter.Term != null && (System.Object) uidIter.Term.Field == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) == 0)
+
+                    // The uidIter TermEnum should now be pointing at either
+                    //  1) a null term, meaning there are no more uids to check.
+                    //  2) a term matching the current file.
+                    //  3) a term not matching us.
+                    if (uidIter.Term != null && uidIter.Term.Field == "uid" && String.CompareOrdinal(uidIter.Term.Text, uid) == 0)
 					{
-						uidIter.Next(); // keep matching docs
+                        // uidIter points to the current document, we should move one
+                        // step ahead to keep state consistant, and carry on.
+						uidIter.Next();
 					}
-					else if (!deleting)
+					else if (operation == Operation.IncrementalReindex)
 					{
-						// add new docs
-						Document doc = HTMLDocument.Document(file);
-						System.Console.Out.WriteLine("adding " + doc.Get("path"));
+                        // uidIter does not point to the current document, and we're
+                        // currently indexing documents.
+						var doc = HTMLDocument.Document(file);
+						Console.Out.WriteLine("adding " + doc.Get("path"));
 						writer.AddDocument(doc);
 					}
 				}
 				else
 				{
-					// creating a new index
-					Document doc = HTMLDocument.Document(file);
-					System.Console.Out.WriteLine("adding " + doc.Get("path"));
-					writer.AddDocument(doc); // add docs unconditionally
+                    // We're doing a complete reindexing. We aren't using uidIter,
+                    // but for completeness we assert that it's null (as expected).
+                    Debug.Assert(uidIter == null, "Expected uidIter == null for operation == " + operation);
+
+					var doc = HTMLDocument.Document(file);
+					Console.Out.WriteLine("adding " + doc.Get("path"));
+					writer.AddDocument(doc);
 				}
 			}
 		}
+
+        private enum Operation {
+            /// <summary>
+            ///   Indicates an incremental indexing.
+            /// </summary>
+            IncrementalReindex,
+
+            /// <summary>
+            ///   Indicates that stale entries in the index should be removed.
+            /// </summary>
+            RemoveStale,
+
+            /// <summary>
+            ///   Indicates an complete reindexing.
+            /// </summary>
+            CompleteReindex
+        }
 	}
 }
\ No newline at end of file

Modified: incubator/lucene.net/trunk/src/demo/SearchFiles/SearchFiles.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/trunk/src/demo/SearchFiles/SearchFiles.cs?rev=1377648&r1=1377647&r2=1377648&view=diff
==============================================================================
--- incubator/lucene.net/trunk/src/demo/SearchFiles/SearchFiles.cs (original)
+++ incubator/lucene.net/trunk/src/demo/SearchFiles/SearchFiles.cs Mon Aug 27 12:04:23 2012
@@ -16,38 +16,32 @@
  */
 
 using System;
-
-using Analyzer = Lucene.Net.Analysis.Analyzer;
-using StandardAnalyzer = Lucene.Net.Analysis.Standard.StandardAnalyzer;
-using Document = Lucene.Net.Documents.Document;
-using FilterIndexReader = Lucene.Net.Index.FilterIndexReader;
-using IndexReader = Lucene.Net.Index.IndexReader;
-using QueryParser = Lucene.Net.QueryParsers.QueryParser;
+using System.IO;
+using System.Text;
+using Lucene.Net.Analysis;
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Documents;
+using Lucene.Net.QueryParsers;
+using Lucene.Net.Index;
+using Lucene.Net.Search;
 using FSDirectory = Lucene.Net.Store.FSDirectory;
 using Version = Lucene.Net.Util.Version;
-using Collector = Lucene.Net.Search.Collector;
-using IndexSearcher = Lucene.Net.Search.IndexSearcher;
-using Query = Lucene.Net.Search.Query;
-using ScoreDoc = Lucene.Net.Search.ScoreDoc;
-using Scorer = Lucene.Net.Search.Scorer;
-using Searcher = Lucene.Net.Search.Searcher;
-using TopScoreDocCollector = Lucene.Net.Search.TopScoreDocCollector;
 
 namespace Lucene.Net.Demo
 {
 	
 	/// <summary>Simple command-line based search demo. </summary>
-	public class SearchFiles
+	public static class SearchFiles
 	{
-		private class AnonymousClassCollector:Collector
+		private class AnonymousClassCollector : Collector
 		{
 			private Scorer scorer;
 			private int docBase;
 			
 			// simply print docId and score of every matching document
-			public override void  Collect(int doc)
+			public override void Collect(int doc)
 			{
-				System.Console.Out.WriteLine("doc=" + doc + docBase + " score=" + scorer.Score());
+				Console.Out.WriteLine("doc=" + doc + docBase + " score=" + scorer.Score());
 			}
 			
 			public override bool AcceptsDocsOutOfOrder
@@ -55,7 +49,7 @@ namespace Lucene.Net.Demo
                 get { return true; }
 			}
 			
-			public override void  SetNextReader(IndexReader reader, int docBase)
+			public override void SetNextReader(IndexReader reader, int docBase)
 			{
 				this.docBase = docBase;
 			}
@@ -66,49 +60,46 @@ namespace Lucene.Net.Demo
 			}
 		}
 		
-		/// <summary>Use the norms from one field for all fields.  Norms are read into memory,
+		/// <summary>
+		/// Use the norms from one field for all fields.  Norms are read into memory,
 		/// using a byte of memory per document per searched field.  This can cause
 		/// search of large collections with a large number of fields to run out of
 		/// memory.  If all of the fields contain only a single token, then the norms
 		/// are all identical, then single norm vector may be shared. 
 		/// </summary>
-		private class OneNormsReader:FilterIndexReader
+		private class OneNormsReader : FilterIndexReader
 		{
-			private System.String field;
+			private readonly String field;
 			
-			public OneNormsReader(IndexReader in_Renamed, System.String field):base(in_Renamed)
+			public OneNormsReader(IndexReader in_Renamed, String field):base(in_Renamed)
 			{
 				this.field = field;
 			}
 			
-			public override byte[] Norms(System.String field)
+			public override byte[] Norms(String field)
 			{
 				return in_Renamed.Norms(this.field);
 			}
 		}
-		
-		private SearchFiles()
-		{
-		}
-		
+				
 		/// <summary>Simple command-line based search demo. </summary>
 		[STAThread]
-		public static void  Main(System.String[] args)
+		public static void Main(String[] args)
 		{
-			System.String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
+			String usage = "Usage:\t" + typeof(SearchFiles) + "[-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
 			usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
 			if (args.Length > 0 && ("-h".Equals(args[0]) || "-help".Equals(args[0])))
 			{
-				System.Console.Out.WriteLine(usage);
-				System.Environment.Exit(0);
+				Console.Out.WriteLine(usage);
+				Environment.Exit(0);
 			}
 			
-			System.String index = "index";
-			System.String field = "contents";
-			System.String queries = null;
+			String index = "index";
+			String field = "contents";
+			String queries = null;
 			int repeat = 0;
 			bool raw = false;
-			System.String normsField = null;
+			String normsField = null;
 			bool paging = true;
 			int hitsPerPage = 10;
 			
@@ -131,7 +122,7 @@ namespace Lucene.Net.Demo
 				}
 				else if ("-repeat".Equals(args[i]))
 				{
-					repeat = System.Int32.Parse(args[i + 1]);
+					repeat = Int32.Parse(args[i + 1]);
 					i++;
 				}
 				else if ("-raw".Equals(args[i]))
@@ -151,7 +142,7 @@ namespace Lucene.Net.Demo
 					}
 					else
 					{
-						hitsPerPage = System.Int32.Parse(args[i + 1]);
+						hitsPerPage = Int32.Parse(args[i + 1]);
 						if (hitsPerPage == 0)
 						{
 							paging = false;
@@ -160,69 +151,82 @@ namespace Lucene.Net.Demo
 					i++;
 				}
 			}
+
+		    IndexReader indexReader = null;
+            try
+            {
+                // only searching, so read-only=true
+                indexReader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true
+
+			    if (normsField != null)
+				    indexReader = new OneNormsReader(indexReader, normsField);
 			
-			IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.DirectoryInfo(index)), true); // only searching, so read-only=true
-			
-			if (normsField != null)
-				reader = new OneNormsReader(reader, normsField);
-			
-			Searcher searcher = new IndexSearcher(reader);
-			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
+			    Searcher searcher = new IndexSearcher(indexReader);
+			    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
 			
-			System.IO.StreamReader in_Renamed = null;
-			if (queries != null)
-			{
-				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(queries, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(queries, System.Text.Encoding.Default).CurrentEncoding);
-			}
-			else
-			{
-				in_Renamed = new System.IO.StreamReader(new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).BaseStream, new System.IO.StreamReader(System.Console.OpenStandardInput(), System.Text.Encoding.GetEncoding("UTF-8")).CurrentEncoding);
-			}
-			QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
-			while (true)
-			{
-				if (queries == null)
-				// prompt the user
-					System.Console.Out.WriteLine("Enter query: ");
-				
-				System.String line = in_Renamed.ReadLine();
-				
-				if (line == null || line.Length == - 1)
-					break;
-				
-				line = line.Trim();
-				if (line.Length == 0)
-					break;
-				
-				Query query = parser.Parse(line);
-				System.Console.Out.WriteLine("Searching for: " + query.ToString(field));
-				
-				
-				if (repeat > 0)
-				{
-					// repeat & time as benchmark
-					System.DateTime start = System.DateTime.Now;
-					for (int i = 0; i < repeat; i++)
-					{
-						searcher.Search(query, null, 100);
-					}
-					System.DateTime end = System.DateTime.Now;
-					System.Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
-				}
-				
-				if (paging)
-				{
-					DoPagingSearch(in_Renamed, searcher, query, hitsPerPage, raw, queries == null);
-				}
-				else
-				{
-					DoStreamingSearch(searcher, query);
-				}
-			}
-			reader.Close();
+			    StreamReader queryReader;
+			    if (queries != null)
+			    {
+				    queryReader = new StreamReader(new StreamReader(queries, Encoding.Default).BaseStream, new StreamReader(queries, Encoding.Default).CurrentEncoding);
+			    }
+			    else
+			    {
+				    queryReader = new StreamReader(new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).BaseStream, new StreamReader(Console.OpenStandardInput(), Encoding.UTF8).CurrentEncoding);
+			    }
+
+                var parser = new QueryParser(Version.LUCENE_30, field, analyzer);
+			    while (true)
+			    {
+				    if (queries == null)
+				    // prompt the user
+					    Console.Out.WriteLine("Enter query: ");
+				
+				    String line = queryReader.ReadLine();
+				
+				    if (line == null || line.Length == - 1)
+					    break;
+				
+				    line = line.Trim();
+				    if (line.Length == 0)
+					    break;
+				
+				    Query query = parser.Parse(line);
+				    Console.Out.WriteLine("Searching for: " + query.ToString(field));
+				
+				    if (repeat > 0)
+				    {
+					    // repeat & time as benchmark
+					    DateTime start = DateTime.Now;
+					    for (int i = 0; i < repeat; i++)
+					    {
+						    searcher.Search(query, null, 100);
+					    }
+					    DateTime end = DateTime.Now;
+					    Console.Out.WriteLine("Time: " + (end.Millisecond - start.Millisecond) + "ms");
+				    }
+				
+				    if (paging)
+				    {
+					    DoPagingSearch(queryReader, searcher, query, hitsPerPage, raw, queries == null);
+				    }
+				    else
+				    {
+					    DoStreamingSearch(searcher, query);
+				    }
+			    }
+			    queryReader.Close();
+            } 
+            finally 
+            {
+                if (indexReader != null)
+                {
+                    indexReader.Dispose();
+                }
+            }
 		}
 		
-		/// <summary> This method uses a custom HitCollector implementation which simply prints out
+		/// <summary>
+		/// This method uses a custom HitCollector implementation which simply prints out
 		/// the docId and score of every matching document. 
 		/// 
 		/// This simulates the streaming search use case, where all hits are supposed to
@@ -231,7 +235,6 @@ namespace Lucene.Net.Demo
 		public static void  DoStreamingSearch(Searcher searcher, Query query)
 		{
 			Collector streamingHitCollector = new AnonymousClassCollector();
-			
 			searcher.Search(query, streamingHitCollector);
 		}
 		
@@ -244,28 +247,28 @@ namespace Lucene.Net.Demo
 		/// is executed another time and all hits are collected.
 		/// 
 		/// </summary>
-		public static void  DoPagingSearch(System.IO.StreamReader in_Renamed, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive)
+		public static void  DoPagingSearch(StreamReader input, Searcher searcher, Query query, int hitsPerPage, bool raw, bool interactive)
 		{
 			
 			// Collect enough docs to show 5 pages
-			TopScoreDocCollector collector = TopScoreDocCollector.Create(5 * hitsPerPage, false);
+			var collector = TopScoreDocCollector.Create(5 * hitsPerPage, false);
 			searcher.Search(query, collector);
-			ScoreDoc[] hits = collector.TopDocs().ScoreDocs;
+			var hits = collector.TopDocs().ScoreDocs;
 			
 			int numTotalHits = collector.TotalHits;
-			System.Console.Out.WriteLine(numTotalHits + " total matching documents");
+			Console.Out.WriteLine(numTotalHits + " total matching documents");
 			
 			int start = 0;
-			int end = System.Math.Min(numTotalHits, hitsPerPage);
+			int end = Math.Min(numTotalHits, hitsPerPage);
 			
 			while (true)
 			{
 				if (end > hits.Length)
 				{
-					System.Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected.");
-					System.Console.Out.WriteLine("Collect more (y/n) ?");
-					System.String line = in_Renamed.ReadLine();
-					if (line.Length == 0 || line[0] == 'n')
+					Console.Out.WriteLine("Only results 1 - " + hits.Length + " of " + numTotalHits + " total matching documents collected.");
+					Console.Out.WriteLine("Collect more (y/n) ?");
+					String line = input.ReadLine();
+					if (String.IsNullOrEmpty(line) || line[0] == 'n')
 					{
 						break;
 					}
@@ -275,31 +278,31 @@ namespace Lucene.Net.Demo
 					hits = collector.TopDocs().ScoreDocs;
 				}
 				
-				end = System.Math.Min(hits.Length, start + hitsPerPage);
+				end = Math.Min(hits.Length, start + hitsPerPage);
 				
 				for (int i = start; i < end; i++)
 				{
 					if (raw)
 					{
 						// output raw format
-						System.Console.Out.WriteLine("doc=" + hits[i].Doc + " score=" + hits[i].Score);
+						Console.Out.WriteLine("doc=" + hits[i].Doc + " score=" + hits[i].Score);
 						continue;
 					}
 					
 					Document doc = searcher.Doc(hits[i].Doc);
-					System.String path = doc.Get("path");
+					String path = doc.Get("path");
 					if (path != null)
 					{
-						System.Console.Out.WriteLine((i + 1) + ". " + path);
-						System.String title = doc.Get("title");
+						Console.Out.WriteLine((i + 1) + ". " + path);
+						String title = doc.Get("title");
 						if (title != null)
 						{
-							System.Console.Out.WriteLine("   Title: " + doc.Get("title"));
+							Console.Out.WriteLine("   Title: " + doc.Get("title"));
 						}
 					}
 					else
 					{
-						System.Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
+						Console.Out.WriteLine((i + 1) + ". " + "No path for this document");
 					}
 				}
 				
@@ -313,26 +316,26 @@ namespace Lucene.Net.Demo
 					bool quit = false;
 					while (true)
 					{
-						System.Console.Out.Write("Press ");
+						Console.Out.Write("Press ");
 						if (start - hitsPerPage >= 0)
 						{
-							System.Console.Out.Write("(p)revious page, ");
+							Console.Out.Write("(p)revious page, ");
 						}
 						if (start + hitsPerPage < numTotalHits)
 						{
-							System.Console.Out.Write("(n)ext page, ");
+							Console.Out.Write("(n)ext page, ");
 						}
-						System.Console.Out.WriteLine("(q)uit or enter number to jump to a page.");
+						Console.Out.WriteLine("(q)uit or enter number to jump to a page.");
 						
-						System.String line = in_Renamed.ReadLine();
-						if (line.Length == 0 || line[0] == 'q')
+						String line = input.ReadLine();
+						if (String.IsNullOrEmpty(line) || line[0] == 'q')
 						{
 							quit = true;
 							break;
 						}
 						if (line[0] == 'p')
 						{
-							start = System.Math.Max(0, start - hitsPerPage);
+							start = Math.Max(0, start - hitsPerPage);
 							break;
 						}
 						else if (line[0] == 'n')
@@ -345,7 +348,7 @@ namespace Lucene.Net.Demo
 						}
 						else
 						{
-							int page = System.Int32.Parse(line);
+							int page = Int32.Parse(line);
 							if ((page - 1) * hitsPerPage < numTotalHits)
 							{
 								start = (page - 1) * hitsPerPage;
@@ -353,13 +356,13 @@ namespace Lucene.Net.Demo
 							}
 							else
 							{
-								System.Console.Out.WriteLine("No such page");
+								Console.Out.WriteLine("No such page");
 							}
 						}
 					}
 					if (quit)
 						break;
-					end = System.Math.Min(numTotalHits, start + hitsPerPage);
+					end = Math.Min(numTotalHits, start + hitsPerPage);
 				}
 			}
 		}