You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2010/07/22 21:34:52 UTC
svn commit: r966819 [2/20] - in /lucene/dev/branches/realtime_search: ./ lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/benchmark/conf/ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ lucene/contrib/benchmark/src/j...

Propchange: lucene/dev/branches/realtime_search/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1 +1,2 @@
-/lucene/dev/branches/branch_3x:949730
+/lucene/dev/branches/branch_3x:949730,957490,961612
+/lucene/dev/trunk:953476-966816

Modified: lucene/dev/branches/realtime_search/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/build.xml (original)
+++ lucene/dev/branches/realtime_search/build.xml Thu Jul 22 19:34:35 2010
@@ -44,6 +44,18 @@
     </sequential>
   </target>
 
+  <target name="generate-maven-artifacts" description="Generate Maven Artifacts for Lucene and Solr">
+    <sequential>
+      <subant target="generate-maven-artifacts" inheritall="false" failonerror="true">
+        <fileset dir="lucene" includes="build.xml" />
+      </subant>
+
+      <subant target="generate-maven-artifacts" inheritall="false" failonerror="true">
+        <fileset dir="solr" includes="build.xml" />
+      </subant>
+    </sequential>
+  </target>
+
   <target name="clean" description="Clean Lucene and Solr">
     <sequential>
       <subant target="clean" inheritall="false" failonerror="true">

Propchange: lucene/dev/branches/realtime_search/lucene/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene:943137,949730
+/lucene/dev/branches/branch_3x/lucene:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene:953476-966816
 /lucene/java/branches/flex_1458:824912-931101
 /lucene/java/branches/lucene_2_4:748824
 /lucene/java/branches/lucene_2_9:817269-818600,829134,829881,831036,896850,909334,948516

Modified: lucene/dev/branches/realtime_search/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/CHANGES.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/CHANGES.txt Thu Jul 22 19:34:35 2010
@@ -19,21 +19,31 @@ Changes in backwards compatibility polic
     3.1 you can start indexing new documents into an existing index.
     But for best performance you should fully reindex.
 
+  - The postings APIs (TermEnum, TermDocsEnum, TermPositionsEnum)
+    have been removed in favor of the new flexible
+    indexing (flex) APIs (Fields, FieldsEnum, Terms, TermsEnum,
+    DocsEnum, DocsAndPositionsEnum). One big difference is that field
+    and terms are now enumerated separately: a TermsEnum provides a
+    BytesRef (wraps a byte[]) per term within a single field, not a
+    Term.  Another is that when asking for a Docs/AndPositionsEnum, you
+    now specify the skipDocs explicitly (typically this will be the
+    deleted docs, but in general you can provide any Bits).
+
   - MultiReader ctor now throws IOException
 
   - Directory.copy/Directory.copyTo now copies all files (not just
     index files), since what is and isn't and index file is now
-    dependent on the codecs used. (Mike McCandless)
+    dependent on the codecs used.
 
   - UnicodeUtil now uses BytesRef for UTF-8 output, and some method
     signatures have changed to CharSequence.  These are internal APIs
-    and subject to change suddenly.  (Robert Muir, Mike McCandless)
+    and subject to change suddenly.
 
   - Positional queries (PhraseQuery, *SpanQuery) will now throw an
     exception if use them on a field that omits positions during
     indexing (previously they silently returned no results).
 
-  - FieldCache.(Byte,Short,Int,Long,Float,Double}Parser's API has
+  - FieldCache.{Byte,Short,Int,Long,Float,Double}Parser's API has
     changed -- each parse method now takes a BytesRef instead of a
     String.  If you have an existing Parser, a simple way to fix it is
     invoke BytesRef.utf8ToString, and pass that String to your
@@ -57,6 +67,8 @@ Changes in backwards compatibility polic
     an IllegalArgumentException, because the NTS does not support
     TermAttribute/CharTermAttribute. If you want to further filter
     or attach Payloads to NTS, use the new NumericTermAttribute.
+    
+  (Mike McCandless, Robert Muir, Uwe Schindler, Mark Miller, Michael Busch)
 
 * LUCENE-2386: IndexWriter no longer performs an empty commit upon new index
   creation. Previously, if you passed an empty Directory and set OpenMode to
@@ -79,7 +91,7 @@ Changes in backwards compatibility polic
   (getTerms, getTermsIndex).  Also, the sort values (returned in
   FieldDoc.fields) when sorting by SortField.STRING or
   SortField.STRING_VAL are now BytesRef instances.  See MIGRATE.txt
-  for more details. (Mike McCandless)
+  for more details. (yonik, Mike McCandless)
  
 * LUCENE-2480: Though not a change in backwards compatibility policy, pre-3.0 
   indexes are no longer supported. You should upgrade to 3.x first, then run
@@ -88,23 +100,16 @@ Changes in backwards compatibility polic
 * LUCENE-2484: Removed deprecated TermAttribute. Use CharTermAttribute
   and TermToBytesRefAttribute instead.  (Uwe Schindler)
 
-Changes in runtime behavior
-
-* LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if 
-  it cannot delete the lock file, since obtaining the lock does not fail if the 
-  file is there. (Shai Erera)
-
 API Changes
 
-* LUCENE-1458, LUCENE-2111: The postings APIs (TermEnum, TermDocsEnum,
-  TermPositionsEnum) have been deprecated in favor of the new flexible
-  indexing (flex) APIs (Fields, FieldsEnum, Terms, TermsEnum,
-  DocsEnum, DocsAndPositionsEnum). One big difference is that field
-  and terms are now enumerated separately: a TermsEnum provides a
-  BytesRef (wraps a byte[]) per term within a single field, not a
-  Term.  Another is that when asking for a Docs/AndPositionsEnum, you
-  now specify the skipDocs explicitly (typically this will be the
-  deleted docs, but in general you can provide any Bits).
+* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
+  required to be character based. Lucene views a term as an arbitrary byte[]:
+  during analysis, character-based terms are converted to UTF8 byte[],
+  but analyzers are free to directly create terms as byte[]
+  (NumericField does this, for example).  The term data is buffered as
+  byte[] during indexing, written as byte[] into the terms dictionary,
+  and iterated as byte[] (wrapped in a BytesRef) by IndexReader for
+  searching.
 
 * LUCENE-1458, LUCENE-2111: IndexReader now directly exposes its
   deleted docs (getDeletedDocs), providing a new Bits interface to
@@ -116,11 +121,6 @@ API Changes
   commit points when they are not needed anymore (instead of waiting for the 
   next commit). (Shai Erera)
 
-* LUCENE-2356: Add IndexWriterConfig.set/getReaderTermsIndexDivisor,
-  to set what IndexWriter passes for termsIndexDivisor to the readers
-  it opens internally when applying deletions or creating a
-  near-real-time reader.  (Earwin Burrfoot via Mike McCandless)
-
 New features
 
 * LUCENE-1606, LUCENE-2089: Adds AutomatonQuery, a MultiTermQuery that 
@@ -130,7 +130,7 @@ New features
 
 * LUCENE-1990: Adds internal packed ints implementation, to be used
   for more efficient storage of int arrays when the values are
-  bounded, for example for storing the terms dict index Toke Toke
+  bounded, for example for storing the terms dict index (Toke
   Eskildsen via Mike McCandless)
 
 * LUCENE-2321: Cutover to a more RAM efficient packed-ints based
@@ -156,15 +156,6 @@ New features
   standard codec), and int block (really a "base" for using
   block-based compressors like PForDelta for storing postings data).
 
-* LUCENE-2302, LUCENE-1458, LUCENE-2111: Terms are no longer required
-  to be character based.  Lucene views a term as an arbitrary byte[]:
-  during analysis, character-based terms are converted to UTF8 byte[],
-  but analyzers are free to directly create terms as byte[]
-  (NumericField does this, for example).  The term data is buffered as
-  byte[] during indexing, written as byte[] into the terms dictionary,
-  and iterated as byte[] (wrapped in a BytesRef) by IndexReader for
-  searching.
-
 * LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy
   can be used to prevent commits from ever getting deleted from the index.
   (Shai Erera)
@@ -194,6 +185,22 @@ New features
 
 * LUCENE-2489: Added PerFieldCodecWrapper (in oal.index.codecs) which
   lets you set the Codec per field (Mike McCandless)
+
+* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
+  (Tim Smith, Grant Ingersoll)
+
+* LUCENE-2373: Extend CodecProvider to use SegmentInfosWriter and
+  SegmentInfosReader to allow customization of SegmentInfos data.
+  (Andrzej Bialecki)
+
+Optimizations
+
+* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
+  (Mike McCandless)
+
+* LUCENE-2531: Fix issue when sorting by a String field that was
+  causing too many fallbacks to compare-by-value (instead of by-ord).
+  (Mike McCandless)
   
 ======================= Lucene 3.x (not yet released) =======================
 
@@ -203,10 +210,6 @@ Changes in backwards compatibility polic
   class is no longer used by Lucene.  (Gunnar Wagenknecht via Mike
   McCandless)
 
-* LUCENE-2135: Added FieldCache.purge(IndexReader) method to the
-  interface.  Anyone implementing FieldCache externally will need to
-  fix their code to implement this, on upgrading.  (Mike McCandless)
-
 * LUCENE-1923: Renamed SegmentInfo & SegmentInfos segString method to
   toString.  These are advanced APIs and subject to change suddenly.
   (Tim Smith via Mike McCandless)
@@ -222,7 +225,8 @@ Changes in backwards compatibility polic
   the IndexWriter for a MergePolicy exactly once. You can change references to
   'writer' from <code>writer.doXYZ()</code> to <code>writer.get().doXYZ()</code>
   (it is also advisable to add an <code>assert writer != null;</code> before you
-  access the wrapped IndexWriter.
+  access the wrapped IndexWriter.)
+
   In addition, MergePolicy only exposes a default constructor, and the one that
   took IndexWriter as argument has been removed from all MergePolicy extensions.
   (Shai Erera via Mike McCandless)
@@ -265,10 +269,6 @@ Changes in runtime behavior
 * LUCENE-2179: CharArraySet.clear() is now functional.
   (Robert Muir, Uwe Schindler)
 
-* LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if 
-  it cannot delete the lock file, since obtaining the lock does not fail if the 
-  file is there. (Shai Erera)
-
 * LUCENE-2455: IndexWriter.addIndexes no longer optimizes the target index 
   before it adds the new ones. Also, the existing segments are not merged and so
   the index will not end up with a single segment (unless it was empty before).
@@ -276,6 +276,7 @@ Changes in runtime behavior
   invokes a merge on the incoming and target segments, but instead copies the
   segments to the target index. You can call maybeMerge or optimize after this
   method completes, if you need to.
+  
   In addition, Directory.copyTo* were removed in favor of copy which takes the
   target Directory, source and target files as arguments, and copies the source
   file to the target Directory under the target file name. (Shai Erera)
@@ -313,10 +314,6 @@ API Changes
   files are no longer open by IndexReaders. (luocanrao via Mike
   McCandless)
 
-* LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform
-  operations before flush starts. Also exposed doAfterFlush as protected instead
-  of package-private. (Shai Erera via Mike McCandless)
-
 * LUCENE-2282: IndexFileNames is exposed as a public class allowing for easier 
   use by external code. In addition it offers a matchExtension method which 
   callers can use to query whether a certain file matches a certain extension.
@@ -368,9 +365,9 @@ API Changes
   next commit). (Shai Erera)
 
 * LUCENE-2455: IndexWriter.addIndexesNoOptimize was renamed to addIndexes.
-  IndexFileNames.segmentFileName now takes another parameter to accomodate
+  IndexFileNames.segmentFileName now takes another parameter to accommodate
   custom file names. You should use this method to name all your files.
-  (Shai Erera) 
+  (Shai Erera)
   
 * LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced
   with equivalent ones that take a String (id) as argument. You can pass
@@ -379,16 +376,6 @@ API Changes
   
 Bug fixes
 
-* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
-  Integer.MAX_VALUE as nDocs to IndexSearcher search methods.  (Paul
-  Taylor via Mike McCandless)
-
-* LUCENE-2142: FieldCacheImpl.getStringIndex no longer throws an
-  exception when term count exceeds doc count.  (Mike McCandless)
-
-* LUCENE-2104: NativeFSLock.release() would silently fail if the lock is held by 
-  another thread/process.  (Shai Erera via Uwe Schindler)
-
 * LUCENE-2216: OpenBitSet.hashCode returned different hash codes for
   sets that only differed by trailing zeros. (Dawid Weiss, yonik)
 
@@ -402,17 +389,6 @@ Bug fixes
   incorrectly and lead to ConcurrentModificationException.
   (Uwe Schindler, Robert Muir)
 
-* LUCENE-2283: Use shared memory pool for term vector and stored
-  fields buffers. This memory will be reclaimed if needed according to
-  the configured RAM Buffer Size for the IndexWriter.  This also fixes
-  potentially excessive memory usage when many threads are indexing a
-  mix of small and large documents.  (Tim Smith via Mike McCandless)
-
-* LUCENE-2300: If IndexWriter is pooling reader (because NRT reader
-  has been obtained), and addIndexes* is run, do not pool the
-  readers from the external directory.  This is harmless (NRT reader is
-  correct), but a waste of resources.  (Mike McCandless)
-
 * LUCENE-2328: Index files fsync tracking moved from
   IndexWriter/IndexReader to Directory, and it no longer leaks memory.
   (Earwin Burrfoot via Mike McCandless)
@@ -424,63 +400,53 @@ Bug fixes
 * LUCENE-2074: Reduce buffer size of lexer back to default on reset.
   (Ruben Laguna, Shai Erera via Uwe Schindler)
   
-* LUCENE-2422: Don't reuse byte[] in IndexInput/Output -- it gains
-  little performance, and ties up possibly large amounts of memory for
-  apps that index large docs.  (Ross Woolf via Mike McCandless)
-
-* LUCENE-2387: Don't hang onto Fieldables from the last doc indexed,
-  in IndexWriter, nor the Reader in Tokenizer after close is
-  called.  (Ruben Laguna, Uwe Schindler, Mike McCandless)
-  
-* LUCENE-2417: IndexCommit did not implement hashCode() and equals() 
-  consistently. Now they both take Directory and version into consideration. In
-  addition, all of IndexComnmit methods which threw 
-  UnsupportedOperationException are now abstract. (Shai Erera)
-
-* LUCENE-2467: Fixed memory leaks in IndexWriter when large documents
-  are indexed.  (Mike McCandless)
-
-* LUCENE-2473: Clicking on the "More Results" link in the luceneweb.war
-  demo resulted in ArrayIndexOutOfBoundsException.  
-  (Sami Siren via Robert Muir)
-
-* LUCENE-2476: If any exception is hit init'ing IW, release the write
-  lock (previously we only released on IOException).  (Tamas Cservenak
-  via Mike McCandless)
-
-* LUCENE-2478: Fix CachingWrapperFilter to not throw NPE when
-  Filter.getDocIdSet() returns null.  (Uwe Schindler, Daniel Noll)
-
-* LUCENE-2468: Allow specifying how new deletions should be handled in
-  CachingWrapperFilter and CachingSpanFilter.  By default, new
-  deletions are ignored in CachingWrapperFilter, since typically this
-  filter is AND'd with a query that correctly takes new deletions into
-  account.  This should be a performance gain (higher cache hit rate)
-  in apps that reopen readers, or use near-real-time reader
-  (IndexWriter.getReader()), but may introduce invalid search results
-  (allowing deleted docs to be returned) for certain cases, so a new
-  expert ctor was added to CachingWrapperFilter to enforce deletions
-  at a performance cost.  CachingSpanFilter by default recaches if
-  there are new deletions (Shay Banon via Mike McCandless)
-
-* LUCENE-2299: If you open an NRT reader while addIndexes* is running,
-  it may miss some segments (Earwin Burrfoot via Mike McCandless)
+* LUCENE-2496: Don't throw NPE if IndexWriter is opened with CREATE on
+  a prior (corrupt) index missing its segments_N file.  (Mike
+  McCandless)
 
-* LUCENE-2397: Don't throw NPE from SnapshotDeletionPolicy.snapshot if
-  there are no commits yet (Shai Erera)
+* LUCENE-2142 (correct fix): FieldCacheImpl.getStringIndex no longer
+  throws an exception when term count exceeds doc count.
+  (Mike McCandless, Uwe Schindler)
+
+* LUCENE-2513: when opening writable IndexReader on a not-current
+  commit, do not overwrite "future" commits.  (Mike McCandless)
+
+* LUCENE-2533: fix FileSwitchDirectory.listAll to not return dups when
+  primary & secondary dirs share the same underlying directory.
+  (Michael McCandless)
 
-* LUCENE-2424: Fix FieldDoc.toString to actually return its fields
-  (Stephen Green via Mike McCandless)
+* LUCENE-2534: fix over-sharing bug in
+  MultiTermsEnum.docs/AndPositionsEnum.  (Robert Muir, Mike
+  McCandless)
 
-* LUCENE-2311: Always pass a "fully loaded" (terms index & doc stores)
-  SegmentsReader to IndexWriter's mergedSegmentWarmer (if set), so
-  that warming is free to do whatever it needs to.  (Earwin Burrfoot
+* LUCENE-2536: IndexWriter.rollback was failing to properly rollback
+  buffered deletions against segments that were flushed (Mark Harwood
   via Mike McCandless)
 
-* LUCENE-2486: Fixed intermittent FileNotFoundException on doc store
-  files when a mergedSegmentWarmer is set on IndexWriter.  (Mike
-  McCandless)
-
+* LUCENE-2541: Fixed NumericRangeQuery that returned incorrect results
+  with endpoints near Long.MIN_VALUE and Long.MAX_VALUE:
+  NumericUtils.splitRange() overflowed, if
+  - the range contained a LOWER bound
+    that was greater than (Long.MAX_VALUE - (1L << precisionStep))
+  - the range contained an UPPER bound
+    that was less than (Long.MIN_VALUE + (1L << precisionStep))
+  With standard precision steps around 4, this had no effect on
+  most queries, only those that met the above conditions.
+  Queries with large precision steps failed more easy. Queries with
+  precision step >=64 were not affected. Also 32 bit data types int
+  and float were not affected.
+  (Yonik Seeley, Uwe Schindler)
+
+* LUCENE-2549: Fix TimeLimitingCollector#TimeExceededException to record
+  the absolute docid.  (Uwe Schindler)
+
+* LUCENE-2458: QueryParser no longer automatically forms phrase queries,
+  assuming whitespace tokenization. Previously all CJK queries, for example,
+  would be turned into phrase queries. The old behavior is preserved with
+  the matchVersion parameter for previous versions. Additionally, you can
+  explicitly enable the old behavior with setAutoGeneratePhraseQueries(true) 
+  (Robert Muir)
+  
 New features
 
 * LUCENE-2128: Parallelized fetching document frequencies during weight
@@ -556,6 +522,9 @@ New features
   to wrap any other Analyzer and provide the same functionality as
   MaxFieldLength provided on IndexWriter.  This patch also fixes a bug
   in the offset calculation in CharTokenizer. (Uwe Schindler, Shai Erera)
+
+* LUCENE-2526: Don't throw NPE from MultiPhraseQuery.toString when
+  it's empty.  (Ross Woolf via Mike McCandless)
   
 Optimizations
 
@@ -580,13 +549,6 @@ Optimizations
   BooleanQuery.maxClauseCount() as before. 
   (Uwe Schindler, Robert Muir, Mike McCandless)
 
-* LUCENE-2135: On IndexReader.close, forcefully evict any entries from
-  the FieldCache rather than waiting for the WeakHashMap to release
-  the reference (Mike McCandless)
-
-* LUCENE-2161: Improve concurrency of IndexReader, especially in the
-  context of near real-time readers.  (Mike McCandless)
-
 * LUCENE-2164: ConcurrentMergeScheduler has more control over merge
   threads.  First, it gives smaller merges higher thread priority than
   larges ones.  Second, a new set/getMaxMergeCount setting will pause
@@ -624,9 +586,6 @@ Optimizations
   because then it will make sense to make the RAM buffers as large as 
   possible. (Mike McCandless, Michael Busch)
 
-* LUCENE-2360: Small speedup to recycling of reused per-doc RAM in
-  IndexWriter (Robert Muir, Mike McCandless)
-
 * LUCENE-2380: The terms field cache methods (getTerms,
   getTermsIndex), which replace the older String equivalents
   (getStrings, getStringIndex), consume quite a bit less RAM in most
@@ -649,7 +608,11 @@ Build
   can force them to run sequentially by passing -Drunsequential=1 on the command
   line. The number of threads that are spwaned per CPU defaults to '1'. If you 
   wish to change that, you can run the tests with -DthreadsPerProcessor=[num].
-  (Robert Muir, Shai Erera, Peter Kofler) 
+  (Robert Muir, Shai Erera, Peter Kofler)
+
+* LUCENE-2516: Backwards tests are now compiled against released lucene-core.jar
+  from tarball of previous version. Backwards tests are now packaged together
+  with src distribution.  (Uwe Schindler)
 
 Test Cases
 
@@ -688,6 +651,147 @@ Test Cases
 * LUCENE-2398: Improve tests to work better from IDEs such as Eclipse.
   (Paolo Castagna via Robert Muir)
 
+================== Release 2.9.3 / 3.0.2 2010-06-18 ====================
+
+Changes in backwards compatibility policy
+
+* LUCENE-2135: Added FieldCache.purge(IndexReader) method to the
+  interface.  Anyone implementing FieldCache externally will need to
+  fix their code to implement this, on upgrading.  (Mike McCandless)
+
+Changes in runtime behavior
+
+* LUCENE-2421: NativeFSLockFactory does not throw LockReleaseFailedException if 
+  it cannot delete the lock file, since obtaining the lock does not fail if the 
+  file is there. (Shai Erera)
+
+* LUCENE-2060 (2.9.3 only): Changed ConcurrentMergeScheduler's default for
+  maxNumThreads from 3 to 1, because in practice we get the most gains
+  from running a single merge in the backround.  More than one
+  concurrent merge causes alot of thrashing (though it's possible on
+  SSD storage that there would be net gains).  (Jason Rutherglen, Mike
+  McCandless)
+
+Bug fixes
+
+* LUCENE-2046 (2.9.3 only): IndexReader should not see the index as changed, after
+  IndexWriter.prepareCommit has been called but before
+  IndexWriter.commit is called. (Peter Keegan via Mike McCandless)
+
+* LUCENE-2119: Don't throw NegativeArraySizeException if you pass
+  Integer.MAX_VALUE as nDocs to IndexSearcher search methods.  (Paul
+  Taylor via Mike McCandless)
+
+* LUCENE-2142: FieldCacheImpl.getStringIndex no longer throws an
+  exception when term count exceeds doc count.  (Mike McCandless)
+
+* LUCENE-2104: NativeFSLock.release() would silently fail if the lock is held by 
+  another thread/process.  (Shai Erera via Uwe Schindler)
+  
+* LUCENE-2283: Use shared memory pool for term vector and stored
+  fields buffers. This memory will be reclaimed if needed according to
+  the configured RAM Buffer Size for the IndexWriter.  This also fixes
+  potentially excessive memory usage when many threads are indexing a
+  mix of small and large documents.  (Tim Smith via Mike McCandless)
+
+* LUCENE-2300: If IndexWriter is pooling reader (because NRT reader
+  has been obtained), and addIndexes* is run, do not pool the
+  readers from the external directory.  This is harmless (NRT reader is
+  correct), but a waste of resources.  (Mike McCandless)
+
+* LUCENE-2422: Don't reuse byte[] in IndexInput/Output -- it gains
+  little performance, and ties up possibly large amounts of memory
+  for apps that index large docs.  (Ross Woolf via Mike McCandless)
+
+* LUCENE-2387: Don't hang onto Fieldables from the last doc indexed,
+  in IndexWriter, nor the Reader in Tokenizer after close is
+  called.  (Ruben Laguna, Uwe Schindler, Mike McCandless)
+
+* LUCENE-2417: IndexCommit did not implement hashCode() and equals() 
+  consistently. Now they both take Directory and version into consideration. In
+  addition, all of IndexComnmit methods which threw 
+  UnsupportedOperationException are now abstract. (Shai Erera)
+
+* LUCENE-2467: Fixed memory leaks in IndexWriter when large documents
+  are indexed.  (Mike McCandless)
+
+* LUCENE-2473: Clicking on the "More Results" link in the luceneweb.war
+  demo resulted in ArrayIndexOutOfBoundsException.
+  (Sami Siren via Robert Muir)
+
+* LUCENE-2476: If any exception is hit init'ing IW, release the write
+  lock (previously we only released on IOException).  (Tamas Cservenak
+  via Mike McCandless)
+
+* LUCENE-2478: Fix CachingWrapperFilter to not throw NPE when
+  Filter.getDocIdSet() returns null.  (Uwe Schindler, Daniel Noll)
+
+* LUCENE-2468: Allow specifying how new deletions should be handled in
+  CachingWrapperFilter and CachingSpanFilter.  By default, new
+  deletions are ignored in CachingWrapperFilter, since typically this
+  filter is AND'd with a query that correctly takes new deletions into
+  account.  This should be a performance gain (higher cache hit rate)
+  in apps that reopen readers, or use near-real-time reader
+  (IndexWriter.getReader()), but may introduce invalid search results
+  (allowing deleted docs to be returned) for certain cases, so a new
+  expert ctor was added to CachingWrapperFilter to enforce deletions
+  at a performance cost.  CachingSpanFilter by default recaches if
+  there are new deletions (Shay Banon via Mike McCandless)
+
+* LUCENE-2299: If you open an NRT reader while addIndexes* is running,
+  it may miss some segments (Earwin Burrfoot via Mike McCandless)
+
+* LUCENE-2397: Don't throw NPE from SnapshotDeletionPolicy.snapshot if
+  there are no commits yet (Shai Erera)
+
+* LUCENE-2424: Fix FieldDoc.toString to actually return its fields
+  (Stephen Green via Mike McCandless)
+
+* LUCENE-2311: Always pass a "fully loaded" (terms index & doc stores)
+  SegmentsReader to IndexWriter's mergedSegmentWarmer (if set), so
+  that warming is free to do whatever it needs to.  (Earwin Burrfoot
+  via Mike McCandless)
+
+* LUCENE-2486: Fixed intermittent FileNotFoundException on doc store
+  files when a mergedSegmentWarmer is set on IndexWriter.  (Mike
+  McCandless)
+
+* LUCENE-2130: Fix performance issue when FuzzyQuery runs on a
+  multi-segment index (Michael McCandless)
+
+API Changes
+
+* LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform
+  operations before flush starts. Also exposed doAfterFlush as protected instead
+  of package-private. (Shai Erera via Mike McCandless)
+
+* LUCENE-2356: Add IndexWriter.set/getReaderTermsIndexDivisor, to set
+  what IndexWriter passes for termsIndexDivisor to the readers it
+  opens internally when applying deletions or creating a
+  near-real-time reader.  (Earwin Burrfoot via Mike McCandless)
+
+Optimizations
+
+* LUCENE-2494 (3.0.2 only): Use CompletionService in ParallelMultiSearcher
+  instead of simple polling for results. (Edward Drapkin, Simon Willnauer) 
+
+* LUCENE-2135: On IndexReader.close, forcefully evict any entries from
+  the FieldCache rather than waiting for the WeakHashMap to release
+  the reference (Mike McCandless)
+
+* LUCENE-2161: Improve concurrency of IndexReader, especially in the
+  context of near real-time readers.  (Mike McCandless)
+
+* LUCENE-2360: Small speedup to recycling of reused per-doc RAM in
+  IndexWriter (Robert Muir, Mike McCandless)
+
+Build
+
+* LUCENE-2488 (2.9.3 only): Support build with JDK 1.4 and exclude Java 1.5
+  contrib modules on request (pass '-Dforce.jdk14.build=true') when
+  compiling/testing/packaging. This marks the benchmark contrib also
+  as Java 1.5, as it depends on fast-vector-highlighter. (Uwe Schindler)
+
 ================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
 
 Changes in backwards compatibility policy
@@ -763,8 +867,6 @@ API Changes
    (Robert Muir)
 
 Optimizations
- * LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of
-   simple polling for resutls. (Edward Drapkin, Simon Willnauer) 
 
  * LUCENE-2086: When resolving deleted terms, do so in term sort order
    for better performance (Bogdan Ghidireac via Mike McCandless)

Modified: lucene/dev/branches/realtime_search/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/MIGRATE.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/MIGRATE.txt Thu Jul 22 19:34:35 2010
@@ -1,5 +1,5 @@
 
-LUCENE-2380
+LUCENE-2380: FieldCache.getStrings/Index --> FieldCache.getDocTerms/Index
 
   * The field values returned when sorting by SortField.STRING are now
     BytesRef.  You can call value.utf8ToString() to convert back to
@@ -22,11 +22,42 @@ LUCENE-2380
       DocTerms values = FieldCache.DEFAULT.getTerms(reader, field);
       ...
       BytesRef term = new BytesRef();
-      String aValue = values.get(docID, term).utf8ToString();
+      String aValue = values.getTerm(docID, term).utf8ToString();
 
     Note however that it can be costly to convert to String, so it's
     better to work directly with the BytesRef.
 
+  * Similarly, in FieldCache, getStringIndex (returning a StringIndex
+    instance, with direct arrays int[] order and String[] lookup) has
+    been replaced with getTermsIndex (returning a
+    FieldCache.DocTermsIndex instance).  DocTermsIndex provides the
+    getOrd(int docID) method to lookup the int order for a document,
+    lookup(int ord, BytesRef reuse) to lookup the term from a given
+    order, and the sugar method getTerm(int docID, BytesRef reuse)
+    which internally calls getOrd and then lookup.
+
+    If you had code like this before:
+
+      StringIndex idx = FieldCache.DEFAULT.getStringIndex(reader, field);
+      ...
+      int ord = idx.order[docID];
+      String aValue = idx.lookup[ord];
+
+    you can do this instead:
+
+      DocTermsIndex idx = FieldCache.DEFAULT.getTermsIndex(reader, field);
+      ...
+      int ord = idx.getOrd(docID);
+      BytesRef term = new BytesRef();
+      String aValue = idx.lookup(ord, term).utf8ToString();
+
+    Note however that it can be costly to convert to String, so it's
+    better to work directly with the BytesRef.
+
+    DocTermsIndex also has a getTermsEnum() method, which returns an
+    iterator (TermsEnum) over the term values in the index (ie,
+    iterates ord = 0..numOrd()-1).
+
   * StringComparatorLocale is now more CPU costly than it was before
     (it was already very CPU costly since it does not compare using
     indexed collation keys; use CollationKeyFilter for better
@@ -43,3 +74,195 @@ LUCENE-2380
     String.  You can call the .utf8ToString() method on the BytesRef
     instances, if necessary.
 
+
+
+LUCENE-1458, LUCENE-2111: Flexible Indexing
+
+  Flexible indexing changed the low level fields/terms/docs/positions
+  enumeration APIs.  Here are the major changes:
+
+    * Terms are now binary in nature (arbitrary byte[]), represented
+      by the BytesRef class (which provides an offset + length "slice"
+      into an existing byte[]).
+
+    * Fields are separately enumerated (FieldsEnum) from the terms
+      within each field (TermEnum).  So instead of this:
+
+        TermEnum termsEnum = ...;
+	while(termsEnum.next()) {
+	  Term t = termsEnum.term();
+	  System.out.println("field=" + t.field() + "; text=" + t.text());
+        }
+
+      Do this:
+ 
+        FieldsEnum fieldsEnum = ...;
+	String field;
+	while((field = fieldsEnum.next()) != null) {
+	  TermsEnum termsEnum = fieldsEnum.terms();
+	  BytesRef text;
+	  while((text = termsEnum.next()) != null) {
+	    System.out.println("field=" + field + "; text=" + text.utf8ToString());
+	  }
+
+    * TermDocs is renamed to DocsEnum.  Instead of this:
+
+        while(td.next()) {
+	  int doc = td.doc();
+	  ...
+	}
+
+      do this:
+
+        int doc;
+	while((doc = td.next()) != DocsEnum.NO_MORE_DOCS) {
+	  ...
+ 	}
+
+      Instead of this:
+      
+        if (td.skipTo(target)) {
+	  int doc = td.doc();
+	  ...
+	}
+
+      do this:
+      
+        if ((doc=td.skipTo(target)) != DocsEnum.NO_MORE_DOCS) {
+	  ...
+	}
+
+      The bulk read API has also changed.  Instead of this:
+
+        int[] docs = new int[256];
+        int[] freqs = new int[256];
+
+        while(true) {
+          int count = td.read(docs, freqs)
+          if (count == 0) {
+            break;
+          }
+          // use docs[i], freqs[i]
+        }
+
+      do this:
+
+        DocsEnum.BulkReadResult bulk = td.getBulkResult();
+        while(true) {
+          int count = td.read();
+          if (count == 0) {
+            break;
+          }
+          // use bulk.docs.ints[i] and bulk.freqs.ints[i]
+        }
+
+    * TermPositions is renamed to DocsAndPositionsEnum, and no longer
+      extends the docs only enumerator (DocsEnum).
+
+    * Deleted docs are no longer implicitly filtered from
+      docs/positions enums.  Instead, you pass a Bits
+      skipDocs (set bits are skipped) when obtaining the enums.  Also,
+      you can now ask a reader for its deleted docs.
+
+    * The docs/positions enums cannot seek to a term.  Instead,
+      TermsEnum is able to seek, and then you request the
+      docs/positions enum from that TermsEnum.
+
+    * TermsEnum's seek method returns more information.  So instead of
+      this:
+
+        Term t;
+        TermEnum termEnum = reader.terms(t);
+	if (t.equals(termEnum.term())) {
+	  ...
+        }
+
+      do this:
+
+        TermsEnum termsEnum = ...;
+	BytesRef text;
+	if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+	  ...
+	}
+
+      SeekStatus also contains END (enumerator is done) and NOT_FOUND
+      (term was not found but enumerator is now positioned to the next
+      term).
+
+    * TermsEnum has an ord() method, returning the long numeric
+      ordinal (ie, first term is 0, next is 1, and so on) for the term
+      it's not positioned to.  There is also a corresponding seek(long
+      ord) method.  Note that these methods are optional; in
+      particular the MultiFields TermsEnum does not implement them.
+
+
+  How you obtain the enums has changed.  The primary entry point is
+  the Fields class.  If you know your reader is a single segment
+  reader, do this:
+
+    Fields fields = reader.Fields();
+    if (fields != null) {
+      ...
+    }
+
+  If the reader might be multi-segment, you must do this:
+    
+    Fields fields = MultiFields.getFields(reader);
+    if (fields != null) {
+      ...
+    }
+  
+  The fields may be null (eg if the reader has no fields).
+
+  Note that the MultiFields approach entails a performance hit on
+  MultiReaders, as it must merge terms/docs/positions on the fly. It's
+  generally better to instead get the sequential readers (use
+  oal.util.ReaderUtil) and then step through those readers yourself,
+  if you can (this is how Lucene drives searches).
+
+  If you pass a SegmentReader to MultiFields.fiels it will simply
+  return reader.fields(), so there is no performance hit in that
+  case.
+
+  Once you have a non-null Fields you can do this:
+
+    Terms terms = fields.terms("field");
+    if (terms != null) {
+      ...
+    }
+
+  The terms may be null (eg if the field does not exist).
+
+  Once you have a non-null terms you can get an enum like this:
+
+    TermsEnum termsEnum = terms.iterator();
+
+  The returned TermsEnum will not be null.
+
+  You can then .next() through the TermsEnum, or seek.  If you want a
+  DocsEnum, do this:
+
+    Bits skipDocs = MultiFields.getDeletedDocs(reader);
+    DocsEnum docsEnum = null;
+
+    docsEnum = termsEnum.docs(skipDocs, docsEnum);
+
+  You can pass in a prior DocsEnum and it will be reused if possible.
+
+  Likewise for DocsAndPositionsEnum.
+
+  IndexReader has several sugar methods (which just go through the
+  above steps, under the hood).  Instead of:
+
+    Term t;
+    TermDocs termDocs = reader.termDocs();
+    termDocs.seek(t);
+
+  do this:
+
+    String field;
+    BytesRef text;
+    DocsEnum docsEnum = reader.termDocsEnum(reader.getDeletedDocs(), field, text);
+
+  Likewise for DocsAndPositionsEnum.
+    

Modified: lucene/dev/branches/realtime_search/lucene/backwards/backwards-readme.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/backwards/backwards-readme.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/backwards/backwards-readme.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/backwards/backwards-readme.txt Thu Jul 22 19:34:35 2010
@@ -4,14 +4,15 @@
 
 This folder contains the src/ folder of the previous Lucene major version.
 
-The test-backwards ANT task compiles the core classes of the previous version and its tests
-against these class files. After that the compiled test classes are run against the new
-lucene-core.jar file.
+The test-backwards ANT task compiles the previous version's tests (bundled) against the
+previous released lucene-core.jar file (bundled). After that the compiled test classes
+are run against the new lucene-core.jar file, created by ANT before.
 
 After branching a new Lucene major version (branch name "lucene_X_Y") do the following:
 
-* svn rm backwards/src/
-* svn cp https://svn.apache.org/repos/asf/lucene/dev/branches/lucene_X_Y/lucene/src/ backwards/src/
+* svn rm backwards/src/test
+* svn cp https://svn.apache.org/repos/asf/lucene/dev/branches/lucene_X_Y/lucene/src/test backwards/src/test
+* Copy the lucene-core.jar from the last release tarball to backwards/lib and delete old one.
 * Check that everything is correct: The backwards folder should contain a src/ folder
-  that now contains java, test, demo,.... The files should be the ones from the branch.
+  that now contains "test". The files should be the ones from the branch.
 * Run "ant test-backwards"

Modified: lucene/dev/branches/realtime_search/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/build.xml?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/build.xml (original)
+++ lucene/dev/branches/realtime_search/lucene/build.xml Thu Jul 22 19:34:35 2010
@@ -43,7 +43,7 @@
   </path>
 
   <patternset id="src.dist.patterns"
-              includes="src/,build.xml,*build*.xml,docs/,*.txt,contrib/,*pom.xml*,lib/"
+              includes="src/,build.xml,*build*.xml,docs/,*.txt,contrib/,*pom.xml*,lib/,backwards/"
               excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/,contrib/benchmark/temp/,contrib/benchmark/work/"
   />
   <patternset id="binary.build.dist.patterns"
@@ -65,13 +65,11 @@
           description="Runs all unit tests (core, contrib and back-compat)"
   />
 
-  <path id="backwards.compile.classpath">
-    <pathelement location="${build.dir.backwards}/classes/java"/>
-  </path>
-	
   <path id="backwards.test.compile.classpath">
     <path refid="junit-path"/>
-    <pathelement location="${build.dir.backwards}/classes/java"/>
+    <fileset dir="${backwards.dir}/lib">
+      <include name="lucene-core*.jar"/>
+    </fileset>
   </path>
 	
   <path id="backwards.junit.classpath">
@@ -81,31 +79,15 @@
     <pathelement path="${java.class.path}"/>
   </path>
 
-  <available property="backwards.available" file="${backwards.dir}/src"/>
-  
   <!-- remove this -->
   <target name="test-tag" depends="test-backwards" description="deprecated"/>
   
-  <target name="test-backwards-message" unless="backwards.available">
-<echo level="warning">WARNING: Backwards compatibility tests can only be run from SVN checkout.
-The source distribution does not contain sources of the previous Lucene Java version.</echo>
-  </target>
-
-  <target name="compile-backwards" depends="compile-core, jar-core, test-backwards-message"
-  	description="Runs tests of a previous Lucene version." if="backwards.available">
+  <target name="compile-backwards" depends="compile-core, jar-core"
+  	description="Runs tests of a previous Lucene version.">
 	<sequential>
       <mkdir dir="${build.dir.backwards}"/>	  
-
-	  <!-- first compile branch classes -->
-	  <compile
-        srcdir="${backwards.dir}/src/java"
-        destdir="${build.dir.backwards}/classes/java"
-        javac.source="${javac.source.backwards}" javac.target="${javac.target.backwards}"
-      >
-        <classpath refid="backwards.compile.classpath"/>
-	  </compile>
-		  	
-      <!-- compile branch tests against branch classpath -->	
+          
+      <!-- compile branch tests against previous version JAR file -->	
       <compile-test-macro srcdir="${backwards.dir}/src/test" destdir="${build.dir.backwards}/classes/test"
                   test.classpath="backwards.test.compile.classpath" javac.source="${javac.source.backwards}" javac.target="${javac.target.backwards}"/>
       
@@ -113,10 +95,10 @@ The source distribution does not contain
   	</sequential>
   </target>	
 
-  <target name="test-backwards" depends="compile-backwards, junit-backwards-mkdir, junit-backwards-sequential, junit-backwards-parallel" if="backwards.available"/>
+  <target name="test-backwards" /><!--add here after 4.0: depends="compile-backwards, junit-backwards-mkdir, junit-backwards-sequential, junit-backwards-parallel"-->
 
   <target name="junit-backwards-mkdir">
-	<mkdir dir="${build.dir.backwards}/test"/>
+    <mkdir dir="${build.dir.backwards}/test"/>
   </target>
 
   <macrodef name="backwards-test-macro">
@@ -134,20 +116,11 @@ The source distribution does not contain
   	</sequential>
   </macrodef>
 
-  <target name="check-backwards-cond">
-    <condition property="run-backwards-sequential">
-      <and>
-        <isset property="backwards.available"/>
-        <isset property="runsequential"/>
-      </and>
-    </condition>
-  </target>
-
-  <target name="junit-backwards-sequential" depends="check-backwards-cond" if="run-backwards-sequential">
+  <target name="junit-backwards-sequential" if="runsequential">
     <backwards-test-macro/>
   </target>
 
-  <target name="junit-backwards-parallel" unless="runsequential" if="backwards.available">
+  <target name="junit-backwards-parallel" unless="runsequential">
     <parallel threadsPerProcessor="${threadsPerProcessor}">
      <backwards-test-macro threadNum="1" threadTotal="8"/>
      <backwards-test-macro threadNum="2" threadTotal="8"/>

Propchange: lucene/dev/branches/realtime_search/lucene/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/build.xml:943137,949730
+/lucene/dev/branches/branch_3x/lucene/build.xml:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/build.xml:953476-966816
 /lucene/java/branches/flex_1458/build.xml:824912-931101
 /lucene/java/branches/lucene_2_9/build.xml:909334,948516
 /lucene/java/trunk/build.xml:924483-924731,924781,925176-925462

Propchange: lucene/dev/branches/realtime_search/lucene/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/contrib:943137,949730
+/lucene/dev/branches/branch_3x/lucene/contrib:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/contrib:953476-966816
 /lucene/java/branches/flex_1458/contrib:824912-931101
 /lucene/java/branches/lucene_2_4/contrib:748824
 /lucene/java/branches/lucene_2_9/contrib:817269-818600,825998,829134,829816,829881,831036,896850,909334,948516

Modified: lucene/dev/branches/realtime_search/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/CHANGES.txt?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/CHANGES.txt Thu Jul 22 19:34:35 2010
@@ -7,6 +7,19 @@ Build
  * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo.
    (Robert Muir)
 
+New Features
+
+  * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific
+    Directory impl that uses the O_DIRECT flag to bypass the buffer
+    cache.  This is useful to prevent segment merging from evicting
+    pages from the buffer cache, since fadvise/madvise do not seem.
+    (Michael McCandless)
+
+  * LUCENE-2373: Added a Codec implementation that works with append-only
+    filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
+    code is refactored to support append-only FS, and to allow for future
+    customization of per-segment information. (Andrzej Bialecki)
+
 ======================= Lucene 3.x (not yet released) =======================
 
 Changes in backwards compatibility policy
@@ -78,6 +91,9 @@ Bug fixes
  * LUCENE-2404: Fix bugs with position increment and empty tokens in ThaiWordFilter.
    For matchVersion >= 3.1 the filter also no longer lowercases. ThaiAnalyzer
    will use a separate LowerCaseFilter instead. (Uwe Schindler, Robert Muir)
+
+* LUCENE-2524: FastVectorHighlighter: use mod for getting colored tag.
+  (Koji Sekiguchi)
    
 API Changes
 
@@ -168,6 +184,12 @@ New features
  * LUCENE-1287: Allow usage of HyphenationCompoundWordTokenFilter without dictionary.
    (Thomas Peuss via Robert Muir)
 
+ * LUCENE-2464: FastVectorHighlighter: add SingleFragListBuilder to return
+   entire field contents. (Koji Sekiguchi)
+
+ * LUCENE-2503: Added lighter stemming alternatives for European languages. 
+   (Robert Muir)
+
 Build
 
  * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
@@ -218,6 +240,10 @@ Other
  * LUCENE-2415: Use reflection instead of a shim class to access Jakarta
    Regex prefix.  (Uwe Schindler)
 
+================== Release 2.9.3 / 3.0.2 2010-06-18 ====================
+
+No changes.
+
 ================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
 
 New features

Propchange: lucene/dev/branches/realtime_search/lucene/contrib/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt:943137,949730
+/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/contrib/CHANGES.txt:953476-966816
 /lucene/java/branches/flex_1458/contrib/CHANGES.txt:824912-931101
 /lucene/java/branches/lucene_2_4/contrib/CHANGES.txt:748824
 /lucene/java/branches/lucene_2_9/contrib/CHANGES.txt:817269-818600,825998,826775,829134,829816,829881,831036,896850,948516

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector-small.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector-small.alg?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector-small.alg (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector-small.alg Thu Jul 22 19:34:35 2010
@@ -23,7 +23,7 @@
 #    topScoreDocUnordered - Like above, but allows out of order
 collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
 
-analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
+analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
 directory=FSDirectory
 #directory=RamDirectory
 

Propchange: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector-small.alg
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector.alg?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector.alg (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector.alg Thu Jul 22 19:34:35 2010
@@ -23,7 +23,7 @@
 #    topScoreDocUnordered - Like above, but allows out of order
 collector.class=coll:topScoreDocOrdered:topScoreDocUnordered:topScoreDocOrdered:topScoreDocUnordered
 
-analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer
+analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer
 directory=FSDirectory
 #directory=RamDirectory
 

Propchange: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/collector.alg
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/indexLineFile.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/indexLineFile.alg?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/indexLineFile.alg (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/conf/indexLineFile.alg Thu Jul 22 19:34:35 2010
@@ -29,7 +29,7 @@
 #   ant run-task -Dtask.alg=conf/indexLineFile.alg
 #
 
-analyzer=org.apache.lucene.analysis.SimpleAnalyzer
+analyzer=org.apache.lucene.analysis.core.SimpleAnalyzer
 
 # Feed that knows how to process the line file format:
 content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteByPercentTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteByPercentTask.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteByPercentTask.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteByPercentTask.java Thu Jul 22 19:34:35 2010
@@ -21,7 +21,8 @@ import java.util.Random;
 
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.Bits;
 
 /**
  * Deletes a percentage of documents from an index randomly
@@ -50,11 +51,6 @@ public class DeleteByPercentTask extends
   }
   
   @Override
-  public void setup() throws Exception {
-    super.setup();
-  }
-
-  @Override
   public void setParams(String params) {
     super.setParams(params);
     percent = Double.parseDouble(params)/100;
@@ -78,14 +74,19 @@ public class DeleteByPercentTask extends
     }
     while (numDeleted < numToDelete) {
       double delRate = ((double) (numToDelete-numDeleted))/r.numDocs();
-      TermDocs termDocs = r.termDocs(null);
-      while (termDocs.next() && numDeleted < numToDelete) {
-        if (random.nextDouble() <= delRate) {
-          r.deleteDocument(termDocs.doc());
+      Bits delDocs = MultiFields.getDeletedDocs(r);
+      int doc = 0;
+      while (doc < maxDoc && numDeleted < numToDelete) {
+        if ((delDocs == null || !delDocs.get(doc)) && random.nextDouble() <= delRate) {
+          r.deleteDocument(doc);
           numDeleted++;
+          if (delDocs == null) {
+            delDocs = MultiFields.getDeletedDocs(r);
+            assert delDocs != null;
+          }
         }
+        doc++;
       }
-      termDocs.close();
     }
     System.out.println("--> processed (delete) " + numDeleted + " docs");
     r.decRef();

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java Thu Jul 22 19:34:35 2010
@@ -20,8 +20,9 @@ import java.io.File;
 import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.PriorityQueue;
@@ -88,15 +89,15 @@ public class QualityQueriesFinder {
     IndexReader ir = IndexReader.open(dir, true);
     try {
       int threshold = ir.maxDoc() / 10; // ignore words too common.
-      TermEnum terms = ir.terms(new Term(field,""));
-      while (terms.next()) {
-        if (!field.equals(terms.term().field())) {
-          break;
-        }
-        int df = terms.docFreq();
-        if (df<threshold) {
-          String ttxt = terms.term().text();
-          pq.insertWithOverflow(new TermDf(ttxt,df));
+      Terms terms = MultiFields.getTerms(ir, field);
+      if (terms != null) {
+        TermsEnum termsEnum = terms.iterator();
+        while (termsEnum.next() != null) {
+          int df = termsEnum.docFreq();
+          if (df<threshold) {
+            String ttxt = termsEnum.term().utf8ToString();
+            pq.insertWithOverflow(new TermDf(ttxt,df));
+          }
         }
       }
     } finally {

Modified: lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Thu Jul 22 19:34:35 2010
@@ -606,6 +606,40 @@ public class TestPerfTasksLogic extends 
     }
   }
 
+  public void testDeleteByPercent() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "ram.flush.mb=-1",
+        "max.buffered=2",
+        "content.source.log.step=3",
+        "doc.term.vector=false",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "CreateIndex",
+        "{ \"AddDocs\"  AddDoc > : * ",
+        "CloseIndex()",
+        "OpenReader(false)",
+        "DeleteByPercent(20)",
+        "CloseReader"
+    };
+    
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+
+    // 3. test number of docs in the index
+    IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true);
+    int ndocsExpected = 16; // first 20 reuters docs, minus 20%
+    assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
+    ir.close();
+  }
+
   /**
    * Test that we can set merge scheduler".
    */

Modified: lucene/dev/branches/realtime_search/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java Thu Jul 22 19:34:35 2010
@@ -23,10 +23,13 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
 
 import java.io.File;
 import java.util.Date;
@@ -39,7 +42,7 @@ public class IndexHTML {
   private static boolean deleting = false;	  // true during deletion pass
   private static IndexReader reader;		  // existing index
   private static IndexWriter writer;		  // new index being built
-  private static TermEnum uidIter;		  // document id iterator
+  private static TermsEnum uidIter;		  // document id iterator
 
   /** Indexer for HTML files.*/
   public static void main(String[] argv) {
@@ -110,21 +113,24 @@ public class IndexHTML {
     if (!create) {				  // incrementally update
 
       reader = IndexReader.open(FSDirectory.open(index), false);		  // open existing index
-      uidIter = reader.terms(new Term("uid", "")); // init uid iterator
-
-      indexDocs(file);
-
-      if (deleting) {				  // delete rest of stale docs
-        while (uidIter.term() != null && uidIter.term().field() == "uid") {
-          System.out.println("deleting " +
-              HTMLDocument.uid2url(uidIter.term().text()));
-          reader.deleteDocuments(uidIter.term());
-          uidIter.next();
+      Terms terms = MultiFields.getTerms(reader, "uid");
+      if (terms != null) {
+        uidIter = terms.iterator();
+
+        indexDocs(file);
+
+        if (deleting) {				  // delete rest of stale docs
+          BytesRef text;
+          while ((text=uidIter.next()) != null) {
+            String termText = text.utf8ToString();
+            System.out.println("deleting " +
+                               HTMLDocument.uid2url(termText));
+            reader.deleteDocuments(new Term("uid", termText));
+          }
+          deleting = false;
         }
-        deleting = false;
       }
 
-      uidIter.close();				  // close uid iterator
       reader.close();				  // close existing index
 
     } else					  // don't have exisiting
@@ -145,17 +151,21 @@ public class IndexHTML {
       if (uidIter != null) {
         String uid = HTMLDocument.uid(file);	  // construct uid for doc
 
-        while (uidIter.term() != null && uidIter.term().field() == "uid" &&
-            uidIter.term().text().compareTo(uid) < 0) {
-          if (deleting) {			  // delete stale docs
-            System.out.println("deleting " +
-                HTMLDocument.uid2url(uidIter.term().text()));
-            reader.deleteDocuments(uidIter.term());
+        BytesRef text;
+        while((text = uidIter.next()) != null) {
+          String termText = text.utf8ToString();
+          if (termText.compareTo(uid) < 0) {
+            if (deleting) {			  // delete stale docs
+              System.out.println("deleting " +
+                                 HTMLDocument.uid2url(termText));
+              reader.deleteDocuments(new Term("uid", termText));
+            }
+          } else {
+            break;
           }
-          uidIter.next();
         }
-        if (uidIter.term() != null && uidIter.term().field() == "uid" &&
-            uidIter.term().text().compareTo(uid) == 0) {
+        if (text != null &&
+            text.utf8ToString().compareTo(uid) == 0) {
           uidIter.next();			  // keep matching docs
         } else if (!deleting) {			  // add new docs
           Document doc = HTMLDocument.Document(file);

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Thu Jul 22 19:34:35 2010
@@ -36,6 +36,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Hides implementation issues associated with obtaining a TokenStream for use
@@ -176,7 +177,7 @@ public class TokenSources {
       }
     }
     // code to reconstruct the original sequence of Tokens
-    String[] terms = tpv.getTerms();
+    BytesRef[] terms = tpv.getTerms();
     int[] freq = tpv.getTermFrequencies();
     int totalTokens = 0;
 
@@ -204,7 +205,7 @@ public class TokenSources {
           unsortedTokens = new ArrayList<Token>();
         }
         for (int tp = 0; tp < offsets.length; tp++) {
-          Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp]
+          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(), offsets[tp]
               .getEndOffset());
           unsortedTokens.add(token);
         }
@@ -220,7 +221,7 @@ public class TokenSources {
         // tokens stored with positions - can use this to index straight into
         // sorted array
         for (int tp = 0; tp < pos.length; tp++) {
-          Token token = new Token(terms[t], offsets[tp].getStartOffset(),
+          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(),
               offsets[tp].getEndOffset());
           tokensInOriginalOrder[pos[tp]] = token;
         }

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Thu Jul 22 19:34:35 2010
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 public final class TokenStreamFromTermPositionVector extends TokenStream {
 
@@ -54,18 +55,18 @@ public final class TokenStreamFromTermPo
     termAttribute = addAttribute(CharTermAttribute.class);
     positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
     offsetAttribute = addAttribute(OffsetAttribute.class);
-    final String[] terms = termPositionVector.getTerms();
+    final BytesRef[] terms = termPositionVector.getTerms();
     for (int i = 0; i < terms.length; i++) {
       final TermVectorOffsetInfo[] offsets = termPositionVector.getOffsets(i);
       final int[] termPositions = termPositionVector.getTermPositions(i);
       for (int j = 0; j < termPositions.length; j++) {
         Token token;
         if (offsets != null) {
-          token = new Token(terms[i].toCharArray(), 0, terms[i].length(),
+          token = new Token(terms[i].utf8ToString(),
               offsets[j].getStartOffset(), offsets[j].getEndOffset());
         } else {
           token = new Token();
-          token.setEmpty().append(terms[i]);
+          token.setEmpty().append(terms[i].utf8ToString());
         }
         // Yes - this is the position, not the increment! This is for
         // sorting. This value

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java Thu Jul 22 19:34:35 2010
@@ -36,7 +36,10 @@ public abstract class BaseFragmentsBuild
     "<b style=\"background:yellow\">", "<b style=\"background:lawngreen\">", "<b style=\"background:aquamarine\">",
     "<b style=\"background:magenta\">", "<b style=\"background:palegreen\">", "<b style=\"background:coral\">",
     "<b style=\"background:wheat\">", "<b style=\"background:khaki\">", "<b style=\"background:lime\">",
-    "<b style=\"background:deepskyblue\">"
+    "<b style=\"background:deepskyblue\">", "<b style=\"background:deeppink\">", "<b style=\"background:salmon\">",
+    "<b style=\"background:peachpuff\">", "<b style=\"background:violet\">", "<b style=\"background:mediumpurple\">",
+    "<b style=\"background:palegoldenrod\">", "<b style=\"background:darkkhaki\">", "<b style=\"background:springgreen\">",
+    "<b style=\"background:turquoise\">", "<b style=\"background:powderblue\">"
   };
   public static final String[] COLORED_POST_TAGS = { "</b>" };
   
@@ -145,10 +148,12 @@ public abstract class BaseFragmentsBuild
   }
   
   protected String getPreTag( int num ){
-    return preTags.length > num ? preTags[num] : preTags[0];
+    int n = num % preTags.length;
+    return preTags[n];
   }
   
   protected String getPostTag( int num ){
-    return postTags.length > num ? postTags[num] : postTags[0];
+    int n = num % postTags.length;
+    return postTags[n];
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java Thu Jul 22 19:34:35 2010
@@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@@ -80,15 +81,15 @@ public class FieldTermStack {
     // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
     if( termSet == null ) return;
     
-    for( String term : tpv.getTerms() ){
-      if( !termSet.contains( term ) ) continue;
+    for( BytesRef term : tpv.getTerms() ){
+      if( !termSet.contains( term.utf8ToString() ) ) continue;
       int index = tpv.indexOf( term );
       TermVectorOffsetInfo[] tvois = tpv.getOffsets( index );
       if( tvois == null ) return; // just return to make null snippets
       int[] poss = tpv.getTermPositions( index );
       if( poss == null ) return; // just return to make null snippets
       for( int i = 0; i < tvois.length; i++ )
-        termList.add( new TermInfo( term, tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
+        termList.add( new TermInfo( term.utf8ToString(), tvois[i].getStartOffset(), tvois[i].getEndOffset(), poss[i] ) );
     }
     
     // sort by position

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilder.java Thu Jul 22 19:34:35 2010
@@ -24,7 +24,7 @@ import java.util.List;
 import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo;
 
 /**
- * A simple implementation of FragListBuilder.
+ * A simple implementation of {@link FragListBuilder}.
  */
 public class SimpleFragListBuilder implements FragListBuilder {
   

Propchange: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jul 22 19:34:35 2010
@@ -1,4 +1,5 @@
-/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test:943137,949730
+/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test:943137,949730,957490,960490,961612
+/lucene/dev/trunk/lucene/contrib/highlighter/src/test:953476-966816
 /lucene/java/branches/flex_1458/contrib/highlighter/src/test:824912-931101
 /lucene/java/branches/lucene_2_4/contrib/highlighter/src/test:748824
 /lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036,896850,909334,948516

Modified: lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Thu Jul 22 19:34:35 2010
@@ -55,7 +55,7 @@ public class FieldQueryTest extends Abst
   }
 
   public void testFlattenTermAndPhrase2gram() throws Exception {
-    Query query = paB.parse( "AA AND BCD OR EFGH" );
+    Query query = paB.parse( "AA AND \"BCD\" OR \"EFGH\"" );
     FieldQuery fq = new FieldQuery( query, true, true );
     Set<Query> flatQueries = new HashSet<Query>();
     fq.flatten( query, flatQueries );
@@ -679,7 +679,7 @@ public class FieldQueryTest extends Abst
   }
   
   public void testQueryPhraseMapOverlap2gram() throws Exception {
-    Query query = paB.parse( "abc AND bcd" );
+    Query query = paB.parse( "\"abc\" AND \"bcd\"" );
     
     // phraseHighlight = true, fieldMatch = true
     FieldQuery fq = new FieldQuery( query, true, true );

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocsEnum.java Thu Jul 22 19:34:35 2010
@@ -54,7 +54,7 @@ public class InstantiatedDocsEnum extend
 
   @Override
   public int advance(int target) {
-    if (currentDoc.getDocument().getDocumentNumber() >= target) {
+    if (currentDoc != null && currentDoc.getDocument().getDocumentNumber() >= target) {
       return nextDoc();
     }
 

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java Thu Jul 22 19:34:35 2010
@@ -31,10 +31,15 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.TermPositionVector;
-import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.BytesRef;
 
 /**
  * Represented as a coupled graph of class instances, this
@@ -220,34 +225,46 @@ public class InstantiatedIndex
       }
     }
     List<InstantiatedTerm> terms = new ArrayList<InstantiatedTerm>(5000 * getTermsByFieldAndText().size());
-    TermEnum termEnum = sourceIndexReader.terms();
-    while (termEnum.next()) {
-      if (fields == null || fields.contains(termEnum.term().field())) { // todo skipto if not using field
-        InstantiatedTerm instantiatedTerm = new InstantiatedTerm(termEnum.term().field(), termEnum.term().text());
-        getTermsByFieldAndText().get(termEnum.term().field()).put(termEnum.term().text(), instantiatedTerm);
-        instantiatedTerm.setTermIndex(terms.size());
-        terms.add(instantiatedTerm);
-        instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termEnum.docFreq()]);
+    Fields fieldsC = MultiFields.getFields(sourceIndexReader);
+    if (fieldsC != null) {
+      FieldsEnum fieldsEnum = fieldsC.iterator();
+      String field;
+      while((field = fieldsEnum.next()) != null) {
+        if (fields == null || fields.contains(field)) {
+          TermsEnum termsEnum = fieldsEnum.terms();
+          BytesRef text;
+          while((text = termsEnum.next()) != null) {
+            String termText = text.utf8ToString();
+            InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+            getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
+            instantiatedTerm.setTermIndex(terms.size());
+            terms.add(instantiatedTerm);
+            instantiatedTerm.setAssociatedDocuments(new InstantiatedTermDocumentInformation[termsEnum.docFreq()]);
+          }
+        }
       }
     }
-    termEnum.close();
     orderedTerms = terms.toArray(new InstantiatedTerm[terms.size()]);
 
     // create term-document informations
     for (InstantiatedTerm term : orderedTerms) {
-      TermPositions termPositions = sourceIndexReader.termPositions(term.getTerm());
+      DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(sourceIndexReader,
+                                                                            MultiFields.getDeletedDocs(sourceIndexReader),
+                                                                            term.getTerm().field(),
+                                                                            new BytesRef(term.getTerm().text()));
       int position = 0;
-      while (termPositions.next()) {
-        InstantiatedDocument document = documentsByNumber[termPositions.doc()];
+      while (termPositions.nextDoc() != DocsEnum.NO_MORE_DOCS) {
+        InstantiatedDocument document = documentsByNumber[termPositions.docID()];
 
         byte[][] payloads = new byte[termPositions.freq()][];
         int[] positions = new int[termPositions.freq()];
         for (int i = 0; i < termPositions.freq(); i++) {
           positions[i] = termPositions.nextPosition();
 
-          if (termPositions.isPayloadAvailable()) {
-            payloads[i] = new byte[termPositions.getPayloadLength()];
-            termPositions.getPayload(payloads[i], 0);
+          if (termPositions.hasPayload()) {
+            BytesRef br = termPositions.getPayload();
+            payloads[i] = new byte[br.length];
+            System.arraycopy(br.bytes, br.offset, payloads[i], 0, br.length);
           }
         }
 
@@ -273,7 +290,7 @@ public class InstantiatedIndex
           TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
           if (termPositionVector != null) {
             for (int i = 0; i < termPositionVector.getTerms().length; i++) {
-              String token = termPositionVector.getTerms()[i];
+              String token = termPositionVector.getTerms()[i].utf8ToString();
               InstantiatedTerm term = findTerm(field.name(), token);
               InstantiatedTermDocumentInformation termDocumentInformation = term.getAssociatedDocument(document.getDocumentNumber());
               termDocumentInformation.setTermOffsets(termPositionVector.getOffsets(i));

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Thu Jul 22 19:34:35 2010
@@ -371,48 +371,10 @@ public class InstantiatedIndexReader ext
   }
 
   @Override
-  public TermEnum terms() throws IOException {
-    return new InstantiatedTermEnum(this);
-  }
-
-  @Override
-  public TermEnum terms(Term t) throws IOException {
-    InstantiatedTerm it = getIndex().findTerm(t);
-    if (it != null) {
-      return new InstantiatedTermEnum(this, it.getTermIndex());
-    } else {
-      int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
-      if (startPos < 0) {
-        startPos = -1 - startPos;
-      }
-      return new InstantiatedTermEnum(this, startPos);
-    }
-  }
-
-  @Override
-  public TermDocs termDocs() throws IOException {
-    return new InstantiatedTermDocs(this);
-  }
-
-
-  @Override
-  public TermDocs termDocs(Term term) throws IOException {
-    if (term == null) {
-      return new InstantiatedAllTermDocs(this);
-    } else {
-      InstantiatedTermDocs termDocs = new InstantiatedTermDocs(this);
-      termDocs.seek(term);
-      return termDocs;
-    }
-  }
-
-  @Override
-  public TermPositions termPositions() throws IOException {
-    return new InstantiatedTermPositions(this);
-  }
-
-  @Override
   public Fields fields() {
+    if (getIndex().getOrderedTerms().length == 0) {
+      return null;
+    }
 
     return new Fields() {
       @Override
@@ -464,7 +426,7 @@ public class InstantiatedIndexReader ext
 
           @Override
           public Comparator<BytesRef> getComparator() {
-            return BytesRef.getUTF8SortedAsUTF16Comparator();
+            return BytesRef.getUTF8SortedAsUnicodeComparator();
           }
         };
       }
@@ -502,7 +464,7 @@ public class InstantiatedIndexReader ext
       List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
       mapper.setExpectations(field, tv.size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : tv) {
-        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
+        mapper.map(tdi.getTerm().getTerm().bytes(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
       }
     }
   }
@@ -513,7 +475,7 @@ public class InstantiatedIndexReader ext
     for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
       mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
       for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
-        mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
+        mapper.map(tdi.getTerm().getTerm().bytes(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
       }
     }
   }

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermFreqVector.java Thu Jul 22 19:34:35 2010
@@ -1,6 +1,7 @@
 package org.apache.lucene.store.instantiated;
 
 import org.apache.lucene.index.TermFreqVector;
+import org.apache.lucene.util.BytesRef;
 
 import java.io.Serializable;
 import java.util.Arrays;
@@ -34,18 +35,18 @@ public class InstantiatedTermFreqVector
 
   private final List<InstantiatedTermDocumentInformation> termDocumentInformations;
   private final String field;
-  private final String terms[];
+  private final BytesRef terms[];
   private final int termFrequencies[];
 
   public InstantiatedTermFreqVector(InstantiatedDocument document, String field) {
     this.field = field;
     termDocumentInformations = document.getVectorSpace().get(field);
-    terms = new String[termDocumentInformations.size()];
+    terms = new BytesRef[termDocumentInformations.size()];
     termFrequencies = new int[termDocumentInformations.size()];
 
     for (int i = 0; i < termDocumentInformations.size(); i++) {
       InstantiatedTermDocumentInformation termDocumentInformation = termDocumentInformations.get(i);
-      terms[i] = termDocumentInformation.getTerm().text();
+      terms[i] = termDocumentInformation.getTerm().getTerm().bytes();
       termFrequencies[i] = termDocumentInformation.getTermPositions().length;
     }
   }
@@ -77,7 +78,7 @@ public class InstantiatedTermFreqVector
     return terms == null ? 0 : terms.length;
   }
 
-  public String[] getTerms() {
+  public BytesRef[] getTerms() {
     return terms;
   }
 
@@ -85,14 +86,14 @@ public class InstantiatedTermFreqVector
     return termFrequencies;
   }
 
-  public int indexOf(String termText) {
+  public int indexOf(BytesRef termText) {
     if (terms == null)
       return -1;
     int res = Arrays.binarySearch(terms, termText);
     return res >= 0 ? res : -1;
   }
 
-  public int[] indexesOf(String[] termNumbers, int start, int len) {
+  public int[] indexesOf(BytesRef[] termNumbers, int start, int len) {
     // TODO: there must be a more efficient way of doing this.
     //       At least, we could advance the lower bound of the terms array
     //       as we find valid indices. Also, it might be possible to leverage

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Thu Jul 22 19:34:35 2010
@@ -41,14 +41,14 @@ public class InstantiatedTermsEnum exten
 
   @Override
   public SeekStatus seek(BytesRef text, boolean useCache) {
-    final Term t = new Term(field, text.utf8ToString());
+    final Term t = new Term(field, text);
     int loc = Arrays.binarySearch(terms, t, InstantiatedTerm.termComparator);
     if (loc < 0) {
       upto = -loc - 1;
       if (upto >= terms.length) {
         return SeekStatus.END;
       } else {
-        br.copy(terms[upto].getTerm().text());
+        br.copy(terms[upto].getTerm().bytes());
         return SeekStatus.NOT_FOUND;
       }
     } else {
@@ -123,7 +123,7 @@ public class InstantiatedTermsEnum exten
 
   @Override
   public Comparator<BytesRef> getComparator() {
-    return BytesRef.getUTF8SortedAsUTF16Comparator();
+    return BytesRef.getUTF8SortedAsUnicodeComparator();
   }
 }