You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 16:37:30 UTC

svn commit: r1069842 [1/4] - in /lucene/dev/branches/bulkpostings: ./ lucene/ lucene/src/java/org/apache/lucene/analysis/ lucene/src/java/org/apache/lucene/document/ lucene/src/java/org/apache/lucene/index/ lucene/src/java/org/apache/lucene/index/codec...

Author: rmuir
Date: Fri Feb 11 15:37:28 2011
New Revision: 1069842

URL: http://svn.apache.org/viewvc?rev=1069842&view=rev
Log:
merge trunk (1068956:1069829)

Added:
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java
      - copied, changed from r1069829, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java
    lucene/dev/branches/bulkpostings/modules/analysis/icu/src/data/uax29/Default.rbbi
      - copied unchanged from r1069829, lucene/dev/trunk/modules/analysis/icu/src/data/uax29/Default.rbbi
    lucene/dev/branches/bulkpostings/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
      - copied unchanged from r1069829, lucene/dev/trunk/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
Modified:
    lucene/dev/branches/bulkpostings/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
    lucene/dev/branches/bulkpostings/lucene/build.xml
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/bulkpostings/modules/   (props changed)
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
    lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
    lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
    lucene/dev/branches/bulkpostings/solr/   (props changed)
    lucene/dev/branches/bulkpostings/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/KEYS   (props changed)
    lucene/dev/branches/bulkpostings/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/README.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/build.xml   (props changed)
    lucene/dev/branches/bulkpostings/solr/client/   (props changed)
    lucene/dev/branches/bulkpostings/solr/common-build.xml   (props changed)
    lucene/dev/branches/bulkpostings/solr/contrib/   (props changed)
    lucene/dev/branches/bulkpostings/solr/example/   (props changed)
    lucene/dev/branches/bulkpostings/solr/example/solr/conf/velocity/tabs.vm
    lucene/dev/branches/bulkpostings/solr/lib/   (props changed)
    lucene/dev/branches/bulkpostings/solr/site/   (props changed)
    lucene/dev/branches/bulkpostings/solr/src/   (props changed)
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
    lucene/dev/branches/bulkpostings/solr/testlogging.properties   (props changed)

Modified: lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/CHANGES.txt?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/CHANGES.txt Fri Feb 11 15:37:28 2011
@@ -849,7 +849,16 @@ New features
   IndexReader, to allow apps that maintain external per-segment caches
   to evict entries when a segment is finished.  (Shay Banon, Yonik
   Seeley, Mike McCandless)
-  
+
+* LUCENE-2911: The new StandardTokenizer, UAX29URLEmailTokenizer, and
+  the ICUTokenizer in contrib now all tag types with a consistent set
+  of token types (defined in StandardTokenizer). Tokens in the major
+  CJK types are explicitly marked to allow for custom downstream handling: 
+  <IDEOGRAPHIC>, <HANGUL>, <KATAKANA>, and <HIRAGANA>.  
+  (Robert Muir, Steven Rowe)
+
+* LUCENE-2913: Add missing getters to Numeric* classes. (Uwe Schindler)
+
 Optimizations
 
 * LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of

Modified: lucene/dev/branches/bulkpostings/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/build.xml?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/build.xml (original)
+++ lucene/dev/branches/bulkpostings/lucene/build.xml Fri Feb 11 15:37:28 2011
@@ -391,7 +391,8 @@
 
   <target name="dist-all" depends="dist, dist-src"/>
 
-  <target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, jar-test-framework-src, javadocs">
+  <target name="generate-maven-artifacts"
+          depends="maven.ant.tasks-check, package, jar-src, jar-test-framework-src, javadocs">
     <sequential>
 	  <ant target="get-maven-poms" dir=".."/>
  
@@ -403,22 +404,35 @@
                   classifier="sources"/>
           <attach file="${build.dir}/${final.name}-javadoc.jar"
                   classifier="javadoc"/>
-          <attach file="${build.dir}/${final.name}-tests.jar"
-                  classifier="tests"/>
         </artifact-attachments>
       </m2-deploy>
 
-      <artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/>
-      <artifact:pom id="test-framework-pom" file="src/test-framework/pom.xml"/>
-      <artifact:deploy>
-        <attach file="${build.dir}/${final.name}-tests.jar" 
-                classifier="tests" />
+      <!--
+        For the purposes of the generated artifacts, change the <packaging>
+        in the test-framework POM from "jar" to "test-jar" - this allows
+        artifact:deploy to properly name the artifact.  The Maven build doesn't
+        have a lifecycle mapping for the "test-jar" packaging, though, so the
+        POM in src/test-framework/ is left with the "jar" packaging.
+      -->
+      <property name="test-jar-packaging-test-framework-pom"
+                location="${build.dir}/test-jar-packaging-test-framework,pom"/>
+      <copy file="src/test-framework/pom.xml"
+            tofile="${test-jar-packaging-test-framework-pom}">
+        <filterset begintoken="&lt;packaging&gt;" endtoken="&lt;/packaging&gt;">
+          <filter token="jar"
+                  value="&lt;packaging&gt;test-jar&lt;/packaging&gt;"/>
+        </filterset>
+      </copy>
+      <artifact:pom id="test-framework-pom"
+                    file="${test-jar-packaging-test-framework-pom}"/>
+      <artifact:deploy file="${build.dir}/${final.name}-tests.jar">
         <attach file="${build.dir}/${final.name}-tests-src.jar"
                 classifier="test-sources"/>
         <attach file="${build.dir}/${final.name}-tests-javadoc.jar"
                 classifier="test-javadoc"/>
         <remoteRepository url="${m2.repository.url}">
-          <authentication username="${m2.repository.username}" privateKey="${m2.repository.private.key}"/>
+          <authentication username="${m2.repository.username}"
+                          privateKey="${m2.repository.private.key}"/>
         </remoteRepository>
         <pom refid="test-framework-pom"/>
       </artifact:deploy>

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Fri Feb 11 15:37:28 2011
@@ -293,6 +293,11 @@ public final class NumericTokenStream ex
     return (shift < valSize);
   }
 
+  /** Returns the precision step. */
+  public int getPrecisionStep() {
+    return precisionStep;
+  }
+  
   // members
   private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class);
   private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java Fri Feb 11 15:37:28 2011
@@ -222,6 +222,11 @@ public final class NumericField extends 
     return (Number) fieldsData;
   }
   
+  /** Returns the precision step. */
+  public int getPrecisionStep() {
+    return numericTS.getPrecisionStep();
+  }
+  
   /**
    * Initializes the field with the supplied <code>long</code> value.
    * @param value the numeric value

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri Feb 11 15:37:28 2011
@@ -53,20 +53,6 @@ public class SegmentWriteState {
    * tweaking this is rarely useful.*/
   public int termIndexInterval;                   // TODO: this should be private to the codec, not settable here or in IWC
 
-  /** Expert: The fraction of TermDocs entries stored in skip tables,
-   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
-   * smaller indexes, greater acceleration, but fewer accelerable cases, while
-   * smaller values result in bigger indexes, less acceleration and more
-   * accelerable cases. More detailed experiments would be useful here. */
-  public final int skipInterval = 16;
-  
-  /** Expert: The maximum number of skip levels. Smaller values result in 
-   * slightly smaller indexes, but slower skipping in big posting lists.
-   */
-  public final int maxSkipLevels = 10;
-  
-
-
   public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
                            int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
     this.infoStream = infoStream;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Fri Feb 11 15:37:28 2011
@@ -23,17 +23,6 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
-import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
-import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
-import org.apache.lucene.index.codecs.simple64.Simple64Codec;
-import org.apache.lucene.index.codecs.simple64.Simple64Codec;
-import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
-import org.apache.lucene.index.codecs.standard.StandardCodec;
-import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
-import org.apache.lucene.index.codecs.pfordelta.FrameOfRefCodec;
-import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec;
-
 /** Holds a set of codecs, keyed by name.  You subclass
  *  this, instantiate it, and register your codecs, then
  *  pass this instance to IndexReader/IndexWriter (via
@@ -101,7 +90,7 @@ public class CodecProvider {
     return infosReader;
   }
 
-  static private CodecProvider defaultCodecs = new DefaultCodecProvider();
+  static private CodecProvider defaultCodecs = new CoreCodecProvider();
 
   public static CodecProvider getDefault() {
     return defaultCodecs;
@@ -169,17 +158,3 @@ public class CodecProvider {
     defaultFieldCodec = codec;
   }
 }
-
-class DefaultCodecProvider extends CodecProvider {
-  DefaultCodecProvider() {
-    register(new StandardCodec());
-    register(new PreFlexCodec());
-    register(new PulsingCodec(1));
-    register(new SimpleTextCodec());
-    register(new PatchedFrameOfRefCodec());
-    register(new FrameOfRefCodec());
-    register(new PForDeltaFixedIntBlockCodec(128));
-    register(new BulkVIntCodec(128));
-    register(new Simple64Codec(4));
-  }
-}

Copied: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java (from r1069829, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java?p2=lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java&r1=1069829&r2=1069842&rev=1069842&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java Fri Feb 11 15:37:28 2011
@@ -17,8 +17,13 @@ package org.apache.lucene.index.codecs;
  * limitations under the License.
  */
 
+import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
+import org.apache.lucene.index.codecs.pfordelta.FrameOfRefCodec;
+import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
+import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec;
 import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
 import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
+import org.apache.lucene.index.codecs.simple64.Simple64Codec;
 import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.index.codecs.standard.StandardCodec;
 
@@ -45,5 +50,10 @@ class CoreCodecProvider extends CodecPro
     register(new PreFlexCodec());
     register(new PulsingCodec(1));
     register(new SimpleTextCodec());
+    register(new PatchedFrameOfRefCodec());
+    register(new FrameOfRefCodec());
+    register(new PForDeltaFixedIntBlockCodec(128));
+    register(new BulkVIntCodec(128));
+    register(new Simple64Codec(4));
   }
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java Fri Feb 11 15:37:28 2011
@@ -162,7 +162,8 @@ public class SegmentTermDocs {
 
   /** Optimized implementation. */
   public boolean skipTo(int target) throws IOException {
-    if (df >= skipInterval) {                      // optimized case
+    // don't skip if the target is close (within skipInterval docs away)
+    if ((target - skipInterval) >= doc && df >= skipInterval) {                      // optimized case
       if (skipListReader == null)
         skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri Feb 11 15:37:28 2011
@@ -57,6 +57,7 @@ public class SepPostingsReaderImpl exten
 
   int skipInterval;
   int maxSkipLevels;
+  int skipMinimum;
 
   public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
 
@@ -103,6 +104,7 @@ public class SepPostingsReaderImpl exten
       SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
     skipInterval = termsIn.readInt();
     maxSkipLevels = termsIn.readInt();
+    skipMinimum = termsIn.readInt();
   }
 
   @Override
@@ -232,7 +234,7 @@ public class SepPostingsReaderImpl exten
         //System.out.println("  payloadFP=" + termState.payloadFP);
       }
     }
-    if (termState.docFreq >= skipInterval) {
+    if (termState.docFreq >= skipMinimum) {
       //System.out.println("   readSkip @ " + termState.bytesReader.pos);
       if (isFirstTerm) {
         termState.skipFP = termState.bytesReader.readVLong();
@@ -372,7 +374,7 @@ public class SepPostingsReaderImpl exten
 
       docFreq = termState.docFreq;
       assert docFreq > 0;
-      // NOTE: unused if docFreq < skipInterval:
+      // NOTE: unused if docFreq < skipMinimum:
       skipFP = termState.skipFP;
       count = 0;
       doc = 0;
@@ -442,14 +444,11 @@ public class SepPostingsReaderImpl exten
     public int advance(int target) throws IOException {
       //System.out.println("SepDocsEnum.advance target=" + target);
 
-      // TODO: jump right to next() if target is < X away
-      // from where we are now?
+      if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
       //System.out.println("SepDocsEnum.advance target=" + target);
 
-      if (docFreq >= skipInterval) {
-
         // There are enough docs in the posting to have
-        // skip data
+        // skip data, and its not too close
 
         if (skipper == null) {
           // This DocsEnum has never done any skipping
@@ -679,13 +678,10 @@ public class SepPostingsReaderImpl exten
     public int advance(int target) throws IOException {
       //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
 
-      // TODO: jump right to next() if target is < X away
-      // from where we are now?
-
-      if (docFreq >= skipInterval) {
+      if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
 
         // There are enough docs in the posting to have
-        // skip data
+        // skip data, and its not too close
 
         if (skipper == null) {
           //System.out.println("  create skipper");
@@ -1065,7 +1061,8 @@ public class SepPostingsReaderImpl exten
     @Override
     public JumpResult jump(int target, int curCount) throws IOException {
 
-      if (docFreq >= skipInterval) {
+      // TODO: require jump to take current docid and prevent skipping for close jumps?
+      if (docFreq >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Fri Feb 11 15:37:28 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.Set;
 
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl
   IndexOutput termsOut;
 
   final SepSkipListWriter skipListWriter;
-  final int skipInterval;
-  final int maxSkipLevels;
+  /** Expert: The fraction of TermDocs entries stored in skip tables,
+   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  final int skipInterval = 16;
+  
+  /**
+   * Expert: minimum docFreq to write any skip data at all
+   */
+  final int skipMinimum = skipInterval;
+
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  final int maxSkipLevels = 10;
+
   final int totalNumDocs;
 
   boolean storePayloads;
@@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl
 
     totalNumDocs = state.numDocs;
 
-    // TODO: -- abstraction violation
-    skipListWriter = new SepSkipListWriter(state.skipInterval,
-                                           state.maxSkipLevels,
+    skipListWriter = new SepSkipListWriter(skipInterval,
+                                           maxSkipLevels,
                                            state.numDocs,
                                            freqOut, docOut,
                                            posOut, payloadOut);
-
-    skipInterval = state.skipInterval;
-    maxSkipLevels = state.maxSkipLevels;
   }
 
   @Override
@@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl
     // TODO: -- just ask skipper to "start" here
     termsOut.writeInt(skipInterval);                // write skipInterval
     termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    termsOut.writeInt(skipMinimum);                 // write skipMinimum
   }
 
   @Override
@@ -265,7 +278,7 @@ public final class SepPostingsWriterImpl
       }
     }
 
-    if (df >= skipInterval) {
+    if (df >= skipMinimum) {
       //System.out.println("  skipFP=" + skipStart);
       final long skipFP = skipOut.getFilePointer();
       skipListWriter.writeSkip(skipOut);

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Fri Feb 11 15:37:28 2011
@@ -48,6 +48,7 @@ public class StandardPostingsReader exte
 
   int skipInterval;
   int maxSkipLevels;
+  int skipMinimum;
 
   //private String segment;
 
@@ -87,6 +88,7 @@ public class StandardPostingsReader exte
 
     skipInterval = termsIn.readInt();
     maxSkipLevels = termsIn.readInt();
+    skipMinimum = termsIn.readInt();
   }
 
   // Must keep final because we do non-standard clone
@@ -180,7 +182,7 @@ public class StandardPostingsReader exte
     //System.out.println("  freqFP=" + termState.freqOffset);
     assert termState.freqOffset < freqIn.length();
 
-    if (termState.docFreq >= skipInterval) {
+    if (termState.docFreq >= skipMinimum) {
       termState.skipOffset = termState.bytesReader.readVInt();
       //System.out.println("  skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
       assert termState.freqOffset + termState.skipOffset < freqIn.length();
@@ -368,7 +370,7 @@ public class StandardPostingsReader exte
     @Override
     public int advance(int target) throws IOException {
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close.
@@ -518,7 +520,7 @@ public class StandardPostingsReader exte
 
       //System.out.println("StandardR.D&PE advance target=" + target);
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close
@@ -715,7 +717,7 @@ public class StandardPostingsReader exte
 
       //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
 
-      if ((target - skipInterval) >= doc && limit >= skipInterval) {
+      if ((target - skipInterval) >= doc && limit >= skipMinimum) {
 
         // There are enough docs in the posting to have
         // skip data, and it isn't too close
@@ -1124,10 +1126,8 @@ public class StandardPostingsReader exte
     @Override
     public JumpResult jump(int target, int curCount) throws IOException {
   
-      // TODO: jump right to next() if target is < X away
-      // from where we are now?
-  
-      if (skipOffset > 0) {
+      // TODO: require jump to take current docid and prevent skipping for close jumps?
+      if (docFreq >= skipMinimum) {
   
         // There are enough docs in the posting to have
         // skip data

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Fri Feb 11 15:37:28 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.s
 import java.io.IOException;
 
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
@@ -44,8 +45,22 @@ public final class StandardPostingsWrite
   final IndexOutput freqOut;
   final IndexOutput proxOut;
   final DefaultSkipListWriter skipListWriter;
-  final int skipInterval;
-  final int maxSkipLevels;
+  /** Expert: The fraction of TermDocs entries stored in skip tables,
+   * used to accelerate {@link DocsEnum#advance(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  final int skipInterval = 16;
+  
+  /**
+   * Expert: minimum docFreq to write any skip data at all
+   */
+  final int skipMinimum = skipInterval;
+
+  /** Expert: The maximum number of skip levels. Smaller values result in 
+   * slightly smaller indexes, but slower skipping in big posting lists.
+   */
+  final int maxSkipLevels = 10;
   final int totalNumDocs;
   IndexOutput termsOut;
 
@@ -84,14 +99,11 @@ public final class StandardPostingsWrite
 
     totalNumDocs = state.numDocs;
 
-    skipListWriter = new DefaultSkipListWriter(state.skipInterval,
-                                               state.maxSkipLevels,
+    skipListWriter = new DefaultSkipListWriter(skipInterval,
+                                               maxSkipLevels,
                                                state.numDocs,
                                                freqOut,
                                                proxOut);
-     
-    skipInterval = state.skipInterval;
-    maxSkipLevels = state.maxSkipLevels;
   }
 
   @Override
@@ -100,6 +112,7 @@ public final class StandardPostingsWrite
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
     termsOut.writeInt(skipInterval);                // write skipInterval
     termsOut.writeInt(maxSkipLevels);               // write maxSkipLevels
+    termsOut.writeInt(skipMinimum);                 // write skipMinimum
   }
 
   @Override
@@ -218,7 +231,7 @@ public final class StandardPostingsWrite
     }
     lastFreqStart = freqStart;
 
-    if (df >= skipInterval) {
+    if (df >= skipMinimum) {
       bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
     }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Feb 11 15:37:28 2011
@@ -325,22 +325,26 @@ public final class FuzzyTermsEnum extend
     
     /** finds the smallest Lev(n) DFA that accepts the term. */
     @Override
-    protected AcceptStatus accept(BytesRef term) {
-      if (term.equals(termRef)) { // ed = 0
-        boostAtt.setBoost(1.0F);
-        return AcceptStatus.YES_AND_SEEK;
-      }
-      
-      int codePointCount = -1;
+    protected AcceptStatus accept(BytesRef term) {    
+      int ed = matchers.length - 1;
       
-      // TODO: benchmark doing this backwards
-      for (int i = 1; i < matchers.length; i++)
-        if (matchers[i].run(term.bytes, term.offset, term.length)) {
-          // this sucks, we convert just to score based on length.
-          if (codePointCount == -1) {
-            codePointCount = UnicodeUtil.codePointCount(term);
+      if (matches(term, ed)) { // we match the outer dfa
+        // now compute exact edit distance
+        while (ed > 0) {
+          if (matches(term, ed - 1)) {
+            ed--;
+          } else {
+            break;
           }
-          final float similarity = 1.0f - ((float) i / (float) 
+        }
+        
+        // scale to a boost and return (if similarity > minSimilarity)
+        if (ed == 0) { // exact match
+          boostAtt.setBoost(1.0F);
+          return AcceptStatus.YES_AND_SEEK;
+        } else {
+          final int codePointCount = UnicodeUtil.codePointCount(term);
+          final float similarity = 1.0f - ((float) ed / (float) 
               (Math.min(codePointCount, termLength)));
           if (similarity > minSimilarity) {
             boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
@@ -349,8 +353,14 @@ public final class FuzzyTermsEnum extend
             return AcceptStatus.NO_AND_SEEK;
           }
         }
-      
-      return AcceptStatus.NO_AND_SEEK;
+      } else {
+        return AcceptStatus.NO_AND_SEEK;
+      }
+    }
+    
+    /** returns true if term is within k edits of the query term */
+    final boolean matches(BytesRef term, int k) {
+      return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
     }
     
     /** defers to superclass, except can start at an arbitrary location */

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java Fri Feb 11 15:37:28 2011
@@ -179,4 +179,7 @@ public final class NumericRangeFilter<T 
   /** Returns the upper value of this range filter */
   public T getMax() { return query.getMax(); }
   
+  /** Returns the precision step. */
+  public int getPrecisionStep() { return query.getPrecisionStep(); }
+  
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java Fri Feb 11 15:37:28 2011
@@ -319,6 +319,9 @@ public final class NumericRangeQuery<T e
   /** Returns the upper value of this range query */
   public T getMax() { return max; }
   
+  /** Returns the precision step. */
+  public int getPrecisionStep() { return precisionStep; }
+  
   @Override
   public String toString(final String field) {
     final StringBuilder sb = new StringBuilder();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Fri Feb 11 15:37:28 2011
@@ -151,7 +151,7 @@ public class MockRandomCodec extends Cod
 
               @Override
               public boolean isIndexTerm(BytesRef term, TermStats stats) {
-                return random.nextInt(gap) == 17;
+                return rand.nextInt(gap) == 17;
               }
 
               @Override

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Feb 11 15:37:28 2011
@@ -287,7 +287,7 @@ public class TestIndexWriter extends Luc
       // Import to use same term index interval else a
       // smaller one here could increase the disk usage and
       // cause a false failure:
-      writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval));
+      writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy()));
       writer.setInfoStream(VERBOSE ? System.out : null);
       writer.optimize();
       writer.close();

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Fri Feb 11 15:37:28 2011
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
-// generated on Thursday, January 6, 2011 5:09:41 AM UTC
+// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
+// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -285,13 +285,19 @@ ASCIITLD = "." (
 	| [wW][sS]
 	| [xX][nN]--0[zZ][wW][mM]56[dD]
 	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
+	| [xX][nN]--3[eE]0[bB]707[eE]
+	| [xX][nN]--45[bB][rR][jJ]9[cC]
 	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
 	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
+	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
 	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
 	| [xX][nN]--[fF][iI][qQ][sS]8[sS]
 	| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
+	| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
 	| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
 	| [xX][nN]--[gG]6[wW]251[dD]
+	| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
+	| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
 	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
 	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
 	| [xX][nN]--[jJ]6[wW]193[gG]
@@ -301,13 +307,18 @@ ASCIITLD = "." (
 	| [xX][nN]--[kK][pP][rR][yY]57[dD]
 	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
 	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
+	| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
 	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
 	| [xX][nN]--[oO]3[cC][wW]4[hH]
+	| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
 	| [xX][nN]--[pP]1[aA][iI]
 	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
+	| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
 	| [xX][nN]--[wW][gG][bB][hH]1[cC]
 	| [xX][nN]--[wW][gG][bB][lL]6[aA]
 	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
+	| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
+	| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
 	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
 	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
 	| [yY][eE]

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Fri Feb 11 15:37:28 2011
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -26,14 +26,15 @@ WARNING: if you change ClassicTokenizerI
 
 */
 
+import java.io.Reader;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 12/4/10 7:24 PM from the specification file
- * <tt>C:/cygwin/home/us/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * on 2/9/11 11:45 AM from the specification file
+ * <tt>C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
  */
 class ClassicTokenizerImpl implements StandardTokenizerInterface {
 
@@ -681,44 +682,44 @@ public final void getText(CharTermAttrib
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 5: 
-          { return NUM;
+        case 10: 
+          { return EMAIL;
           }
         case 11: break;
-        case 9: 
-          { return ACRONYM;
+        case 2: 
+          { return ALPHANUM;
           }
         case 12: break;
-        case 7: 
-          { return COMPANY;
+        case 4: 
+          { return HOST;
           }
         case 13: break;
-        case 10: 
-          { return EMAIL;
-          }
-        case 14: break;
         case 1: 
           { /* ignore */
           }
+        case 14: break;
+        case 8: 
+          { return ACRONYM_DEP;
+          }
         case 15: break;
-        case 6: 
-          { return APOSTROPHE;
+        case 5: 
+          { return NUM;
           }
         case 16: break;
-        case 3: 
-          { return CJ;
+        case 9: 
+          { return ACRONYM;
           }
         case 17: break;
-        case 8: 
-          { return ACRONYM_DEP;
+        case 7: 
+          { return COMPANY;
           }
         case 18: break;
-        case 2: 
-          { return ALPHANUM;
+        case 6: 
+          { return APOSTROPHE;
           }
         case 19: break;
-        case 4: 
-          { return HOST;
+        case 3: 
+          { return CJ;
           }
         case 20: break;
         default: 

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro Fri Feb 11 15:37:28 2011
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC
+// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC
 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
 
 

Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Fri Feb 11 15:37:28 2011
@@ -78,6 +78,8 @@ public final class StandardTokenizer ext
   public static final int SOUTHEAST_ASIAN = 9;
   public static final int IDEOGRAPHIC = 10;
   public static final int HIRAGANA = 11;
+  public static final int KATAKANA = 12;
+  public static final int HANGUL = 13;
   
   /** String token types that correspond to token type int constants */
   public static final String [] TOKEN_TYPES = new String [] {
@@ -92,7 +94,9 @@ public final class StandardTokenizer ext
     "<ACRONYM_DEP>",
     "<SOUTHEAST_ASIAN>",
     "<IDEOGRAPHIC>",
-    "<HIRAGANA>"
+    "<HIRAGANA>",
+    "<KATAKANA>",
+    "<HANGUL>"
   };
 
   private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;