You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/11 16:37:30 UTC
svn commit: r1069842 [1/4] - in /lucene/dev/branches/bulkpostings: ./
lucene/ lucene/src/java/org/apache/lucene/analysis/
lucene/src/java/org/apache/lucene/document/
lucene/src/java/org/apache/lucene/index/
lucene/src/java/org/apache/lucene/index/codec...
Author: rmuir
Date: Fri Feb 11 15:37:28 2011
New Revision: 1069842
URL: http://svn.apache.org/viewvc?rev=1069842&view=rev
Log:
merge trunk (1068956:1069829)
Added:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java
- copied, changed from r1069829, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java
lucene/dev/branches/bulkpostings/modules/analysis/icu/src/data/uax29/Default.rbbi
- copied unchanged from r1069829, lucene/dev/trunk/modules/analysis/icu/src/data/uax29/Default.rbbi
lucene/dev/branches/bulkpostings/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
- copied unchanged from r1069829, lucene/dev/trunk/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
Modified:
lucene/dev/branches/bulkpostings/ (props changed)
lucene/dev/branches/bulkpostings/lucene/ (props changed)
lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
lucene/dev/branches/bulkpostings/lucene/build.xml
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/branches/bulkpostings/modules/ (props changed)
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex
lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
lucene/dev/branches/bulkpostings/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
lucene/dev/branches/bulkpostings/solr/ (props changed)
lucene/dev/branches/bulkpostings/solr/CHANGES.txt (props changed)
lucene/dev/branches/bulkpostings/solr/KEYS (props changed)
lucene/dev/branches/bulkpostings/solr/LICENSE.txt (props changed)
lucene/dev/branches/bulkpostings/solr/NOTICE.txt (props changed)
lucene/dev/branches/bulkpostings/solr/README.txt (props changed)
lucene/dev/branches/bulkpostings/solr/build.xml (props changed)
lucene/dev/branches/bulkpostings/solr/client/ (props changed)
lucene/dev/branches/bulkpostings/solr/common-build.xml (props changed)
lucene/dev/branches/bulkpostings/solr/contrib/ (props changed)
lucene/dev/branches/bulkpostings/solr/example/ (props changed)
lucene/dev/branches/bulkpostings/solr/example/solr/conf/velocity/tabs.vm
lucene/dev/branches/bulkpostings/solr/lib/ (props changed)
lucene/dev/branches/bulkpostings/solr/site/ (props changed)
lucene/dev/branches/bulkpostings/solr/src/ (props changed)
lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
lucene/dev/branches/bulkpostings/solr/testlogging.properties (props changed)
Modified: lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/CHANGES.txt?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/CHANGES.txt Fri Feb 11 15:37:28 2011
@@ -849,7 +849,16 @@ New features
IndexReader, to allow apps that maintain external per-segment caches
to evict entries when a segment is finished. (Shay Banon, Yonik
Seeley, Mike McCandless)
-
+
+* LUCENE-2911: The new StandardTokenizer, UAX29URLEmailTokenizer, and
+ the ICUTokenizer in contrib now all tag types with a consistent set
+ of token types (defined in StandardTokenizer). Tokens in the major
+ CJK types are explicitly marked to allow for custom downstream handling:
+ <IDEOGRAPHIC>, <HANGUL>, <KATAKANA>, and <HIRAGANA>.
+ (Robert Muir, Steven Rowe)
+
+* LUCENE-2913: Add missing getters to Numeric* classes. (Uwe Schindler)
+
Optimizations
* LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of
Modified: lucene/dev/branches/bulkpostings/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/build.xml?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/build.xml (original)
+++ lucene/dev/branches/bulkpostings/lucene/build.xml Fri Feb 11 15:37:28 2011
@@ -391,7 +391,8 @@
<target name="dist-all" depends="dist, dist-src"/>
- <target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, jar-test-framework-src, javadocs">
+ <target name="generate-maven-artifacts"
+ depends="maven.ant.tasks-check, package, jar-src, jar-test-framework-src, javadocs">
<sequential>
<ant target="get-maven-poms" dir=".."/>
@@ -403,22 +404,35 @@
classifier="sources"/>
<attach file="${build.dir}/${final.name}-javadoc.jar"
classifier="javadoc"/>
- <attach file="${build.dir}/${final.name}-tests.jar"
- classifier="tests"/>
</artifact-attachments>
</m2-deploy>
- <artifact:install-provider artifactId="wagon-ssh" version="1.0-beta-7"/>
- <artifact:pom id="test-framework-pom" file="src/test-framework/pom.xml"/>
- <artifact:deploy>
- <attach file="${build.dir}/${final.name}-tests.jar"
- classifier="tests" />
+ <!--
+ For the purposes of the generated artifacts, change the <packaging>
+ in the test-framework POM from "jar" to "test-jar" - this allows
+ artifact:deploy to properly name the artifact. The Maven build doesn't
+ have a lifecycle mapping for the "test-jar" packaging, though, so the
+ POM in src/test-framework/ is left with the "jar" packaging.
+ -->
+ <property name="test-jar-packaging-test-framework-pom"
+ location="${build.dir}/test-jar-packaging-test-framework,pom"/>
+ <copy file="src/test-framework/pom.xml"
+ tofile="${test-jar-packaging-test-framework-pom}">
+ <filterset begintoken="<packaging>" endtoken="</packaging>">
+ <filter token="jar"
+ value="<packaging>test-jar</packaging>"/>
+ </filterset>
+ </copy>
+ <artifact:pom id="test-framework-pom"
+ file="${test-jar-packaging-test-framework-pom}"/>
+ <artifact:deploy file="${build.dir}/${final.name}-tests.jar">
<attach file="${build.dir}/${final.name}-tests-src.jar"
classifier="test-sources"/>
<attach file="${build.dir}/${final.name}-tests-javadoc.jar"
classifier="test-javadoc"/>
<remoteRepository url="${m2.repository.url}">
- <authentication username="${m2.repository.username}" privateKey="${m2.repository.private.key}"/>
+ <authentication username="${m2.repository.username}"
+ privateKey="${m2.repository.private.key}"/>
</remoteRepository>
<pom refid="test-framework-pom"/>
</artifact:deploy>
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Fri Feb 11 15:37:28 2011
@@ -293,6 +293,11 @@ public final class NumericTokenStream ex
return (shift < valSize);
}
+ /** Returns the precision step. */
+ public int getPrecisionStep() {
+ return precisionStep;
+ }
+
// members
private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java Fri Feb 11 15:37:28 2011
@@ -222,6 +222,11 @@ public final class NumericField extends
return (Number) fieldsData;
}
+ /** Returns the precision step. */
+ public int getPrecisionStep() {
+ return numericTS.getPrecisionStep();
+ }
+
/**
* Initializes the field with the supplied <code>long</code> value.
* @param value the numeric value
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Fri Feb 11 15:37:28 2011
@@ -53,20 +53,6 @@ public class SegmentWriteState {
* tweaking this is rarely useful.*/
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
- /** Expert: The fraction of TermDocs entries stored in skip tables,
- * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
- * smaller indexes, greater acceleration, but fewer accelerable cases, while
- * smaller values result in bigger indexes, less acceleration and more
- * accelerable cases. More detailed experiments would be useful here. */
- public final int skipInterval = 16;
-
- /** Expert: The maximum number of skip levels. Smaller values result in
- * slightly smaller indexes, but slower skipping in big posting lists.
- */
- public final int maxSkipLevels = 10;
-
-
-
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
this.infoStream = infoStream;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Fri Feb 11 15:37:28 2011
@@ -23,17 +23,6 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
-import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
-import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
-import org.apache.lucene.index.codecs.simple64.Simple64Codec;
-import org.apache.lucene.index.codecs.simple64.Simple64Codec;
-import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
-import org.apache.lucene.index.codecs.standard.StandardCodec;
-import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
-import org.apache.lucene.index.codecs.pfordelta.FrameOfRefCodec;
-import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec;
-
/** Holds a set of codecs, keyed by name. You subclass
* this, instantiate it, and register your codecs, then
* pass this instance to IndexReader/IndexWriter (via
@@ -101,7 +90,7 @@ public class CodecProvider {
return infosReader;
}
- static private CodecProvider defaultCodecs = new DefaultCodecProvider();
+ static private CodecProvider defaultCodecs = new CoreCodecProvider();
public static CodecProvider getDefault() {
return defaultCodecs;
@@ -169,17 +158,3 @@ public class CodecProvider {
defaultFieldCodec = codec;
}
}
-
-class DefaultCodecProvider extends CodecProvider {
- DefaultCodecProvider() {
- register(new StandardCodec());
- register(new PreFlexCodec());
- register(new PulsingCodec(1));
- register(new SimpleTextCodec());
- register(new PatchedFrameOfRefCodec());
- register(new FrameOfRefCodec());
- register(new PForDeltaFixedIntBlockCodec(128));
- register(new BulkVIntCodec(128));
- register(new Simple64Codec(4));
- }
-}
Copied: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java (from r1069829, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java?p2=lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java&r1=1069829&r2=1069842&rev=1069842&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/CoreCodecProvider.java Fri Feb 11 15:37:28 2011
@@ -17,8 +17,13 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
+import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec;
+import org.apache.lucene.index.codecs.pfordelta.FrameOfRefCodec;
+import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec;
+import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec;
import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
+import org.apache.lucene.index.codecs.simple64.Simple64Codec;
import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -45,5 +50,10 @@ class CoreCodecProvider extends CodecPro
register(new PreFlexCodec());
register(new PulsingCodec(1));
register(new SimpleTextCodec());
+ register(new PatchedFrameOfRefCodec());
+ register(new FrameOfRefCodec());
+ register(new PForDeltaFixedIntBlockCodec(128));
+ register(new BulkVIntCodec(128));
+ register(new Simple64Codec(4));
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java Fri Feb 11 15:37:28 2011
@@ -162,7 +162,8 @@ public class SegmentTermDocs {
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
- if (df >= skipInterval) { // optimized case
+ // don't skip if the target is close (within skipInterval docs away)
+ if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
if (skipListReader == null)
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Fri Feb 11 15:37:28 2011
@@ -57,6 +57,7 @@ public class SepPostingsReaderImpl exten
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
@@ -103,6 +104,7 @@ public class SepPostingsReaderImpl exten
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
@Override
@@ -232,7 +234,7 @@ public class SepPostingsReaderImpl exten
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
@@ -372,7 +374,7 @@ public class SepPostingsReaderImpl exten
docFreq = termState.docFreq;
assert docFreq > 0;
- // NOTE: unused if docFreq < skipInterval:
+ // NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
@@ -442,14 +444,11 @@ public class SepPostingsReaderImpl exten
public int advance(int target) throws IOException {
//System.out.println("SepDocsEnum.advance target=" + target);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
//System.out.println("SepDocsEnum.advance target=" + target);
- if (docFreq >= skipInterval) {
-
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
@@ -679,13 +678,10 @@ public class SepPostingsReaderImpl exten
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (docFreq >= skipInterval) {
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");
@@ -1065,7 +1061,8 @@ public class SepPostingsReaderImpl exten
@Override
public JumpResult jump(int target, int curCount) throws IOException {
- if (docFreq >= skipInterval) {
+ // TODO: require jump to take current docid and prevent skipping for close jumps?
+ if (docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Fri Feb 11 15:37:28 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
+
final int totalNumDocs;
boolean storePayloads;
@@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl
totalNumDocs = state.numDocs;
- // TODO: -- abstraction violation
- skipListWriter = new SepSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new SepSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -265,7 +278,7 @@ public final class SepPostingsWriterImpl
}
}
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
//System.out.println(" skipFP=" + skipStart);
final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java Fri Feb 11 15:37:28 2011
@@ -48,6 +48,7 @@ public class StandardPostingsReader exte
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
//private String segment;
@@ -87,6 +88,7 @@ public class StandardPostingsReader exte
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
// Must keep final because we do non-standard clone
@@ -180,7 +182,7 @@ public class StandardPostingsReader exte
//System.out.println(" freqFP=" + termState.freqOffset);
assert termState.freqOffset < freqIn.length();
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
//System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
@@ -368,7 +370,7 @@ public class StandardPostingsReader exte
@Override
public int advance(int target) throws IOException {
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
@@ -518,7 +520,7 @@ public class StandardPostingsReader exte
//System.out.println("StandardR.D&PE advance target=" + target);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
@@ -715,7 +717,7 @@ public class StandardPostingsReader exte
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
- if ((target - skipInterval) >= doc && limit >= skipInterval) {
+ if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
@@ -1124,10 +1126,8 @@ public class StandardPostingsReader exte
@Override
public JumpResult jump(int target, int curCount) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (skipOffset > 0) {
+ // TODO: require jump to take current docid and prevent skipping for close jumps?
+ if (docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Fri Feb 11 15:37:28 2011
@@ -23,6 +23,7 @@ package org.apache.lucene.index.codecs.s
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -44,8 +45,22 @@ public final class StandardPostingsWrite
final IndexOutput freqOut;
final IndexOutput proxOut;
final DefaultSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
final int totalNumDocs;
IndexOutput termsOut;
@@ -84,14 +99,11 @@ public final class StandardPostingsWrite
totalNumDocs = state.numDocs;
- skipListWriter = new DefaultSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new DefaultSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut,
proxOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -100,6 +112,7 @@ public final class StandardPostingsWrite
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -218,7 +231,7 @@ public final class StandardPostingsWrite
}
lastFreqStart = freqStart;
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart));
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Fri Feb 11 15:37:28 2011
@@ -325,22 +325,26 @@ public final class FuzzyTermsEnum extend
/** finds the smallest Lev(n) DFA that accepts the term. */
@Override
- protected AcceptStatus accept(BytesRef term) {
- if (term.equals(termRef)) { // ed = 0
- boostAtt.setBoost(1.0F);
- return AcceptStatus.YES_AND_SEEK;
- }
-
- int codePointCount = -1;
+ protected AcceptStatus accept(BytesRef term) {
+ int ed = matchers.length - 1;
- // TODO: benchmark doing this backwards
- for (int i = 1; i < matchers.length; i++)
- if (matchers[i].run(term.bytes, term.offset, term.length)) {
- // this sucks, we convert just to score based on length.
- if (codePointCount == -1) {
- codePointCount = UnicodeUtil.codePointCount(term);
+ if (matches(term, ed)) { // we match the outer dfa
+ // now compute exact edit distance
+ while (ed > 0) {
+ if (matches(term, ed - 1)) {
+ ed--;
+ } else {
+ break;
}
- final float similarity = 1.0f - ((float) i / (float)
+ }
+
+ // scale to a boost and return (if similarity > minSimilarity)
+ if (ed == 0) { // exact match
+ boostAtt.setBoost(1.0F);
+ return AcceptStatus.YES_AND_SEEK;
+ } else {
+ final int codePointCount = UnicodeUtil.codePointCount(term);
+ final float similarity = 1.0f - ((float) ed / (float)
(Math.min(codePointCount, termLength)));
if (similarity > minSimilarity) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
@@ -349,8 +353,14 @@ public final class FuzzyTermsEnum extend
return AcceptStatus.NO_AND_SEEK;
}
}
-
- return AcceptStatus.NO_AND_SEEK;
+ } else {
+ return AcceptStatus.NO_AND_SEEK;
+ }
+ }
+
+ /** returns true if term is within k edits of the query term */
+ final boolean matches(BytesRef term, int k) {
+ return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
}
/** defers to superclass, except can start at an arbitrary location */
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeFilter.java Fri Feb 11 15:37:28 2011
@@ -179,4 +179,7 @@ public final class NumericRangeFilter<T
/** Returns the upper value of this range filter */
public T getMax() { return query.getMax(); }
+ /** Returns the precision step. */
+ public int getPrecisionStep() { return query.getPrecisionStep(); }
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java Fri Feb 11 15:37:28 2011
@@ -319,6 +319,9 @@ public final class NumericRangeQuery<T e
/** Returns the upper value of this range query */
public T getMax() { return max; }
+ /** Returns the precision step. */
+ public int getPrecisionStep() { return precisionStep; }
+
@Override
public String toString(final String field) {
final StringBuilder sb = new StringBuilder();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java Fri Feb 11 15:37:28 2011
@@ -151,7 +151,7 @@ public class MockRandomCodec extends Cod
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
- return random.nextInt(gap) == 17;
+ return rand.nextInt(gap) == 17;
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Feb 11 15:37:28 2011
@@ -287,7 +287,7 @@ public class TestIndexWriter extends Luc
// Import to use same term index interval else a
// smaller one here could increase the disk usage and
// cause a false failure:
- writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval));
+ writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setTermIndexInterval(termIndexInterval).setMergePolicy(newLogMergePolicy()));
writer.setInfoStream(VERBOSE ? System.out : null);
writer.optimize();
writer.close();
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Fri Feb 11 15:37:28 2011
@@ -15,8 +15,8 @@
*/
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
-// generated on Thursday, January 6, 2011 5:09:41 AM UTC
+// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
+// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
ASCIITLD = "." (
@@ -285,13 +285,19 @@ ASCIITLD = "." (
| [wW][sS]
| [xX][nN]--0[zZ][wW][mM]56[dD]
| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
+ | [xX][nN]--3[eE]0[bB]707[eE]
+ | [xX][nN]--45[bB][rR][jJ]9[cC]
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
+ | [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
| [xX][nN]--[fF][iI][qQ][sS]8[sS]
| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
+ | [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
| [xX][nN]--[gG]6[wW]251[dD]
+ | [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
+ | [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
| [xX][nN]--[jJ]6[wW]193[gG]
@@ -301,13 +307,18 @@ ASCIITLD = "." (
| [xX][nN]--[kK][pP][rR][yY]57[dD]
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
+ | [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
| [xX][nN]--[oO]3[cC][wW]4[hH]
+ | [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
| [xX][nN]--[pP]1[aA][iI]
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
+ | [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
| [xX][nN]--[wW][gG][bB][hH]1[cC]
| [xX][nN]--[wW][gG][bB][lL]6[aA]
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
+ | [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
+ | [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
| [yY][eE]
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Fri Feb 11 15:37:28 2011
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
package org.apache.lucene.analysis.standard;
@@ -26,14 +26,15 @@ WARNING: if you change ClassicTokenizerI
*/
+import java.io.Reader;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 12/4/10 7:24 PM from the specification file
- * <tt>C:/cygwin/home/us/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * on 2/9/11 11:45 AM from the specification file
+ * <tt>C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/
class ClassicTokenizerImpl implements StandardTokenizerInterface {
@@ -681,44 +682,44 @@ public final void getText(CharTermAttrib
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 5:
- { return NUM;
+ case 10:
+ { return EMAIL;
}
case 11: break;
- case 9:
- { return ACRONYM;
+ case 2:
+ { return ALPHANUM;
}
case 12: break;
- case 7:
- { return COMPANY;
+ case 4:
+ { return HOST;
}
case 13: break;
- case 10:
- { return EMAIL;
- }
- case 14: break;
case 1:
{ /* ignore */
}
+ case 14: break;
+ case 8:
+ { return ACRONYM_DEP;
+ }
case 15: break;
- case 6:
- { return APOSTROPHE;
+ case 5:
+ { return NUM;
}
case 16: break;
- case 3:
- { return CJ;
+ case 9:
+ { return ACRONYM;
}
case 17: break;
- case 8:
- { return ACRONYM_DEP;
+ case 7:
+ { return COMPANY;
}
case 18: break;
- case 2:
- { return ALPHANUM;
+ case 6:
+ { return APOSTROPHE;
}
case 19: break;
- case 4:
- { return HOST;
+ case 3:
+ { return CJ;
}
case 20: break;
default:
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro Fri Feb 11 15:37:28 2011
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC
+// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1069842&r1=1069841&r2=1069842&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Fri Feb 11 15:37:28 2011
@@ -78,6 +78,8 @@ public final class StandardTokenizer ext
public static final int SOUTHEAST_ASIAN = 9;
public static final int IDEOGRAPHIC = 10;
public static final int HIRAGANA = 11;
+ public static final int KATAKANA = 12;
+ public static final int HANGUL = 13;
/** String token types that correspond to token type int constants */
public static final String [] TOKEN_TYPES = new String [] {
@@ -92,7 +94,9 @@ public final class StandardTokenizer ext
"<ACRONYM_DEP>",
"<SOUTHEAST_ASIAN>",
"<IDEOGRAPHIC>",
- "<HIRAGANA>"
+ "<HIRAGANA>",
+ "<KATAKANA>",
+ "<HANGUL>"
};
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;