You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/03 02:29:48 UTC
svn commit: r1098871 - in /lucene/dev/trunk: lucene/
lucene/src/java/org/apache/lucene/analysis/
lucene/src/java/org/apache/lucene/util/
lucene/src/test-framework/org/apache/lucene/analysis/
lucene/src/test/org/apache/lucene/analysis/ lucene/src/test/o...
Author: rmuir
Date: Tue May 3 00:29:47 2011
New Revision: 1098871
URL: http://svn.apache.org/viewvc?rev=1098871&view=rev
Log:
LUCENE-3063: factor CharTokenizer/CharacterUtils into analyzers module
Added:
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
- copied, changed from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
- copied, changed from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
- copied, changed from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
- copied, changed from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
Modified:
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
lucene/dev/trunk/modules/analysis/CHANGES.txt
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Tue May 3 00:29:47 2011
@@ -312,6 +312,8 @@ LUCENE-1458, LUCENE-2111: Flexible Index
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* LUCENE-2514: The option to use a Collator's order (instead of binary order) for
sorting and range queries has been moved to contrib/queries.
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Tue May 3 00:29:47 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
/**
* Automaton-based tokenizer for testing. Optionally lowercases.
*/
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
/** Acts Similar to WhitespaceTokenizer */
public static final CharacterRunAutomaton WHITESPACE =
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,67 @@ public class MockTokenizer extends CharT
private final boolean lowerCase;
private int state;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ int off = 0;
+
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+ super(factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+ super(input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
}
@Override
+ public final boolean incrementToken() throws IOException {
+ clearAttributes();
+ for (;;) {
+ int startOffset = off;
+ int cp = readCodePoint();
+ if (cp < 0) {
+ break;
+ } else if (isTokenChar(cp)) {
+ int endOffset;
+ do {
+ char chars[] = Character.toChars(normalize(cp));
+ for (int i = 0; i < chars.length; i++)
+ termAtt.append(chars[i]);
+ endOffset = off;
+ cp = readCodePoint();
+ } while (cp >= 0 && isTokenChar(cp));
+ offsetAtt.setOffset(startOffset, endOffset);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected int readCodePoint() throws IOException {
+ int ch = input.read();
+ if (ch < 0) {
+ return ch;
+ } else {
+ assert !Character.isLowSurrogate((char) ch);
+ off++;
+ if (Character.isHighSurrogate((char) ch)) {
+ int ch2 = input.read();
+ if (ch2 >= 0) {
+ off++;
+ assert Character.isLowSurrogate((char) ch2);
+ return Character.toCodePoint((char) ch, (char) ch2);
+ }
+ }
+ return ch;
+ }
+ }
+
protected boolean isTokenChar(int c) {
state = runAutomaton.step(state, c);
if (state < 0) {
@@ -70,7 +117,6 @@ public class MockTokenizer extends CharT
}
}
- @Override
protected int normalize(int c) {
return lowerCase ? Character.toLowerCase(c) : c;
}
@@ -79,5 +125,12 @@ public class MockTokenizer extends CharT
public void reset() throws IOException {
super.reset();
state = runAutomaton.getInitialState();
+ off = 0;
+ }
+
+ @Override
+ public void end() throws IOException {
+ int finalOffset = correctOffset(off);
+ offsetAtt.setOffset(finalOffset, finalOffset);
}
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue May 3 00:29:47 2011
@@ -1603,7 +1603,7 @@ public class TestIndexWriter extends Luc
// LUCENE-510
public void testInvalidUTF16() throws Throwable {
Directory dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new StringSplitAnalyzer()));
Document doc = new Document();
final int count = utf8Data.length/2;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Tue May 3 00:29:47 2011
@@ -616,7 +616,7 @@ public class TestStressIndexing2 extends
}
for(int i=start;i<end;i++) {
- int t = nextInt(6);
+ int t = nextInt(5);
if (0 == t && i < end-1) {
// Make a surrogate pair
// High surrogate
@@ -631,13 +631,6 @@ public class TestStressIndexing2 extends
buffer[i] = (char) nextInt(0x800, 0xd800);
else if (4 == t)
buffer[i] = (char) nextInt(0xe000, 0xffff);
- else if (5 == t) {
- // Illegal unpaired surrogate
- if (r.nextBoolean())
- buffer[i] = (char) nextInt(0xd800, 0xdc00);
- else
- buffer[i] = (char) nextInt(0xdc00, 0xe000);
- }
}
buffer[end] = ' ';
return 1+end;
Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/analysis/CHANGES.txt Tue May 3 00:29:47 2011
@@ -83,6 +83,8 @@ New Features
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+ - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
* SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
/something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java Tue May 3 00:29:47 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Tue May 3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Tue May 3 00:29:47 2011
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Tue May 3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Tue May 3 00:29:47 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Tue May 3 00:29:47 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java Tue May 3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Tue May 3 00:29:47 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
/**
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java Tue May 3 00:29:47 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java Tue May 3 00:29:47 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
*/
import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.util.AttributeSource;
Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Tue May 3 00:29:47 2011
@@ -24,7 +24,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
Copied: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?p2=lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Tue May 3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,12 +20,13 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.Version;
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
/**
* An abstract base class for simple, character-oriented tokenizers.
Copied: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java?p2=lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java Tue May 3 00:29:47 2011
@@ -1,8 +1,10 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.util.Version;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
Copied: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java?p2=lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java Tue May 3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,6 +20,10 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.StringReader;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+
/**
* Testcase for {@link CharTokenizer} subclasses
@@ -42,7 +46,7 @@ public class TestCharTokenizers extends
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
}
@@ -59,7 +63,7 @@ public class TestCharTokenizers extends
builder.append("a");
}
builder.append("\ud801\udc1cabc");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
}
}
@@ -73,7 +77,7 @@ public class TestCharTokenizers extends
for (int i = 0; i < 255; i++) {
builder.append("A");
}
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
@@ -87,7 +91,7 @@ public class TestCharTokenizers extends
builder.append("A");
}
builder.append("\ud801\udc1c");
- MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+ Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
}
Copied: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java (from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java?p2=lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java Tue May 3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,7 +21,9 @@ import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
import org.junit.Test;
/**