You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/03 02:29:48 UTC

svn commit: r1098871 - in /lucene/dev/trunk: lucene/ lucene/src/java/org/apache/lucene/analysis/ lucene/src/java/org/apache/lucene/util/ lucene/src/test-framework/org/apache/lucene/analysis/ lucene/src/test/org/apache/lucene/analysis/ lucene/src/test/o...

Author: rmuir
Date: Tue May  3 00:29:47 2011
New Revision: 1098871

URL: http://svn.apache.org/viewvc?rev=1098871&view=rev
Log:
LUCENE-3063: factor CharTokenizer/CharacterUtils into analyzers module

Added:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
      - copied, changed from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
      - copied, changed from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
      - copied, changed from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
      - copied, changed from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
Removed:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java
Modified:
    lucene/dev/trunk/lucene/MIGRATE.txt
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
    lucene/dev/trunk/modules/analysis/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java

Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Tue May  3 00:29:47 2011
@@ -312,6 +312,8 @@ LUCENE-1458, LUCENE-2111: Flexible Index
     - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
     - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
     - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+    - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+    - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
 
 * LUCENE-2514: The option to use a Collator's order (instead of binary order) for
   sorting and range queries has been moved to contrib/queries.

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/analysis/MockTokenizer.java Tue May  3 00:29:47 2011
@@ -20,14 +20,15 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
 
 /**
  * Automaton-based tokenizer for testing. Optionally lowercases.
  */
-public class MockTokenizer extends CharTokenizer {
+public class MockTokenizer extends Tokenizer {
   /** Acts Similar to WhitespaceTokenizer */
   public static final CharacterRunAutomaton WHITESPACE = 
     new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
@@ -45,21 +46,67 @@ public class MockTokenizer extends CharT
   private final boolean lowerCase;
   private int state;
 
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  int off = 0;
+
   public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, factory, input);
+    super(factory, input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
   }
 
   public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
-    super(LuceneTestCase.TEST_VERSION_CURRENT, input);
+    super(input);
     this.runAutomaton = runAutomaton;
     this.lowerCase = lowerCase;
     this.state = runAutomaton.getInitialState();
   }
   
   @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    for (;;) {
+      int startOffset = off;
+      int cp = readCodePoint();
+      if (cp < 0) {
+        break;
+      } else if (isTokenChar(cp)) {
+        int endOffset;
+        do {
+          char chars[] = Character.toChars(normalize(cp));
+          for (int i = 0; i < chars.length; i++)
+            termAtt.append(chars[i]);
+          endOffset = off;
+          cp = readCodePoint();
+        } while (cp >= 0 && isTokenChar(cp));
+        offsetAtt.setOffset(startOffset, endOffset);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  protected int readCodePoint() throws IOException {
+    int ch = input.read();
+    if (ch < 0) {
+      return ch;
+    } else {
+      assert !Character.isLowSurrogate((char) ch);
+      off++;
+      if (Character.isHighSurrogate((char) ch)) {
+        int ch2 = input.read();
+        if (ch2 >= 0) {
+          off++;
+          assert Character.isLowSurrogate((char) ch2);
+          return Character.toCodePoint((char) ch, (char) ch2);
+        }
+      }
+      return ch;
+    }
+  }
+
   protected boolean isTokenChar(int c) {
     state = runAutomaton.step(state, c);
     if (state < 0) {
@@ -70,7 +117,6 @@ public class MockTokenizer extends CharT
     }
   }
   
-  @Override
   protected int normalize(int c) {
     return lowerCase ? Character.toLowerCase(c) : c;
   }
@@ -79,5 +125,12 @@ public class MockTokenizer extends CharT
   public void reset() throws IOException {
     super.reset();
     state = runAutomaton.getInitialState();
+    off = 0;
+  }
+
+  @Override
+  public void end() throws IOException {
+    int finalOffset = correctOffset(off);
+    offsetAtt.setOffset(finalOffset, finalOffset);
   }
 }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Tue May  3 00:29:47 2011
@@ -1603,7 +1603,7 @@ public class TestIndexWriter extends Luc
   // LUCENE-510
   public void testInvalidUTF16() throws Throwable {
     Directory dir = newDirectory();
-    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new StringSplitAnalyzer()));
     Document doc = new Document();
 
     final int count = utf8Data.length/2;

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java Tue May  3 00:29:47 2011
@@ -616,7 +616,7 @@ public class TestStressIndexing2 extends
       }
 
       for(int i=start;i<end;i++) {
-        int t = nextInt(6);
+        int t = nextInt(5);
         if (0 == t && i < end-1) {
           // Make a surrogate pair
           // High surrogate
@@ -631,13 +631,6 @@ public class TestStressIndexing2 extends
           buffer[i] = (char) nextInt(0x800, 0xd800);
         else if (4 == t)
           buffer[i] = (char) nextInt(0xe000, 0xffff);
-        else if (5 == t) {
-          // Illegal unpaired surrogate
-          if (r.nextBoolean())
-            buffer[i] = (char) nextInt(0xd800, 0xdc00);
-          else
-            buffer[i] = (char) nextInt(0xdc00, 0xe000);
-        }
       }
       buffer[end] = ' ';
       return 1+end;

Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/analysis/CHANGES.txt Tue May  3 00:29:47 2011
@@ -83,6 +83,8 @@ New Features
     - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
     - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
     - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+    - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+    - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils
 
  * SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
    /something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicLetterTokenizer.java Tue May  3 00:29:47 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ar;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer; // javadoc @link
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java Tue May  3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java Tue May  3 00:29:47 2011
@@ -22,7 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 /**

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java Tue May  3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java Tue May  3 00:29:47 2011
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceAnalyzer.java Tue May  3 00:29:47 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.core;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java Tue May  3 00:29:47 2011
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Tue May  3 00:29:47 2011
@@ -21,7 +21,7 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 /**

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicTokenizer.java Tue May  3 00:29:47 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.in;
 
 import java.io.Reader;
 
-import org.apache.lucene.analysis.CharTokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java Tue May  3 00:29:47 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ru;
  */
 
 import java.io.Reader;
-import org.apache.lucene.analysis.CharTokenizer;
 import org.apache.lucene.analysis.Tokenizer; // for javadocs
+import org.apache.lucene.analysis.util.CharTokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
 import org.apache.lucene.util.AttributeSource;

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1098871&r1=1098870&r2=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Tue May  3 00:29:47 2011
@@ -24,7 +24,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 

Copied: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?p2=lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Tue May  3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,12 +20,13 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.io.Reader;
 
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.CharacterUtils;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
 
 /**
  * An abstract base class for simple, character-oriented tokenizers. 

Copied: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java (from r1098593, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java?p2=lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/CharacterUtils.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java Tue May  3 00:29:47 2011
@@ -1,8 +1,10 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
 import java.io.Reader;
 
+import org.apache.lucene.util.Version;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with

Copied: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java?p2=lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestCharTokenizers.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java Tue May  3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.analysis;
+package org.apache.lucene.analysis.util;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -20,6 +20,10 @@ package org.apache.lucene.analysis;
 import java.io.IOException;
 import java.io.StringReader;
 
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+
 
 /**
  * Testcase for {@link CharTokenizer} subclasses
@@ -42,7 +46,7 @@ public class TestCharTokenizers extends 
     }
     // internal buffer size is 1024 make sure we have a surrogate pair right at the border
     builder.insert(1023, "\ud801\udc1c");
-    MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
     assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
   }
   
@@ -59,7 +63,7 @@ public class TestCharTokenizers extends 
         builder.append("a");
       }
       builder.append("\ud801\udc1cabc");
-      MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
+      Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
       assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
     }
   }
@@ -73,7 +77,7 @@ public class TestCharTokenizers extends 
     for (int i = 0; i < 255; i++) {
       builder.append("A");
     }
-    MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
     assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
   }
   
@@ -87,7 +91,7 @@ public class TestCharTokenizers extends 
       builder.append("A");
     }
     builder.append("\ud801\udc1c");
-    MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
+    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
     assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
   }
 }

Copied: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java (from r1098593, lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java?p2=lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java&p1=lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java&r1=1098593&r2=1098871&rev=1098871&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/TestCharacterUtils.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java Tue May  3 00:29:47 2011
@@ -1,4 +1,4 @@
-package org.apache.lucene.util;
+package org.apache.lucene.analysis.util;
 
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,7 +21,9 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 
-import org.apache.lucene.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
 import org.junit.Test;
 
 /**