You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2008/05/14 07:37:45 UTC

svn commit: r656111 - in /lucene/java/trunk: CHANGES.txt contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java

Author: otis
Date: Tue May 13 22:37:45 2008
New Revision: 656111

URL: http://svn.apache.org/viewvc?rev=656111&view=rev
Log:
LUCENE-1003: Don't let RussianAnalyzer drop numbers.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue May 13 22:37:45 2008
@@ -103,7 +103,10 @@
     This is needed when you want to update an index as part of a
     transaction involving external resources (eg a database).  Also
     deprecated abort(), renaming it to rollback().  (Mike McCandless)
-	
+
+10. LUCENE-1003: Stop RussianAnalyzer from removing numbers.
+    (TUSUR OpenTeam, Dmitry Lihachev via Otis Gospodnetic)
+
 New features
 
  1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java Tue May 13 22:37:45 2008
@@ -94,7 +94,18 @@
         '\u042C',
         '\u042D',
         '\u042E',
-        '\u042F'
+        '\u042F',
+        // numbers
+        '0',
+        '1',
+        '2',
+        '3',
+        '4',
+        '5',
+        '6',
+        '7',
+        '8',
+        '9'
     };
 
     // KOI8 charset
@@ -163,7 +174,18 @@
         0xf8,
         0xfc,
         0xe0,
-        0xf1
+        0xf1,
+        // numbers
+        '0',
+        '1',
+        '2',
+        '3',
+        '4',
+        '5',
+        '6',
+        '7',
+        '8',
+        '9'
     };
 
     // CP1251 eharset
@@ -232,7 +254,18 @@
         0xDC,
         0xDD,
         0xDE,
-        0xDF
+        0xDF,
+        // numbers
+        '0',
+        '1',
+        '2',
+        '3',
+        '4',
+        '5',
+        '6',
+        '7',
+        '8',
+        '9'
     };
 
     public static char toLowerCase(char letter, char[] charset)

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Tue May 13 22:37:45 2008
@@ -168,4 +168,21 @@
         inWords1251.close();
         sample1251.close();
     }
+    
+    public void testDigitsInRussianCharset() 
+    {
+        Reader reader = new StringReader("text 1000");
+        RussianAnalyzer ra = new RussianAnalyzer();
+        TokenStream stream = ra.tokenStream("", reader);
+
+        try {
+            assertEquals("text", stream.next().termText());
+            assertNotNull("RussianAnalyzer's tokenizer skips numbers from input text", stream.next());
+        }
+        catch (IOException e)
+        {
+            fail("unexpected IOException");
+        }
+    }
+
 }