You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by ot...@apache.org on 2008/05/14 07:37:45 UTC
svn commit: r656111 - in /lucene/java/trunk: CHANGES.txt
contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
Author: otis
Date: Tue May 13 22:37:45 2008
New Revision: 656111
URL: http://svn.apache.org/viewvc?rev=656111&view=rev
Log:
LUCENE-1003: Don't let RussianAnalyzer drop numbers.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Tue May 13 22:37:45 2008
@@ -103,7 +103,10 @@
This is needed when you want to update an index as part of a
transaction involving external resources (eg a database). Also
deprecated abort(), renaming it to rollback(). (Mike McCandless)
-
+
+10. LUCENE-1003: Stop RussianAnalyzer from removing numbers.
+ (TUSUR OpenTeam, Dmitry Lihachev via Otis Gospodnetic)
+
New features
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java Tue May 13 22:37:45 2008
@@ -94,7 +94,18 @@
'\u042C',
'\u042D',
'\u042E',
- '\u042F'
+ '\u042F',
+ // numbers
+ '0',
+ '1',
+ '2',
+ '3',
+ '4',
+ '5',
+ '6',
+ '7',
+ '8',
+ '9'
};
// KOI8 charset
@@ -163,7 +174,18 @@
0xf8,
0xfc,
0xe0,
- 0xf1
+ 0xf1,
+ // numbers
+ '0',
+ '1',
+ '2',
+ '3',
+ '4',
+ '5',
+ '6',
+ '7',
+ '8',
+ '9'
};
// CP1251 eharset
@@ -232,7 +254,18 @@
0xDC,
0xDD,
0xDE,
- 0xDF
+ 0xDF,
+ // numbers
+ '0',
+ '1',
+ '2',
+ '3',
+ '4',
+ '5',
+ '6',
+ '7',
+ '8',
+ '9'
};
public static char toLowerCase(char letter, char[] charset)
Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=656111&r1=656110&r2=656111&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Tue May 13 22:37:45 2008
@@ -168,4 +168,21 @@
inWords1251.close();
sample1251.close();
}
+
+ public void testDigitsInRussianCharset()
+ {
+ Reader reader = new StringReader("text 1000");
+ RussianAnalyzer ra = new RussianAnalyzer();
+ TokenStream stream = ra.tokenStream("", reader);
+
+ try {
+ assertEquals("text", stream.next().termText());
+ assertNotNull("RussianAnalyzer's tokenizer skips numbers from input text", stream.next());
+ }
+ catch (IOException e)
+ {
+ fail("unexpected IOException");
+ }
+ }
+
}