You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2009/07/06 15:50:18 UTC
svn commit: r791483 [1/5] -
/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/
Author: uschindler
Date: Mon Jul 6 13:50:17 2009
New Revision: 791483
URL: http://svn.apache.org/viewvc?rev=791483&view=rev
Log:
Convert and cleanup the test files to UTF-8. What is still broken is the incorrect usage of KOI8 and CP1251 encodings. Added svn:eol-style=native to all files again.
Added:
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm (with props)
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUTF8.txt (with props)
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/testUTF8.txt (with props)
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUTF8.txt (with props)
Removed:
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUnicode.htm
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUnicode.txt
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/testUnicode.txt
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUnicode.txt
Modified:
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=791483&r1=791482&r2=791483&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Mon Jul 6 13:50:17 2009
@@ -62,13 +62,13 @@
RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
inWords =
new InputStreamReader(
- new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
- "Unicode");
+ new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
+ "UTF-8");
sampleUnicode =
new InputStreamReader(
- new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
- "Unicode");
+ new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUTF8.htm")),
+ "UTF-8");
TokenStream in = ra.tokenStream("all", inWords);
Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=791483&r1=791482&r2=791483&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Mon Jul 6 13:50:17 2009
@@ -50,8 +50,8 @@
BufferedReader inWords =
new BufferedReader(
new InputStreamReader(
- new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/wordsUnicode.txt")),
- "Unicode"));
+ new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/wordsUTF8.txt")),
+ "UTF-8"));
while ((str = inWords.readLine()) != null)
{
words.add(str);
@@ -62,8 +62,8 @@
BufferedReader inStems =
new BufferedReader(
new InputStreamReader(
- new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/stemsUnicode.txt")),
- "Unicode"));
+ new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/stemsUTF8.txt")),
+ "UTF-8"));
while ((str = inStems.readLine()) != null)
{
stems.add(str);
Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm?rev=791483&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm (added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm Mon Jul 6 13:50:17 2009
@@ -0,0 +1 @@
+[вмеÑÑ][Ñил][ÑлекÑÑомагниÑн][ÑнеÑг][имел][пÑедÑÑавлен][Ñкаж][жÑеÑ][дÑевн][египÑ][знан][Ñ
Ñан][Ñайн][Ñзк][кÑÑг][поÑвÑÑен][вÑÑк][вÑемен][виÑок][пÑин][Ñоб][нов][ÑеÑ
нолог][Ñам][дел][ÑаÑкÑÑва][поÑаен][знан][пÑежн][век][говоÑ][нов][инÑоÑмаÑ][ÑÑанов][доÑÑÑпн][ÑиÑок][кÑÑг][полÑзоваÑел][ÑеÑ
][ÑлÑÑа][Ñознан][обÑеÑÑв][гоÑ
ов][воÑпÑинÑ][воÑполÑзова]
\ No newline at end of file
Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm
------------------------------------------------------------------------------
svn:eol-style = native