You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2009/07/06 15:50:18 UTC

svn commit: r791483 [1/5] - /lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/

Author: uschindler
Date: Mon Jul  6 13:50:17 2009
New Revision: 791483

URL: http://svn.apache.org/viewvc?rev=791483&view=rev
Log:
Convert and cleanup the test files to UTF-8. What is still broken is the incorrect usage of KOI8 and CP1251 encodings. Added svn:eol-style=native to all files again.

Added:
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm   (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUTF8.txt   (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/testUTF8.txt   (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUTF8.txt   (with props)
Removed:
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUnicode.htm
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUnicode.txt
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/testUnicode.txt
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUnicode.txt
Modified:
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java?rev=791483&r1=791482&r2=791483&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java Mon Jul  6 13:50:17 2009
@@ -62,13 +62,13 @@
         RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
         inWords =
             new InputStreamReader(
-                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
-                "Unicode");
+                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
+                "UTF-8");
 
         sampleUnicode =
             new InputStreamReader(
-                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
-                "Unicode");
+                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUTF8.htm")),
+                "UTF-8");
 
         TokenStream in = ra.tokenStream("all", inWords);
 

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java?rev=791483&r1=791482&r2=791483&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java (original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java Mon Jul  6 13:50:17 2009
@@ -50,8 +50,8 @@
         BufferedReader inWords =
             new BufferedReader(
                 new InputStreamReader(
-                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/wordsUnicode.txt")),
-                    "Unicode"));
+                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/wordsUTF8.txt")),
+                    "UTF-8"));
         while ((str = inWords.readLine()) != null)
         {
             words.add(str);
@@ -62,8 +62,8 @@
         BufferedReader inStems =
             new BufferedReader(
                 new InputStreamReader(
-                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/stemsUnicode.txt")),
-                    "Unicode"));
+                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/stemsUTF8.txt")),
+                    "UTF-8"));
         while ((str = inStems.readLine()) != null)
         {
             stems.add(str);

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm?rev=791483&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm (added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm Mon Jul  6 13:50:17 2009
@@ -0,0 +1 @@
+[вмест][сил][электромагнитн][энерг][имел][представлен][скаж][жрец][древн][египт][знан][хран][тайн][узк][круг][посвящен][всяк][времен][виток][прин][соб][нов][технолог][сам][дел][раскрыва][потаен][знан][прежн][век][говор][нов][информац][станов][доступн][широк][круг][пользовател][тех][случа][сознан][обществ][гоÑ
 ‚ов][восприня][воспользова]
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/ru/resUTF8.htm
------------------------------------------------------------------------------
    svn:eol-style = native