You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/08/31 03:33:03 UTC

svn commit: r991053 - in /lucene/dev/trunk: lucene/contrib/ modules/analysis/icu/ modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/

Author: rmuir
Date: Tue Aug 31 01:33:02 2010
New Revision: 991053

URL: http://svn.apache.org/viewvc?rev=991053&view=rev
Log:
LUCENE-2629: fix analysis/icu's gennorm2 task

Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/modules/analysis/icu/build.xml
    lucene/dev/trunk/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
    lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=991053&r1=991052&r2=991053&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Tue Aug 31 01:33:02 2010
@@ -117,6 +117,11 @@ Bug fixes
 * LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus
   permissions to newly created files, and to not silently hardwire
   buffer size to 1 MB.  (Mark Miller, Robert Muir, Mike McCandless)
+
+* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows
+  you to customize its normalization/folding, by editing the source data files in src/data
+  and regenerating a new .nrm with 'ant gennorm2'.  (David Bowen via Robert Muir)
+
    
 API Changes
 

Modified: lucene/dev/trunk/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/build.xml?rev=991053&r1=991052&r2=991053&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/build.xml (original)
+++ lucene/dev/trunk/modules/analysis/icu/build.xml Tue Aug 31 01:33:02 2010
@@ -65,17 +65,27 @@
   <property name="gennorm2.src.dir" value="src/data/utr30"/>
   <property name="gennorm2.src.files" 
   	value="nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
+  <property name="gennorm2.tmp" value="${build.dir}/gennorm2/utr30.tmp"/>
   <property name="gennorm2.dst" value="src/resources/org/apache/lucene/analysis/icu/utr30.nrm"/>
   <target name="gennorm2">
-  	<echo>Warning: only works on a big-endian platform!</echo>
+    <echo>Note that the gennorm2 and icupkg tools must be on your PATH. These tools
+are part of the ICU4C package. See http://site.icu-project.org/ </echo>
+    <mkdir dir="${build.dir}/gennorm2"/>
     <exec executable="gennorm2" failonerror="true">
       <arg value="-v"/>
       <arg value="-s"/>
       <arg value="${gennorm2.src.dir}"/>
-      <arg value="${gennorm2.src.files}"/>
+      <arg line="${gennorm2.src.files}"/>
       <arg value="-o"/>
+      <arg value="${gennorm2.tmp}"/>
+    </exec>
+    <!-- now convert binary file to big-endian -->
+    <exec executable="icupkg" failonerror="true">
+      <arg value="-tb"/>
+      <arg value="${gennorm2.tmp}"/>
       <arg value="${gennorm2.dst}"/>
     </exec>
+    <delete file="${gennorm2.tmp}"/>
   </target>
   
   <property name="rbbi.src.dir" location="src/data/uax29"/>

Modified: lucene/dev/trunk/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm?rev=991053&r1=991052&r2=991053&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java?rev=991053&r1=991052&r2=991053&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java Tue Aug 31 01:33:02 2010
@@ -68,5 +68,12 @@ public class TestICUFoldingFilter extend
     
     // ascii-folding-filter type stuff
     assertAnalyzesTo(a, "đis is cræzy", new String[] { "dis", "is", "craezy" });
+
+    // proper downcasing of Turkish dotted-capital I
+    // (according to default case folding rules)
+    assertAnalyzesTo(a, "ELÄ°F", new String[] { "elif" });
+    
+    // handling of decomposed combining-dot-above
+    assertAnalyzesTo(a, "eli\u0307f", new String[] { "elif" });
   }
 }