You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/19 07:59:43 UTC

svn commit: r1533695 - in /lucene/dev/branches/lucene4956/lucene/analysis/arirang: ./ src/data/ src/java/org/apache/lucene/analysis/ko/dic/ src/java/org/apache/lucene/analysis/ko/morph/ src/resources/org/apache/lucene/analysis/ko/dic/ src/tools/java/or...

Author: rmuir
Date: Sat Oct 19 05:59:42 2013
New Revision: 1533695

URL: http://svn.apache.org/r1533695
Log:
LUCENE-4956: move data to src/data and setup regeneration (for now simple copy)

Added:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/eomi.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/eomi.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/josa.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/josa.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/mapHanja.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/mapHanja.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/prefix.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/prefix.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/suffix.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/suffix.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/syllable.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/syllable.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/uncompounds.dic
      - copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/uncompounds.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java   (with props)
Removed:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/abbreviation.dic
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/cj.dic
Modified:
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
    lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml Sat Oct 19 05:59:42 2013
@@ -31,4 +31,43 @@
   </path>
   
   <target name="compile-core" depends="jar-analyzers-common, common.compile-core"/>
+
+  <!-- for rebuilding dictionary -->
+  <path id="tools.classpath">
+    <path refid="classpath"/>
+    <pathelement location="${build.dir}/classes/tools"/>
+  </path>
+
+  <target name="compile-tools" depends="common.compile-tools">
+    <compile
+      srcdir="src/tools/java"
+      destdir="${build.dir}/classes/tools">
+      <classpath>
+        <path refid="tools.classpath"/>
+      </classpath>
+    </compile>
+  </target>
+
+  <property name="dict.src.dir" location="src/data"/>
+  <property name="dict.target.dir" location="${resources.dir}/org/apache/lucene/analysis/ko/dic"/>
+
+  <target name="build-dict" depends="compile-tools">
+    <sequential>
+      <delete verbose="true">
+        <fileset dir="${dict.target.dir}" includes="**/*"/>
+      </delete>
+      <java fork="true" failonerror="true" maxmemory="256M" classname="org.apache.lucene.analysis.ko.DictionaryBuilder">
+        <classpath>
+          <path refid="tools.classpath"/>
+        </classpath>
+        <assertions>
+          <enable package="org.apache.lucene"/>
+        </assertions>
+        <arg value="${dict.src.dir}"/>
+        <arg value="${dict.target.dir}"/>
+      </java>
+    </sequential>
+  </target>
+  <target name="regenerate" depends="build-dict"/>
+
 </project>

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sat Oct 19 05:59:42 2013
@@ -45,10 +45,6 @@ public class DictionaryUtil {
   
   private static final Map<String,WordEntry> uncompounds = new HashMap<String,WordEntry>();
   
-  private static final Map<String, String> cjwords = new HashMap<String,String>();
-  
-  private static final Map<String, String> abbreviations = new HashMap<String,String>();
-  
   static {  
     try {
       final LineProcessor proc = new LineProcessor() {
@@ -87,17 +83,6 @@ public class DictionaryUtil {
         }       
       }); 
       
-      DictionaryResources.readLines(DictionaryResources.FILE_ABBREV, new LineProcessor() {
-        @Override
-        public void processLine(String abbrev) throws IOException {
-          String[] infos = abbrev.split("[:]+");
-          if(infos.length!=2) {
-            throw new IOException("Invalid file format: "+abbrev);
-          }
-          abbreviations.put(infos[0].trim(), infos[1].trim());          
-        }
-      });
-      
       DictionaryResources.readLines(DictionaryResources.FILE_UNCOMPOUNDS, new LineProcessor() {
         @Override
         public void processLine(String compound) throws IOException {
@@ -109,17 +94,6 @@ public class DictionaryUtil {
           uncompounds.put(entry.getWord(), entry);
         }
       });
-  
-      DictionaryResources.readLines(DictionaryResources.FILE_CJ, new LineProcessor() {
-        @Override
-        public void processLine(String cj) throws IOException {
-          String[] infos = cj.split("[:]+");
-          if(infos.length!=2) {
-            throw new IOException("Invalid file format: "+cj);
-          }
-          cjwords.put(infos[0], infos[1]);
-        }
-      });
 
       readFileToSet(josas,DictionaryResources.FILE_JOSA);
       
@@ -234,18 +208,10 @@ public class DictionaryUtil {
     return null;
   }
   
-  public static String getAbbrevMorph(String key) {
-    return abbreviations.get(key);
-  }
-  
   public static WordEntry getUncompound(String key) {
     return uncompounds.get(key);
   }
   
-  public static String getCJWord(String key) {
-    return cjwords.get(key);
-  }
-  
   public static boolean existJosa(String str) {
     return josas.contains(str);
   }

Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java Sat Oct 19 05:59:42 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ko.mo
  * limitations under the License.
  */
 
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 

Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java?rev=1533695&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java Sat Oct 19 05:59:42 2013
@@ -0,0 +1,51 @@
+package org.apache.lucene.analysis.ko;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class DictionaryBuilder {
+  public static void main(String args[]) throws Exception {
+    String FILES_AS_IS[] = { "compounds.dic", "dictionary.dic", "eomi.dic", "extension.dic", "josa.dic", "mapHanja.dic",
+                       "prefix.dic", "suffix.dic", "syllable.dic", "uncompounds.dic" };
+    File inputDir = new File(args[0]);
+    File outputDir = new File(args[1]);
+    for (String file : FILES_AS_IS) {
+      File in = new File(inputDir, file);
+      File out = new File(outputDir, file);
+      copyAsIs(in, out);
+    }
+  }
+  
+  static void copyAsIs(File in, File out) throws Exception {
+    InputStream r = new BufferedInputStream(new FileInputStream(in));
+    OutputStream w = new BufferedOutputStream(new FileOutputStream(out));
+    int c;
+    while ((c = r.read()) != -1) {
+      w.write(c);
+    }
+    r.close();
+    w.close();
+  }
+}