You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/19 07:59:43 UTC
svn commit: r1533695 - in
/lucene/dev/branches/lucene4956/lucene/analysis/arirang: ./ src/data/
src/java/org/apache/lucene/analysis/ko/dic/
src/java/org/apache/lucene/analysis/ko/morph/
src/resources/org/apache/lucene/analysis/ko/dic/ src/tools/java/or...
Author: rmuir
Date: Sat Oct 19 05:59:42 2013
New Revision: 1533695
URL: http://svn.apache.org/r1533695
Log:
LUCENE-4956: move data to src/data and setup regeneration (for now simple copy)
Added:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/compounds.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/compounds.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/dictionary.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/dictionary.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/eomi.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/eomi.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/extension.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/extension.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/josa.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/josa.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/mapHanja.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/mapHanja.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/prefix.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/prefix.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/suffix.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/suffix.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/syllable.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/syllable.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/data/uncompounds.dic
- copied unchanged from r1533555, lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/uncompounds.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java (with props)
Removed:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/abbreviation.dic
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/resources/org/apache/lucene/analysis/ko/dic/cj.dic
Modified:
lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/build.xml Sat Oct 19 05:59:42 2013
@@ -31,4 +31,43 @@
</path>
<target name="compile-core" depends="jar-analyzers-common, common.compile-core"/>
+
+ <!-- for rebuilding dictionary -->
+ <path id="tools.classpath">
+ <path refid="classpath"/>
+ <pathelement location="${build.dir}/classes/tools"/>
+ </path>
+
+ <target name="compile-tools" depends="common.compile-tools">
+ <compile
+ srcdir="src/tools/java"
+ destdir="${build.dir}/classes/tools">
+ <classpath>
+ <path refid="tools.classpath"/>
+ </classpath>
+ </compile>
+ </target>
+
+ <property name="dict.src.dir" location="src/data"/>
+ <property name="dict.target.dir" location="${resources.dir}/org/apache/lucene/analysis/ko/dic"/>
+
+ <target name="build-dict" depends="compile-tools">
+ <sequential>
+ <delete verbose="true">
+ <fileset dir="${dict.target.dir}" includes="**/*"/>
+ </delete>
+ <java fork="true" failonerror="true" maxmemory="256M" classname="org.apache.lucene.analysis.ko.DictionaryBuilder">
+ <classpath>
+ <path refid="tools.classpath"/>
+ </classpath>
+ <assertions>
+ <enable package="org.apache.lucene"/>
+ </assertions>
+ <arg value="${dict.src.dir}"/>
+ <arg value="${dict.target.dir}"/>
+ </java>
+ </sequential>
+ </target>
+ <target name="regenerate" depends="build-dict"/>
+
</project>
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/dic/DictionaryUtil.java Sat Oct 19 05:59:42 2013
@@ -45,10 +45,6 @@ public class DictionaryUtil {
private static final Map<String,WordEntry> uncompounds = new HashMap<String,WordEntry>();
- private static final Map<String, String> cjwords = new HashMap<String,String>();
-
- private static final Map<String, String> abbreviations = new HashMap<String,String>();
-
static {
try {
final LineProcessor proc = new LineProcessor() {
@@ -87,17 +83,6 @@ public class DictionaryUtil {
}
});
- DictionaryResources.readLines(DictionaryResources.FILE_ABBREV, new LineProcessor() {
- @Override
- public void processLine(String abbrev) throws IOException {
- String[] infos = abbrev.split("[:]+");
- if(infos.length!=2) {
- throw new IOException("Invalid file format: "+abbrev);
- }
- abbreviations.put(infos[0].trim(), infos[1].trim());
- }
- });
-
DictionaryResources.readLines(DictionaryResources.FILE_UNCOMPOUNDS, new LineProcessor() {
@Override
public void processLine(String compound) throws IOException {
@@ -109,17 +94,6 @@ public class DictionaryUtil {
uncompounds.put(entry.getWord(), entry);
}
});
-
- DictionaryResources.readLines(DictionaryResources.FILE_CJ, new LineProcessor() {
- @Override
- public void processLine(String cj) throws IOException {
- String[] infos = cj.split("[:]+");
- if(infos.length!=2) {
- throw new IOException("Invalid file format: "+cj);
- }
- cjwords.put(infos[0], infos[1]);
- }
- });
readFileToSet(josas,DictionaryResources.FILE_JOSA);
@@ -234,18 +208,10 @@ public class DictionaryUtil {
return null;
}
- public static String getAbbrevMorph(String key) {
- return abbreviations.get(key);
- }
-
public static WordEntry getUncompound(String key) {
return uncompounds.get(key);
}
- public static String getCJWord(String key) {
- return cjwords.get(key);
- }
-
public static boolean existJosa(String str) {
return josas.contains(str);
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java?rev=1533695&r1=1533694&r2=1533695&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/morph/WordEntry.java Sat Oct 19 05:59:42 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis.ko.mo
* limitations under the License.
*/
-import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
Added: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java?rev=1533695&view=auto
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java (added)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/tools/java/org/apache/lucene/analysis/ko/DictionaryBuilder.java Sat Oct 19 05:59:42 2013
@@ -0,0 +1,51 @@
+package org.apache.lucene.analysis.ko;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class DictionaryBuilder {
+ public static void main(String args[]) throws Exception {
+ String FILES_AS_IS[] = { "compounds.dic", "dictionary.dic", "eomi.dic", "extension.dic", "josa.dic", "mapHanja.dic",
+ "prefix.dic", "suffix.dic", "syllable.dic", "uncompounds.dic" };
+ File inputDir = new File(args[0]);
+ File outputDir = new File(args[1]);
+ for (String file : FILES_AS_IS) {
+ File in = new File(inputDir, file);
+ File out = new File(outputDir, file);
+ copyAsIs(in, out);
+ }
+ }
+
+ static void copyAsIs(File in, File out) throws Exception {
+ InputStream r = new BufferedInputStream(new FileInputStream(in));
+ OutputStream w = new BufferedOutputStream(new FileOutputStream(out));
+ int c;
+ while ((c = r.read()) != -1) {
+ w.write(c);
+ }
+ r.close();
+ w.close();
+ }
+}