You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by Apache Wiki <wi...@apache.org> on 2007/09/13 11:31:59 UTC
[Lucene-hadoop Wiki] Trivial Update of "Hbase/HbaseShell/Ideas" by udanax
Dear Wiki user,
You have subscribed to a wiki page or wiki category on "Lucene-hadoop Wiki" for change notification.
The following page has been changed by udanax:
http://wiki.apache.org/lucene-hadoop/Hbase/HbaseShell/Ideas
------------------------------------------------------------------------------
== Hbase JDBC driver ==
I've started to think about Java Database Connectivity (JDBC) driver for use with Hbase.
- [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case.
+ [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case. -- [:udanax:Edward yoon]
~-''Zeroboard is one of the most popular PHP web boards in Asia.''-~
@@ -16, +16 @@
== Korean Morphological Analyzer for lucene ==
+ * work in progress with NLP lab.
+
+ {{{
+ package org.apache.lucene;
+
+ import java.io.IOException;
+ import java.io.StringReader;
+ import java.util.ArrayList;
+ import java.util.List;
+
+ import org.apache.lucene.analysis.Analyzer;
+ import org.apache.lucene.analysis.Token;
+ import org.apache.lucene.analysis.TokenStream;
+ import org.apache.lucene.analysis.kr.KoreanAnalyzer;
+ import org.apache.lucene.analysis.kr.KoreanMorphemeAnalyzer;
+
+ public class Test {
+ public static void main(String[] arg) throws IOException {
+ Analyzer a = new KoreanMorphemeAnalyzer();
+ String[] result = getMorphemeArray(a, "조선 제4대 임금 세종이 훈민정음이라는 이름으로 창제하였다");
+
+ for (int i = 0; i < result.length; i++) {
+ System.out.println(result[i]);
+ }
+
+ a = new KoreanAnalyzer();
+ result = getMorphemeArray(a, "조선 제4대 임금 세종이 훈민정음이라는 이름으로 창제하였다");
+
+ for (int i = 0; i < result.length; i++) {
+ System.out.println(result[i]);
+ }
+ }
+
+ private static String[] getMorphemeArray(Analyzer a, String string)
+ throws IOException {
+ List<String> tmp = new ArrayList<String>();
+ TokenStream ts = a.tokenStream("dummy", new StringReader(string));
+
+ for (int i = 0; true; i++) {
+ Token t = ts.next();
+ if (t == null) {
+ break;
+ } else {
+ tmp.add(t.toString());
+ }
+ }
+
+ return tmp.toArray(new String[] {});
+ }
+ }}}
+