You are viewing a plain text version of this content. The canonical link for it is here.

Posted to common-commits@hadoop.apache.org by Apache Wiki <wi...@apache.org> on 2007/09/13 11:31:59 UTC

[Lucene-hadoop Wiki] Trivial Update of "Hbase/HbaseShell/Ideas" by udanax

Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Lucene-hadoop Wiki" for change notification.

The following page has been changed by udanax:
http://wiki.apache.org/lucene-hadoop/Hbase/HbaseShell/Ideas

------------------------------------------------------------------------------
  
  == Hbase JDBC driver ==
  I've started to think about Java Database Connectivity (JDBC) driver for use with Hbase. 
- [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case. 
+ [[BR]]I am thinking of integrating Zeroboard with this work in progress for a test case. -- [:udanax:Edward yoon]
  
  ~-''Zeroboard is one of the most popular PHP web boards in Asia.''-~
  
@@ -16, +16 @@

  
  == Korean Morphological Analyzer for lucene ==
  
+  * work in progress with NLP lab.
+ 
+ {{{
+ package org.apache.lucene;
+ 
+ import java.io.IOException;
+ import java.io.StringReader;
+ import java.util.ArrayList;
+ import java.util.List;
+ 
+ import org.apache.lucene.analysis.Analyzer;
+ import org.apache.lucene.analysis.Token;
+ import org.apache.lucene.analysis.TokenStream;
+ import org.apache.lucene.analysis.kr.KoreanAnalyzer;
+ import org.apache.lucene.analysis.kr.KoreanMorphemeAnalyzer;
+ 
+ public class Test {
+   public static void main(String[] arg) throws IOException {
+     Analyzer a = new KoreanMorphemeAnalyzer();
+     String[] result = getMorphemeArray(a, "조선 제4대 임금 세종이 훈민정음이라는 이름으로 창제하였다");
+ 
+     for (int i = 0; i < result.length; i++) {
+       System.out.println(result[i]);
+     }
+     
+     a = new KoreanAnalyzer();
+     result = getMorphemeArray(a, "조선 제4대 임금 세종이 훈민정음이라는 이름으로 창제하였다");
+ 
+     for (int i = 0; i < result.length; i++) {
+       System.out.println(result[i]);
+     }
+   }
+ 
+   private static String[] getMorphemeArray(Analyzer a, String string)
+       throws IOException {
+     List<String> tmp = new ArrayList<String>();
+     TokenStream ts = a.tokenStream("dummy", new StringReader(string));
+ 
+     for (int i = 0; true; i++) {
+       Token t = ts.next();
+       if (t == null) {
+         break;
+       } else {
+         tmp.add(t.toString());
+       }
+     }
+ 
+     return tmp.toArray(new String[] {});
+   }
+ }}}
+