You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cm...@apache.org on 2012/03/10 15:54:49 UTC
svn commit: r1299213 [2/2] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/synonym/ lucene/contrib/analyzers/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/ luce...

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestKuromojiTokenizer.java Sat Mar 10 14:54:47 2012
@@ -17,7 +17,13 @@ package org.apache.lucene.analysis.kurom
  * limitations under the License.
  */
 
+import java.io.BufferedReader;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.LineNumberReader;
+import java.io.PrintWriter;
 import java.io.Reader;
 import java.io.StringReader;
 
@@ -26,21 +32,76 @@ import org.apache.lucene.analysis.BaseTo
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer.Mode;
+import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
+import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
+import org.apache.lucene.analysis.kuromoji.tokenattributes.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
 
 public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
+
+  public static UserDictionary readDict() {
+    InputStream is = TestKuromojiTokenizer.class.getResourceAsStream("userdict.txt");
+    if (is == null) {
+      throw new RuntimeException("Cannot find userdict.txt in test classpath!");
+    }
+    try {
+      try {
+        Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8);
+        return new UserDictionary(reader);
+      } finally {
+        is.close();
+      }
+    } catch (IOException ioe) {
+      throw new RuntimeException(ioe);
+    }
+  }
+
   private Analyzer analyzer = new ReusableAnalyzerBase() {
     @Override
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer tokenizer = new KuromojiTokenizer(reader);
+      Tokenizer tokenizer = new KuromojiTokenizer(reader, readDict(), false, Mode.SEARCH);
       return new TokenStreamComponents(tokenizer, tokenizer);
     }
   };
-  
+
+  private Analyzer analyzerNormal = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      Tokenizer tokenizer = new KuromojiTokenizer(reader, readDict(), false, Mode.NORMAL);
+      return new TokenStreamComponents(tokenizer, tokenizer);
+    }
+  };
+
+  private Analyzer analyzerNoPunct = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      Tokenizer tokenizer = new KuromojiTokenizer(reader, readDict(), true, Mode.SEARCH);
+      return new TokenStreamComponents(tokenizer, tokenizer);
+    }
+  };
+
+  private Analyzer extendedModeAnalyzerNoPunct = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      Tokenizer tokenizer = new KuromojiTokenizer(reader, readDict(), true, Mode.EXTENDED);
+      return new TokenStreamComponents(tokenizer, tokenizer);
+    }
+  };
+
+  public void testNormalMode() throws Exception {
+    assertAnalyzesTo(analyzerNormal,
+                     "ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢",
+                     new String[] {"ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"});
+  }
+
   public void testDecomposition1() throws Exception {
-    assertAnalyzesTo(analyzer, "æ¬æ¥ã¯ãè²§å°å±¤ã®å¥³æ§ãåä¾ã«å»çä¿è·ãæä¾ããããã«åµè¨ãããå¶åº¦ã§ããã" +
+    assertAnalyzesTo(analyzerNoPunct, "æ¬æ¥ã¯ãè²§å°å±¤ã®å¥³æ§ãåä¾ã«å»çä¿è·ãæä¾ããããã«åµè¨ãããå¶åº¦ã§ããã" +
                          "ã¢ã¡ãªã«ä½æå¾èå»çæ´å©å¶åº¦ããä»æ¥ã§ã¯ããã®äºç®ã®ç´ï¼åã®ï¼ãèäººã«è²»ããã¦ããã",
      new String[] { "æ¬æ¥", "ã¯",  "è²§å°", "å±¤", "ã®", "å¥³æ§", "ã", "åä¾", "ã«", "å»ç", "ä¿è·", "ã",      
                     "æä¾", "ãã", "ãã", "ã«", "åµè¨", "ã", "ã", "ã", "å¶åº¦", "ã§", "ãã",  "ã¢ã¡ãªã«", 
@@ -56,7 +117,7 @@ public class TestKuromojiTokenizer exten
   }
   
   public void testDecomposition2() throws Exception {
-    assertAnalyzesTo(analyzer, "éº»è¬ã®å¯å£²ã¯æ ¹ãããçµ¶ãããªããã°ãªããªã",
+    assertAnalyzesTo(analyzerNoPunct, "éº»è¬ã®å¯å£²ã¯æ ¹ãããçµ¶ãããªããã°ãªããªã",
       new String[] { "éº»è¬", "ã®", "å¯å£²", "ã¯", "æ ¹ããã", "çµ¶ãã", "ãªãã", "ã°", "ãªã", "ãªã" },
       new int[] { 0, 2, 3, 5, 6,  10, 13, 16, 17, 19 },
       new int[] { 2, 3, 5, 6, 10, 13, 16, 17, 19, 21 }
@@ -64,7 +125,7 @@ public class TestKuromojiTokenizer exten
   }
   
   public void testDecomposition3() throws Exception {
-    assertAnalyzesTo(analyzer, "éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã",
+    assertAnalyzesTo(analyzerNoPunct, "éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã",
       new String[] { "éå¥³", "ç©", "å¤§å°", "ãã·ã¥ã¼",  "ãããã³ã¹" },
       new int[] { 0, 2, 3, 5, 10 },
       new int[] { 2, 3, 5, 9, 15 }
@@ -92,9 +153,32 @@ public class TestKuromojiTokenizer exten
     ts.close();
   }
 
+  /*
+    // NOTE: intentionally fails!  Just trying to debug this
+    // one input...
+  public void testDecomposition6() throws Exception {
+    assertAnalyzesTo(analyzer, "å¥è¯åç«¯ç§å¦æè¡å¤§å¦é¢å¤§å¦",
+      new String[] { "ãã", "ã¯", "æ¬", "ã§", "ã¯", "ãªã" },
+      new int[] { 0, 2, 3, 4, 5, 6 },
+      new int[] { 2, 3, 4, 5, 6, 8 }
+                     );
+  }
+  */
+
   /** Tests that sentence offset is incorporated into the resulting offsets */
   public void testTwoSentences() throws Exception {
-    assertAnalyzesTo(analyzer, "éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã",
+    /*
+    //TokenStream ts = a.tokenStream("foo", new StringReader("å¦¹ã®å²åã§ããä¿ºã¨å¹´åã§ãä»åé¨çã§ãã"));
+    TokenStream ts = analyzer.tokenStream("foo", new StringReader("&#x250cdf66<!--\"<!--#<!--;?><!--#<!--#><!---->?>-->;"));
+    ts.reset();
+    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+    while(ts.incrementToken()) {
+      System.out.println("  " + termAtt.toString());
+    }
+    System.out.println("DONE PARSE\n\n");
+    */
+
+    assertAnalyzesTo(analyzerNoPunct, "éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã éå¥³ç©å¤§å°ãã·ã¥ã¼ã»ãããã³ã¹ã",
       new String[] { "éå¥³", "ç©", "å¤§å°", "ãã·ã¥ã¼", "ãããã³ã¹",  "éå¥³", "ç©", "å¤§å°", "ãã·ã¥ã¼",  "ãããã³ã¹"  },
       new int[] { 0, 2, 3, 5, 10, 17, 19, 20, 22, 27 },
       new int[] { 2, 3, 5, 9, 15, 19, 20, 22, 26, 32 }
@@ -104,6 +188,7 @@ public class TestKuromojiTokenizer exten
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER);
+    checkRandomData(random, analyzerNoPunct, 10000*RANDOM_MULTIPLIER);
   }
   
   public void testLargeDocReliability() throws Exception {
@@ -126,6 +211,9 @@ public class TestKuromojiTokenizer exten
   public void testSurrogates2() throws IOException {
     int numIterations = atLeast(10000);
     for (int i = 0; i < numIterations; i++) {
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + i);
+      }
       String s = _TestUtil.randomUnicodeString(random, 100);
       TokenStream ts = analyzer.tokenStream("foo", new StringReader(s));
       CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
@@ -135,22 +223,410 @@ public class TestKuromojiTokenizer exten
       }
     }
   }
+
+  public void testOnlyPunctuation() throws IOException {
+    TokenStream ts = analyzerNoPunct.tokenStream("foo", new StringReader("ãããã"));
+    ts.reset();
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+
+  public void testOnlyPunctuationExtended() throws IOException {
+    TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", new StringReader("......"));
+    ts.reset();
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
   
   // note: test is kinda silly since kuromoji emits punctuation tokens.
   // but, when/if we filter these out it will be useful.
   public void testEnd() throws Exception {
-    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("ããã¯æ¬ã§ã¯ãªã")),
+    assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", new StringReader("ããã¯æ¬ã§ã¯ãªã")),
         new String[] { "ãã", "ã¯", "æ¬", "ã§", "ã¯", "ãªã" },
         new int[] { 0, 2, 3, 4, 5, 6 },
         new int[] { 2, 3, 4, 5, 6, 8 },
         new Integer(8)
     );
-    
-    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("ããã¯æ¬ã§ã¯ãªã    ")),
+
+    assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", new StringReader("ããã¯æ¬ã§ã¯ãªã    ")),
         new String[] { "ãã", "ã¯", "æ¬", "ã§", "ã¯", "ãªã"  },
         new int[] { 0, 2, 3, 4, 5, 6, 8 },
         new int[] { 2, 3, 4, 5, 6, 8, 9 },
         new Integer(12)
     );
   }
+
+  public void testUserDict() throws Exception {
+    // Not a great test because w/o userdict.txt the
+    // segmentation is the same:
+    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("é¢è¥¿å½éç©ºæ¸¯ã«è¡ã£ã")),
+                              new String[] { "é¢è¥¿", "å½é", "ç©ºæ¸¯", "ã«", "è¡ã£", "ã"  },
+                              new int[] { 0, 2, 4, 6, 7, 9 },
+                              new int[] { 2, 4, 6, 7, 9, 10 },
+                              new Integer(10)
+    );
+  }
+
+  public void testUserDict2() throws Exception {
+    // Better test: w/o userdict the segmentation is different:
+    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("æéé¾")),
+                              new String[] { "æéé¾"  },
+                              new int[] { 0 },
+                              new int[] { 3 },
+                              new Integer(3)
+    );
+  }
+
+  public void testUserDict3() throws Exception {
+    // Test entry that breaks into multiple tokens:
+    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("abcd")),
+                              new String[] { "a", "b", "cd"  },
+                              new int[] { 0, 1, 2 },
+                              new int[] { 1, 2, 4 },
+                              new Integer(4)
+    );
+  }
+
+  // HMM: fails (segments as a/b/cd/efghij)... because the
+  // two paths have exactly equal paths (1 KNOWN + 1
+  // UNKNOWN) and we don't seem to favor longer KNOWN /
+  // shorter UNKNOWN matches:
+
+  /*
+  public void testUserDict4() throws Exception {
+    // Test entry that has another entry as prefix
+    assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("abcdefghij")),
+                              new String[] { "ab", "cd", "efg", "hij"  },
+                              new int[] { 0, 2, 4, 7 },
+                              new int[] { 2, 4, 7, 10 },
+                              new Integer(10)
+    );
+  }
+  */
+  
+  public void testSegmentation() throws Exception {
+    // Skip tests for Michelle Kwan -- UniDic segments Kwan as ã¯ ã¯ã³
+    //		String input = "ãã·ã§ã«ã»ã¯ã¯ã³ãåªåãã¾ãããã¹ãã¼ã¹ã¹ãã¼ã·ã§ã³ã«è¡ãã¾ããããããããã";
+    //		String[] surfaceForms = {
+    //				"ãã·ã§ã«", "ã»", "ã¯ã¯ã³", "ã", "åªå", "ã", "ã¾ã", "ã", "ã",
+    //				"ã¹ãã¼ã¹", "ã¹ãã¼ã·ã§ã³", "ã«", "è¡ã", "ã¾ã", "ã",
+    //				"ãããããã", "ã"
+    //		};
+    String input = "ã¹ãã¼ã¹ã¹ãã¼ã·ã§ã³ã«è¡ãã¾ããããããããã";
+    String[] surfaceForms = {
+        "ã¹ãã¼ã¹", "ã¹ãã¼ã·ã§ã³", "ã«", "è¡ã", "ã¾ã", "ã",
+        "ãããããã", "ã"
+    };
+    assertAnalyzesTo(analyzer,
+                     input,
+                     surfaceForms);
+  }
+
+  public void testLatticeToDot() throws Exception {
+    final GraphvizFormatter gv2 = new GraphvizFormatter(ConnectionCosts.getInstance());
+    final Analyzer analyzer = new ReusableAnalyzerBase() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        KuromojiTokenizer tokenizer = new KuromojiTokenizer(reader, readDict(), false, Mode.SEARCH);
+        tokenizer.setGraphvizFormatter(gv2);
+        return new TokenStreamComponents(tokenizer, tokenizer);
+      }
+    };
+
+    String input = "ã¹ãã¼ã¹ã¹ãã¼ã·ã§ã³ã«è¡ãã¾ããããããããã";
+    String[] surfaceForms = {
+        "ã¹ãã¼ã¹", "ã¹ãã¼ã·ã§ã³", "ã«", "è¡ã", "ã¾ã", "ã",
+        "ãããããã", "ã"
+    };
+    assertAnalyzesTo(analyzer,
+                     input,
+                     surfaceForms);
+    
+    assertTrue(gv2.finish().indexOf("22.0") != -1);
+  }
+
+  private void assertReadings(String input, String... readings) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
+    ts.reset();
+    for(String reading : readings) {
+      assertTrue(ts.incrementToken());
+      assertEquals(reading, readingAtt.getReading());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+
+  private void assertPronunciations(String input, String... pronunciations) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
+    ts.reset();
+    for(String pronunciation : pronunciations) {
+      assertTrue(ts.incrementToken());
+      assertEquals(pronunciation, readingAtt.getPronunciation());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+  
+  private void assertBaseForms(String input, String... baseForms) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class);
+    ts.reset();
+    for(String baseForm : baseForms) {
+      assertTrue(ts.incrementToken());
+      assertEquals(baseForm, baseFormAtt.getBaseForm());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+
+  private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
+    ts.reset();
+    for(String inflectionType : inflectionTypes) {
+      assertTrue(ts.incrementToken());
+      assertEquals(inflectionType, inflectionAtt.getInflectionType());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+
+  private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
+    ts.reset();
+    for(String inflectionForm : inflectionForms) {
+      assertTrue(ts.incrementToken());
+      assertEquals(inflectionForm, inflectionAtt.getInflectionForm());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+  
+  private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
+    TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input));
+    PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
+    ts.reset();
+    for(String partOfSpeech : partsOfSpeech) {
+      assertTrue(ts.incrementToken());
+      assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech());
+    }
+    assertFalse(ts.incrementToken());
+    ts.end();
+  }
+  
+  public void testReadings() throws Exception {
+    assertReadings("å¯¿å¸ãé£ã¹ããã§ãã",
+                   "ã¹ã·",
+                   "ã¬",
+                   "ã¿ã",
+                   "ã¿ã¤",
+                   "ãã¹",
+                   "ã");
+  }
+  
+  public void testReadings2() throws Exception {
+    assertReadings("å¤ãã®å¦çãè©¦é¨ã«è½ã¡ãã",
+                   "ãªãªã¯",
+                   "ã",
+                   "ã¬ã¯ã»ã¤",
+                   "ã¬",
+                   "ã·ã±ã³",
+                   "ã",
+                   "ãªã",
+                   "ã¿",
+                   "ã");
+  }
+  
+  public void testPronunciations() throws Exception {
+    assertPronunciations("å¯¿å¸ãé£ã¹ããã§ãã",
+                         "ã¹ã·",
+                         "ã¬",
+                         "ã¿ã",
+                         "ã¿ã¤",
+                         "ãã¹",
+                         "ã");
+  }
+  
+  public void testPronunciations2() throws Exception {
+    // pronunciation differs from reading here
+    assertPronunciations("å¤ãã®å¦çãè©¦é¨ã«è½ã¡ãã",
+                         "ãªã¼ã¯",
+                         "ã",
+                         "ã¬ã¯ã»ã¤",
+                         "ã¬",
+                         "ã·ã±ã³",
+                         "ã",
+                         "ãªã",
+                         "ã¿",
+                         "ã");
+  }
+  
+  public void testBasicForms() throws Exception {
+    assertBaseForms("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã",
+                    null,
+                    null,
+                    null,
+                    null,
+                    null,
+                    null,
+                    "ãã",
+                    null,
+                    null);
+  }
+  
+  public void testInflectionTypes() throws Exception {
+    assertInflectionTypes("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã",
+                          null,
+                          null,
+                          null,
+                          null,
+                          null,
+                          null,
+                          "äºæ®µã»ã©è¡",
+                          "ç¹æ®ã»ãã¹",
+                          null);
+  }
+  
+  public void testInflectionForms() throws Exception {
+    assertInflectionForms("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã",
+                          null,
+                          null,
+                          null,
+                          null,
+                          null,
+                          null,
+                          "é£ç¨å½¢",
+                          "åºæ¬å½¢",
+                          null);
+  }
+  
+  public void testPartOfSpeech() throws Exception {
+    assertPartsOfSpeech("ããã¯ã¾ã å®é¨æ®µéã«ããã¾ãã",
+                        "åè©-ä»£åè©-ä¸è¬",
+                        "å©è©-ä¿å©è©",
+                        "å¯è©-å©è©é¡æ¥ç¶",
+                        "åè©-ãµå¤æ¥ç¶",
+                        "åè©-ä¸è¬",
+                        "å©è©-æ ¼å©è©-ä¸è¬",
+                        "åè©-èªç«",
+                        "å©åè©",
+                        "è¨å·-å¥ç¹");
+  }
+
+  // TODO: the next 2 tests are no longer using the first/last word ids, maybe lookup the words and fix?
+  // do we have a possibility to actually lookup the first and last word from dictionary?
+  public void testYabottai() throws Exception {
+    assertAnalyzesTo(analyzer, "ãã¼ã£ãã",
+                     new String[] {"ãã¼ã£ãã"});
+  }
+
+  public void testTsukitosha() throws Exception {
+    assertAnalyzesTo(analyzer, "çªãéãã",
+                     new String[] {"çªãéãã"});
+  }
+
+  public void testBocchan() throws Exception {
+    doTestBocchan(1);
+  }
+
+  @Nightly
+  public void testBocchanBig() throws Exception {
+    doTestBocchan(100);
+  }
+
+  /*
+  public void testWikipedia() throws Exception {
+    final FileInputStream fis = new FileInputStream("/q/lucene/jawiki-20120220-pages-articles.xml");
+    final Reader r = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
+
+    final long startTimeNS = System.nanoTime();
+    boolean done = false;
+    long compoundCount = 0;
+    long nonCompoundCount = 0;
+    long netOffset = 0;
+    while (!done) {
+      final TokenStream ts = analyzer.tokenStream("ignored", r);
+      ts.reset();
+      final PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
+      final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
+      int count = 0;
+      while (true) {
+        if (!ts.incrementToken()) {
+          done = true;
+          break;
+        }
+        count++;
+        if (posIncAtt.getPositionIncrement() == 0) {
+          compoundCount++;
+        } else {
+          nonCompoundCount++;
+          if (nonCompoundCount % 1000000 == 0) {
+            System.out.println(String.format("%.2f msec [pos=%d, %d, %d]",
+                                             (System.nanoTime()-startTimeNS)/1000000.0,
+                                             netOffset + offsetAtt.startOffset(),
+                                             nonCompoundCount,
+                                             compoundCount));
+          }
+        }
+        if (count == 100000000) {
+          System.out.println("  again...");
+          break;
+        }
+      }
+      ts.end();
+      netOffset += offsetAtt.endOffset();
+    }
+    System.out.println("compoundCount=" + compoundCount + " nonCompoundCount=" + nonCompoundCount);
+    r.close();
+  }
+  */
+
+  
+  private void doTestBocchan(int numIterations) throws Exception {
+    LineNumberReader reader = new LineNumberReader(new InputStreamReader(
+        this.getClass().getResourceAsStream("bocchan.utf-8")));
+    String line = reader.readLine();
+    reader.close();
+    
+    if (VERBOSE) {
+      System.out.println("Test for Bocchan without pre-splitting sentences");
+    }
+
+    /*
+    if (numIterations > 1) {
+      // warmup
+      for (int i = 0; i < numIterations; i++) {
+        final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(line));
+        ts.reset();
+        while(ts.incrementToken());
+      }
+    }
+    */
+
+    long totalStart = System.currentTimeMillis();
+    for (int i = 0; i < numIterations; i++) {
+      final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(line));
+      ts.reset();
+      while(ts.incrementToken());
+    }
+    String[] sentences = line.split("ã|ã");
+    if (VERBOSE) {
+      System.out.println("Total time : " + (System.currentTimeMillis() - totalStart));
+      System.out.println("Test for Bocchan with pre-splitting sentences (" + sentences.length + " sentences)");
+    }
+    totalStart = System.currentTimeMillis();
+    for (int i = 0; i < numIterations; i++) {
+      for (String sentence: sentences) {
+        final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(sentence));
+        ts.reset();
+        while(ts.incrementToken());
+      }
+    }
+    if (VERBOSE) {
+      System.out.println("Total time : " + (System.currentTimeMillis() - totalStart));
+    }
+  }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestSearchMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestSearchMode.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestSearchMode.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/TestSearchMode.java Sat Mar 10 14:54:47 2012
@@ -28,20 +28,19 @@ import org.apache.lucene.analysis.Analyz
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.kuromoji.Segmenter.Mode;
+import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer.Mode;
 import org.apache.lucene.util.IOUtils;
 
 public class TestSearchMode extends BaseTokenStreamTestCase {
   private final static String SEGMENTATION_FILENAME = "search-segmentation-tests.txt";
-  private final Segmenter segmenter = new Segmenter(Mode.SEARCH);
   private final Analyzer analyzer = new ReusableAnalyzerBase() {
     @Override
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer tokenizer = new KuromojiTokenizer(segmenter, reader);
+      Tokenizer tokenizer = new KuromojiTokenizer(reader, null, true, Mode.SEARCH);
       return new TokenStreamComponents(tokenizer, tokenizer);
     }
   };
-  
+
   /** Test search mode segmentation */
   public void testSearchSegmentation() throws IOException {
     InputStream is = TestSearchMode.class.getResourceAsStream(SEGMENTATION_FILENAME);
@@ -64,7 +63,18 @@ public class TestSearchMode extends Base
         String[] fields = line.split("\t", 2);
         String sourceText = fields[0];
         String[] expectedTokens = fields[1].split("\\s+");
-        assertAnalyzesTo(analyzer, sourceText, expectedTokens);
+        int[] expectedPosIncrs = new int[expectedTokens.length];
+        int[] expectedPosLengths = new int[expectedTokens.length];
+        for(int tokIDX=0;tokIDX<expectedTokens.length;tokIDX++) {
+          if (expectedTokens[tokIDX].endsWith("/0")) {
+            expectedTokens[tokIDX] = expectedTokens[tokIDX].replace("/0", "");
+            expectedPosLengths[tokIDX] = expectedTokens.length-1;
+          } else {
+            expectedPosIncrs[tokIDX] = 1;
+            expectedPosLengths[tokIDX] = 1;
+          }
+        }
+        assertAnalyzesTo(analyzer, sourceText, expectedTokens, expectedPosIncrs);
       }
     } finally {
       is.close();

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/dict/UserDictionaryTest.java Sat Mar 10 14:54:47 2012
@@ -23,29 +23,17 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.IOException;
 
-import org.apache.lucene.analysis.kuromoji.SegmenterTest;
 import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
+import org.apache.lucene.analysis.kuromoji.TestKuromojiTokenizer;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;
 
 public class UserDictionaryTest extends LuceneTestCase {
 
-  private UserDictionary readDict() throws IOException {
-    InputStream is = SegmenterTest.class.getResourceAsStream("userdict.txt");
-    if (is == null)
-      throw new FileNotFoundException("Cannot find userdict.txt in test classpath!");
-    try {
-      Reader reader = new InputStreamReader(is, IOUtils.CHARSET_UTF_8);
-      return new UserDictionary(reader);
-    } finally {
-      is.close();
-    }
-  }
-  
   @Test
   public void testLookup() throws IOException {
-    UserDictionary dictionary = readDict();
+    UserDictionary dictionary = TestKuromojiTokenizer.readDict();
     String s = "é¢è¥¿å½éç©ºæ¸¯ã«è¡ã£ã";
     int[][] dictionaryEntryResult = dictionary.lookup(s.toCharArray(), 0, s.length());
     // Length should be three é¢è¥¿, å½é, ç©ºæ¸¯
@@ -69,7 +57,7 @@ public class UserDictionaryTest extends 
   
   @Test
   public void testReadings() throws IOException {
-    UserDictionary dictionary = readDict();
+    UserDictionary dictionary = TestKuromojiTokenizer.readDict();
     int[][] result = dictionary.lookup("æ¥æ¬çµæ¸æ°è".toCharArray(), 0, 6);
     assertEquals(3, result.length);
     int wordIdNihon = result[0][0]; // wordId of æ¥æ¬ in æ¥æ¬çµæ¸æ°è
@@ -83,7 +71,7 @@ public class UserDictionaryTest extends 
   
   @Test
   public void testPartOfSpeech() throws IOException {
-    UserDictionary dictionary = readDict();
+    UserDictionary dictionary = TestKuromojiTokenizer.readDict();
     int[][] result = dictionary.lookup("æ¥æ¬çµæ¸æ°è".toCharArray(), 0, 6);
     assertEquals(3, result.length);
     int wordIdKeizai = result[1][0]; // wordId of çµæ¸ in æ¥æ¬çµæ¸æ°è
@@ -92,7 +80,7 @@ public class UserDictionaryTest extends 
   
   @Test
   public void testRead() throws IOException {
-    UserDictionary dictionary = readDict();
+    UserDictionary dictionary = TestKuromojiTokenizer.readDict();
     assertNotNull(dictionary);		
   }
 }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/search-segmentation-tests.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/search-segmentation-tests.txt?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/search-segmentation-tests.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/search-segmentation-tests.txt Sat Mar 10 14:54:47 2012
@@ -25,43 +25,45 @@
 ##
 
 # Kansai Internationl Airport
-é¢è¥¿å½éç©ºæ¸¯	é¢è¥¿ å½é ç©ºæ¸¯
+é¢è¥¿å½éç©ºæ¸¯	é¢è¥¿ é¢è¥¿å½éç©ºæ¸¯/0 å½é ç©ºæ¸¯
 # Narita Airport
-æç°ç©ºæ¸¯	æç° ç©ºæ¸¯
+æç°ç©ºæ¸¯	æç° æç°ç©ºæ¸¯/0 ç©ºæ¸¯
 # Haneda Airport
-ç¾½ç°ç©ºæ¸¯	ç¾½ç° ç©ºæ¸¯
+ç¾½ç°ç©ºæ¸¯	ç¾½ç° ç¾½ç°ç©ºæ¸¯/0 ç©ºæ¸¯
 # Nara Institute of Science and Technology
-å¥è¯åç«¯ç§å¦æè¡å¤§å¦é¢å¤§å¦	å¥è¯ åç«¯ ç§å¦ æè¡ å¤§å¦é¢ å¤§å¦
+å¥è¯åç«¯ç§å¦æè¡å¤§å¦é¢å¤§å¦	å¥è¯ å¥è¯åç«¯ç§å¦æè¡å¤§å¦é¢å¤§å¦/0 åç«¯ ç§å¦ æè¡ å¤§å¦é¢ å¤§å¦
 # Tokyo University
-æ±äº¬å¤§å¦	æ±äº¬ å¤§å¦
+æ±äº¬å¤§å¦	æ±äº¬ æ±äº¬å¤§å¦/0 å¤§å¦
 # Kyoto University
-äº¬é½å¤§å¦	äº¬é½ å¤§å¦
+äº¬é½å¤§å¦	äº¬é½ äº¬é½å¤§å¦/0 å¤§å¦
+
+# NOTE: differs from non-compound mode:
 # Kyoto University Baseball Club
-äº¬é½å¤§å¦ç¡¬å¼éçé¨	äº¬é½ å¤§å¦ ç¡¬å¼ éç é¨
+äº¬é½å¤§å¦ç¡¬å¼éçé¨	äº¬é½å¤§ å¦ ç¡¬å¼ éç é¨
 
 ##
 ## Katakana titles
 ##
 
 # Senior Software Engineer
-ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢	ã·ãã¢ ã½ããã¦ã§ã¢ ã¨ã³ã¸ãã¢
+ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢	ã·ãã¢ ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢/0 ã½ããã¦ã§ã¢ ã¨ã³ã¸ãã¢
 # Software Engineer
 ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢	ã½ããã¦ã§ã¢ ã¨ã³ã¸ãã¢
 # Senior Project Manager
-ã·ãã¢ããã¸ã§ã¯ãããã¸ã£ã¼	ã·ãã¢ ããã¸ã§ã¯ã ããã¸ã£ã¼
+ã·ãã¢ããã¸ã§ã¯ãããã¸ã£ã¼	ã·ãã¢ ã·ãã¢ããã¸ã§ã¯ãããã¸ã£ã¼/0 ããã¸ã§ã¯ã ããã¸ã£ã¼
 # Project Manager
 ããã¸ã§ã¯ãããã¸ã£ã¼	ããã¸ã§ã¯ã ããã¸ã£ã¼
 # Senior Sales Engineer
-ã·ãã¢ã»ã¼ã«ã¹ã¨ã³ã¸ãã¢	ã·ãã¢ ã»ã¼ã«ã¹ ã¨ã³ã¸ãã¢
+ã·ãã¢ã»ã¼ã«ã¹ã¨ã³ã¸ãã¢	ã·ãã¢ ã·ãã¢ã»ã¼ã«ã¹ã¨ã³ã¸ãã¢/0 ã»ã¼ã«ã¹ ã¨ã³ã¸ãã¢
 # System Architect
-ã·ã¹ãã ã¢ã¼ããã¯ã	ã·ã¹ãã  ã¢ã¼ããã¯ã
+ã·ã¹ãã ã¢ã¼ããã¯ã	ã·ã¹ãã  ã·ã¹ãã ã¢ã¼ããã¯ã/0 ã¢ã¼ããã¯ã
 # Senior System Architect
-ã·ãã¢ã·ã¹ãã ã¢ã¼ããã¯ã	ã·ãã¢ ã·ã¹ãã  ã¢ã¼ããã¯ã
+ã·ãã¢ã·ã¹ãã ã¢ã¼ããã¯ã	ã·ãã¢ ã·ãã¢ã·ã¹ãã ã¢ã¼ããã¯ã/0 ã·ã¹ãã  ã¢ã¼ããã¯ã
 # System Administrator
 ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿	ã·ã¹ãã  ã¢ãããã¹ãã¬ã¼ã¿
-ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼	ã·ã¹ãã  ã¢ãããã¹ãã¬ã¼ã¿ã¼
+ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼	ã·ã¹ãã  ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼/0 ã¢ãããã¹ãã¬ã¼ã¿ã¼
 # Senior System Administrator
-ã·ãã¢ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼	ã·ãã¢ ã·ã¹ãã  ã¢ãããã¹ãã¬ã¼ã¿ã¼
+ã·ãã¢ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼	ã·ãã¢ ã·ãã¢ã·ã¹ãã ã¢ãããã¹ãã¬ã¼ã¿ã¼/0 ã·ã¹ãã  ã¢ãããã¹ãã¬ã¼ã¿ã¼
 
 ##
 ## Company names (several are fictitious)
@@ -70,25 +72,25 @@
 # SoftBank Mobile
 ã½ãããã³ã¯ã¢ãã¤ã«	ã½ãããã³ã¯ ã¢ãã¤ã«
 # Alpine Materials
-ã¢ã«ãã¤ã³ãããªã¢ã«ãº	ã¢ã«ãã¤ã³ ãããªã¢ã«ãº
+ã¢ã«ãã¤ã³ãããªã¢ã«ãº	ã¢ã«ãã¤ã³ ã¢ã«ãã¤ã³ãããªã¢ã«ãº/0 ãããªã¢ã«ãº
 # Sapporo Holdings
 ãµããããã¼ã«ãã£ã³ã°ã¹	ãµããã ãã¼ã«ãã£ã³ã°ã¹
 # Yamada Corporation
-ã¤ããã³ã¼ãã¬ã¼ã·ã§ã³	ã¤ãã ã³ã¼ãã¬ã¼ã·ã§ã³
+ã¤ããã³ã¼ãã¬ã¼ã·ã§ã³	ã¤ãã ã¤ããã³ã¼ãã¬ã¼ã·ã§ã³/0 ã³ã¼ãã¬ã¼ã·ã§ã³
 # Canon Semiconductor equipement	NOTE: Semiconductor becomes semi + conductor
-ãã¤ãã³ã»ãã³ã³ãã¯ã¿ã¼ã¨ã¯ã£ããã¡ã³ã	ãã¤ãã³ ã»ã ã³ã³ãã¯ã¿ã¼ ã¨ã¯ã£ããã¡ã³ã
+ãã¤ãã³ã»ãã³ã³ãã¯ã¿ã¼ã¨ã¯ã£ããã¡ã³ã	ãã¤ãã³ ãã¤ãã³ã»ãã³ã³ãã¯ã¿ã¼ã¨ã¯ã£ããã¡ã³ã/0 ã»ã ã³ã³ãã¯ã¿ã¼ ã¨ã¯ã£ããã¡ã³ã
 # Orental Chain
-ãªãªã¨ã³ã¿ã«ãã¨ã³	ãªãªã¨ã³ã¿ã« ãã¨ã³
+ãªãªã¨ã³ã¿ã«ãã¨ã³	ãªãªã¨ã³ã¿ã« ãªãªã¨ã³ã¿ã«ãã¨ã³/0 ãã¨ã³
 # Ally Projects Japan	NOTE: Becomes one token as ããã¸ã§ã¯ã is not in IPADIC
 ã¢ã¼ãªã¼ããã¸ã§ã¯ãã¸ã£ãã³	ã¢ã¼ãªã¼ããã¸ã§ã¯ãã¸ã£ãã³
 # Peter Pan Corporation
-ãã¼ã¿ã¼ãã³ã³ã¼ãã¬ã¼ã·ã§ã³	ãã¼ã¿ã¼ ãã³ ã³ã¼ãã¬ã¼ã·ã§ã³
+ãã¼ã¿ã¼ãã³ã³ã¼ãã¬ã¼ã·ã§ã³	ãã¼ã¿ã¼ ãã¼ã¿ã¼ãã³ã³ã¼ãã¬ã¼ã·ã§ã³/0 ãã³ ã³ã¼ãã¬ã¼ã·ã§ã³
 # AIM Create
 ã¨ã¤ã ã¯ãªã¨ã¤ã	ã¨ã¤ã ã¯ãªã¨ã¤ã
 # Mars Engineering
-ãã¼ã¹ã¨ã³ã¸ãã¢ãªã³ã°	ãã¼ã¹ ã¨ã³ã¸ãã¢ãªã³ã°
+ãã¼ã¹ã¨ã³ã¸ãã¢ãªã³ã°	ãã¼ã¹ ãã¼ã¹ã¨ã³ã¸ãã¢ãªã³ã°/0 ã¨ã³ã¸ãã¢ãªã³ã°
 # Fuji Protein Technology
-ãã¸ãããã¤ã³ãã¯ããã¸ã¼	ãã¸ ãããã¤ã³ ãã¯ããã¸ã¼
+ãã¸ãããã¤ã³ãã¯ããã¸ã¼	ãã¸ ãã¸ãããã¤ã³ãã¯ããã¸ã¼/0 ãããã¤ã³ ãã¯ããã¸ã¼
 
 ##
 ## Person names
@@ -100,7 +102,7 @@
 ã¹ãã£ã¼ãã¸ã§ããº	ã¹ãã£ã¼ã ã¸ã§ããº
 # Harry Potter	NOTE: Becomes one token (short word)
 ããªã¼ããã¿ã¼	ããªã¼ããã¿ã¼
-# Bill Gates	NOTE: Becomes one token (short work)
+# Bill Gates	NOTE: Becomes one token (short word)
 ãã«ã²ã¤ã	ãã«ã²ã¤ã
 # Sean Connery	NOTE: Becomes one token (okay)
 ã·ã§ã¼ã³ã³ããªã¼	ã·ã§ã¼ã³ã³ããªã¼
@@ -133,8 +135,8 @@
 ##
 
 # JT Engineering	NOTE: Becomes J Tien ginia ring (substrings are in IPADIC)
-ã¸ã§ã¤ãã£ã¨ã³ã¸ãã¢ãªã³ã°	ã¸ã§ã¤ ãã£ã¨ã³ ã¸ãã¢ ãªã³ã°
+ã¸ã§ã¤ãã£ã¨ã³ã¸ãã¢ãªã³ã°	ã¸ã§ã¤ ã¸ã§ã¤ãã£ã¨ã³ã¸ãã¢ãªã³ã°/0 ãã£ã¨ã³ ã¸ãã¢ ãªã³ã°
 # Anchovy pasta	NOTE: Become Anch yvipasta
-ã¢ã³ãã§ããã¹ã¿	ã¢ã³ã ã§ããã¹ã¿
+ã¢ã³ãã§ããã¹ã¿	ã¢ã³ã ã¢ã³ãã§ããã¹ã¿/0 ã§ããã¹ã¿
 # Surprise gift	NOTE: Becomes one token (surprise not in IPADIC)
 ãµãã©ã¤ãºã®ãã	ãµãã©ã¤ãºã®ãã

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/userdict.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/userdict.txt?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/userdict.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/src/test/org/apache/lucene/analysis/kuromoji/userdict.txt Sat Mar 10 14:54:47 2012
@@ -4,3 +4,7 @@
 
 # Custom reading for sumo wrestler
 æéé¾,æéé¾,ã¢ãµã·ã§ã¦ãªã¥ã¦,ã«ã¹ã¿ã äººå
+
+# Silly entry:
+abcd,a b cd,foo1 foo2 foo3,bar
+abcdefg,ab cd efg,foo1 foo2 foo4,bar

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java Sat Mar 10 14:54:47 2012
@@ -54,9 +54,10 @@ public class PositionIncrementAttributeI
    * @param positionIncrement the distance from the prior term
    */
   public void setPositionIncrement(int positionIncrement) {
-    if (positionIncrement < 0)
+    if (positionIncrement < 0) {
       throw new IllegalArgumentException
-        ("Increment must be zero or greater: " + positionIncrement);
+        ("Increment must be zero or greater: got " + positionIncrement);
+    }
     this.positionIncrement = positionIncrement;
   }
 
@@ -79,7 +80,8 @@ public class PositionIncrementAttributeI
     }
     
     if (other instanceof PositionIncrementAttributeImpl) {
-      return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement;
+      PositionIncrementAttributeImpl _other = (PositionIncrementAttributeImpl) other;
+      return positionIncrement ==  _other.positionIncrement;
     }
  
     return false;
@@ -95,5 +97,4 @@ public class PositionIncrementAttributeI
     PositionIncrementAttribute t = (PositionIncrementAttribute) target;
     t.setPositionIncrement(positionIncrement);
   }  
-
 }

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Sat Mar 10 14:54:47 2012
@@ -840,6 +840,7 @@ public final class FST<T> {
   }
 
   public Arc<T> readFirstRealTargetArc(int node, Arc<T> arc, final BytesReader in) throws IOException {
+    assert in.bytes == bytes;
     final int address = getNodeAddress(node);
     in.pos = address;
     //System.out.println("  readFirstRealTargtArc address="
@@ -936,6 +937,7 @@ public final class FST<T> {
   /** Never returns null, but you should never call this if
    *  arc.isLast() is true. */
   public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
+    assert in.bytes == bytes;
 
     // TODO: can't assert this because we call from readFirstArc
     // assert !flag(arc.flags, BIT_LAST_ARC);
@@ -1019,6 +1021,7 @@ public final class FST<T> {
    *  This returns null if the arc was not found, else the incoming arc. */
   public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
     assert cachedRootArcs != null;
+    assert in.bytes == bytes;
 
     if (labelToMatch == END_LABEL) {
       if (follow.isFinal()) {
@@ -1225,17 +1228,20 @@ public final class FST<T> {
 
   /** Expert */
   public static abstract class BytesReader extends DataInput {
-    int pos;
+    protected int pos;
+    protected final byte[] bytes;
+    protected BytesReader(byte[] bytes, int pos) {
+      this.bytes = bytes;
+      this.pos = pos;
+    }
     abstract void skip(int byteCount);
     abstract void skip(int base, int byteCount);
   }
 
   final static class ReverseBytesReader extends BytesReader {
-    final byte[] bytes;
 
     public ReverseBytesReader(byte[] bytes, int pos) {
-      this.bytes = bytes;
-      this.pos = pos;
+      super(bytes, pos);
     }
 
     @Override
@@ -1262,11 +1268,9 @@ public final class FST<T> {
   // TODO: can we use just ByteArrayDataInput...?  need to
   // add a .skipBytes to DataInput.. hmm and .setPosition
   final static class ForwardBytesReader extends BytesReader {
-    final byte[] bytes;
 
     public ForwardBytesReader(byte[] bytes, int pos) {
-      this.bytes = bytes;
-      this.pos = pos;
+      super(bytes, pos);
     }
 
     @Override

Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Sat Mar 10 14:54:47 2012
@@ -17,13 +17,18 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
 import java.io.Reader;
 import java.io.StringReader;
-import java.io.IOException;
+import java.io.StringWriter;
+import java.io.Writer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
- 
+
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
@@ -83,7 +88,7 @@ public abstract class BaseTokenStreamTes
     }
   }
 
-  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException {
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset) throws IOException {
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
     
@@ -107,6 +112,12 @@ public abstract class BaseTokenStreamTes
       assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
       posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
     }
+
+    PositionLengthAttribute posLengthAtt = null;
+    if (posLengths != null) {
+      assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
+      posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
+    }
     
     ts.reset();
     for (int i = 0; i < output.length; i++) {
@@ -116,6 +127,7 @@ public abstract class BaseTokenStreamTes
       if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
       if (typeAtt != null) typeAtt.setType("bogusType");
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
+      if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
       
       checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
       assertTrue("token "+i+" does not exist", ts.incrementToken());
@@ -130,6 +142,8 @@ public abstract class BaseTokenStreamTes
         assertEquals("type "+i, types[i], typeAtt.type());
       if (posIncrements != null)
         assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
+      if (posLengths != null)
+        assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
       
       // we can enforce some basic things about a few attributes even if the caller doesn't check:
       if (offsetAtt != null) {
@@ -138,14 +152,18 @@ public abstract class BaseTokenStreamTes
         assertTrue("endOffset must be >= startOffset", offsetAtt.endOffset() >= offsetAtt.startOffset());
         if (finalOffset != null) {
           assertTrue("startOffset must be <= finalOffset", offsetAtt.startOffset() <= finalOffset.intValue());
-          assertTrue("endOffset must be <= finalOffset", offsetAtt.endOffset() <= finalOffset.intValue());
+          assertTrue("endOffset must be <= finalOffset: got endOffset=" + offsetAtt.endOffset() + " vs finalOffset=" + finalOffset.intValue(),
+                     offsetAtt.endOffset() <= finalOffset.intValue());
         }
       }
       if (posIncrAtt != null) {
         assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0);
       }
+      if (posLengthAtt != null) {
+        assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
+      }
     }
-    assertFalse("end of stream", ts.incrementToken());
+    assertFalse("TokenStream has more tokens than expected", ts.incrementToken());
     ts.end();
     if (finalOffset != null)
       assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
@@ -155,65 +173,81 @@ public abstract class BaseTokenStreamTes
     ts.close();
   }
   
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException {
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, finalOffset);
+  }
+
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, null);
   }
 
   public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException {
-    assertTokenStreamContents(ts, output, null, null, null, null, null);
+    assertTokenStreamContents(ts, output, null, null, null, null, null, null);
   }
   
   public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException {
-    assertTokenStreamContents(ts, output, null, null, types, null, null);
+    assertTokenStreamContents(ts, output, null, null, types, null, null, null);
   }
   
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException {
-    assertTokenStreamContents(ts, output, null, null, null, posIncrements, null);
+    assertTokenStreamContents(ts, output, null, null, null, posIncrements, null, null);
   }
   
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, null);
   }
   
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], Integer finalOffset) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, finalOffset);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, finalOffset);
   }
   
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, null);
   }
 
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, Integer finalOffset) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, finalOffset);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, finalOffset);
+  }
+  
+  public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, int[] posLengths, Integer finalOffset) throws IOException {
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, posLengths, finalOffset);
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
-    assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
+    assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
+  }
+  
+  public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
+    assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
-    assertAnalyzesTo(a, input, output, null, null, null, null);
+    assertAnalyzesTo(a, input, output, null, null, null, null, null);
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException {
-    assertAnalyzesTo(a, input, output, null, null, types, null);
+    assertAnalyzesTo(a, input, output, null, null, types, null, null);
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
-    assertAnalyzesTo(a, input, output, null, null, null, posIncrements);
+    assertAnalyzesTo(a, input, output, null, null, null, posIncrements, null);
+  }
+
+  public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, int[] posIncrements, int[] posLengths) throws IOException {
+    assertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths);
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
-    assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null);
+    assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null);
   }
   
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
-    assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements);
+    assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
   }
   
 
   public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
-    assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
+    assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
   }
   
   public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
@@ -326,7 +360,7 @@ public abstract class BaseTokenStreamTes
       }
 
       if (VERBOSE) {
-        System.out.println("NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
+        System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
       }
 
       int remainder = random.nextInt(10);
@@ -336,10 +370,12 @@ public abstract class BaseTokenStreamTes
       CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
       OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
       PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null;
+      PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null;
       TypeAttribute typeAtt = ts.hasAttribute(TypeAttribute.class) ? ts.getAttribute(TypeAttribute.class) : null;
       List<String> tokens = new ArrayList<String>();
       List<String> types = new ArrayList<String>();
       List<Integer> positions = new ArrayList<Integer>();
+      List<Integer> positionLengths = new ArrayList<Integer>();
       List<Integer> startOffsets = new ArrayList<Integer>();
       List<Integer> endOffsets = new ArrayList<Integer>();
       ts.reset();
@@ -347,6 +383,7 @@ public abstract class BaseTokenStreamTes
         tokens.add(termAtt.toString());
         if (typeAtt != null) types.add(typeAtt.type());
         if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement());
+        if (posLengthAtt != null) positionLengths.add(posLengthAtt.getPositionLength());
         if (offsetAtt != null) {
           startOffsets.add(offsetAtt.startOffset());
           endOffsets.add(offsetAtt.endOffset());
@@ -357,11 +394,21 @@ public abstract class BaseTokenStreamTes
       // verify reusing is "reproducable" and also get the normal tokenstream sanity checks
       if (!tokens.isEmpty()) {
         if (VERBOSE) {
-          System.out.println("NOTE: BaseTokenStreamTestCase: re-run analysis");
+          System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: re-run analysis; " + tokens.size() + " tokens");
         }
         reader = new StringReader(text);
         ts = a.reusableTokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
-        if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
+        if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
+          // offset + pos + posLength + type
+          assertTokenStreamContents(ts, 
+            tokens.toArray(new String[tokens.size()]),
+            toIntArray(startOffsets),
+            toIntArray(endOffsets),
+            types.toArray(new String[types.size()]),
+            toIntArray(positions),
+            toIntArray(positionLengths),
+            text.length());
+        } else if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
           // offset + pos + type
           assertTokenStreamContents(ts, 
             tokens.toArray(new String[tokens.size()]),
@@ -369,7 +416,18 @@ public abstract class BaseTokenStreamTes
             toIntArray(endOffsets),
             types.toArray(new String[types.size()]),
             toIntArray(positions),
+            null,
             text.length());
+        } else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
+          // offset + pos + posLength
+          assertTokenStreamContents(ts, 
+              tokens.toArray(new String[tokens.size()]),
+              toIntArray(startOffsets),
+              toIntArray(endOffsets),
+              null,
+              toIntArray(positions),
+              toIntArray(positionLengths),
+              text.length());
         } else if (posIncAtt != null && offsetAtt != null) {
           // offset + pos
           assertTokenStreamContents(ts, 
@@ -378,6 +436,7 @@ public abstract class BaseTokenStreamTes
               toIntArray(endOffsets),
               null,
               toIntArray(positions),
+              null,
               text.length());
         } else if (offsetAtt != null) {
           // offset
@@ -387,6 +446,7 @@ public abstract class BaseTokenStreamTes
               toIntArray(endOffsets),
               null,
               null,
+              null,
               text.length());
         } else {
           // terms only
@@ -396,6 +456,22 @@ public abstract class BaseTokenStreamTes
       }
     }
   }
+
+  protected String toDot(Analyzer a, String inputText) throws IOException {
+    final StringWriter sw = new StringWriter();
+    final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
+    ts.reset();
+    new TokenStreamToDot(inputText, ts, new PrintWriter(sw)).toDot();
+    return sw.toString();
+  }
+
+  protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException {
+    Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), "UTF-8");
+    final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
+    ts.reset();
+    new TokenStreamToDot(inputText, ts, new PrintWriter(w)).toDot();
+    w.close();
+  }
   
   static int[] toIntArray(List<Integer> list) {
     int ret[] = new int[list.size()];

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/KuromojiTokenizerFactory.java Sat Mar 10 14:54:47 2012
@@ -28,8 +28,7 @@ import java.util.Map;
 
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer;
-import org.apache.lucene.analysis.kuromoji.Segmenter;
-import org.apache.lucene.analysis.kuromoji.Segmenter.Mode;
+import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer.Mode;
 import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
 import org.apache.lucene.util.IOUtils;
 import org.apache.solr.analysis.BaseTokenizerFactory;
@@ -88,7 +87,7 @@ public class KuromojiTokenizerFactory ex
   
   //@Override
   public Tokenizer create(Reader input) {
-    return new KuromojiTokenizer(new Segmenter(userDictionary, mode), input);
+    return new KuromojiTokenizer(input, userDictionary, true, mode);
   }
   
   private Mode getMode(Map<String, String> args) {
@@ -96,7 +95,7 @@ public class KuromojiTokenizerFactory ex
     if (mode != null) {
       return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH));
     } else {
-      return Segmenter.DEFAULT_MODE;
+      return KuromojiTokenizer.DEFAULT_MODE;
     }
   }
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestKuromojiTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestKuromojiTokenizerFactory.java?rev=1299213&r1=1299212&r2=1299213&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestKuromojiTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestKuromojiTokenizerFactory.java Sat Mar 10 14:54:47 2012
@@ -50,7 +50,7 @@ public class TestKuromojiTokenizerFactor
     factory.inform(new SolrResourceLoader(null, null));
     TokenStream ts = factory.create(new StringReader("ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢"));
     assertTokenStreamContents(ts,
-        new String[] { "ã·ãã¢", "ã½ããã¦ã§ã¢", "ã¨ã³ã¸ãã¢" }
+                              new String[] { "ã·ãã¢", "ã·ãã¢ã½ããã¦ã§ã¢ã¨ã³ã¸ãã¢", "ã½ããã¦ã§ã¢", "ã¨ã³ã¸ãã¢" }
     );
   }