You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/11 13:12:38 UTC

svn commit: r1586614 - in /lucene/dev/trunk/lucene/analysis/common/src: java/org/apache/lucene/analysis/th/ThaiTokenizer.java test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java

Author: rmuir
Date: Fri Apr 11 11:12:38 2014
New Revision: 1586614

URL: http://svn.apache.org/r1586614
Log:
LUCENE-5601: ThaiTokenizer ignores sentenceStart

Modified:
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java?rev=1586614&r1=1586613&r2=1586614&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java Fri Apr 11 11:12:38 2014
@@ -99,7 +99,7 @@ public class ThaiTokenizer extends Segme
     }
 
     clearAttributes();
-    termAtt.copyBuffer(buffer, start, end - start);
+    termAtt.copyBuffer(buffer, sentenceStart + start, end - start);
     offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end));
     return true;
   }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1586614&r1=1586613&r2=1586614&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Fri Apr 11 11:12:38 2014
@@ -117,4 +117,11 @@ public class TestThaiAnalyzer extends Ba
     ts.addAttribute(FlagsAttribute.class);
     assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
   }
+  
+  public void testTwoSentences() throws Exception {
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "This is a test. การที่ได้ต้องแสดงว่างานดี", 
+          new String[] { "this", "is", "a", "test", "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+          new int[] { 0, 5, 8, 10, 16, 19, 22, 25, 29, 33, 36, 39 },
+          new int[] { 4, 7, 9, 14, 19, 22, 25, 29, 33, 36, 39, 41 });
+  }
 }