You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/11 13:20:48 UTC

svn commit: r1586616 - in /lucene/dev/branches/lucene_solr_4_8: ./ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ lucene/analysis/common/src/test/org/apache/lucene/analysis/th/

Author: rmuir
Date: Fri Apr 11 11:20:47 2014
New Revision: 1586616

URL: http://svn.apache.org/r1586616
Log:
LUCENE-5601: ThaiTokenizer ignores sentenceStart

Modified:
    lucene/dev/branches/lucene_solr_4_8/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
    lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java?rev=1586616&r1=1586615&r2=1586616&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java Fri Apr 11 11:20:47 2014
@@ -100,7 +100,7 @@ public class ThaiTokenizer extends Segme
     }
 
     clearAttributes();
-    termAtt.copyBuffer(buffer, start, end - start);
+    termAtt.copyBuffer(buffer, sentenceStart + start, end - start);
     offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end));
     return true;
   }

Modified: lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1586616&r1=1586615&r2=1586616&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Fri Apr 11 11:20:47 2014
@@ -179,4 +179,11 @@ public class TestThaiAnalyzer extends Ba
     ts.addAttribute(FlagsAttribute.class);
     assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
   }
+  
+  public void testTwoSentences() throws Exception {
+    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "This is a test. การที่ได้ต้องแสดงว่างานดี", 
+          new String[] { "this", "is", "a", "test", "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
+          new int[] { 0, 5, 8, 10, 16, 19, 22, 25, 29, 33, 36, 39 },
+          new int[] { 4, 7, 9, 14, 19, 22, 25, 29, 33, 36, 39, 41 });
+  }
 }