You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/11 13:20:48 UTC
svn commit: r1586616 - in /lucene/dev/branches/lucene_solr_4_8: ./ lucene/
lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/th/
lucene/analysis/common/src/test/org/apache/lucene/analysis/th/
Author: rmuir
Date: Fri Apr 11 11:20:47 2014
New Revision: 1586616
URL: http://svn.apache.org/r1586616
Log:
LUCENE-5601: ThaiTokenizer ignores sentenceStart
Modified:
lucene/dev/branches/lucene_solr_4_8/ (props changed)
lucene/dev/branches/lucene_solr_4_8/lucene/ (props changed)
lucene/dev/branches/lucene_solr_4_8/lucene/analysis/ (props changed)
lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
Modified: lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java?rev=1586616&r1=1586615&r2=1586616&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiTokenizer.java Fri Apr 11 11:20:47 2014
@@ -100,7 +100,7 @@ public class ThaiTokenizer extends Segme
}
clearAttributes();
- termAtt.copyBuffer(buffer, start, end - start);
+ termAtt.copyBuffer(buffer, sentenceStart + start, end - start);
offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end));
return true;
}
Modified: lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java?rev=1586616&r1=1586615&r2=1586616&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (original)
+++ lucene/dev/branches/lucene_solr_4_8/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java Fri Apr 11 11:20:47 2014
@@ -179,4 +179,11 @@ public class TestThaiAnalyzer extends Ba
ts.addAttribute(FlagsAttribute.class);
assertTokenStreamContents(ts, new String[] { "ภาษา", "à¹à¸à¸¢" });
}
+
+ public void testTwoSentences() throws Exception {
+ assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET), "This is a test. à¸à¸²à¸£à¸à¸µà¹à¹à¸à¹à¸à¹à¸à¸à¹à¸ªà¸à¸à¸§à¹à¸²à¸à¸²à¸à¸à¸µ",
+ new String[] { "this", "is", "a", "test", "à¸à¸²à¸£", "à¸à¸µà¹", "à¹à¸à¹", "à¸à¹à¸à¸", "à¹à¸ªà¸à¸", "วà¹à¸²", "à¸à¸²à¸", "à¸à¸µ" },
+ new int[] { 0, 5, 8, 10, 16, 19, 22, 25, 29, 33, 36, 39 },
+ new int[] { 4, 7, 9, 14, 19, 22, 25, 29, 33, 36, 39, 41 });
+ }
}