You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/04/14 17:25:48 UTC

svn commit: r1092338 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/test-framework/ lucene/backwards/src/test/ lucene/contrib/ lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/ lucene/contrib/anal...

Author: rmuir
Date: Thu Apr 14 15:25:48 2011
New Revision: 1092338

URL: http://svn.apache.org/viewvc?rev=1092338&view=rev
Log:
LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException on sentences longer than 32,767 characters

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test-framework/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/benchmark/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1092338&r1=1092337&r2=1092338&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Thu Apr 14 15:25:48 2011
@@ -2,7 +2,10 @@ Lucene contrib change Log
 
 ======================= Lucene 3.x (not yet released) =======================
 
-(No changes)
+Bug fixes
+
+ * LUCENE-3026: SmartChineseAnalyzer's WordTokenFilter threw NullPointerException
+   on sentences longer than 32,767 characters.  (wangzhenghang via Robert Muir)
 
 ======================= Lucene 3.1.0 =======================
 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java?rev=1092338&r1=1092337&r2=1092338&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java Thu Apr 14 15:25:48 2011
@@ -75,7 +75,7 @@ class SegGraph {
     List<SegToken> result = new ArrayList<SegToken>();
     int s = -1, count = 0, size = tokenListTable.size();
     List<SegToken> tokenList;
-    short index = 0;
+    int index = 0;
     while (count < size) {
       if (isStartExist(s)) {
         tokenList = tokenListTable.get(s);

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java?rev=1092338&r1=1092337&r2=1092338&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java Thu Apr 14 15:25:48 2011
@@ -17,8 +17,11 @@
 
 package org.apache.lucene.analysis.cn.smart;
 
+import java.io.StringReader;
+
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.Version;
 
 public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
@@ -166,4 +169,30 @@ public class TestSmartChineseAnalyzer ex
         new int[] { 0, 1, 3, 4, 6, 7 },
         new int[] { 1, 3, 4, 6, 7, 9 });
   }
+  
+  // LUCENE-3026
+  public void testLargeDocument() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 5000; i++) {
+      sb.append("我购买了道具和服装。");
+    }
+    Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+    TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+    stream.reset();
+    while (stream.incrementToken()) {
+    }
+  }
+  
+  // LUCENE-3026
+  public void testLargeSentence() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 5000; i++) {
+      sb.append("我购买了道具和服装");
+    }
+    Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
+    TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
+    stream.reset();
+    while (stream.incrementToken()) {
+    }
+  }
 }