You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/03/24 15:54:05 UTC
svn commit: r1304816 - in
/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene:
analysis/BaseTokenStreamTestCase.java util/_TestUtil.java
Author: rmuir
Date: Sat Mar 24 14:54:04 2012
New Revision: 1304816
URL: http://svn.apache.org/viewvc?rev=1304816&view=rev
Log:
LUCENE-3911: improve BaseTokenStreamTestCase random string generation, fix off-by-ones in TestUtil string gen methods
Modified:
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1304816&r1=1304815&r2=1304816&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Sat Mar 24 14:54:04 2012
@@ -365,8 +365,9 @@ public abstract class BaseTokenStreamTes
for (int i = 0; i < iterations; i++) {
String text;
-
+
if (random.nextInt(10) == 7) {
+ // real data from linedocs
text = docs.nextDoc().get("body");
if (text.length() > maxWordLength) {
// Take care not to split up a surrogate pair:
@@ -377,25 +378,11 @@ public abstract class BaseTokenStreamTes
}
}
} else {
- if (simple) {
- text = random.nextBoolean() ? _TestUtil.randomSimpleString(random, maxWordLength) : _TestUtil.randomHtmlishString(random, maxWordLength);
- } else {
- switch(_TestUtil.nextInt(random, 0, 4)) {
- case 0:
- text = _TestUtil.randomSimpleString(random, maxWordLength);
- break;
- case 1:
- text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength);
- break;
- case 2:
- text = _TestUtil.randomHtmlishString(random, maxWordLength);
- break;
- default:
- text = _TestUtil.randomUnicodeString(random, maxWordLength);
- }
- }
+ // synthetic
+ text = randomAnalysisString(random, maxWordLength, simple);
}
+
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
}
@@ -581,6 +568,65 @@ public abstract class BaseTokenStreamTes
}
}
}
+
+ private static String randomAnalysisString(Random random, int maxLength, boolean simple) {
+ assert maxLength >= 0;
+
+ // sometimes just a purely random string
+ if (random.nextInt(31) == 0) {
+ return randomSubString(random, random.nextInt(maxLength), simple);
+ }
+
+ // otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer
+ // first decide how big the string will really be: 0..n
+ maxLength = random.nextInt(maxLength);
+ int avgWordLength = _TestUtil.nextInt(random, 3, 8);
+ StringBuilder sb = new StringBuilder();
+ while (sb.length() < maxLength) {
+ if (sb.length() > 0) {
+ sb.append(' ');
+ }
+ int wordLength = -1;
+ while (wordLength < 0) {
+ wordLength = (int) (random.nextGaussian() * 3 + avgWordLength);
+ }
+ wordLength = Math.min(wordLength, maxLength - sb.length());
+ sb.append(randomSubString(random, wordLength, simple));
+ }
+ return sb.toString();
+ }
+
+ private static String randomSubString(Random random, int wordLength, boolean simple) {
+ if (wordLength == 0) {
+ return "";
+ }
+
+ int evilness = _TestUtil.nextInt(random, 0, 20);
+
+ StringBuilder sb = new StringBuilder();
+ while (sb.length() < wordLength) {;
+ if (simple) {
+ sb.append(random.nextBoolean() ? _TestUtil.randomSimpleString(random, wordLength) : _TestUtil.randomHtmlishString(random, wordLength));
+ } else {
+ if (evilness < 10) {
+ sb.append(_TestUtil.randomSimpleString(random, wordLength));
+ } else if (evilness < 15) {
+ sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength));
+ } else if (evilness == 16) {
+ sb.append(_TestUtil.randomHtmlishString(random, wordLength));
+ } else {
+ sb.append(_TestUtil.randomUnicodeString(random, wordLength));
+ }
+ }
+ }
+ if (sb.length() > wordLength) {
+ sb.setLength(wordLength);
+ if (Character.isHighSurrogate(sb.charAt(wordLength-1))) {
+ sb.setLength(wordLength-1);
+ }
+ }
+ return sb.toString();
+ }
protected String toDot(Analyzer a, String inputText) throws IOException {
final StringWriter sw = new StringWriter();
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1304816&r1=1304815&r2=1304816&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Sat Mar 24 14:54:04 2012
@@ -197,7 +197,7 @@ public class _TestUtil {
}
public static String randomSimpleString(Random r, int maxLength) {
- final int end = r.nextInt(maxLength);
+ final int end = nextInt(r, 0, maxLength);
if (end == 0) {
// allow 0 length
return "";
@@ -222,7 +222,7 @@ public class _TestUtil {
* Returns a random string up to a certain length.
*/
public static String randomUnicodeString(Random r, int maxLength) {
- final int end = r.nextInt(maxLength);
+ final int end = nextInt(r, 0, maxLength);
if (end == 0) {
// allow 0 length
return "";
@@ -326,7 +326,7 @@ public class _TestUtil {
};
public static String randomHtmlishString(Random random, int numElements) {
- final int end = random.nextInt(numElements);
+ final int end = nextInt(random, 0, numElements);
if (end == 0) {
// allow 0 length
return "";
@@ -471,12 +471,12 @@ public class _TestUtil {
/** Returns random string of length up to maxLength codepoints , all codepoints within the same unicode block. */
public static String randomRealisticUnicodeString(Random r, int maxLength) {
- return randomRealisticUnicodeString(r, 0, 20);
+ return randomRealisticUnicodeString(r, 0, maxLength);
}
/** Returns random string of length between min and max codepoints, all codepoints within the same unicode block. */
public static String randomRealisticUnicodeString(Random r, int minLength, int maxLength) {
- final int end = minLength + r.nextInt(maxLength);
+ final int end = nextInt(r, minLength, maxLength);
final int block = r.nextInt(blockStarts.length);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < end; i++)