You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2018/11/27 11:00:45 UTC
[1/3] lucene-solr:master: update comment after limiting number of
debug tokens
Repository: lucene-solr
Updated Branches:
refs/heads/master 72ca4488d -> 6728f0c4f
update comment after limiting number of debug tokens
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6728f0c4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6728f0c4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6728f0c4
Branch: refs/heads/master
Commit: 6728f0c4f4612d32c1eea35395669c0520021acb
Parents: 34ed015
Author: Michael Sokolov <so...@amazon.com>
Authored: Sat Nov 24 10:48:26 2018 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500
----------------------------------------------------------------------
.../src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6728f0c4/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index 9ab9489..5d5859e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -56,7 +56,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
private final OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class);
private final CharTermAttribute termAtt = getAttribute(CharTermAttribute.class);
- // record all the Tokens seen so they can be dumped on failure
+ // record the last MAX_DEBUG_TOKENS tokens seen so they can be dumped on failure
private final List<Token> tokens = new LinkedList<>();
private final String name;
[2/3] lucene-solr:master: fixing javadoc;
added docs for parameters of new method
Posted by mi...@apache.org.
fixing javadoc; added docs for parameters of new method
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/34ed0154
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/34ed0154
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/34ed0154
Branch: refs/heads/master
Commit: 34ed01543a27791b6cabce696f0360fd7ff836e1
Parents: 5490790
Author: Michael Sokolov <so...@amazon.com>
Authored: Mon Nov 19 15:46:29 2018 +0000
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500
----------------------------------------------------------------------
.../java/org/apache/lucene/analysis/ValidatingTokenFilter.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/34ed0154/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index 54b8fd3..9ab9489 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -181,8 +181,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
/**
* Prints details about consumed tokens stored in any ValidatingTokenFilters in the input chain
- * @param in
- * @param out
+ * @param in the input token stream
+ * @param out the output print stream
*/
public static void dumpValidatingTokenFilters(TokenStream in, PrintStream out) {
if (in instanceof TokenFilter) {
[3/3] lucene-solr:master: LUCENE-8517: do not wrap FixedShingleFilter
with conditional in TestRandomChains
Posted by mi...@apache.org.
LUCENE-8517: do not wrap FixedShingleFilter with conditional in TestRandomChains
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/54907903
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/54907903
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/54907903
Branch: refs/heads/master
Commit: 54907903e8d1a5da0c65328f24a1018c5e393afc
Parents: 72ca448
Author: Michael Sokolov <so...@amazon.com>
Authored: Sat Nov 17 08:41:25 2018 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500
----------------------------------------------------------------------
.../lucene/analysis/core/TestRandomChains.java | 4 +-
.../lucene/analysis/ValidatingTokenFilter.java | 69 ++++++++++++++++++--
2 files changed, 65 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/54907903/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 557a69e..beb9bb2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -88,6 +88,7 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
import org.apache.lucene.analysis.payloads.IdentityEncoder;
import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.shingle.FixedShingleFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.TestSnowball;
import org.apache.lucene.analysis.standard.StandardTokenizer;
@@ -129,6 +130,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// expose inconsistent offsets
// https://issues.apache.org/jira/browse/LUCENE-4170
avoidConditionals.add(ShingleFilter.class);
+ avoidConditionals.add(FixedShingleFilter.class);
// FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
// can break position lengths
avoidConditionals.add(FlattenGraphFilter.class);
@@ -590,7 +592,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
static class MockRandomAnalyzer extends Analyzer {
final long seed;
-
+
MockRandomAnalyzer(long seed) {
this.seed = seed;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/54907903/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index b29da70..54b8fd3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -17,7 +17,11 @@
package org.apache.lucene.analysis;
import java.io.IOException;
+import java.io.PrintStream;
import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -38,6 +42,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
* offsets are consistent with one another). */
public final class ValidatingTokenFilter extends TokenFilter {
+ private static final int MAX_DEBUG_TOKENS = 20;
+
private int pos;
private int lastStartOffset;
@@ -50,6 +56,9 @@ public final class ValidatingTokenFilter extends TokenFilter {
private final OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class);
private final CharTermAttribute termAtt = getAttribute(CharTermAttribute.class);
+ // record all the Tokens seen so they can be dumped on failure
+ private final List<Token> tokens = new LinkedList<>();
+
private final String name;
/** The name arg is used to identify this stage when
@@ -72,28 +81,38 @@ public final class ValidatingTokenFilter extends TokenFilter {
int startOffset = 0;
int endOffset = 0;
int posLen = 0;
+ int posInc = 0;
+
+ if (posIncAtt != null) {
+ posInc = posIncAtt.getPositionIncrement();
+ }
+ if (offsetAtt != null) {
+ startOffset = offsetAtt.startOffset();
+ endOffset = offsetAtt.endOffset();
+ }
+
+ posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
+
+ addToken(startOffset, endOffset, posInc);
// System.out.println(name + ": " + this);
if (posIncAtt != null) {
- pos += posIncAtt.getPositionIncrement();
+ pos += posInc;
if (pos == -1) {
+ dumpValidatingTokenFilters(this, System.err);
throw new IllegalStateException(name + ": first posInc must be > 0");
}
}
if (offsetAtt != null) {
- startOffset = offsetAtt.startOffset();
- endOffset = offsetAtt.endOffset();
-
- if (offsetAtt.startOffset() < lastStartOffset) {
+ if (startOffset < lastStartOffset) {
+ dumpValidatingTokenFilters(this, System.err);
throw new IllegalStateException(name + ": offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
}
lastStartOffset = offsetAtt.startOffset();
}
- posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
-
if (offsetAtt != null && posIncAtt != null) {
if (!posToStartOffset.containsKey(pos)) {
@@ -106,6 +125,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
// System.out.println(name + " + vs " + pos + " -> " + startOffset);
final int oldStartOffset = posToStartOffset.get(pos);
if (oldStartOffset != startOffset) {
+ dumpValidatingTokenFilters(this, System.err);
throw new IllegalStateException(name + ": inconsistent startOffset at pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
}
}
@@ -122,6 +142,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
//System.out.println(name + " + ve " + endPos + " -> " + endOffset);
final int oldEndOffset = posToEndOffset.get(endPos);
if (oldEndOffset != endOffset) {
+ dumpValidatingTokenFilters(this, System.err);
throw new IllegalStateException(name + ": inconsistent endOffset at pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
}
}
@@ -147,5 +168,39 @@ public final class ValidatingTokenFilter extends TokenFilter {
posToStartOffset.clear();
posToEndOffset.clear();
lastStartOffset = 0;
+ tokens.clear();
+ }
+
+
+ private void addToken(int startOffset, int endOffset, int posInc) {
+ if (tokens.size() == MAX_DEBUG_TOKENS) {
+ tokens.remove(0);
+ }
+ tokens.add(new Token(termAtt.toString(), posInc, startOffset, endOffset));
+ }
+
+ /**
+ * Prints details about consumed tokens stored in any ValidatingTokenFilters in the input chain
+ * @param in
+ * @param out
+ */
+ public static void dumpValidatingTokenFilters(TokenStream in, PrintStream out) {
+ if (in instanceof TokenFilter) {
+ dumpValidatingTokenFilters(((TokenFilter) in).input, out);
+ if (in instanceof ValidatingTokenFilter) {
+ out.println(((ValidatingTokenFilter) in).dump());
+ }
+ }
}
+
+ public String dump() {
+ StringBuilder buf = new StringBuilder();
+ buf.append(name).append(": ");
+ for (Token token : tokens) {
+ buf.append(String.format(Locale.ROOT, "%s<[%d-%d] +%d> ",
+ token, token.startOffset(), token.endOffset(), token.getPositionIncrement()));
+ }
+ return buf.toString();
+ }
+
}