You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2018/11/27 11:00:45 UTC

[1/3] lucene-solr:master: update comment after limiting number of debug tokens

Repository: lucene-solr
Updated Branches:
  refs/heads/master 72ca4488d -> 6728f0c4f


update comment after limiting number of debug tokens


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6728f0c4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6728f0c4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6728f0c4

Branch: refs/heads/master
Commit: 6728f0c4f4612d32c1eea35395669c0520021acb
Parents: 34ed015
Author: Michael Sokolov <so...@amazon.com>
Authored: Sat Nov 24 10:48:26 2018 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6728f0c4/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index 9ab9489..5d5859e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -56,7 +56,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
   private final OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class);
   private final CharTermAttribute termAtt = getAttribute(CharTermAttribute.class);
 
-  // record all the Tokens seen so they can be dumped on failure
+  // record the last MAX_DEBUG_TOKENS tokens seen so they can be dumped on failure
   private final List<Token> tokens = new LinkedList<>();
 
   private final String name;


[2/3] lucene-solr:master: fixing javadoc; added docs for parameters of new method

Posted by mi...@apache.org.
fixing javadoc; added docs for parameters of new method


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/34ed0154
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/34ed0154
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/34ed0154

Branch: refs/heads/master
Commit: 34ed01543a27791b6cabce696f0360fd7ff836e1
Parents: 5490790
Author: Michael Sokolov <so...@amazon.com>
Authored: Mon Nov 19 15:46:29 2018 +0000
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500

----------------------------------------------------------------------
 .../java/org/apache/lucene/analysis/ValidatingTokenFilter.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/34ed0154/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index 54b8fd3..9ab9489 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -181,8 +181,8 @@ public final class ValidatingTokenFilter extends TokenFilter {
 
   /**
    * Prints details about consumed tokens stored in any ValidatingTokenFilters in the input chain
-   * @param in
-   * @param out
+   * @param in the input token stream
+   * @param out the output print stream
    */
   public static void dumpValidatingTokenFilters(TokenStream in, PrintStream out) {
     if (in instanceof TokenFilter) {


[3/3] lucene-solr:master: LUCENE-8517: do not wrap FixedShingleFilter with conditional in TestRandomChains

Posted by mi...@apache.org.
LUCENE-8517: do not wrap FixedShingleFilter with conditional in TestRandomChains


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/54907903
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/54907903
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/54907903

Branch: refs/heads/master
Commit: 54907903e8d1a5da0c65328f24a1018c5e393afc
Parents: 72ca448
Author: Michael Sokolov <so...@amazon.com>
Authored: Sat Nov 17 08:41:25 2018 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Nov 27 06:00:29 2018 -0500

----------------------------------------------------------------------
 .../lucene/analysis/core/TestRandomChains.java  |  4 +-
 .../lucene/analysis/ValidatingTokenFilter.java  | 69 ++++++++++++++++++--
 2 files changed, 65 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/54907903/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 557a69e..beb9bb2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -88,6 +88,7 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
 import org.apache.lucene.analysis.payloads.IdentityEncoder;
 import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.shingle.FixedShingleFilter;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
 import org.apache.lucene.analysis.snowball.TestSnowball;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
@@ -129,6 +130,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
     // expose inconsistent offsets
     // https://issues.apache.org/jira/browse/LUCENE-4170
     avoidConditionals.add(ShingleFilter.class);
+    avoidConditionals.add(FixedShingleFilter.class);
     // FlattenGraphFilter changes the output graph entirely, so wrapping it in a condition
     // can break position lengths
     avoidConditionals.add(FlattenGraphFilter.class);
@@ -590,7 +592,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
 
   static class MockRandomAnalyzer extends Analyzer {
     final long seed;
-    
+
     MockRandomAnalyzer(long seed) {
       this.seed = seed;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/54907903/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
index b29da70..54b8fd3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@@ -17,7 +17,11 @@
 package org.apache.lucene.analysis;
 
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -38,6 +42,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
  *  offsets are consistent with one another). */
 public final class ValidatingTokenFilter extends TokenFilter {
 
+  private static final int MAX_DEBUG_TOKENS = 20;
+
   private int pos;
   private int lastStartOffset;
 
@@ -50,6 +56,9 @@ public final class ValidatingTokenFilter extends TokenFilter {
   private final OffsetAttribute offsetAtt = getAttribute(OffsetAttribute.class);
   private final CharTermAttribute termAtt = getAttribute(CharTermAttribute.class);
 
+  // record all the Tokens seen so they can be dumped on failure
+  private final List<Token> tokens = new LinkedList<>();
+
   private final String name;
 
   /** The name arg is used to identify this stage when
@@ -72,28 +81,38 @@ public final class ValidatingTokenFilter extends TokenFilter {
     int startOffset = 0;
     int endOffset = 0;
     int posLen = 0;
+    int posInc = 0;
+
+    if (posIncAtt != null) {
+      posInc = posIncAtt.getPositionIncrement();
+    }
+    if (offsetAtt != null) {
+      startOffset = offsetAtt.startOffset();
+      endOffset = offsetAtt.endOffset();
+    }
+
+    posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
+
+    addToken(startOffset, endOffset, posInc);
 
     // System.out.println(name + ": " + this);
     
     if (posIncAtt != null) {
-      pos += posIncAtt.getPositionIncrement();
+      pos += posInc;
       if (pos == -1) {
+        dumpValidatingTokenFilters(this, System.err);
         throw new IllegalStateException(name + ": first posInc must be > 0");
       }
     }
     
     if (offsetAtt != null) {
-      startOffset = offsetAtt.startOffset();
-      endOffset = offsetAtt.endOffset();
-
-      if (offsetAtt.startOffset() < lastStartOffset) {
+      if (startOffset < lastStartOffset) {
+        dumpValidatingTokenFilters(this, System.err);
         throw new IllegalStateException(name + ": offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
       }
       lastStartOffset = offsetAtt.startOffset();
     }
     
-    posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
-    
     if (offsetAtt != null && posIncAtt != null) {
 
       if (!posToStartOffset.containsKey(pos)) {
@@ -106,6 +125,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
         // System.out.println(name + "  + vs " + pos + " -> " + startOffset);
         final int oldStartOffset = posToStartOffset.get(pos);
         if (oldStartOffset != startOffset) {
+          dumpValidatingTokenFilters(this, System.err);
           throw new IllegalStateException(name + ": inconsistent startOffset at pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
         }
       }
@@ -122,6 +142,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
         //System.out.println(name + "  + ve " + endPos + " -> " + endOffset);
         final int oldEndOffset = posToEndOffset.get(endPos);
         if (oldEndOffset != endOffset) {
+          dumpValidatingTokenFilters(this, System.err);
           throw new IllegalStateException(name + ": inconsistent endOffset at pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
         }
       }
@@ -147,5 +168,39 @@ public final class ValidatingTokenFilter extends TokenFilter {
     posToStartOffset.clear();
     posToEndOffset.clear();
     lastStartOffset = 0;
+    tokens.clear();
+  }
+
+
+  private void addToken(int startOffset, int endOffset, int posInc) {
+    if (tokens.size() == MAX_DEBUG_TOKENS) {
+      tokens.remove(0);
+    }
+    tokens.add(new Token(termAtt.toString(), posInc, startOffset, endOffset));
+  }
+
+  /**
+   * Prints details about consumed tokens stored in any ValidatingTokenFilters in the input chain
+   * @param in
+   * @param out
+   */
+  public static void dumpValidatingTokenFilters(TokenStream in, PrintStream out) {
+    if (in instanceof TokenFilter) {
+      dumpValidatingTokenFilters(((TokenFilter) in).input, out);
+      if (in instanceof ValidatingTokenFilter) {
+        out.println(((ValidatingTokenFilter) in).dump());
+      }
+    }
   }
+
+  public String dump() {
+    StringBuilder buf = new StringBuilder();
+    buf.append(name).append(": ");
+    for (Token token : tokens) {
+      buf.append(String.format(Locale.ROOT, "%s<[%d-%d] +%d> ",
+          token, token.startOffset(), token.endOffset(), token.getPositionIncrement()));
+    }
+    return buf.toString();
+  }
+
 }