You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mh...@apache.org on 2020/05/19 17:40:06 UTC
[lucene-solr] branch branch_8x updated: Lucene-9371: Allow external
access to RegExp's parsed structure (#1521) (#1529)
This is an automated email from the ASF dual-hosted git repository.
mharwood pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 9d6c738 Lucene-9371: Allow external access to RegExp's parsed structure (#1521) (#1529)
9d6c738 is described below
commit 9d6c738ffce0c3164691f161ba8b92a615b1e062
Author: markharwood <ma...@gmail.com>
AuthorDate: Tue May 19 18:39:55 2020 +0100
Lucene-9371: Allow external access to RegExp's parsed structure (#1521) (#1529)
Made RegExp internal fields public final to allow external classes to render eg English explanations of pattern logic. Backport of 44fc5b
---
.../org/apache/lucene/util/automaton/RegExp.java | 207 ++++++++++++---------
1 file changed, 119 insertions(+), 88 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
index 31cca6d..3956486 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
@@ -315,8 +315,43 @@ import java.util.Set;
*/
public class RegExp {
- enum Kind {
- REGEXP_UNION, REGEXP_CONCATENATION, REGEXP_INTERSECTION, REGEXP_OPTIONAL, REGEXP_REPEAT, REGEXP_REPEAT_MIN, REGEXP_REPEAT_MINMAX, REGEXP_COMPLEMENT, REGEXP_CHAR, REGEXP_CHAR_RANGE, REGEXP_ANYCHAR, REGEXP_EMPTY, REGEXP_STRING, REGEXP_ANYSTRING, REGEXP_AUTOMATON, REGEXP_INTERVAL
+ /**
+ * The type of expression represented by a RegExp node.
+ */
+ public enum Kind {
+ /** The union of two expressions */
+ REGEXP_UNION,
+ /** A sequence of two expressions */
+ REGEXP_CONCATENATION,
+ /** The intersection of two expressions */
+ REGEXP_INTERSECTION,
+ /** An optional expression */
+ REGEXP_OPTIONAL,
+ /** An expression that repeats */
+ REGEXP_REPEAT,
+ /** An expression that repeats a minimum number of times*/
+ REGEXP_REPEAT_MIN,
+ /** An expression that repeats a minimum and maximum number of times*/
+ REGEXP_REPEAT_MINMAX,
+ /** The complement of an expression */
+ REGEXP_COMPLEMENT,
+ /** A Character */
+ REGEXP_CHAR,
+ /** A Character range*/
+ REGEXP_CHAR_RANGE,
+ /** Any Character allowed*/
+ REGEXP_ANYCHAR,
+ /** An empty expression*/
+ REGEXP_EMPTY,
+ /** A string expression*/
+ REGEXP_STRING,
+ /** Any string allowed */
+ REGEXP_ANYSTRING,
+ /** An Automaton expression*/
+ REGEXP_AUTOMATON,
+ /** An Interval expression */
+ REGEXP_INTERVAL,
+ /** An expression for a pre-defined class e.g. \w */
}
/**
@@ -360,21 +395,37 @@ public class RegExp {
*/
public static final int NONE = 0x0000;
+ //Immutable parsed state
+ /**
+ * The type of expression
+ */
+ public final Kind kind;
+ /**
+ * Child expressions held by a container type expression
+ */
+ public final RegExp exp1, exp2;
+ /**
+ * String expression
+ */
+ public final String s;
+ /**
+ * Character expression
+ */
+ public final int c;
+ /**
+ * Limits for repeatable type expressions
+ */
+ public final int min, max, digits;
+ /**
+ * Extents for range type expressions
+ */
+ public final int from, to;
+
+ // Parser variables
private final String originalString;
- Kind kind;
- RegExp exp1, exp2;
- String s;
- int c;
- int min, max, digits;
- int from, to;
-
int flags;
int pos;
-
- RegExp() {
- this.originalString = null;
- }
-
+
/**
* Constructs new <code>RegExp</code> from a string. Same as
* <code>RegExp(s, ALL)</code>.
@@ -417,6 +468,37 @@ public class RegExp {
from = e.from;
to = e.to;
}
+
+ RegExp(Kind kind, RegExp exp1, RegExp exp2, String s, int c, int min, int max, int digits, int from, int to){
+ this.originalString = null;
+ this.kind = kind;
+ this.flags = 0;
+ this.exp1 = exp1;
+ this.exp2 = exp2;
+ this.s = s;
+ this.c = c;
+ this.min = min;
+ this.max = max;
+ this.digits = digits;
+ this.from = from;
+ this.to = to;
+ }
+
+ // Simplified construction of container nodes
+ static RegExp newContainerNode(Kind kind, RegExp exp1, RegExp exp2) {
+ return new RegExp(kind, exp1, exp2, null, 0, 0, 0, 0, 0, 0);
+ }
+
+ // Simplified construction of repeating nodes
+ static RegExp newRepeatingNode(Kind kind, RegExp exp, int min, int max) {
+ return new RegExp(kind, exp, null, null, 0, min, max, 0, 0, 0);
+ }
+
+
+ // Simplified construction of leaf nodes
+ static RegExp newLeafNode(Kind kind, String s, int c, int min, int max, int digits, int from, int to) {
+ return new RegExp(kind, null, null, s, c, min, max, digits, from, to);
+ }
/**
* Constructs new <code>Automaton</code> from this <code>RegExp</code>. Same
@@ -854,34 +936,29 @@ public class RegExp {
}
static RegExp makeUnion(RegExp exp1, RegExp exp2) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_UNION;
- r.exp1 = exp1;
- r.exp2 = exp2;
- return r;
+ return newContainerNode(Kind.REGEXP_UNION, exp1, exp2);
}
static RegExp makeConcatenation(RegExp exp1, RegExp exp2) {
if ((exp1.kind == Kind.REGEXP_CHAR || exp1.kind == Kind.REGEXP_STRING)
&& (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) return makeString(
exp1, exp2);
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_CONCATENATION;
+ RegExp rexp1, rexp2;
if (exp1.kind == Kind.REGEXP_CONCATENATION
&& (exp1.exp2.kind == Kind.REGEXP_CHAR || exp1.exp2.kind == Kind.REGEXP_STRING)
&& (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) {
- r.exp1 = exp1.exp1;
- r.exp2 = makeString(exp1.exp2, exp2);
+ rexp1 = exp1.exp1;
+ rexp2 = makeString(exp1.exp2, exp2);
} else if ((exp1.kind == Kind.REGEXP_CHAR || exp1.kind == Kind.REGEXP_STRING)
&& exp2.kind == Kind.REGEXP_CONCATENATION
&& (exp2.exp1.kind == Kind.REGEXP_CHAR || exp2.exp1.kind == Kind.REGEXP_STRING)) {
- r.exp1 = makeString(exp1, exp2.exp1);
- r.exp2 = exp2.exp2;
+ rexp1 = makeString(exp1, exp2.exp1);
+ rexp2 = exp2.exp2;
} else {
- r.exp1 = exp1;
- r.exp2 = exp2;
+ rexp1 = exp1;
+ rexp2 = exp2;
}
- return r;
+ return newContainerNode(Kind.REGEXP_CONCATENATION, rexp1, rexp2);
}
static private RegExp makeString(RegExp exp1, RegExp exp2) {
@@ -894,107 +971,61 @@ public class RegExp {
}
static RegExp makeIntersection(RegExp exp1, RegExp exp2) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_INTERSECTION;
- r.exp1 = exp1;
- r.exp2 = exp2;
- return r;
+ return newContainerNode(Kind.REGEXP_INTERSECTION, exp1, exp2);
}
static RegExp makeOptional(RegExp exp) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_OPTIONAL;
- r.exp1 = exp;
- return r;
+ return newContainerNode(Kind.REGEXP_OPTIONAL, exp, null);
}
static RegExp makeRepeat(RegExp exp) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_REPEAT;
- r.exp1 = exp;
- return r;
+ return newContainerNode(Kind.REGEXP_REPEAT, exp, null);
}
static RegExp makeRepeat(RegExp exp, int min) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_REPEAT_MIN;
- r.exp1 = exp;
- r.min = min;
- return r;
+ return newRepeatingNode(Kind.REGEXP_REPEAT_MIN, exp, min, 0);
}
static RegExp makeRepeat(RegExp exp, int min, int max) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_REPEAT_MINMAX;
- r.exp1 = exp;
- r.min = min;
- r.max = max;
- return r;
+ return newRepeatingNode(Kind.REGEXP_REPEAT_MINMAX, exp, min, max);
}
static RegExp makeComplement(RegExp exp) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_COMPLEMENT;
- r.exp1 = exp;
- return r;
+ return newContainerNode(Kind.REGEXP_COMPLEMENT, exp, null);
}
static RegExp makeChar(int c) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_CHAR;
- r.c = c;
- return r;
+ return newLeafNode(Kind.REGEXP_CHAR, null, c, 0, 0, 0, 0, 0);
}
static RegExp makeCharRange(int from, int to) {
if (from > to)
throw new IllegalArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")");
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_CHAR_RANGE;
- r.from = from;
- r.to = to;
- return r;
+ return newLeafNode(Kind.REGEXP_CHAR_RANGE, null, 0, 0, 0, 0, from, to);
}
static RegExp makeAnyChar() {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_ANYCHAR;
- return r;
+ return newContainerNode(Kind.REGEXP_ANYCHAR, null, null);
}
static RegExp makeEmpty() {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_EMPTY;
- return r;
+ return newContainerNode(Kind.REGEXP_EMPTY, null, null);
}
static RegExp makeString(String s) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_STRING;
- r.s = s;
- return r;
+ return newLeafNode(Kind.REGEXP_STRING, s, 0, 0, 0, 0, 0, 0);
}
static RegExp makeAnyString() {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_ANYSTRING;
- return r;
+ return newContainerNode(Kind.REGEXP_ANYSTRING, null, null);
}
static RegExp makeAutomaton(String s) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_AUTOMATON;
- r.s = s;
- return r;
+ return newLeafNode(Kind.REGEXP_AUTOMATON, s, 0, 0, 0, 0, 0, 0);
}
static RegExp makeInterval(int min, int max, int digits) {
- RegExp r = new RegExp();
- r.kind = Kind.REGEXP_INTERVAL;
- r.min = min;
- r.max = max;
- r.digits = digits;
- return r;
+ return newLeafNode(Kind.REGEXP_INTERVAL, null, 0, min, max, digits, 0, 0);
}
private boolean peek(String s) {