You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by ma...@apache.org on 2017/09/14 20:22:27 UTC
[2/2] atlas git commit: ATLAS-2091: basic search update to avoid
index query for attribute values containing Tokenizer characters
ATLAS-2091: basic search update to avoid index query for attribute values containing Tokenizer characters
Signed-off-by: Madhan Neethiraj <ma...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/atlas/commit/d1c585a2
Tree: http://git-wip-us.apache.org/repos/asf/atlas/tree/d1c585a2
Diff: http://git-wip-us.apache.org/repos/asf/atlas/diff/d1c585a2
Branch: refs/heads/master
Commit: d1c585a22c4c1abd866e553480efbd21f9cffcd0
Parents: a785e93
Author: apoorvnaik <ap...@apache.org>
Authored: Thu Sep 14 11:37:06 2017 -0700
Committer: Madhan Neethiraj <ma...@apache.org>
Committed: Thu Sep 14 12:51:55 2017 -0700
----------------------------------------------------------------------
.../apache/atlas/discovery/SearchProcessor.java | 129 ++++++++++++++-----
1 file changed, 96 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/atlas/blob/d1c585a2/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
index f6ff8d4..64a86b9 100644
--- a/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
+++ b/repository/src/main/java/org/apache/atlas/discovery/SearchProcessor.java
@@ -168,7 +168,7 @@ public abstract class SearchProcessor {
if (isIndexSearchable(filterCriteria, structType)) {
indexFiltered.add(attributeName);
} else {
- LOG.warn("not using index-search for attribute '{}' - its either non-indexed or a string attribute used with NEQ operator; might cause poor performance", structType.getQualifiedAttributeName(attributeName));
+ LOG.warn("not using index-search for attribute '{}'; might cause poor performance", structType.getQualifiedAttributeName(attributeName));
graphFiltered.add(attributeName);
}
@@ -330,16 +330,34 @@ public abstract class SearchProcessor {
boolean ret = indexedKeys != null && indexedKeys.contains(qualifiedName);
if (ret) { // index exists
- // Don't use index query for NEQ on string type attributes - as it might return fewer entries due to tokenization of vertex property value by indexer
- if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) {
- AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName());
+ // for string type attributes, don't use index query in the following cases:
+ // - operation is NEQ, as it might return fewer entries due to tokenization of vertex property value
+ // - value-to-compare has special characters
+ AtlasType attributeType = structType.getAttributeType(filterCriteria.getAttributeName());
+
+ if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName())) {
+ if (filterCriteria.getOperator() == SearchParameters.Operator.NEQ) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("NEQ operator found for string attribute {}, deferring to in-memory or graph query (might cause poor performance)", qualifiedName);
+ }
+
+ ret = false;
+ } else if (hasIndexQuerySpecialChar(filterCriteria.getAttributeValue())) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("special characters found in filter value {}, deferring to in-memory or graph query (might cause poor performance)", filterCriteria.getAttributeValue());
+ }
- if (AtlasBaseTypeDef.ATLAS_TYPE_STRING.equals(attributeType.getTypeName())) {
ret = false;
}
}
}
+ if (LOG.isDebugEnabled()) {
+ if (!ret) {
+ LOG.debug("Not using index query for: attribute='{}', operator='{}', value='{}'", qualifiedName, filterCriteria.getOperator(), filterCriteria.getAttributeValue());
+ }
+ }
+
return ret;
}
@@ -358,7 +376,6 @@ public abstract class SearchProcessor {
if (nestedExpression.length() > 0) {
nestedExpression.append(SPACE_STRING).append(criteria.getCondition()).append(SPACE_STRING);
}
- // todo: when a neq operation is nested and occurs in the beginning of the query, index query has issues
nestedExpression.append(nestedQuery);
}
}
@@ -539,8 +556,7 @@ public abstract class SearchProcessor {
query.has(qualifiedName, AtlasGraphQuery.ComparisionOperator.NOT_EQUAL, attrValue);
break;
case LIKE:
- // TODO: Maybe we need to validate pattern
- query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, getLikeRegex(attrValue));
+ query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, attrValue);
break;
case CONTAINS:
query.has(qualifiedName, AtlasGraphQuery.MatchingOperator.REGEX, getContainsRegex(attrValue));
@@ -616,41 +632,88 @@ public abstract class SearchProcessor {
}
}
- // ATLAS-2118: Reserved regex characters in attribute value can cause the graph query to fail when parsing the contains regex
- private String getContainsRegex(String attributeValue) {
- StringBuilder escapedAttrVal = new StringBuilder(".*");
+ private static String getContainsRegex(String attributeValue) {
+ return ".*" + escapeRegExChars(attributeValue) + ".*";
+ }
- for (int i = 0; i < attributeValue.length(); i++) {
- final char c = attributeValue.charAt(i);
-
- switch (c) {
- case '+':
- case '|':
- case '(':
- case '{':
- case '[':
- case '*':
- case '?':
- case '$':
- case '/':
- case '^':
- escapedAttrVal.append('\\');
- break;
+ private static String getSuffixRegex(String attributeValue) {
+ return ".*" + escapeRegExChars(attributeValue);
+ }
+
+ private static String escapeRegExChars(String val) {
+ StringBuilder escapedVal = new StringBuilder();
+
+ for (int i = 0; i < val.length(); i++) {
+ final char c = val.charAt(i);
+
+ if (isRegExSpecialChar(c)) {
+ escapedVal.append('\\');
}
- escapedAttrVal.append(c);
+ escapedVal.append(c);
}
- escapedAttrVal.append(".*");
+ return escapedVal.toString();
+ }
+
+ private static boolean isRegExSpecialChar(char c) {
+ switch (c) {
+ case '+':
+ case '|':
+ case '(':
+ case '{':
+ case '[':
+ case '*':
+ case '?':
+ case '$':
+ case '/':
+ case '^':
+ return true;
+ }
- return escapedAttrVal.toString();
+ return false;
}
- private String getSuffixRegex(String attributeValue) {
- return ".*" + attributeValue;
+ private static boolean hasIndexQuerySpecialChar(String attributeValue) {
+ for (int i = 0; i < attributeValue.length(); i++) {
+ if (isIndexQuerySpecialChar(attributeValue.charAt(i))) {
+ return true;
+ }
+ }
+
+ return false;
}
- private String getLikeRegex(String attributeValue) { return ".*" + attributeValue + ".*"; }
+ private static boolean isIndexQuerySpecialChar(char c) {
+ switch (c) {
+ case '+':
+ case '-':
+ case '&':
+ case '|':
+ case '!':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case '^':
+ case '"':
+ case '~':
+ case '*':
+ case '?':
+ case ':':
+ case '/':
+ case '#':
+ case '$':
+ case '%':
+ case '@':
+ case '=':
+ return true;
+ }
+
+ return false;
+ }
protected List<AtlasVertex> getVerticesFromIndexQueryResult(Iterator<AtlasIndexQuery.Result> idxQueryResult, List<AtlasVertex> vertices) {
if (idxQueryResult != null) {