You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2019/09/21 04:53:21 UTC

[atlas] 03/06: Use fulltext indices for dsl search

This is an automated email from the ASF dual-hosted git repository.

sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git

commit 4a4d0e486fd96ca81bdeec2d6be5be3975f16aac
Author: Bolke de Bruin <bo...@xs4all.nl>
AuthorDate: Thu Aug 22 11:58:24 2019 +0200

    Use fulltext indices for dsl search
    
    Per janusgraph documentation https://docs.janusgraph.org/latest/index-parameters.html strings
    are indexed as text by default. Atlas uses string search which is suboptimal and leads to
    significant performance loss.
    
    This switches to use fulltext predicates when available which give a significant speedup.
    
    Signed-off-by: Ashutosh Mestry <am...@hortonworks.com>
    (cherry picked from commit 8792f162dfea3d471c7a0f5672984462ee434fba)
---
 .../java/org/apache/atlas/query/GremlinClause.java |  7 ++++---
 .../apache/atlas/query/GremlinQueryComposer.java   | 10 +++++++++-
 .../atlas/query/GremlinQueryComposerTest.java      | 23 ++++++++++++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
index ca8419a..55ccabd 100644
--- a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
+++ b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
@@ -43,9 +43,10 @@ enum GremlinClause {
     RANGE("range(%s, %s + %s)"),
     SELECT("select('%s')"),
     TO_LIST("toList()"),
-    TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"),
-    TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textPrefix(%s))"),
-    TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textRegex(\".*\" + %s))"),
+    STRING_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"),
+    TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(%s))"),
+    TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsPrefix(%s))"),
+    TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(\".*\" + %s))"),
     TRAIT("outE('classifiedAs').has('__name', within('%s')).outV()"),
     ANY_TRAIT("or(has('__traitNames'), has('__propagatedTraitNames'))"),
     NO_TRAIT("and(hasNot('__traitNames'), hasNot('__propagatedTraitNames'))"),
diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
index e64a894..294dc00 100644
--- a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
+++ b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
@@ -171,6 +171,7 @@ public class GremlinQueryComposer {
         }
 
         String                currentType = context.getActiveTypeName();
+
         IdentifierHelper.Info org         = null;
         IdentifierHelper.Info lhsI        = createInfo(lhs);
         if (!lhsI.isPrimitive()) {
@@ -193,7 +194,14 @@ public class GremlinQueryComposer {
         rhs = addQuotesIfNecessary(lhsI, rhs);
         SearchParameters.Operator op = SearchParameters.Operator.fromString(operator);
         if (op == SearchParameters.Operator.LIKE) {
-            add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+            final AtlasStructType.AtlasAttribute attribute = context.getActiveEntityType().getAttribute(lhsI.getAttributeName());
+            final AtlasStructDef.AtlasAttributeDef.IndexType indexType = attribute.getAttributeDef().getIndexType();
+
+            if (indexType == AtlasStructDef.AtlasAttributeDef.IndexType.STRING) {
+                add(GremlinClause.STRING_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+            } else {
+                add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+            }
         } else if (op == SearchParameters.Operator.IN) {
             add(GremlinClause.HAS_OPERATOR, getPropertyForClause(lhsI), "within", rhs);
         } else {
diff --git a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
index b73d427..ca32ffc 100644
--- a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
+++ b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
@@ -17,17 +17,23 @@
  */
 package org.apache.atlas.query;
 
+import afu.org.checkerframework.checker.igj.qual.I;
+import jnr.ffi.annotations.In;
 import org.apache.atlas.AtlasErrorCode;
 import org.apache.atlas.exception.AtlasBaseException;
 import org.apache.atlas.model.TypeCategory;
+import org.apache.atlas.model.typedef.AtlasStructDef;
 import org.apache.atlas.query.antlr4.AtlasDSLParser;
 import org.apache.atlas.type.AtlasEntityType;
+import org.apache.atlas.type.AtlasStructType;
 import org.apache.atlas.type.AtlasType;
 import org.apache.atlas.type.AtlasTypeRegistry;
 import org.apache.commons.lang.StringUtils;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import static org.testng.Assert.assertEquals;
@@ -163,6 +169,8 @@ public class GremlinQueryComposerTest {
         verify("from DB where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain));
         verify("Table where Asset.name like \"Tab*\"",
                 "g.V().has('__typeName', 'Table').has('Asset.__s_name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).dedup().limit(25).toList()");
+        verify("Table where owner like \"Tab*\"",
+          "g.V().has('__typeName', 'Table').has('Table.owner', org.janusgraph.core.attribute.Text.textContainsRegex(\"Tab.*\")).dedup().limit(25).toList()");
         verify("from Table where (db.name = \"Reporting\")",
                 "g.V().has('__typeName', 'Table').out('__Table.db').has('DB.name', eq(\"Reporting\")).dedup().in('__Table.db').dedup().limit(25).toList()");
     }
@@ -409,6 +417,21 @@ public class GremlinQueryComposerTest {
             } else {
                 type = mock(AtlasEntityType.class);
                 when(type.getTypeCategory()).thenReturn(TypeCategory.ENTITY);
+
+                AtlasStructType.AtlasAttribute attr = mock(AtlasStructType.AtlasAttribute.class);
+                AtlasStructDef.AtlasAttributeDef def = mock(AtlasStructDef.AtlasAttributeDef.class);
+                when(def.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.DEFAULT);
+                when(attr.getAttributeDef()).thenReturn(def);
+
+                AtlasStructType.AtlasAttribute attr_s = mock(AtlasStructType.AtlasAttribute.class);
+                AtlasStructDef.AtlasAttributeDef def_s = mock(AtlasStructDef.AtlasAttributeDef.class);
+                when(def_s.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.STRING);
+
+                when(attr_s.getAttributeDef()).thenReturn(def_s);
+
+                when(((AtlasEntityType) type).getAttribute(anyString())).thenReturn(attr);
+                when(((AtlasEntityType) type).getAttribute(eq("name"))).thenReturn(attr_s);
+
             }
 
             if(typeName.equals("PIII")) {