You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by am...@apache.org on 2019/09/12 20:58:49 UTC
[atlas] branch master updated (8bad6b0 -> 8792f16)
This is an automated email from the ASF dual-hosted git repository.
amestry pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git.
from 8bad6b0 ATLAS-3396: ZipSourceWithBackingDirectory: Implementation. Port to master.
new e6b9e9a ATLAS-3396: ZipSourceWithBackingStore implementation. Empty ZIP unit test fix.
new 8792f16 Use fulltext indices for dsl search
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../java/org/apache/atlas/query/GremlinClause.java | 7 ++++---
.../apache/atlas/query/GremlinQueryComposer.java | 10 +++++++++-
.../atlas/query/GremlinQueryComposerTest.java | 23 ++++++++++++++++++++++
.../atlas/repository/impexp/ImportServiceTest.java | 3 ++-
4 files changed, 38 insertions(+), 5 deletions(-)
[atlas] 02/02: Use fulltext indices for dsl search
Posted by am...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
amestry pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git
commit 8792f162dfea3d471c7a0f5672984462ee434fba
Author: Bolke de Bruin <bo...@xs4all.nl>
AuthorDate: Thu Aug 22 11:58:24 2019 +0200
Use fulltext indices for dsl search
Per janusgraph documentation https://docs.janusgraph.org/latest/index-parameters.html strings
are indexed as text by default. Atlas uses string search which is suboptimal and leads to
significant performance loss.
This switches to use fulltext predicates when available which give a significant speedup.
Signed-off-by: Ashutosh Mestry <am...@hortonworks.com>
---
.../java/org/apache/atlas/query/GremlinClause.java | 7 ++++---
.../apache/atlas/query/GremlinQueryComposer.java | 10 +++++++++-
.../atlas/query/GremlinQueryComposerTest.java | 23 ++++++++++++++++++++++
3 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
index ca8419a..55ccabd 100644
--- a/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
+++ b/repository/src/main/java/org/apache/atlas/query/GremlinClause.java
@@ -43,9 +43,10 @@ enum GremlinClause {
RANGE("range(%s, %s + %s)"),
SELECT("select('%s')"),
TO_LIST("toList()"),
- TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"),
- TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textPrefix(%s))"),
- TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textRegex(\".*\" + %s))"),
+ STRING_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textRegex(%s))"),
+ TEXT_CONTAINS("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(%s))"),
+ TEXT_PREFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsPrefix(%s))"),
+ TEXT_SUFFIX("has('%s', org.janusgraph.core.attribute.Text.textContainsRegex(\".*\" + %s))"),
TRAIT("outE('classifiedAs').has('__name', within('%s')).outV()"),
ANY_TRAIT("or(has('__traitNames'), has('__propagatedTraitNames'))"),
NO_TRAIT("and(hasNot('__traitNames'), hasNot('__propagatedTraitNames'))"),
diff --git a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
index e64a894..294dc00 100644
--- a/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
+++ b/repository/src/main/java/org/apache/atlas/query/GremlinQueryComposer.java
@@ -171,6 +171,7 @@ public class GremlinQueryComposer {
}
String currentType = context.getActiveTypeName();
+
IdentifierHelper.Info org = null;
IdentifierHelper.Info lhsI = createInfo(lhs);
if (!lhsI.isPrimitive()) {
@@ -193,7 +194,14 @@ public class GremlinQueryComposer {
rhs = addQuotesIfNecessary(lhsI, rhs);
SearchParameters.Operator op = SearchParameters.Operator.fromString(operator);
if (op == SearchParameters.Operator.LIKE) {
- add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+ final AtlasStructType.AtlasAttribute attribute = context.getActiveEntityType().getAttribute(lhsI.getAttributeName());
+ final AtlasStructDef.AtlasAttributeDef.IndexType indexType = attribute.getAttributeDef().getIndexType();
+
+ if (indexType == AtlasStructDef.AtlasAttributeDef.IndexType.STRING) {
+ add(GremlinClause.STRING_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+ } else {
+ add(GremlinClause.TEXT_CONTAINS, getPropertyForClause(lhsI), IdentifierHelper.getFixedRegEx(rhs));
+ }
} else if (op == SearchParameters.Operator.IN) {
add(GremlinClause.HAS_OPERATOR, getPropertyForClause(lhsI), "within", rhs);
} else {
diff --git a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
index b73d427..ca32ffc 100644
--- a/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
+++ b/repository/src/test/java/org/apache/atlas/query/GremlinQueryComposerTest.java
@@ -17,17 +17,23 @@
*/
package org.apache.atlas.query;
+import afu.org.checkerframework.checker.igj.qual.I;
+import jnr.ffi.annotations.In;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.TypeCategory;
+import org.apache.atlas.model.typedef.AtlasStructDef;
import org.apache.atlas.query.antlr4.AtlasDSLParser;
import org.apache.atlas.type.AtlasEntityType;
+import org.apache.atlas.type.AtlasStructType;
import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.lang.StringUtils;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import static org.mockito.Matchers.anyString;
+import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
@@ -163,6 +169,8 @@ public class GremlinQueryComposerTest {
verify("from DB where (name = \"Reporting\") select name, owner", getExpected(exSel, exMain));
verify("Table where Asset.name like \"Tab*\"",
"g.V().has('__typeName', 'Table').has('Asset.__s_name', org.janusgraph.core.attribute.Text.textRegex(\"Tab.*\")).dedup().limit(25).toList()");
+ verify("Table where owner like \"Tab*\"",
+ "g.V().has('__typeName', 'Table').has('Table.owner', org.janusgraph.core.attribute.Text.textContainsRegex(\"Tab.*\")).dedup().limit(25).toList()");
verify("from Table where (db.name = \"Reporting\")",
"g.V().has('__typeName', 'Table').out('__Table.db').has('DB.name', eq(\"Reporting\")).dedup().in('__Table.db').dedup().limit(25).toList()");
}
@@ -409,6 +417,21 @@ public class GremlinQueryComposerTest {
} else {
type = mock(AtlasEntityType.class);
when(type.getTypeCategory()).thenReturn(TypeCategory.ENTITY);
+
+ AtlasStructType.AtlasAttribute attr = mock(AtlasStructType.AtlasAttribute.class);
+ AtlasStructDef.AtlasAttributeDef def = mock(AtlasStructDef.AtlasAttributeDef.class);
+ when(def.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.DEFAULT);
+ when(attr.getAttributeDef()).thenReturn(def);
+
+ AtlasStructType.AtlasAttribute attr_s = mock(AtlasStructType.AtlasAttribute.class);
+ AtlasStructDef.AtlasAttributeDef def_s = mock(AtlasStructDef.AtlasAttributeDef.class);
+ when(def_s.getIndexType()).thenReturn(AtlasStructDef.AtlasAttributeDef.IndexType.STRING);
+
+ when(attr_s.getAttributeDef()).thenReturn(def_s);
+
+ when(((AtlasEntityType) type).getAttribute(anyString())).thenReturn(attr);
+ when(((AtlasEntityType) type).getAttribute(eq("name"))).thenReturn(attr_s);
+
}
if(typeName.equals("PIII")) {
[atlas] 01/02: ATLAS-3396: ZipSourceWithBackingStore
implementation. Empty ZIP unit test fix.
Posted by am...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
amestry pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git
commit e6b9e9ad25b3b0538c0f864ab534cd16e9dd91b9
Author: Ashutosh Mestry <am...@hortonworks.com>
AuthorDate: Thu Sep 12 13:46:12 2019 -0700
ATLAS-3396: ZipSourceWithBackingStore implementation. Empty ZIP unit test fix.
---
.../java/org/apache/atlas/repository/impexp/ImportServiceTest.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/repository/src/test/java/org/apache/atlas/repository/impexp/ImportServiceTest.java b/repository/src/test/java/org/apache/atlas/repository/impexp/ImportServiceTest.java
index 33fe0ad..1bfe62b 100644
--- a/repository/src/test/java/org/apache/atlas/repository/impexp/ImportServiceTest.java
+++ b/repository/src/test/java/org/apache/atlas/repository/impexp/ImportServiceTest.java
@@ -40,6 +40,7 @@ import org.apache.atlas.store.AtlasTypeDefStore;
import org.apache.atlas.type.AtlasClassificationType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.commons.lang.StringUtils;
+import org.apache.tinkerpop.shaded.kryo.io.Input;
import org.mockito.stubbing.Answer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -463,6 +464,6 @@ public class ImportServiceTest extends ExportImportTestBase {
@Test(expectedExceptions = AtlasBaseException.class)
public void importEmptyZip() throws IOException, AtlasBaseException {
- getZipSource("empty.zip");
+ new ZipSource((InputStream) getZipSource("empty.zip")[0][0]);
}
}