You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by jn...@apache.org on 2017/02/01 21:05:35 UTC

[4/5] incubator-atlas git commit: ATLAS-1369 - Optimize gremlin queries generated by DSL translator

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java
index e862769..add7e07 100644
--- a/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java
+++ b/repository/src/main/java/org/apache/atlas/gremlin/Gremlin3ExpressionFactory.java
@@ -19,21 +19,25 @@
 package org.apache.atlas.gremlin;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.atlas.AtlasException;
+import org.apache.atlas.groovy.AbstractFunctionExpression;
 import org.apache.atlas.groovy.CastExpression;
 import org.apache.atlas.groovy.ClosureExpression;
 import org.apache.atlas.groovy.ComparisonExpression;
+import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator;
 import org.apache.atlas.groovy.ComparisonOperatorExpression;
 import org.apache.atlas.groovy.FunctionCallExpression;
 import org.apache.atlas.groovy.GroovyExpression;
 import org.apache.atlas.groovy.IdentifierExpression;
 import org.apache.atlas.groovy.LiteralExpression;
 import org.apache.atlas.groovy.LogicalExpression;
-import org.apache.atlas.groovy.TypeCoersionExpression;
-import org.apache.atlas.groovy.ComparisonExpression.ComparisonOperator;
 import org.apache.atlas.groovy.LogicalExpression.LogicalOperator;
+import org.apache.atlas.groovy.TernaryOperatorExpression;
+import org.apache.atlas.groovy.TraversalStepType;
+import org.apache.atlas.groovy.TypeCoersionExpression;
 import org.apache.atlas.query.GraphPersistenceStrategies;
 import org.apache.atlas.query.TypeUtils.FieldInfo;
 import org.apache.atlas.repository.graph.AtlasGraphProvider;
@@ -69,27 +73,14 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
     private static final String REPEAT_METHOD = "repeat";
     private static final String RANGE_METHOD = "range";
     private static final String LAST_METHOD = "last";
-    private static final String WHERE_METHOD = "where";
     private static final String TO_STRING_METHOD = "toString";
 
+    private static final GroovyExpression EMPTY_STRING_EXPRESSION = new LiteralExpression("");
+
     @Override
     public GroovyExpression generateLogicalExpression(GroovyExpression parent, String operator,
-            List<GroovyExpression> operands) {
-        if (operands.size() == 1) {
-            // gremlin 3 treats one element expressions as 'false'. Avoid
-            // creating a boolean expression in this case. Inline the expression
-            // note: we can't simply omit it, since it will cause us to traverse
-            // the edge!
-            // use 'where' instead
-            GroovyExpression expr = operands.get(0);
-            // if child is a back expression, that expression becomes an
-            // argument to where
-            return new FunctionCallExpression(parent, WHERE_METHOD, expr);
-        } else {
-            // Gremlin 3 does not support _() syntax
-            //
-            return new FunctionCallExpression(parent, operator, operands);
-        }
+                                                      List<GroovyExpression> operands) {
+        return new FunctionCallExpression(TraversalStepType.FILTER, parent, operator, operands);
     }
 
     @Override
@@ -97,20 +88,20 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
         if (inSelect) {
             return getFieldInSelect();
         } else {
-            return new FunctionCallExpression(parent, SELECT_METHOD, new LiteralExpression(alias));
+            return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, SELECT_METHOD, new LiteralExpression(alias));
         }
     }
 
     @Override
     public GroovyExpression typeTestExpression(GraphPersistenceStrategies s, String typeName, GroovyExpression itRef) {
-        LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName());
         LiteralExpression superTypeAttrExpr = new LiteralExpression(s.superTypeAttributeName());
         LiteralExpression typeNameExpr = new LiteralExpression(typeName);
-
-        FunctionCallExpression result = new FunctionCallExpression(HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr));
-        result = new FunctionCallExpression(result, "or");
-        result = new FunctionCallExpression(HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr));
+        LiteralExpression typeAttrExpr = new LiteralExpression(s.typeAttributeName());
+        FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr));
+        result = new FunctionCallExpression(TraversalStepType.FILTER, result, "or");
+        result = new FunctionCallExpression(TraversalStepType.FILTER, result, HAS_METHOD, superTypeAttrExpr, new FunctionCallExpression(EQ_METHOD, typeNameExpr));
         return result;
+
     }
 
     @Override
@@ -118,41 +109,48 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
 
         GroovyExpression emitExpr = generateLoopEmitExpression(s, dataType);
 
-        GroovyExpression result = new FunctionCallExpression(parent, REPEAT_METHOD, loopExpr);
+        GroovyExpression result = new FunctionCallExpression(TraversalStepType.BRANCH, parent, REPEAT_METHOD, loopExpr);
         if (times != null) {
             GroovyExpression timesExpr = new LiteralExpression(times);
-            result = new FunctionCallExpression(result, TIMES_METHOD, timesExpr);
+            result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, TIMES_METHOD, timesExpr);
         }
-        result = new FunctionCallExpression(result, EMIT_METHOD, emitExpr);
+        result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, EMIT_METHOD, emitExpr);
         return result;
 
     }
 
     @Override
     public GroovyExpression getLoopExpressionParent(GroovyExpression inputQry) {
-        return new IdentifierExpression("__");
+        GroovyExpression curTraversal = getAnonymousTraversalStartExpression();
+        return curTraversal;
+    }
+
+    private IdentifierExpression getAnonymousTraversalStartExpression() {
+        return new IdentifierExpression(TraversalStepType.START, "__");
     }
 
     @Override
     public GroovyExpression generateSelectExpression(GroovyExpression parent, List<LiteralExpression> sourceNames,
             List<GroovyExpression> srcExprs) {
-        FunctionCallExpression result = new FunctionCallExpression(parent, SELECT_METHOD, sourceNames);
+        FunctionCallExpression result = new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, SELECT_METHOD, sourceNames);
 
         for (GroovyExpression expr : srcExprs) {
             GroovyExpression closure = new ClosureExpression(expr);
             GroovyExpression castClosure = new TypeCoersionExpression(closure, FUNCTION_CLASS);
-            result = new FunctionCallExpression(result, BY_METHOD, castClosure);
+            result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, BY_METHOD, castClosure);
         }
         return result;
     }
 
     @Override
     public GroovyExpression generateFieldExpression(GroovyExpression parent, FieldInfo fInfo,
-            String propertyName, boolean inSelect) {
+            String propertyName, boolean inSelect)  {
 
         AttributeInfo attrInfo = fInfo.attrInfo();
         IDataType attrType = attrInfo.dataType();
         GroovyExpression propertyNameExpr = new LiteralExpression(propertyName);
+        //Whether it is the user or shared graph does not matter here, since we're
+        //just getting the conversion expression.  Ideally that would be moved someplace else.
         AtlasGraph graph = AtlasGraphProvider.getGraphInstance();
         if (inSelect) {
 
@@ -161,13 +159,13 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
             return graph.generatePersisentToLogicalConversionExpression(expr, attrType);
         } else {
 
-            GroovyExpression unmapped = new FunctionCallExpression(parent, VALUES_METHOD, propertyNameExpr);
+            GroovyExpression unmapped = new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_VALUES, parent, VALUES_METHOD, propertyNameExpr);
             if (graph.isPropertyValueConversionNeeded(attrType)) {
                 GroovyExpression toConvert = new FunctionCallExpression(getItVariable(), GET_METHOD);
 
                 GroovyExpression conversionFunction = graph.generatePersisentToLogicalConversionExpression(toConvert,
                         attrType);
-                return new FunctionCallExpression(unmapped, MAP_METHOD, new ClosureExpression(conversionFunction));
+                return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, unmapped, MAP_METHOD, new ClosureExpression(conversionFunction));
             } else {
                 return unmapped;
             }
@@ -238,15 +236,15 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
                     valueMatchesExpr);
 
             GroovyExpression filterFunction = new ClosureExpression(filterCondition);
-            return new FunctionCallExpression(parent, FILTER_METHOD, filterFunction);
+            return new FunctionCallExpression(TraversalStepType.FILTER, parent, FILTER_METHOD, filterFunction);
         } else {
             GroovyExpression valueMatches = new FunctionCallExpression(getComparisonFunction(symbol), requiredValue);
-            return new FunctionCallExpression(parent, HAS_METHOD, propertNameExpr, valueMatches);
+            return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, propertNameExpr, valueMatches);
         }
     }
 
     @Override
-    protected GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr) {
+    protected GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s) {
 
         // this bit of groovy magic converts the set of vertices in varName into
         // a String containing the ids of all the vertices. This becomes the
@@ -255,15 +253,61 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
         // _()
         // s"g.V(${varName}.collect{it.id()} as String[])"
 
-        GroovyExpression gExpr = getGraph();
+        GroovyExpression gExpr = getGraphExpression();
         GroovyExpression varRefExpr = new TypeCoersionExpression(varExpr, OBJECT_ARRAY_CLASS);
-        FunctionCallExpression expr = new FunctionCallExpression(gExpr, V_METHOD, varRefExpr);
+        GroovyExpression matchingVerticesExpr = new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD, varRefExpr);
+        GroovyExpression isEmpty  = new FunctionCallExpression(varExpr, "isEmpty");
+        GroovyExpression emptyGraph = getEmptyTraversalExpression();
+
+        GroovyExpression expr = new TernaryOperatorExpression(isEmpty, emptyGraph, matchingVerticesExpr);
+
         return s.addInitialQueryCondition(expr);
     }
 
+    private GroovyExpression getEmptyTraversalExpression() {
+        GroovyExpression emptyGraph = new FunctionCallExpression(TraversalStepType.START, getGraphExpression(), V_METHOD, EMPTY_STRING_EXPRESSION);
+        return emptyGraph;
+    }
+
+    @Override
+    public GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex) {
+        //treat as barrier step, since limits need to be applied globally (even though it
+        //is technically a filter step)
+        return new FunctionCallExpression(TraversalStepType.BARRIER, parent, RANGE_METHOD, new LiteralExpression(startIndex), new LiteralExpression(endIndex));
+    }
+
+    @Override
+    public boolean isRangeExpression(GroovyExpression expr) {
+
+        return (expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals(RANGE_METHOD));
+    }
+
+    @Override
+    public int[] getRangeParameters(AbstractFunctionExpression expr) {
+
+        if (isRangeExpression(expr)) {
+            FunctionCallExpression rangeExpression = (FunctionCallExpression) expr;
+            List<GroovyExpression> arguments = rangeExpression.getArguments();
+            int startIndex = (int)((LiteralExpression)arguments.get(0)).getValue();
+            int endIndex = (int)((LiteralExpression)arguments.get(1)).getValue();
+            return new int[]{startIndex, endIndex};
+        }
+        else {
+            return null;
+        }
+    }
+
     @Override
-    public GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows) {
-        return new FunctionCallExpression(parent, RANGE_METHOD, new LiteralExpression(offset), new LiteralExpression(totalRows));
+    public void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex) {
+
+        if (isRangeExpression(expr)) {
+            FunctionCallExpression rangeExpression = (FunctionCallExpression) expr;
+            rangeExpression.setArgument(0, new LiteralExpression(Integer.valueOf(startIndex)));
+            rangeExpression.setArgument(1, new LiteralExpression(Integer.valueOf(endIndex)));
+        }
+        else {
+            throw new IllegalArgumentException(expr + " is not a valid range expression");
+        }
     }
 
     @Override
@@ -295,7 +339,8 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
             comparisonExpr = new ComparisonOperatorExpression(bCompExpr, aCompExpr);
         }
         ClosureExpression comparisonFunction = new ClosureExpression(comparisonExpr, "a", "b");
-        return new FunctionCallExpression(new FunctionCallExpression(parent, ORDER_METHOD), BY_METHOD, orderByClause, comparisonFunction);
+        FunctionCallExpression orderCall = new FunctionCallExpression(TraversalStepType.BARRIER, parent, ORDER_METHOD);
+        return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, orderCall, BY_METHOD, orderByClause, comparisonFunction);
     }
 
     @Override
@@ -327,10 +372,10 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
     public GroovyExpression generateGroupByExpression(GroovyExpression parent, GroovyExpression groupByExpression,
             GroovyExpression aggregationFunction) {
 
-        GroovyExpression result = new FunctionCallExpression(parent, "group");
+        GroovyExpression result = new FunctionCallExpression(TraversalStepType.BARRIER, parent, "group");
         GroovyExpression groupByClosureExpr = new TypeCoersionExpression(new ClosureExpression(groupByExpression), "Function");
-        result = new FunctionCallExpression(result, "by", groupByClosureExpr);
-        result = new FunctionCallExpression(result, "toList");
+        result = new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, result, "by", groupByClosureExpr);
+        result = new FunctionCallExpression(TraversalStepType.END, result, "toList");
 
         GroovyExpression mapValuesClosure = new ClosureExpression(new FunctionCallExpression(new CastExpression(getItVariable(), "Map"), "values"));
 
@@ -347,8 +392,55 @@ public class Gremlin3ExpressionFactory extends GremlinExpressionFactory {
         result = new FunctionCallExpression(result, "collect", aggregrationFunctionClosure);
         return result;
     }
+
+    @Override
+    public GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection) {
+        GroovyExpression coersedExpression = new TypeCoersionExpression(valueCollection, isMap ? "Map[]" : "Vertex[]");
+        if(isMap) {
+
+            return new FunctionCallExpression(TraversalStepType.START, "__", coersedExpression);
+        }
+        else {
+            //We cannot always use an anonymous traversal because that breaks repeat steps
+            return new FunctionCallExpression(TraversalStepType.START, getEmptyTraversalExpression(), "inject", coersedExpression);
+        }
+    }
+
+    @Override
     public GroovyExpression getGroupBySelectFieldParent() {
         return null;
     }
-}
 
+    @Override
+    public String getTraversalExpressionClass() {
+        return "GraphTraversal";
+    }
+
+    @Override
+    public boolean isSelectGeneratesMap(int aliasCount) {
+        //in Gremlin 3, you only get a map if there is more than 1 alias.
+        return aliasCount > 1;
+    }
+
+    @Override
+    public GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression) {
+        return new FunctionCallExpression(TraversalStepType.MAP_TO_ELEMENT, parent, "map", closureExpression);
+    }
+
+    @Override
+    public GroovyExpression generateGetSelectedValueExpression(LiteralExpression key,
+            GroovyExpression rowMapExpr) {
+        rowMapExpr = new CastExpression(rowMapExpr, "Map");
+        GroovyExpression getExpr = new FunctionCallExpression(rowMapExpr, "get", key);
+        return getExpr;
+    }
+
+    @Override
+    public GroovyExpression getCurrentTraverserObject(GroovyExpression traverser) {
+        return new FunctionCallExpression(traverser, "get");
+    }
+
+    public List<String> getAliasesRequiredByExpression(GroovyExpression expr) {
+        return Collections.emptyList();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java
index 6c326b2..c2fdf09 100644
--- a/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java
+++ b/repository/src/main/java/org/apache/atlas/gremlin/GremlinExpressionFactory.java
@@ -17,8 +17,14 @@
  */
 package org.apache.atlas.gremlin;
 
-import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.atlas.AtlasException;
+import org.apache.atlas.groovy.AbstractFunctionExpression;
 import org.apache.atlas.groovy.ArithmeticExpression;
 import org.apache.atlas.groovy.ArithmeticExpression.ArithmeticOperator;
 import org.apache.atlas.groovy.CastExpression;
@@ -29,6 +35,7 @@ import org.apache.atlas.groovy.GroovyExpression;
 import org.apache.atlas.groovy.IdentifierExpression;
 import org.apache.atlas.groovy.ListExpression;
 import org.apache.atlas.groovy.LiteralExpression;
+import org.apache.atlas.groovy.TraversalStepType;
 import org.apache.atlas.groovy.TypeCoersionExpression;
 import org.apache.atlas.groovy.VariableAssignmentExpression;
 import org.apache.atlas.query.GraphPersistenceStrategies;
@@ -40,13 +47,9 @@ import org.apache.atlas.repository.graphdb.GremlinVersion;
 import org.apache.atlas.typesystem.types.IDataType;
 import org.apache.atlas.typesystem.types.TypeSystem;
 import org.apache.atlas.typesystem.types.cache.TypeCache.TYPE_FILTER;
+import org.apache.atlas.util.AtlasRepositoryConfiguration;
 
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import com.google.common.collect.ImmutableList;
 
 /**
  * Factory to generate Groovy expressions representing Gremlin syntax that that
@@ -58,7 +61,8 @@ public abstract class GremlinExpressionFactory {
     private static final String G_VARIABLE = "g";
     private static final String IT_VARIABLE = "it";
 
-    private static final String SET_CLASS = "Set";
+    protected static final String SET_CLASS = "Set";
+
 
     private static final String OBJECT_FIELD = "object";
 
@@ -66,18 +70,25 @@ public abstract class GremlinExpressionFactory {
     protected static final String FILTER_METHOD = "filter";
     private static final String PATH_METHOD = "path";
     private static final String AS_METHOD = "as";
-    private static final String FILL_METHOD = "fill";
     private static final String IN_OPERATOR = "in";
     protected static final String HAS_METHOD = "has";
     protected static final String TO_LOWER_CASE_METHOD = "toLowerCase";
     protected static final String SELECT_METHOD = "select";
     protected static final String ORDER_METHOD = "order";
+    protected static final String FILL_METHOD = "fill";
 
     public static final GremlinExpressionFactory INSTANCE = AtlasGraphProvider.getGraphInstance()
             .getSupportedGremlinVersion() == GremlinVersion.THREE ? new Gremlin3ExpressionFactory()
                     : new Gremlin2ExpressionFactory();
 
     /**
+     * Returns the unqualified name of the class used in this version of gremlin to
+     * represent Gremlin queries as they are being generated.
+     * @return
+     */
+    public abstract String getTraversalExpressionClass();
+
+    /**
      * Gets the expression to use as the parent when translating the loop
      * expression in a loop
      *
@@ -172,14 +183,40 @@ public abstract class GremlinExpressionFactory {
             String propertyName, String symbol, GroovyExpression requiredValue, FieldInfo fInfo) throws AtlasException;
 
     /**
-     * Generates a limit expression
+     * Generates a range expression
      *
      * @param parent
-     * @param offset
-     * @param totalRows
+     * @param startIndex
+     * @param endIndex
      * @return
      */
-    public abstract GroovyExpression generateLimitExpression(GroovyExpression parent, int offset, int totalRows);
+    public abstract GroovyExpression generateRangeExpression(GroovyExpression parent, int startIndex, int endIndex);
+
+    /**
+     * Determines if the specified expression is a range method call.
+     *
+     * @param expr
+     * @return
+     */
+    public abstract boolean isRangeExpression(GroovyExpression expr);
+
+    /**
+     * Set the start index and end index of a range expression
+     *
+     * @param expr
+     * @param startIndex
+     * @param endIndex
+     */
+    public abstract void setRangeParameters(GroovyExpression expr, int startIndex, int endIndex);
+
+    /**
+     * If the specified function expression is a range expression, returns the start and end index parameters
+     * otherwise returns null.
+     *
+     * @param expr
+     * @return int array with two elements - element 0 is start index, element 1 is end index
+     */
+    public abstract int[] getRangeParameters(AbstractFunctionExpression expr);
 
     /**
      * Generates an order by expression
@@ -193,6 +230,22 @@ public abstract class GremlinExpressionFactory {
             List<GroovyExpression> translatedOrderBy, boolean isAscending);
 
     /**
+     * Determines if specified expression is an order method call
+     *
+     * @param expr
+     * @return
+     */
+    public boolean isOrderExpression(GroovyExpression expr) {
+        if (expr instanceof FunctionCallExpression) {
+            FunctionCallExpression functionCallExpression = (FunctionCallExpression) expr;
+            if (functionCallExpression.getFunctionName().equals(ORDER_METHOD)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
      * Returns the Groovy expressions that should be used as the parents when
      * translating an order by expression. This is needed because Gremlin 2 and
      * 3 handle order by expressions very differently.
@@ -207,6 +260,17 @@ public abstract class GremlinExpressionFactory {
      */
     public abstract GroovyExpression getAnonymousTraversalExpression();
 
+    public boolean isLeafAnonymousTraversalExpression(GroovyExpression expr) {
+        if(!(expr instanceof FunctionCallExpression)) {
+            return false;
+        }
+        FunctionCallExpression functionCallExpr = (FunctionCallExpression)expr;
+        if(functionCallExpr.getCaller() != null) {
+            return false;
+        }
+        return functionCallExpr.getFunctionName().equals("_") & functionCallExpr.getArguments().size() == 0;
+    }
+
     /**
      * Returns an expression representing
      *
@@ -216,11 +280,11 @@ public abstract class GremlinExpressionFactory {
 
     /**
      * Generates the expression the serves as the root of the Gremlin query.
-     * @param s
      * @param varExpr variable containing the vertices to traverse
      * @return
      */
-    protected abstract GroovyExpression initialExpression(GraphPersistenceStrategies s, GroovyExpression varExpr);
+    protected abstract GroovyExpression initialExpression(GroovyExpression varExpr, GraphPersistenceStrategies s);
+
 
     /**
      * Generates an expression that tests whether the vertex represented by the 'toTest'
@@ -236,19 +300,37 @@ public abstract class GremlinExpressionFactory {
             GroovyExpression vertexExpr);
 
     /**
+    /**
      * Generates a sequence of groovy expressions that filter the vertices to only
      * those that match the specified type.  If GraphPersistenceStrategies.collectTypeInstancesIntoVar()
-     * is set, the vertices are put into a variable whose name is geneated from the specified IntSequence.
-     * The last item in the result will be a graph traversal restricted to only the matching vertices.
+     * is set and the gremlin optimizer is disabled, the vertices are put into a variable whose name is generated
+     * from the specified IntSequence.  The last item in the result will be a graph traversal restricted to only
+     * the matching vertices.
      */
     public List<GroovyExpression> generateTypeTestExpression(GraphPersistenceStrategies s, GroovyExpression parent,
                                                              String typeName, IntSequence intSeq) throws AtlasException {
-        if (s.filterBySubTypes()) {
-            return typeTestExpressionUsingInFilter(s, parent, typeName);
-        } else if (s.collectTypeInstancesIntoVar()) {
-            return typeTestExpressionMultiStep(s, typeName, intSeq);
-        } else {
-            return typeTestExpressionUsingFilter(s, parent, typeName);
+
+        if(AtlasRepositoryConfiguration.isGremlinOptimizerEnabled()) {
+            GroovyExpression superTypeAttributeNameExpr = new LiteralExpression(s.superTypeAttributeName());
+            GroovyExpression typeNameExpr = new LiteralExpression(typeName);
+            GroovyExpression superTypeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, superTypeAttributeNameExpr,
+                    typeNameExpr);
+
+            GroovyExpression typeAttributeNameExpr = new LiteralExpression(s.typeAttributeName());
+
+            GroovyExpression typeMatchesExpr = new FunctionCallExpression(TraversalStepType.FILTER, HAS_METHOD, typeAttributeNameExpr,
+                    typeNameExpr);
+            GroovyExpression result = new FunctionCallExpression(TraversalStepType.FILTER, parent, "or", typeMatchesExpr, superTypeMatchesExpr);
+            return Collections.singletonList(result);
+        }
+        else {
+            if (s.filterBySubTypes()) {
+                return typeTestExpressionUsingInFilter(s, parent, typeName);
+            } else if (s.collectTypeInstancesIntoVar()) {
+                return typeTestExpressionMultiStep(s, typeName, intSeq);
+            } else {
+                return typeTestExpressionUsingFilter(s, parent, typeName);
+            }
         }
     }
 
@@ -285,7 +367,7 @@ public abstract class GremlinExpressionFactory {
         result.add(newSetVar(varName));
         result.add(fillVarWithTypeInstances(s, typeName, varName));
         result.add(fillVarWithSubTypeInstances(s, typeName, varName));
-        result.add(initialExpression(s, varExpr));
+        result.add(initialExpression(varExpr, s));
 
         return result;
     }
@@ -324,7 +406,6 @@ public abstract class GremlinExpressionFactory {
         return Collections.singletonList(filterExpr);
     }
 
-
     /**
      * Generates an expression which checks whether the vertices in the query have
      * a field with the given name.
@@ -334,7 +415,7 @@ public abstract class GremlinExpressionFactory {
      * @return
      */
     public GroovyExpression generateUnaryHasExpression(GroovyExpression parent, String fieldName) {
-        return new FunctionCallExpression(parent, HAS_METHOD, new LiteralExpression(fieldName));
+        return new FunctionCallExpression(TraversalStepType.FILTER, parent, HAS_METHOD, new LiteralExpression(fieldName));
     }
 
     /**
@@ -344,7 +425,7 @@ public abstract class GremlinExpressionFactory {
      * @return
      */
     public GroovyExpression generatePathExpression(GroovyExpression parent) {
-        return new FunctionCallExpression(parent, PATH_METHOD);
+        return new FunctionCallExpression(TraversalStepType.MAP_TO_VALUE, parent, PATH_METHOD);
     }
 
     /**
@@ -365,7 +446,7 @@ public abstract class GremlinExpressionFactory {
      * @return
      */
     public GroovyExpression generateAliasExpression(GroovyExpression parent, String alias) {
-        return new FunctionCallExpression(parent, AS_METHOD, new LiteralExpression(alias));
+        return new FunctionCallExpression(TraversalStepType.SIDE_EFFECT, parent, AS_METHOD, new LiteralExpression(alias));
     }
 
     /**
@@ -377,19 +458,19 @@ public abstract class GremlinExpressionFactory {
      * @return
      */
     public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir) {
-        return new FunctionCallExpression(parent, getGremlinFunctionName(dir));
+        return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir));
     }
 
     private String getGremlinFunctionName(AtlasEdgeDirection dir) {
         switch(dir) {
-            case IN:
-              return "in";
-            case OUT:
-                return "out";
-            case BOTH:
-                return "both";
-            default:
-                throw new RuntimeException("Unknown Atlas Edge Direction: " + dir);
+        case IN:
+            return "in";
+        case OUT:
+            return "out";
+        case BOTH:
+            return "both";
+        default:
+            throw new RuntimeException("Unknown Atlas Edge Direction: " + dir);
         }
     }
 
@@ -403,7 +484,7 @@ public abstract class GremlinExpressionFactory {
      */
     public GroovyExpression generateAdjacentVerticesExpression(GroovyExpression parent, AtlasEdgeDirection dir,
             String label) {
-        return new FunctionCallExpression(parent, getGremlinFunctionName(dir), new LiteralExpression(label));
+        return new FunctionCallExpression(TraversalStepType.FLAT_MAP_TO_ELEMENTS, parent, getGremlinFunctionName(dir), new LiteralExpression(label));
     }
 
     /**
@@ -423,17 +504,19 @@ public abstract class GremlinExpressionFactory {
     }
 
     protected GroovyExpression getAllVerticesExpr() {
-        GroovyExpression gExpr = getGraph();
-        return new FunctionCallExpression(gExpr, V_METHOD);
+        GroovyExpression gExpr = getGraphExpression();
+        return new FunctionCallExpression(TraversalStepType.START, gExpr, V_METHOD);
     }
 
-    protected IdentifierExpression getGraph() {
-        return new IdentifierExpression(G_VARIABLE);
+    protected IdentifierExpression getGraphExpression() {
+        return new IdentifierExpression(TraversalStepType.SOURCE, G_VARIABLE);
     }
 
+
     protected GroovyExpression getCurrentObjectExpression() {
         return new FieldExpression(getItVariable(), OBJECT_FIELD);
     }
+
     //assumes cast already performed
     public GroovyExpression generateCountExpression(GroovyExpression itExpr) {
         GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection");
@@ -454,11 +537,9 @@ public abstract class GremlinExpressionFactory {
 
     private GroovyExpression getAggregrationExpression(GroovyExpression itExpr,
             GroovyExpression mapFunction, String functionName) {
-        GroovyExpression collectionExpr = new CastExpression(itExpr,
-                "Collection");
+        GroovyExpression collectionExpr = new CastExpression(itExpr,"Collection");
         ClosureExpression collectFunction = new ClosureExpression(mapFunction);
-        GroovyExpression transformedList = new FunctionCallExpression(
-                collectionExpr, "collect", collectFunction);
+        GroovyExpression transformedList = new FunctionCallExpression(collectionExpr, "collect", collectFunction);
         return new FunctionCallExpression(transformedList, functionName);
     }
 
@@ -466,5 +547,106 @@ public abstract class GremlinExpressionFactory {
         return getItVariable();
     }
 
+    /**
+     * Specifies the parent to use when translating the select list in
+     * a group by statement.
+     *
+     * @return
+     */
     public abstract GroovyExpression getGroupBySelectFieldParent();
+
+    public GroovyExpression generateFillExpression(GroovyExpression parent, GroovyExpression variable) {
+        return new FunctionCallExpression(TraversalStepType.END,parent , "fill", variable);
+    }
+
+    /**
+     * Generates an anonymous graph traversal initialized with the specified value.  In Gremlin 3, we need
+     * to use  a different syntax for this when the object is a map, so that information needs to be provided
+     * to this method so that the correct syntax is used.
+     *
+     * @param isMap true if the value contains Map instances, false if it contains Vertex instances
+     * @param valueCollection the source objects to start the traversal from.
+     */
+    public abstract GroovyExpression generateSeededTraversalExpresssion(boolean isMap, GroovyExpression valueCollection);
+
+    /**
+     * Returns the current value of the traverser.  This is used when generating closure expressions that
+     * need to operate on the current value in the graph graversal.
+     *
+     * @param traverser
+     * @return
+     */
+    public abstract GroovyExpression getCurrentTraverserObject(GroovyExpression traverser);
+
+    /**
+     * Generates an expression that transforms the current value of the traverser by
+     * applying the function specified
+     *
+     * @param parent
+     * @param closureExpression
+     * @return
+     */
+    public abstract GroovyExpression generateMapExpression(GroovyExpression parent, ClosureExpression closureExpression);
+
+    /**
+     * Returns whether a select statement generates a map (or Gremlin 2 "Row") when it contains the specified
+     * number of aliases.
+     *
+     */
+    public abstract boolean isSelectGeneratesMap(int aliasCount);
+
+    /**
+     * Generates an expression to get the value of the value from the row map
+     * generated by select() with the specified key.
+     *
+     */
+    public abstract GroovyExpression generateGetSelectedValueExpression(LiteralExpression key,
+            GroovyExpression rowMapExpr);
+
+    public GroovyExpression removeExtraMapFromPathInResult(GroovyExpression parent) {
+        GroovyExpression listItem = getItVariable();
+        GroovyExpression tailExpr = new FunctionCallExpression(listItem, "tail");
+        return new FunctionCallExpression(parent, "collect", new ClosureExpression(tailExpr));
+
+    }
+
+    /**
+     * Generates a toList expression to execute the gremlin query and
+     * store the result in a new list.
+     *
+     * @param expr
+     * @return
+     */
+    public GroovyExpression generateToListExpression(GroovyExpression expr) {
+        return new FunctionCallExpression(TraversalStepType.END, expr, "toList");
+    }
+
+    /**
+     * Finds aliases that absolutely must be brought along with this expression into
+     * the output expression and cannot just be recreated there.  For example, in the
+     * Gremlin 2 loop expression, the loop semantics break of the alias is simply recreated
+     * in the output expression.
+     * @param expr
+     * @return
+     */
+    public abstract List<String> getAliasesRequiredByExpression(GroovyExpression expr);
+
+
+    /**
+     * Checks if the given expression is an alias expression, and if so
+     * returns the alias from the expression.  Otherwise, null is
+     * returned.
+     */
+    public String getAliasNameIfRelevant(GroovyExpression expr) {
+        if(!(expr instanceof FunctionCallExpression)) {
+            return null;
+        }
+        FunctionCallExpression fc = (FunctionCallExpression)expr;
+        if(! fc.getFunctionName().equals(AS_METHOD)) {
+            return null;
+        }
+       LiteralExpression aliasName =  (LiteralExpression)fc.getArguments().get(0);
+       return aliasName.getValue().toString();
+
+    }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java
new file mode 100644
index 0000000..3e6c39a
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/AliasFinder.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.gremlin.optimizer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.atlas.groovy.AbstractFunctionExpression;
+import org.apache.atlas.groovy.FunctionCallExpression;
+import org.apache.atlas.groovy.GroovyExpression;
+import org.apache.atlas.groovy.LiteralExpression;
+import org.apache.atlas.groovy.TraversalStepType;
+
+/**
+ * Finds all aliases in the expression.
+ */
+public class AliasFinder implements CallHierarchyVisitor {
+
+    private List<LiteralExpression> foundAliases = new ArrayList<>();
+
+    //Whether a final alias is needed.  A final alias is needed
+    //if there are transformation steps after the last alias in
+    //the expression.  We initialize this to false since a final
+    //alias is not needed if there are no aliases.
+    private boolean finalAliasNeeded = false;
+
+    @Override
+    public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) {
+        return true;
+    }
+
+    @Override
+    public void visitNonFunctionCaller(GroovyExpression expr) {
+
+    }
+
+    @Override
+    public void visitNullCaller() {
+
+    }
+
+    private static final Set<TraversalStepType> TRANSFORMATION_STEP_TYPES = new HashSet<>(Arrays.asList(
+            TraversalStepType.MAP_TO_ELEMENT,
+            TraversalStepType.MAP_TO_VALUE,
+            TraversalStepType.FLAT_MAP_TO_ELEMENTS,
+            TraversalStepType.FLAT_MAP_TO_VALUES,
+            TraversalStepType.BARRIER,
+            TraversalStepType.NONE));
+
+
+    @Override
+    public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) {
+
+        if (functionCall instanceof FunctionCallExpression) {
+            FunctionCallExpression expr = (FunctionCallExpression)functionCall;
+            if (expr.getType() == TraversalStepType.SIDE_EFFECT && expr.getFunctionName().equals("as")) {
+                //We found an alias.  This is currently the last expression we've seen
+                //in our traversal back up the expression tree, so at this point a final
+                //alias is not needed.
+                LiteralExpression aliasNameExpr = (LiteralExpression)expr.getArguments().get(0);
+                foundAliases.add(aliasNameExpr);
+                finalAliasNeeded=false;
+            }
+        }
+
+        if(TRANSFORMATION_STEP_TYPES.contains(functionCall.getType())) {
+            //This step changes the value of the traverser.  Now, a final alias
+            //needs to be added.
+            if(!foundAliases.isEmpty()) {
+                finalAliasNeeded = true;
+            }
+        }
+
+        return true;
+    }
+
+    public List<LiteralExpression> getAliases() {
+        return foundAliases;
+    }
+
+    public boolean isFinalAliasNeeded() {
+
+        return finalAliasNeeded;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java
new file mode 100644
index 0000000..6089353
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/CallHierarchyVisitor.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.gremlin.optimizer;
+
+import org.apache.atlas.groovy.AbstractFunctionExpression;
+import org.apache.atlas.groovy.GroovyExpression;
+
+/**
+ * Call back interface for visiting the call hierarchy of a function call.
+ */
+public interface CallHierarchyVisitor {
+
+    /**
+     * Visits a function expression before the visit to its caller.
+     *
+     * @param expr
+     *
+     * @return false to terminate the recursion
+     */
+    boolean preVisitFunctionCaller(AbstractFunctionExpression expr);
+
+    /**
+     * Called when a caller that is not an instance of
+     * AbstractFunctionExpression is found. This indicates that the deepest
+     * point in the call hierarchy has been reached.
+     *
+     *
+     */
+    void visitNonFunctionCaller(GroovyExpression expr);
+
+    /**
+     * Called when a null caller is found (this happens for static/user-defined
+     * functions). This indicates that the deepest point in the call hierarchy
+     * has been reached.
+     *
+     */
+    void visitNullCaller();
+
+    /**
+     * Visits a function expression after the visit to its caller.
+     *
+     * @param expr
+     *
+     * @return false to terminate the recursion
+     */
+    boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall);
+}

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java
new file mode 100644
index 0000000..7cf9711
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandAndsOptimization.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.gremlin.optimizer;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.atlas.gremlin.GremlinExpressionFactory;
+import org.apache.atlas.groovy.AbstractFunctionExpression;
+import org.apache.atlas.groovy.FunctionCallExpression;
+import org.apache.atlas.groovy.GroovyExpression;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Optimizer that pulls has expressions out of an 'and' expression.
+ *
+ * For example:
+ *
+ * g.V().and(has('x'),has('y')
+ *
+ * is optimized to:
+ *
+ * g.V().has('x').has('y')
+ *
+ * There are certain cases where it is not safe to move an expression out
+ * of the 'and'.  For example, in the expression
+ *
+ * g.V().and(has('x').out('y'),has('z'))
+ *
+ * has('x').out('y') cannot be moved out of the 'and', since it changes the value of the traverser.
+ *
+ * At this time, the ExpandAndsOptimizer is not able to handle this scenario, so we don't extract
+ * that expression.  In this case, the result is:
+ *
+ * g.V().has('z').and(has('x').out('y'))
+ *
+ * The optimizer will call ExpandAndsOptimization recursively on the children, so
+ * there is no need to recursively update the children here.
+ *
+ * @param expr
+ * @param context
+ * @return the expressions that should be unioned together to get the query result
+ */
+public class ExpandAndsOptimization implements GremlinOptimization {
+
+    private static final Logger logger_ = LoggerFactory.getLogger(ExpandAndsOptimization.class);
+
+
+    private final GremlinExpressionFactory factory;
+
+    public ExpandAndsOptimization(GremlinExpressionFactory factory) {
+        this.factory = factory;
+    }
+
+    @Override
+    public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) {
+        return expr instanceof FunctionCallExpression && ((FunctionCallExpression)expr).getFunctionName().equals("and");
+    }
+
+    /**
+     * Expands the given and expression.  There is no need to recursively
+     * expand the children here.  This method is called recursively by
+     * GremlinQueryOptimier on the children.
+     *
+     */
+    @Override
+    public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) {
+
+        FunctionCallExpression exprAsFunction = (FunctionCallExpression)expr;
+        GroovyExpression result = exprAsFunction.getCaller();
+
+        List<GroovyExpression> nonExtractableArguments = new ArrayList<>();
+        for(GroovyExpression argument : exprAsFunction.getArguments()) {
+
+            if (GremlinQueryOptimizer.isExtractable(argument)) {
+                //Set the caller of the deepest expression in the call hierarchy
+                //of the argument to point to the current result.
+                //For example, if result is "g.V()" and the updatedArgument is "has('x').has('y')",
+                //updatedArgument would be a tree like this:
+                //
+                //                  has('y')
+                //                 /
+                //                / caller
+                //              |/_
+                //           has('x')
+                //             /
+                //            / caller
+                //          |/_
+                //        (null)
+                //
+                //We would set the caller of has('x') to be g.V(), so result would become g.V().has('x').has('y').
+                //
+                // Note: This operation is currently done by making a copy of the argument tree.  That should
+                // be changed.
+                result = GremlinQueryOptimizer.copyWithNewLeafNode(
+                        (AbstractFunctionExpression) argument, result);
+            } else {
+                logger_.warn("Found non-extractable argument '{}' in the 'and' expression '{}'",argument.toString(), expr.toString());
+                nonExtractableArguments.add(argument);
+            }
+        }
+
+        if (!nonExtractableArguments.isEmpty()) {
+            //add a final 'and' call with the arguments that could not be extracted
+            result =  factory.generateLogicalExpression(result, "and", nonExtractableArguments);
+        }
+        return result;
+    }
+
+    @Override
+    public boolean isApplyRecursively() {
+        return true;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java
new file mode 100644
index 0000000..ba6059a
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpandOrsOptimization.java
@@ -0,0 +1,584 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.gremlin.optimizer;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.atlas.gremlin.GremlinExpressionFactory;
+import org.apache.atlas.groovy.AbstractFunctionExpression;
+import org.apache.atlas.groovy.ClosureExpression;
+import org.apache.atlas.groovy.FunctionCallExpression;
+import org.apache.atlas.groovy.GroovyExpression;
+import org.apache.atlas.groovy.LiteralExpression;
+import org.apache.atlas.groovy.StatementListExpression;
+import org.apache.atlas.groovy.TraversalStepType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.Lists;
+
+
+
+/**
+ * Optimization that removes 'or' expressions from a graph traversal when possible
+ * and replaces them with separate calls that are combined using a logical union operation.
+ * Unfortunately, Titan does not use indices when executing the child graph traversals associated
+ * with an 'or' call.  In order to make the index be used, we split queries with
+ * or expressions into multiple queries.  These queries are executed individually,
+ * using indices, and then the results are combined back together.  Here is a
+ * simple example to illustrate this:
+ *
+ * <h4>Original Query</h4>
+ *
+ * <pre>
+ * g.V().or(has('name','Fred'),has('age','17'))
+ * </pre>
+ *
+ *<h4>Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('name','Fred').fill(r);
+ * g.V().has('age','17').fill(r);
+ * r;
+ * </pre>
+ *
+ * Here, we introduce an intermediate variable "r" which is declared as a Set.  The Set is performing
+ * the union for us.  If there are vertices that happen to both have "Fred" as the name and "17" as the age,
+ * the Set will prevent the second query execution from adding a duplicate vertex to the result.  Recall that
+ * in Groovy scripts, the last expression is the one that will be returned back to the caller.  We refer to
+ * that expression is the "result expression".  For this example, the result expression is simply "r", which
+ * contains the vertices that matched the query.
+ * <p/>
+ * If the query does any kind of transformation of the vertices to produce the query result, that needs
+ * to be done in the result expression.  To understand why that is, let's take a look at another example:
+ *
+ * <h4>Original Query</h4>
+ *
+ * <pre>
+ * g.V().or(has('name','Fred'),has('age','17')).as('person').select('person').by('gender')
+ * </pre>
+ *
+ * <h4>Incorrect Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('name','Fred').as('person').select('person').by('gender').fill(r)
+ * g.V().has('age','17').as('person').select('person').by('gender').fill(r)
+ * r;
+ * </pre>
+ *
+ * The problem with this query is that now 'r' contains Strings (the gender of the person).  Suppose
+ * that there is one person named Fred and there are 3 people whose age is 17 (let's say Fred's age is 16).
+ * The original query would have produced 4 rows, one corresponding to each of those people.  The new
+ * query would produce at most 2 rows - one for 'male' and one for 'female'.  This is happening because
+ * we are now performing the union on the Strings, not on the vertices.  To fix this, we need to split
+ * the original query and put the end portion into the result expression:
+ *
+ * <h4>Correct Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('name','Fred').fill(r)
+ * g.V().has('age','17').fill(r)
+ * __.inject(r as Object[]).as('person').select('person').by('gender')
+ * </pre>
+ *
+ * The logic for doing this splitting is described in more detail in
+ * {@link #moveTransformationsToResultExpression(GroovyExpression, OptimizationContext)}.
+ * <p/>
+ * There is one more problematic case that this optimizer is able to handle.  Let's look at the following example:
+ *
+ * <h4>Original Query</h4>
+ *
+ * <pre>
+ * g.V().or(has('type','Person'),has('superType','Person')).as('x').has('qualifiedName','Fred').as('y').select('x','y').by('name').by('name')
+ * </pre>
+ *
+ * Queries of this form appear often when translating DSL queries.
+ *
+ * If we were to optimize this query using the logic described above, we would get something like this:
+ *
+ * <h4>Incorrect Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('type','Person').fill(r);
+ * g.V().has('superType','Person').fill(r);
+ * __.inject(r as Object[]).as('x').has('qualifiedName','Fred').as('y').select('x','y');
+ * </pre>
+ *
+ * While not strictly incorrect, this query will not perform well since the index on qualifiedName will
+ * not be used.  In order for that index to be used, the 'has' expression needs to be part of the original
+ * query.  However, if we do that alone, the query will be broken, since the select
+ * will now refer to an undefined label:
+ *
+ * <h4>Incorrect Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('type','Person').as('x').has('qualifiedName','Fred').fill(r);
+ * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').fill(r);
+ * __.inject(r as Object[]).as('y').select('x','y')
+ * </pre>
+ *
+ * To fix this, we need to save the values of the aliased vertices in the original
+ * query, and create labels in the result expression that refer to them.  We do this
+ * as follows:
+ *
+ * <h4>Correct Optimized Query</h4>
+ *
+ * <pre>
+ * def r = [] as Set;
+ * g.V().has('type','Person').as('x').has('qualifiedName','Fred').as('y').select('x','y').fill(r);
+ * g.V().has('superType','Person').as('x').has('qualifiedName','Fred').select('x','y').fill(r);
+ * __.inject(r as Object[]).as('__tmp').map({((Map)it.get()).get('x')}).as('x').select('__tmp').map({((Map)it.get()).get('x')}).as('y').select('x','y').by('name').by('name')
+ * </pre>
+ *
+ * This is not pretty, but is the best solution we've found so far for supporting expressions that contain aliases in this optimization.
+ * What ends up happening is that r gets populated with alias->Vertex maps.  In the result expression, we make 'x' point
+ * to a step where the value in the traverser is the vertex for 'x', and we do the same thing for y.  The <code>select('_tmp')</code> step in the middle restores the value of
+ * the traverser back to the map.
+ * <p/>
+ * The one known issue with the alias rearrangement is that it breaks loop expressions.  As a result, expressions containing loops are currently excluded
+ * from this optimization.
+ *
+ * ExpandOrsOptimization expands the entire expression tree recursively, so it is not invoked
+ * recursively by GremlinQueryOptimizer.
+ *
+ */
+public class ExpandOrsOptimization implements GremlinOptimization {
+
+    private static final Logger logger_ = LoggerFactory.getLogger(ExpandOrsOptimization.class);
+
+    private final GremlinExpressionFactory factory;
+
+    public ExpandOrsOptimization(GremlinExpressionFactory factory) {
+        this.factory = factory;
+    }
+
+    @Override
+    public boolean appliesTo(GroovyExpression expr, OptimizationContext contxt) {
+
+        ExpressionFinder finder = new ExpressionFinder(IsOr.INSTANCE);
+        GremlinQueryOptimizer.visitCallHierarchy(expr, finder);
+        return finder.isExpressionFound();
+    }
+
+    @Override
+    public GroovyExpression apply(GroovyExpression expr, OptimizationContext context) {
+
+        setupRangeOptimization(expr, context);
+        GroovyExpression traveralExpression = moveTransformationsToResultExpression(expr, context);
+
+        FunctionGenerator functionGenerator = new FunctionGenerator(factory, context);
+        GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, functionGenerator);
+        traveralExpression = functionGenerator.getNewRootExpression();
+        List<GroovyExpression> bodyExpressions = expandOrs(traveralExpression, context);
+
+
+        //Adds a statement to define the result variable 'v' in the
+        //groovy script.  The variable is declared as a Set.  The type
+        //of the objects in the Set depend on the number of aliases in the Groovy
+        // expression:
+        //   - 0 or 1 alias : Vertex
+        //   - multiple aliases: Map<String,Vertex>
+        StatementListExpression result = new StatementListExpression();
+        context.prependStatement(context.getDefineResultVariableStmt());
+
+
+        for (GroovyExpression bodyExpression : bodyExpressions) {
+            result.addStatement(bodyExpression);
+        }
+        result.addStatement(context.getResultExpression());
+        return result;
+    }
+
+    private void setupRangeOptimization(GroovyExpression expr, OptimizationContext context) {
+
+        // Find any range expressions in the expression tree.
+        RangeFinder rangeFinder = new RangeFinder(factory);
+        GremlinQueryOptimizer.visitCallHierarchy(expr, rangeFinder);
+        List<AbstractFunctionExpression> rangeExpressions = rangeFinder.getRangeExpressions();
+        if (rangeExpressions.size() == 1) {
+            OrderFinder orderFinder = new OrderFinder(factory);
+            GremlinQueryOptimizer.visitCallHierarchy(expr, orderFinder);
+            if (!orderFinder.hasOrderExpression()) {
+                // If there is one range expression and no order expression in the unoptimized gremlin,
+                // save the range parameters to use for adding a range expression to
+                // each expanded "or" expression result, such that it will only contain the specified range of vertices.
+                // For now, apply this optimization only if the range start index is zero.
+                AbstractFunctionExpression rangeExpression = rangeExpressions.get(0);
+                int[] rangeParameters = factory.getRangeParameters(rangeExpression);
+                if (rangeParameters[0] == 0) {
+                    context.setRangeExpression(rangeExpression);
+                }
+            }
+        }
+    }
+
+    private GroovyExpression moveTransformationsToResultExpression(GroovyExpression expr, OptimizationContext context) {
+        GroovyExpression traveralExpression = expr;
+
+        // Determine the 'split point'.  This is the expression that will become
+        // the deepest function call in the result expression.  If a split
+        // point is found, its caller is changed.  The new caller is
+        // set to the graph traversal expression in the result expression.
+        // The original caller becomes the new traversal expression that
+        // will be carried through the rest of the 'or' expansion processing.
+        //
+        // Example: g.V().has('x').as('x').select('x')
+        // Here, select('x') is the split expression
+        // so :
+        // 1) the result expression in OptimizationContext becomes [base result expression].select('x')
+        // 2) we return g.V().has('x').as('x')
+
+        SplitPointFinder finder = new SplitPointFinder(factory);
+        GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, finder);
+        AbstractFunctionExpression splitPoint = finder.getSplitPoint();
+
+
+        List<LiteralExpression> aliases = new ArrayList<>();
+
+        //If we're not splitting the query, there is no need to save/restore
+        //the aliases.
+        if(splitPoint != null) {
+
+            traveralExpression = splitPoint.getCaller();
+
+            AliasFinder aliasFinder = new AliasFinder();
+            GremlinQueryOptimizer.visitCallHierarchy(traveralExpression, aliasFinder);
+            aliases.addAll(aliasFinder.getAliases());
+            if(aliasFinder.isFinalAliasNeeded()) {
+                //The last alias in the expression does not capture the final vertex in the traverser,
+                //so we need to create an alias to record that.
+                traveralExpression = factory.generateAliasExpression(traveralExpression, context.getFinalAliasName());
+                aliases.add(new LiteralExpression(context.getFinalAliasName()));
+            }
+
+            GroovyExpression resultExpr = getBaseResultExpression(context, aliases);
+            splitPoint.setCaller(resultExpr);
+            expr = removeMapFromPathsIfNeeded(expr, aliases);
+            context.setResultExpression(expr);
+        }
+
+        //Add expression(s) to the end of the traversal expression to add the vertices
+        //that were found into the intermediate variable ('r')
+        traveralExpression = addCallToUpdateResultVariable(traveralExpression, aliases, context);
+        return traveralExpression;
+    }
+
+    private GroovyExpression removeMapFromPathsIfNeeded(GroovyExpression expr, List<LiteralExpression> aliases) {
+        if(aliases.size() > 0 && factory.isSelectGeneratesMap(aliases.size())) {
+            PathExpressionFinder pathExprFinder = new PathExpressionFinder();
+            GremlinQueryOptimizer.visitCallHierarchy(expr, pathExprFinder);
+            boolean hasPath = pathExprFinder.isPathExpressionFound();
+            if(hasPath) {
+                //the path will now start with the map that we added.  That is an artifact
+                //of the optimization process and must be removed.
+                if(expr.getType() != TraversalStepType.END && expr.getType() != TraversalStepType.NONE) {
+                    //we're still in the pipeline, need to execute the query before we can
+                    //modify the result
+                    expr = factory.generateToListExpression(expr);
+                }
+                expr = factory.removeExtraMapFromPathInResult(expr);
+            }
+
+        }
+        return expr;
+    }
+
+    /**
+     * This method adds steps to the end of the initial traversal to add the vertices
+     * that were found into an intermediate variable (defined as a Set).  If there is one alias,
+     * this set will contain the vertices associated with that Alias.  If there are multiple
+     * aliases, the values in the set will be alias->vertex maps that have the vertex
+     * associated with the alias for each result.
+
+     * @param expr
+     * @param aliasNames
+     * @param context
+     * @return
+     */
+    private GroovyExpression addCallToUpdateResultVariable(GroovyExpression expr,List<LiteralExpression> aliasNames, OptimizationContext context) {
+
+        GroovyExpression result = expr;
+        // If there is one range expression in the unoptimized gremlin,
+        // add a range expression here so that the intermediate variable will only contain
+        // the specified range of vertices.
+        AbstractFunctionExpression rangeExpression = context.getRangeExpression();
+        if (rangeExpression != null) {
+            int[] rangeParameters = factory.getRangeParameters(rangeExpression);
+            result = factory.generateRangeExpression(result, rangeParameters[0], rangeParameters[1]);
+        }
+        if( ! aliasNames.isEmpty()) {
+            result = factory.generateSelectExpression(result, aliasNames, Collections.<GroovyExpression>emptyList());
+        }
+        return factory.generateFillExpression(result, context.getResultVariable());
+    }
+
+    /**
+     * Recursively traverses the given expression, expanding or expressions
+     * wherever they are found.
+     *
+     * @param expr
+     * @param context
+     * @return expressions that should be unioned together to get the query result
+     */
+    private List<GroovyExpression> expandOrs(GroovyExpression expr, OptimizationContext context) {
+
+        if (GremlinQueryOptimizer.isOrExpression(expr)) {
+            return expandOrFunction(expr, context);
+        }
+        return processOtherExpression(expr, context);
+    }
+
+    /**
+     * This method takes an 'or' expression and expands it into multiple expressions.
+     *
+     * For example:
+     *
+     * g.V().or(has('x'),has('y')
+     *
+     * is expanded to:
+     *
+     * g.V().has('x')
+     * g.V().has('y')
+     *
+     * There are certain cases where it is not safe to move an expression out
+     * of the 'or'.  For example, in the expression
+     *
+     * g.V().or(has('x').out('y'),has('z'))
+     *
+     * has('x').out('y') cannot be moved out of the 'or', since it changes the value of the traverser.
+     *
+     * At this time, the ExpandOrsOptimizer is not able to handle this scenario, so we don't remove
+     * that expression.  In cases like this, a final expression is created that ors together
+     * all of the expressions that could not be extracted.  In this case that would be:
+     *
+     * g.V().has('z')
+     * g.V().or(has('y').out('z'))
+     *
+     * This processing is done recursively.
+     *
+     *
+     * @param expr
+     * @param context
+     * @return the expressions that should be unioned together to get the query result
+     */
+    private List<GroovyExpression> expandOrFunction(GroovyExpression expr, OptimizationContext context) {
+        FunctionCallExpression functionCall = (FunctionCallExpression) expr;
+        GroovyExpression caller = functionCall.getCaller();
+        List<GroovyExpression> updatedCallers = null;
+        if (caller != null) {
+            updatedCallers = expandOrs(caller, context);
+        } else {
+            updatedCallers = Collections.singletonList(null);
+        }
+        UpdatedExpressions newArguments = getUpdatedChildren(functionCall.getArguments(), context);
+        List<GroovyExpression> allUpdatedArguments = new ArrayList<>();
+        for (List<GroovyExpression> exprs : newArguments.getUpdatedChildren()) {
+            allUpdatedArguments.addAll(exprs);
+        }
+        List<AbstractFunctionExpression> extractableArguments = new ArrayList<>();
+        List<GroovyExpression> nonExtractableArguments = new ArrayList<>();
+        for (GroovyExpression argument : allUpdatedArguments) {
+
+            if (GremlinQueryOptimizer.isExtractable(argument)) {
+                extractableArguments.add((AbstractFunctionExpression) argument);
+            } else {
+                logger_.warn("Found non-extractable argument '{}; in the 'or' expression '{}'",argument.toString(), expr.toString());
+                nonExtractableArguments.add(argument);
+            }
+        }
+
+        List<GroovyExpression> result = new ArrayList<>();
+        for (GroovyExpression updatedCaller : updatedCallers) {
+
+            for (AbstractFunctionExpression arg : extractableArguments) {
+                GroovyExpression updated = GremlinQueryOptimizer.copyWithNewLeafNode(arg, updatedCaller);
+                result.add(updated);
+            }
+            if (!nonExtractableArguments.isEmpty()) {
+                result.add(factory.generateLogicalExpression(updatedCaller, "or", nonExtractableArguments));
+            }
+
+        }
+        return result;
+    }
+
+    private UpdatedExpressions getUpdatedChildren(List<GroovyExpression> children, OptimizationContext context) {
+        List<List<GroovyExpression>> updatedChildren = new ArrayList<>();
+        boolean changed = false;
+        for (GroovyExpression child : children) {
+            List<GroovyExpression> childChoices = expandOrs(child, context);
+            if (childChoices.size() != 1 || childChoices.iterator().next() != child) {
+                changed = true;
+            }
+            updatedChildren.add(childChoices);
+        }
+        return new UpdatedExpressions(changed, updatedChildren);
+    }
+
+    private UpdatedExpressions getUpdatedChildren(GroovyExpression expr, OptimizationContext context) {
+        return getUpdatedChildren(expr.getChildren(), context);
+    }
+
+    /**
+     * This is called when we encounter an expression that is not an "or", for example an "and" expressio.  For these
+     * expressions, we process the children and create copies with the cartesian product of the updated
+     * arguments.
+     *
+     * Example:
+     *
+     * g.V().and(or(has('x),has('y'), or(has('a'),has('b')))
+     *
+     * Here, we have an "and" expression with two children:
+     *
+     * 1) or(has('x),has('y')
+     * 2) or(has('a'),has('b'))
+     *
+     * We first process these children.  They each yield 2 expressions:
+     *
+     * 1 -> [ has('x'), has('y') ]
+     * 2 -> [ has('a'), has('b') ]
+     *
+     * The cartesian product of these gives this:
+     *
+     *    [ has('x'), has('a') ]
+     *    [ has('x'), has('b') ]
+     *    [ has('y'), has('a') ]
+     *    [ has('y'), has('b') ]
+     *
+     * So the overall result is:
+     *
+     * g.V().and(has('x'), has('a'))
+     * g.V().and(has('x'), has('b'))
+     * g.V().and(has('y'), has('a'))
+     * g.V().and(has('y'), has('b'))
+     *
+     *
+     * @param source
+     * @param context
+     * @return expressions that should be unioned together to get the query result
+     */
+    private List<GroovyExpression> processOtherExpression(GroovyExpression source, OptimizationContext context) {
+        UpdatedExpressions updatedChildren = getUpdatedChildren(source, context);
+        if (!updatedChildren.hasChanges()) {
+            return Collections.singletonList(source);
+        }
+        List<GroovyExpression> result = new ArrayList<GroovyExpression>();
+
+        //The updated children list we get back has the possible values for each child
+        //in the expression.  We compute a cartesian product to get all possible
+        //combinations of child values.
+        List<List<GroovyExpression>> updateChildLists = Lists.cartesianProduct(updatedChildren.getUpdatedChildren());
+
+        for (List<GroovyExpression> updatedChildList : updateChildLists) {
+            result.add(source.copy(updatedChildList));
+        }
+        return result;
+    }
+
+    @Override
+    public boolean isApplyRecursively() {
+        return false;
+    }
+
+    /**
+     *
+     * This method creates a base result expression that recreates the state of the
+     * graph traverser at start of the result expression to what it would have been
+     * if we had been executing one Gremlin query (instead of many and doing a union).
+     *
+     * To do this, we start with an anonymous graph traversal that will iterate
+     * through the values in the intermediate Set that was created.  We then need
+     * to set things up so that the aliases that were in the original gremlin query
+     * refer to steps with the correct traverser value.
+     *
+     * The way we do this depends on the number of aliases.  If there are 0 or 1 alias,
+     * the intermediate variable already contains Vertices, so we just create the alias.
+     *
+     * If there are multiple aliases, the intermediate variable contains a String->Vertex
+     * map.  We first create a temporary alias that refers to that map.  For each alias,
+     * we use a MapStep to map the map to the Vertex for that alias.  We then add back
+     * the alias, making it refer to the MapStep.  Between the alias restorations, we restore the
+     * traverser object back to the map.
+     *
+     * @param context
+     * @param aliases
+     * @return
+     */
+    private GroovyExpression getBaseResultExpression(OptimizationContext context,
+                                                            List<LiteralExpression> aliases) {
+
+        //Start with an anonymous traversal that gets its objects from the intermediate result variable.
+        GroovyExpression parent = factory.generateSeededTraversalExpresssion(aliases.size() > 1, context.getResultVariable());
+
+        if(aliases.isEmpty()) {
+            return parent;
+        }
+
+        //The expression we will return.
+        GroovyExpression result = parent;
+
+        //We use a temporary alias to save/restore the original value of the traverser
+        //at the start of the query.  We do this so we can set the value of the traverser
+        //back to being the map after we retrieve each alias.  If there is only one
+        //alias, the save/restore is not needed, so there is no need to create this alias.
+        if(aliases.size() > 1) {
+
+            result = factory.generateAliasExpression(result, context.getTempAliasName());
+        }
+
+        Iterator<LiteralExpression> it = aliases.iterator();
+        while(it.hasNext()) {
+            LiteralExpression curAlias = it.next();
+            //A map is only generated by Gremlin when there is more than one alias.  When there is only one
+            //alias, the intermediate variable will directly contain the vertices.`
+            if(factory.isSelectGeneratesMap(aliases.size())) {
+                //Since there is more than one alias, the current traverser object is an alias->vertex
+                //map.  We use a MapStep to map that map to the Vertex for the current alias.  This sets
+                //the current traverser object to that Vertex.  We do this by defining the closure we
+                //pass to the MapStep call [map].get(aliasName) where [map] is the expression
+                //that refers to the map.
+
+                GroovyExpression rowMapExpr = factory.getCurrentTraverserObject(factory.getClosureArgumentValue());
+                GroovyExpression getExpr = factory.generateGetSelectedValueExpression(curAlias, rowMapExpr);
+                result = factory.generateMapExpression(result, new ClosureExpression(getExpr));
+            }
+
+            //Create alias that points to the previous step.  The traverser value at that step
+            //is the Vertex associated with this alias.
+            result = factory.generateAliasExpression(result, curAlias.getValue().toString());
+            if(it.hasNext()) {
+                //Restore the current value of the traverser back to the current alias->vertex map
+                result = factory.generateBackReferenceExpression(result, false, context.getTempAliasName());
+            }
+        }
+        return result;
+    }
+
+
+
+
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/aa74c73d/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java
----------------------------------------------------------------------
diff --git a/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java
new file mode 100644
index 0000000..8997a70
--- /dev/null
+++ b/repository/src/main/java/org/apache/atlas/gremlin/optimizer/ExpressionFinder.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.atlas.gremlin.optimizer;
+
+import java.util.function.Function;
+
+import org.apache.atlas.groovy.AbstractFunctionExpression;
+import org.apache.atlas.groovy.GroovyExpression;
+
+/**
+ * Call hierarchy visitor that checks if an expression
+ * matching the specified criteria is present
+ * in the call hierarch.
+ */
+public class ExpressionFinder implements CallHierarchyVisitor {
+
+    private final Function<GroovyExpression, Boolean> predicate;
+    private boolean expressionFound = false;
+
+    public ExpressionFinder(Function<GroovyExpression, Boolean> predicate) {
+        this.predicate = predicate;
+    }
+    @Override
+    public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) {
+        if (predicate.apply(expr)) {
+            expressionFound = true;
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public void visitNonFunctionCaller(GroovyExpression expr) {
+        if (predicate.apply(expr)) {
+            expressionFound = true;
+        }
+    }
+
+    @Override
+    public void visitNullCaller() {
+        //nothing to do
+    }
+
+    @Override
+    public boolean postVisitFunctionCaller(AbstractFunctionExpression functionCall) {
+        //nothing to do
+        return true;
+    }
+
+    public boolean isExpressionFound() {
+        return expressionFound;
+    }
+
+}