You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by pe...@apache.org on 2016/03/16 16:12:06 UTC

falcon git commit: FALCON-1334 Improve search performance with Titan graph database indexing

Repository: falcon
Updated Branches:
  refs/heads/master aec6084e7 -> 37468da5a


FALCON-1334 Improve search performance with Titan graph database indexing

Upgraded Titan to 0.5.4. Added indexing on status and nominal time. May add more later based on our user study on frequent filters.

Author: yzheng-hortonworks <yz...@hortonworks.com>

Reviewers: Balu <bv...@hortonworks.com>, Peeyush Bishnoi <pe...@apache.org>

Closes #69 from yzheng-hortonworks/FALCON-1334 and squashes the following commits:

51d5530 [yzheng-hortonworks] keep property falcon.graph.serialize.path (needed in two tests)
7939b81 [yzheng-hortonworks] FALCON-1333 Improve search performance with Titan graph database indexing


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/37468da5
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/37468da5
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/37468da5

Branch: refs/heads/master
Commit: 37468da5af90bb9180eef1eaecada53ed1f74ebf
Parents: aec6084
Author: yzheng-hortonworks <yz...@hortonworks.com>
Authored: Wed Mar 16 20:39:14 2016 +0530
Committer: peeyush b <pb...@hortonworks.com>
Committed: Wed Mar 16 20:39:14 2016 +0530

----------------------------------------------------------------------
 common/pom.xml                                  |  4 +-
 .../falcon/metadata/MetadataMappingService.java | 46 +++++++++++++++-----
 common/src/main/resources/startup.properties    |  3 --
 pom.xml                                         |  8 ++--
 src/conf/startup.properties                     |  5 +--
 5 files changed, 41 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/37468da5/common/pom.xml
----------------------------------------------------------------------
diff --git a/common/pom.xml b/common/pom.xml
index 2e01282..df28f9b 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -180,12 +180,12 @@
 
         <dependency>
             <groupId>com.thinkaurelius.titan</groupId>
-            <artifactId>titan-core-jre6</artifactId>
+            <artifactId>titan-core</artifactId>
         </dependency>
 
         <dependency>
             <groupId>com.thinkaurelius.titan</groupId>
-            <artifactId>titan-berkeleyje-jre6</artifactId>
+            <artifactId>titan-berkeleyje</artifactId>
         </dependency>
     </dependencies>
 

http://git-wip-us.apache.org/repos/asf/falcon/blob/37468da5/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java b/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
index 7d22fd5..9f4920c 100644
--- a/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
+++ b/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
@@ -18,7 +18,12 @@
 
 package org.apache.falcon.metadata;
 
+import com.thinkaurelius.titan.core.EdgeLabel;
+import com.thinkaurelius.titan.core.Order;
+import com.thinkaurelius.titan.core.PropertyKey;
+import com.thinkaurelius.titan.core.schema.TitanManagement;
 import com.thinkaurelius.titan.graphdb.blueprints.TitanBlueprintsGraph;
+import com.tinkerpop.blueprints.Direction;
 import com.tinkerpop.blueprints.Edge;
 import com.tinkerpop.blueprints.Graph;
 import com.tinkerpop.blueprints.GraphFactory;
@@ -150,24 +155,41 @@ public class MetadataMappingService
         makeKeyIndex(RelationshipProperty.TYPE.getName());
         makeKeyIndex(RelationshipProperty.TIMESTAMP.getName());
         makeKeyIndex(RelationshipProperty.VERSION.getName());
+        makeInstanceIndex();
+    }
+
+    private void makeInstanceIndex() {
+        // build index for instance search
+        TitanManagement titanManagement = getTitanGraph().getManagementSystem();
+        PropertyKey statusKey = makePropertyKey(titanManagement, RelationshipProperty.STATUS.getName());
+        PropertyKey nominalTimeKey = makePropertyKey(titanManagement, RelationshipProperty.NOMINAL_TIME.getName());
+        EdgeLabel edgeLabel = titanManagement.makeEdgeLabel(RelationshipLabel.INSTANCE_ENTITY_EDGE.getName()).make();
+        titanManagement.buildEdgeIndex(edgeLabel, "indexInstanceN", Direction.OUT, Order.DESC, nominalTimeKey);
+        titanManagement.buildEdgeIndex(edgeLabel, "indexInstanceSN", Direction.OUT, Order.DESC,
+                statusKey, nominalTimeKey);
+        titanManagement.commit();
     }
 
     private void makeNameKeyIndex() {
-        getTitanGraph().makeKey(RelationshipProperty.NAME.getName())
-                .dataType(String.class)
-                .indexed(Vertex.class)
-                .indexed(Edge.class)
-                // .unique() todo this ought to be unique?
-                .make();
-        getTitanGraph().commit();
+        TitanManagement titanManagement = getTitanGraph().getManagementSystem();
+        PropertyKey nameKey = makePropertyKey(titanManagement, RelationshipProperty.NAME.getName());
+        titanManagement.buildIndex("indexByVertexName", Vertex.class).addKey(nameKey).buildCompositeIndex();
+        titanManagement.buildIndex("indexByEdgeName", Edge.class).addKey(nameKey).buildCompositeIndex();
+        titanManagement.commit();
     }
 
     private void makeKeyIndex(String key) {
-        getTitanGraph().makeKey(key)
-                .dataType(String.class)
-                .indexed(Vertex.class)
-                .make();
-        getTitanGraph().commit();
+        TitanManagement titanManagement = getTitanGraph().getManagementSystem();
+        PropertyKey propertyKey = makePropertyKey(titanManagement, key);
+        titanManagement.buildIndex("indexBy" + key, Vertex.class).addKey(propertyKey).buildCompositeIndex();
+        titanManagement.commit();
+    }
+
+    private PropertyKey makePropertyKey(TitanManagement titanManagement, String key) {
+        if (titanManagement.containsPropertyKey(key)) {
+            return titanManagement.getPropertyKey(key);
+        }
+        return titanManagement.makePropertyKey(key).dataType(String.class).make();
     }
 
     public Graph getGraph() {

http://git-wip-us.apache.org/repos/asf/falcon/blob/37468da5/common/src/main/resources/startup.properties
----------------------------------------------------------------------
diff --git a/common/src/main/resources/startup.properties b/common/src/main/resources/startup.properties
index 123d63c..2660f24 100644
--- a/common/src/main/resources/startup.properties
+++ b/common/src/main/resources/startup.properties
@@ -152,9 +152,6 @@ it.workflow.execution.listeners=org.apache.falcon.catalog.CatalogPartitionHandle
 *.falcon.graph.storage.directory=${user.dir}/target/graphdb
 *.falcon.graph.storage.backend=berkeleyje
 *.falcon.graph.serialize.path=${user.dir}/target/graphdb
-*.falcon.graph.preserve.history=false
-*.falcon.graph.transaction.retry.count=3
-*.falcon.graph.transaction.retry.delay=5
 
 # Avoid acquiring read lock when iterating over large graphs
 # See http://s3.thinkaurelius.com/docs/titan/0.5.4/bdb.html

http://git-wip-us.apache.org/repos/asf/falcon/blob/37468da5/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 271b477..689fd88 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1067,8 +1067,8 @@
 
             <dependency>
                 <groupId>com.thinkaurelius.titan</groupId>
-                <artifactId>titan-core-jre6</artifactId>
-                <version>0.4.2</version>
+                <artifactId>titan-core</artifactId>
+                <version>0.5.4</version>
                 <exclusions>
                     <!-- rexster does not work with servlet-api -->
                     <exclusion>
@@ -1097,8 +1097,8 @@
 
             <dependency>
                 <groupId>com.thinkaurelius.titan</groupId>
-                <artifactId>titan-berkeleyje-jre6</artifactId>
-                <version>0.4.2</version>
+                <artifactId>titan-berkeleyje</artifactId>
+                <version>0.5.4</version>
             </dependency>
 
             <dependency>

http://git-wip-us.apache.org/repos/asf/falcon/blob/37468da5/src/conf/startup.properties
----------------------------------------------------------------------
diff --git a/src/conf/startup.properties b/src/conf/startup.properties
index 51a791e..f489096 100644
--- a/src/conf/startup.properties
+++ b/src/conf/startup.properties
@@ -164,10 +164,7 @@ prism.configstore.listeners=org.apache.falcon.entity.v0.EntityGraph,\
 # Graph Storage
 *.falcon.graph.storage.directory=/${falcon.home}/data/graphdb
 *.falcon.graph.storage.backend=berkeleyje
-*.falcon.graph.serialize.path=/${falcon.home}/data
-*.falcon.graph.preserve.history=false
-*.falcon.graph.transaction.retry.count=3
-*.falcon.graph.transaction.retry.delay=5
+*.falcon.graph.serialize.path=${user.dir}/target/graphdb
 
 # Avoid acquiring read lock when iterating over large graphs
 # See http://s3.thinkaurelius.com/docs/titan/0.5.4/bdb.html