You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ba...@apache.org on 2016/08/02 21:44:38 UTC

falcon git commit: FALCON-2104 Loss of data in GraphDB when upgrading Falcon 0.9 to 0.10

Repository: falcon
Updated Branches:
  refs/heads/master 117fdcfe5 -> dda423668


FALCON-2104 Loss of data in GraphDB when upgrading Falcon 0.9 to 0.10

Author: bvellanki <bv...@hortonworks.com>

Reviewers: "Venkat  Ranganathan <ve...@hortonworks.com>, Sowmya Ramesh <so...@apache.org>"

Closes #253 from bvellanki/FALCON-2104


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/dda42366
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/dda42366
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/dda42366

Branch: refs/heads/master
Commit: dda42366823e0fdb996ed70e34710fa8dfb4393c
Parents: 117fdcf
Author: bvellanki <bv...@hortonworks.com>
Authored: Tue Aug 2 14:44:30 2016 -0700
Committer: bvellanki <bv...@hortonworks.com>
Committed: Tue Aug 2 14:44:30 2016 -0700

----------------------------------------------------------------------
 .../falcon/metadata/GraphUpdateUtils.java       | 113 ++++++++++++++++++
 .../falcon/metadata/MetadataMappingService.java |   4 +-
 docs/src/site/twiki/InstallationSteps.twiki     |  15 ++-
 docs/src/site/twiki/MigrationInstructions.twiki |  29 ++++-
 release-docs/0.10/CHANGES.0.10.md               |   1 +
 src/bin/graphdbutil.sh                          | 118 +++++++++++++++++++
 6 files changed, 264 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/common/src/main/java/org/apache/falcon/metadata/GraphUpdateUtils.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/metadata/GraphUpdateUtils.java b/common/src/main/java/org/apache/falcon/metadata/GraphUpdateUtils.java
new file mode 100644
index 0000000..a256e46
--- /dev/null
+++ b/common/src/main/java/org/apache/falcon/metadata/GraphUpdateUtils.java
@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.metadata;
+
+import com.tinkerpop.blueprints.Graph;
+import com.tinkerpop.blueprints.util.io.graphson.GraphSONReader;
+import com.tinkerpop.blueprints.util.io.graphson.GraphSONWriter;
+import org.apache.commons.configuration.Configuration;
+import org.apache.commons.io.FileUtils;
+import org.apache.falcon.FalconException;
+
+import java.io.File;
+
+/**
+ * Utility class for graph operations.
+ */
+public final class GraphUpdateUtils {
+
+    private static final String BANNER_MSG =
+            "Before running this utility please make sure that Falcon startup properties "
+                    + "has the right configuration settings for the graph database, "
+                    + "Falcon server is stopped and no other access to the graph database is being performed.";
+
+    private static final String IMPORT = "import";
+    private static final String EXPORT = "export";
+    private static final String INSTANCE_JSON_FILE = "instanceMetadata.json";
+
+    private GraphUpdateUtils() {
+    }
+
+    public static void main(String[] args) {
+        if (args.length != 2) {
+            usage();
+            System.exit(1);
+        }
+        System.out.println(BANNER_MSG);
+        String operation = args[0].toLowerCase();
+        if (!(operation.equals(EXPORT) || operation.equals(IMPORT))) {
+            usage();
+            System.exit(1);
+        }
+        String utilsDir = args[1];
+        File utilsDirFile = new File(utilsDir);
+        if (!utilsDirFile.isDirectory()) {
+            System.err.println(utilsDir + " is not a valid directory");
+            System.exit(1);
+        }
+        String jsonFile = new File(utilsDirFile, INSTANCE_JSON_FILE).getAbsolutePath();
+        try {
+            Graph graph;
+            if (operation.equals(EXPORT)) {
+                graph = MetadataMappingService.initializeGraphDB();
+                GraphSONWriter.outputGraph(graph, jsonFile);
+                System.out.println("Exported instance metadata to " + jsonFile);
+            } else {
+                // Backup existing graphDB dir
+                Configuration graphConfig = MetadataMappingService.getConfiguration();
+                String graphStore = (String) graphConfig.getProperty("storage.directory");
+                File graphStoreFile = new File(graphStore);
+                File graphDirBackup = new File(graphStore + "_backup");
+                if (graphDirBackup.exists()) {
+                    FileUtils.deleteDirectory(graphDirBackup);
+                }
+                FileUtils.copyDirectory(graphStoreFile, graphDirBackup);
+
+                // delete graph dir first and then init graphDB to ensure IMPORT happens into empty DB.
+                FileUtils.deleteDirectory(graphStoreFile);
+                graph = MetadataMappingService.initializeGraphDB();
+
+                // Import, if there is an exception restore backup.
+                try {
+                    GraphSONReader.inputGraph(graph, jsonFile);
+                    System.out.println("Imported instance metadata to " + jsonFile);
+                } catch (Exception ex) {
+                    String errorMsg = ex.getMessage();
+                    if (graphStoreFile.exists()) {
+                        FileUtils.deleteDirectory(graphStoreFile);
+                    }
+                    FileUtils.copyDirectory(graphDirBackup, graphStoreFile);
+                    throw new FalconException(errorMsg);
+                }
+            }
+        } catch (Exception e) {
+            System.err.println("Error " + operation + "ing JSON data to " + jsonFile + ", " + e.getMessage());
+            e.printStackTrace(System.out);
+            System.exit(1);
+        }
+        System.exit(0);
+    }
+
+    public static void usage() {
+        StringBuilder usageMessage = new StringBuilder(1024);
+        usageMessage.append("usage: java ").append(GraphUpdateUtils.class.getName())
+                .append(" {").append(EXPORT).append('|').append(IMPORT).append("} <directory>");
+        System.err.println(usageMessage);
+    }
+}

http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java b/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
index 727be56..225e44a 100644
--- a/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
+++ b/common/src/main/java/org/apache/falcon/metadata/MetadataMappingService.java
@@ -139,14 +139,14 @@ public class MetadataMappingService
         }
     }
 
-    protected Graph initializeGraphDB() {
+    public static Graph initializeGraphDB() {
         LOG.info("Initializing graph db");
         Configuration graphConfig = getConfiguration();
         validateConfiguration(graphConfig);
         return GraphFactory.open(graphConfig);
     }
 
-    private void validateConfiguration(Configuration graphConfig) {
+    private static void validateConfiguration(Configuration graphConfig) {
         // check if storage backend if configured
         if (!graphConfig.containsKey(PROPERTY_KEY_STORAGE_BACKEND)) {
             throw new FalconRuntimException("Titan GraphDB storage backend is not configured. "

http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/docs/src/site/twiki/InstallationSteps.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/InstallationSteps.twiki b/docs/src/site/twiki/InstallationSteps.twiki
index 93b1eab..297d88e 100644
--- a/docs/src/site/twiki/InstallationSteps.twiki
+++ b/docs/src/site/twiki/InstallationSteps.twiki
@@ -27,16 +27,15 @@ $ mvn clean install
 </verbatim>
 It builds and installs the package into the local repository, for use as a dependency in other projects locally.
 
-[optionally -Dhadoop.version=<<hadoop.version>> can be appended to build for a specific version of Hadoop]
-
-*NOTE:* Falcon drops support for Hadoop-1 and only supports Hadoop-2 from Falcon 0.6 onwards
-[optionally -Doozie.version=<<oozie version>> can be appended to build with a specific version of Oozie. Oozie versions
->= 4 are supported]
-NOTE: Falcon builds with JDK 1.7/1.8 using -noverify option
-      To compile Falcon with Hive Replication, optionally "-P hadoop-2,hivedr" can be appended. For this Hive >= 1.2.0
-      and Oozie >= 4.2.0 should be available.
+[optionally -Dhadoop.version=<<hadoop.version>> can be appended to build for a specific version of hadoop]
 
+*Note 1:* Falcon drops support for Hadoop-1 and only supports Hadoop-2 from Falcon 0.6 onwards
+          Falcon build with JDK 1.7 using -noverify option
 
+*Note 2:* To compile Falcon with addon extensions, append additional profiles to build command using syntax -P<<profile1,profile2>>
+          For Hive Mirroring extension, use profile"hivedr". Hive >= 1.2.0 and Oozie >= 4.2.0 is required
+          For HDFS Snapshot mirroring extension, use profile "hdfs-snapshot-mirroring". Hadoop >= 2.7.0 is required
+          For ADF integration, use profile "adf"
 
 ---+++Step 3 - Package and Deploy Falcon
 

http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/docs/src/site/twiki/MigrationInstructions.twiki
----------------------------------------------------------------------
diff --git a/docs/src/site/twiki/MigrationInstructions.twiki b/docs/src/site/twiki/MigrationInstructions.twiki
index 7c0e027..a11dbc4 100644
--- a/docs/src/site/twiki/MigrationInstructions.twiki
+++ b/docs/src/site/twiki/MigrationInstructions.twiki
@@ -1,15 +1,32 @@
 ---+ Migration Instructions
 
----++ Migrate from 0.5-incubating to 0.6-incubating
+---++ Migrate from 0.9 to 0.10
 
-This is a placeholder wiki for migration instructions from falcon 0.5-incubating to 0.6-incubating.
+FALCON-1333 (Instance Search feature) requires Falcon to use titan-berkeleyje version 0.5.4 to support indexing.
+Up until version 0.9 - Falcon used titan-berkeleyje-jre6 version 0.4.2. GraphDB created by version 0.4.2 cannot be
+read by version 0.5.4. The solution is to migrate the GraphDB to be compatible with Falcon 0.10 release. Please make
+sure that no falcon server is running while performing the migration.
 
----+++ Update Entities
+---+++ 1. Install Falcon 0.10
+Install Falcon 0.10 by following the [[InstallationSteps][Installation Steps]]. Do not start the falcon server yet.
+The tool to migrate graphDB is packaged with 0.10 Falcon server in falcon-common-0.10.jar.
 
----+++ Change cluster dir permissions
+---+++ 2. Export GraphDB to JSON file using Falcon 0.9
+Please run the following command to generate the JSON file.
 
----+++ Enable/Disable TLS
+<verbatim>
+ $FALCON_HOME/bin/graphdbutil.sh export <<java_home> <<hadoop_home>> <<falcon_0.9_home>> <<path_to_falcon-common-0.10.jar>> /jsonFile/dir/
+</verbatim>
 
----+++ Authorization
+This command will create /jsonFile/dir/instanceMetadata.json
 
+---+++ 3. Import GraphDB from JSON file using Falcon 0.10
+Please run the following command to import graphDB the JSON file. The location of graphDB will be based on property
+"*.falcon.graph.storage.directory" set in startup.properties file.
+
+<verbatim>
+  $FALCON_HOME/bin/graphdbutil.sh export <<java_home> <<hadoop_home>> <<falcon_0.10_home>> <<path_to_falcon-common-0.10.jar>> /jsonFile/dir/
+</verbatim>
+
+This command will import from /jsonFile/dir/instanceMetadata.json, now start the Falcon 0.10 server.
 

http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/release-docs/0.10/CHANGES.0.10.md
----------------------------------------------------------------------
diff --git a/release-docs/0.10/CHANGES.0.10.md b/release-docs/0.10/CHANGES.0.10.md
index 999af01..668dc97 100644
--- a/release-docs/0.10/CHANGES.0.10.md
+++ b/release-docs/0.10/CHANGES.0.10.md
@@ -54,6 +54,7 @@
 
 | JIRA | Summary | Priority | Component | Reporter | Contributor |
 |:---- |:---- | :--- |:---- |:---- |:---- |
+| [FALCON-2104](https://issues.apache.org/jira/browse/FALCON-2104) | Loss of data in GraphDB when upgrading Falcon from 0.9 to 0.10. |  Blocker | . | Balu Vellanki | Balu Vellanki |
 | [FALCON-2100](https://issues.apache.org/jira/browse/FALCON-2100) | Remove dependency on com.vividsolutions.jts |  Major | . | Balu Vellanki | Balu Vellanki |
 | [FALCON-2090](https://issues.apache.org/jira/browse/FALCON-2090) | HDFS Snapshot failed with UnknownHostException when scheduling in HA Mode |  Critical | replication | Murali Ramasami | Balu Vellanki |
 | [FALCON-2088](https://issues.apache.org/jira/browse/FALCON-2088) | Entity submission fails with EntityNotRegisteredException in distributed mode |  Blocker | feed, prism, process | Pragya Mittal | Praveen Adlakha |

http://git-wip-us.apache.org/repos/asf/falcon/blob/dda42366/src/bin/graphdbutil.sh
----------------------------------------------------------------------
diff --git a/src/bin/graphdbutil.sh b/src/bin/graphdbutil.sh
new file mode 100644
index 0000000..151ec2f
--- /dev/null
+++ b/src/bin/graphdbutil.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+
+usage() {
+  echo "usage: $0  operation java-home hadoop-home falcon-home falcon-common-jar input/out-dir"
+  echo "  where operation is either export OR import"
+  echo "        java-home is the java installation location"
+  echo "        hadoop-home is the hadoop installation location"
+  echo "        falcon-home is the falcon home installation location"
+  echo "        falcon-common-jar is the falcon-common-<version>.jar location with GraphUtils"
+  echo "        input/output dir is the directory for the graph data"
+  exit 1
+}
+
+if [ $# != 6 ]; then
+  usage
+fi
+
+operation=$1
+java_home=$2
+hadoop_home=$3
+falcon_home=$4
+falcon_common_jar=$5
+util_dir=$6
+
+export=0
+import=0
+keep_temp=Y
+
+case $operation in
+   import) import=1
+           ;;
+   export) export=1
+           ;;
+   *)     echo "Unknown operation $operation"
+          usage
+esac
+
+if [ -d  $java_home -a -f $java_home/bin/java -a -f $java_home/bin/jar ] ; then
+  :
+else
+  echo "Invalid java home directory $java_home"
+  usage
+fi
+
+if [ -d  $hadoop_home -a -f $hadoop_home/bin/hadoop ] ; then
+  :
+else
+  echo "Invalid hadoop home directory $hadoop_home"
+  usage
+fi
+
+if [ -d  $falcon_home -a -f $falcon_home/bin/falcon ] ; then
+  :
+else
+  echo "Invalid falcon home directory $falcon_home"
+  usage
+fi
+
+falcon_war=$falcon_home/server/webapp/falcon.war
+if [ ! -f $falcon_war ]; then
+  echo "Falcon war file $falcon_war not available"
+  usage
+fi
+
+if [ ! -f $falcon_common_jar ]; then
+  echo "Falcon commons jar file $falcon_common_jar not available"
+  usage
+fi
+
+
+util_tmpdir=/tmp/falcon-graphutil-tmp-$$
+echo "Using $util_tmpdir as temporary directory"
+trap "rm -rf $util.tmpdir" 0 2 3 15
+rm -rf $util_tmpdir
+mkdir -p $util_tmpdir
+
+if [ ! -d $util_dir ]; then
+   echo "Directory $util_dir does not exist"
+   usage
+fi
+
+if [ x$import = x1 ]; then
+   if [ ! -f $metadata_file ]; then
+      echo "Directory $util_dir does not exist or $metadata_file not present"
+      usage
+   fi
+fi
+
+cd $util_tmpdir
+jar -xf $falcon_war
+rm ./WEB-INF/lib/jackson*  ./WEB-INF/lib/falcon-common*.jar ./WEB-INF/lib/slf4j* ./WEB-INF/lib/activemq*
+cp $falcon_common_jar ./WEB-INF/lib/
+
+JAVA_HOME=$java_home
+export PATH=$JAVA_HOME/bin:$PATH
+export CLASSPATH="$falcon_home/conf:./WEB-INF/lib/*:`$hadoop_home/bin/hadoop classpath`"
+echo "Using classpath $CLASSPATH"
+java -Dfalcon.log.dir=/tmp/ org.apache.falcon.metadata.GraphUpdateUtils $operation $util_dir
+
+if [ x$keep_temp = xY ]; then
+  :
+else
+  rm -rf $util_tmpdir
+fi
\ No newline at end of file