You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@falcon.apache.org by ba...@apache.org on 2016/04/13 01:05:57 UTC
[3/3] falcon git commit: FALCON-1107 Move trusted extensions
processing to server side
FALCON-1107 Move trusted extensions processing to server side
Ignore any documentation issues as it will be addressed in https://issues.apache.org/jira/browse/FALCON-1106. Thanks!
Author: Sowmya Ramesh <sr...@hortonworks.com>
Reviewers: "Balu Vellanki <ba...@apache.org>, Venkat Ranganathan <ve...@hortonworks.com>"
Closes #92 from sowmyaramesh/FALCON-1107
Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/95bf312f
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/95bf312f
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/95bf312f
Branch: refs/heads/master
Commit: 95bf312f46bc96bc247645da6500b495c21aede3
Parents: c52961c
Author: Sowmya Ramesh <sr...@hortonworks.com>
Authored: Tue Apr 12 16:05:48 2016 -0700
Committer: bvellanki <bv...@hortonworks.com>
Committed: Tue Apr 12 16:05:48 2016 -0700
----------------------------------------------------------------------
addons/extensions/hdfs-mirroring/README | 29 ++
addons/extensions/hdfs-mirroring/pom.xml | 32 ++
.../main/META/hdfs-mirroring-properties.json | 137 +++++++
.../runtime/hdfs-mirroring-template.xml | 45 +++
.../runtime/hdfs-mirroring-workflow.xml | 82 +++++
addons/extensions/hive-mirroring/README | 58 +++
addons/extensions/hive-mirroring/pom.xml | 32 ++
.../main/META/hive-mirroring-properties.json | 179 +++++++++
.../META/hive-mirroring-secure-properties.json | 191 ++++++++++
.../runtime/hive-mirroring-secure-template.xml | 45 +++
.../runtime/hive-mirroring-secure-workflow.xml | 363 +++++++++++++++++++
.../runtime/hive-mirroring-template.xml | 45 +++
.../runtime/hive-mirroring-workflow.xml | 255 +++++++++++++
.../java/org/apache/falcon/hive/HiveDRArgs.java | 9 +-
.../org/apache/falcon/hive/HiveDROptions.java | 38 +-
addons/recipes/hdfs-replication/README.txt | 29 --
addons/recipes/hdfs-replication/pom.xml | 32 --
.../resources/hdfs-replication-template.xml | 44 ---
.../resources/hdfs-replication-workflow.xml | 82 -----
.../main/resources/hdfs-replication.properties | 79 ----
.../recipes/hive-disaster-recovery/README.txt | 58 ---
addons/recipes/hive-disaster-recovery/pom.xml | 32 --
.../hive-disaster-recovery-secure-template.xml | 45 ---
.../hive-disaster-recovery-secure-workflow.xml | 363 -------------------
.../hive-disaster-recovery-secure.properties | 110 ------
.../hive-disaster-recovery-template.xml | 45 ---
.../hive-disaster-recovery-workflow.xml | 249 -------------
.../resources/hive-disaster-recovery.properties | 98 -----
.../falcon/catalog/AbstractCatalogService.java | 12 +
.../falcon/catalog/HiveCatalogService.java | 16 +
common/src/main/resources/startup.properties | 2 +
extensions/pom.xml | 112 ++++++
.../falcon/extensions/AbstractExtension.java | 58 +++
.../org/apache/falcon/extensions/Extension.java | 102 ++++++
.../falcon/extensions/ExtensionBuilder.java | 32 ++
.../falcon/extensions/ExtensionFactory.java | 48 +++
.../falcon/extensions/ExtensionProperties.java | 89 +++++
.../falcon/extensions/ExtensionService.java | 49 +++
.../mirroring/hdfs/HdfsMirroringExtension.java | 111 ++++++
.../hdfs/HdfsMirroringExtensionProperties.java | 65 ++++
.../mirroring/hive/HiveMirroringExtension.java | 231 ++++++++++++
.../hive/HiveMirroringExtensionProperties.java | 92 +++++
.../falcon/extensions/store/ExtensionStore.java | 215 +++++++++++
.../util/ExtensionProcessBuilderUtils.java | 309 ++++++++++++++++
.../falcon/extensions/ExtensionServiceTest.java | 53 +++
.../apache/falcon/extensions/ExtensionTest.java | 160 ++++++++
.../store/AbstractTestExtensionStore.java | 103 ++++++
.../extensions/store/ExtensionStoreTest.java | 65 ++++
.../src/test/resources/backup-cluster-0.1.xml | 44 +++
.../test/resources/hdfs-mirroring-template.xml | 45 +++
.../test/resources/hive-mirroring-template.xml | 45 +++
.../src/test/resources/primary-cluster-0.1.xml | 44 +++
oozie/pom.xml | 6 +
.../service/SharedLibraryHostingService.java | 91 ++++-
pom.xml | 2 +
src/main/assemblies/distributed-package.xml | 79 +++-
src/main/assemblies/standalone-package.xml | 80 +++-
57 files changed, 3851 insertions(+), 1315 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/README
----------------------------------------------------------------------
diff --git a/addons/extensions/hdfs-mirroring/README b/addons/extensions/hdfs-mirroring/README
new file mode 100644
index 0000000..78f1726
--- /dev/null
+++ b/addons/extensions/hdfs-mirroring/README
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+HDFS Directory Replication Extension
+
+Overview
+This extension implements replicating arbitrary directories on HDFS from one
+Hadoop cluster to another Hadoop cluster.
+This piggy backs on replication solution in Falcon which uses the DistCp tool.
+
+Use Case
+* Copy directories between HDFS clusters with out dated partitions
+* Archive directories from HDFS to Cloud. Ex: S3, Azure WASB
+
+Limitations
+As the data volume and number of files grow, this can get inefficient.
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/pom.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hdfs-mirroring/pom.xml b/addons/extensions/hdfs-mirroring/pom.xml
new file mode 100644
index 0000000..cb9304e
--- /dev/null
+++ b/addons/extensions/hdfs-mirroring/pom.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.falcon.extensions</groupId>
+ <artifactId>falcon-hdfs-mirroring-extension</artifactId>
+ <version>0.10-SNAPSHOT</version>
+ <description>Apache Falcon sample Hdfs mirroring extension</description>
+ <name>Apache Falcon sample Hdfs mirroring extension</name>
+ <packaging>jar</packaging>
+</project>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json
----------------------------------------------------------------------
diff --git a/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json b/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json
new file mode 100644
index 0000000..f1b4775
--- /dev/null
+++ b/addons/extensions/hdfs-mirroring/src/main/META/hdfs-mirroring-properties.json
@@ -0,0 +1,137 @@
+{
+ "shortDescription": "This extension implements replicating arbitrary directories on HDFS from one Hadoop cluster to another Hadoop cluster. This piggy backs on replication solution in Falcon which uses the DistCp tool.",
+ "properties":[
+ {
+ "propertyName":"jobName",
+ "required":true,
+ "description":"Unique job name",
+ "example":"hdfs-monthly-sales-dr"
+ },
+ {
+ "propertyName":"jobClusterName",
+ "required":true,
+ "description":"Cluster where job should run",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"jobValidityStart",
+ "required":true,
+ "description":"Job validity start time",
+ "example":"2016-03-03T00:00Z"
+ },
+ {
+ "propertyName":"jobValidityEnd",
+ "required":true,
+ "description":"Job validity end time",
+ "example":"2018-03-13T00:00Z"
+ },
+ {
+ "propertyName":"jobFrequency",
+ "required":true,
+ "description":"job frequency. Valid frequency types are minutes, hours, days, months",
+ "example":"months(1)"
+ },
+ {
+ "propertyName":"jobTimezone",
+ "required":false,
+ "description":"Time zone for the job",
+ "example":"GMT"
+ },
+ {
+ "propertyName":"jobTags",
+ "required":false,
+ "description":"list of comma separated tags. Key Value Pairs, separated by comma",
+ "example":"consumer=consumer@xyz.com, owner=producer@xyz.com, _department_type=forecasting"
+ },
+ {
+ "propertyName":"jobRetryPolicy",
+ "required":false,
+ "description":"Job retry policy",
+ "example":"periodic"
+ },
+ {
+ "propertyName":"jobRetryDelay",
+ "required":false,
+ "description":"Job retry delay",
+ "example":"minutes(30)"
+ },
+ {
+ "propertyName":"jobRetryAttempts",
+ "required":false,
+ "description":"Job retry attempts",
+ "example":"3"
+ },
+ {
+ "propertyName":"jobRetryOnTimeout",
+ "required":false,
+ "description":"Job retry on timeout",
+ "example":"true"
+ },
+ {
+ "propertyName":"jobAclOwner",
+ "required":false,
+ "description":"ACL owner",
+ "example":"ambari-qa"
+ },
+ {
+ "propertyName":"jobAclGroup",
+ "required":false,
+ "description":"ACL group",
+ "example":"users"
+ },
+ {
+ "propertyName":"jobAclPermission",
+ "required":false,
+ "description":"ACL permission",
+ "example":"0x755"
+ },
+ {
+ "propertyName":"sourceDir",
+ "required":true,
+ "description":"Multiple hdfs comma separated source directories",
+ "example":"/user/ambari-qa/primaryCluster/dr/input1, /user/ambari-qa/primaryCluster/dr/input2"
+ },
+ {
+ "propertyName":"sourceCluster",
+ "required":true,
+ "description":"Source cluster for hdfs mirroring",
+ "example":"primaryCluster"
+ },
+ {
+ "propertyName":"targetDir",
+ "required":true,
+ "description":"Target hdfs directory",
+ "example":"/user/ambari-qa/backupCluster/dr"
+ },
+ {
+ "propertyName":"targetCluster",
+ "required":true,
+ "description":"Target cluster for hdfs mirroring",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"distcpMaxMaps",
+ "required":false,
+ "description":"Maximum number of mappers for DistCP",
+ "example":"1"
+ },
+ {
+ "propertyName":"distcpMapBandwidth",
+ "required":false,
+ "description":"Bandwidth in MB for each mapper in DistCP",
+ "example":"100"
+ },
+ {
+ "propertyName":"jobNotificationType",
+ "required":false,
+ "description":"Email Notification for Falcon instance completion",
+ "example":"email"
+ },
+ {
+ "propertyName":"jobNotificationReceivers",
+ "required":false,
+ "description":"Comma separated email Id's",
+ "example":"user1@gmail.com, user2@gmail.com"
+ }
+ ]
+}
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml
new file mode 100644
index 0000000..d511d00
--- /dev/null
+++ b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-template.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="##jobName##" xmlns="uri:falcon:process:0.1">
+ <clusters>
+ <!-- source -->
+ <cluster name="##jobClusterName##">
+ <validity end="##jobValidityEnd##" start="##jobValidityStart##"/>
+ </cluster>
+ </clusters>
+
+ <tags/>
+
+ <parallel>1</parallel>
+ <!-- Dir replication needs to run only once to catch up -->
+ <order>LAST_ONLY</order>
+ <frequency>##jobFrequency##</frequency>
+ <timezone>##jobTimezone##</timezone>
+
+ <properties>
+ <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/>
+ </properties>
+
+ <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##"
+ path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/>
+ <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/>
+ <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/>
+ <ACL/>
+</process>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml
new file mode 100644
index 0000000..1e2282c
--- /dev/null
+++ b/addons/extensions/hdfs-mirroring/src/main/resources/runtime/hdfs-mirroring-workflow.xml
@@ -0,0 +1,82 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-fs-workflow'>
+ <start to='dr-replication'/>
+ <!-- Replication action -->
+ <action name="dr-replication">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp</value>
+ </property>
+ <property>
+ <name>oozie.launcher.oozie.libpath</name>
+ <value>${wf:conf("falcon.libpath")}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
+ <value>${sourceClusterFS},${targetClusterFS}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.replication.FeedReplicator</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-maxMaps</arg>
+ <arg>${distcpMaxMaps}</arg>
+ <arg>-mapBandwidth</arg>
+ <arg>${distcpMapBandwidth}</arg>
+ <arg>-sourcePaths</arg>
+ <arg>${sourceDir}</arg>
+ <arg>-targetPath</arg>
+ <arg>${targetClusterFS}${targetDir}</arg>
+ <arg>-falconFeedStorageType</arg>
+ <arg>FILESYSTEM</arg>
+ <arg>-availabilityFlag</arg>
+ <arg>${availabilityFlag == 'NA' ? "NA" : availabilityFlag}</arg>
+ <arg>-counterLogDir</arg>
+ <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}</arg>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name="end"/>
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/README
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/README b/addons/extensions/hive-mirroring/README
new file mode 100644
index 0000000..827f7e5
--- /dev/null
+++ b/addons/extensions/hive-mirroring/README
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Hive Metastore Disaster Recovery Recipe
+
+Overview
+This extension implements replicating hive metadata and data from one
+Hadoop cluster to another Hadoop cluster.
+This piggy backs on replication solution in Falcon which uses the DistCp tool.
+
+Use Case
+*
+*
+
+Limitations
+*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Hive Metastore Disaster Recovery Extension
+
+Overview
+This extension implements replicating hive metadata and data from one
+Hadoop cluster to another Hadoop cluster.
+This piggy backs on replication solution in Falcon which uses the DistCp tool.
+
+Use Case
+*
+*
+
+Limitations
+*
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/pom.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/pom.xml b/addons/extensions/hive-mirroring/pom.xml
new file mode 100644
index 0000000..adfb0be
--- /dev/null
+++ b/addons/extensions/hive-mirroring/pom.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.falcon.extensions</groupId>
+ <artifactId>falcon-hive-mirroring-extension</artifactId>
+ <version>0.10-SNAPSHOT</version>
+ <description>Apache Falcon sample Hive mirroring extension</description>
+ <name>Apache Falcon sample Hive mirroring extension</name>
+ <packaging>jar</packaging>
+</project>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json
new file mode 100644
index 0000000..a9f3d1b
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-properties.json
@@ -0,0 +1,179 @@
+{
+ "shortDescription":"This extension implements replicating hive metadata and data from one Hadoop cluster to another Hadoop cluster.",
+ "properties":[
+ {
+ "propertyName":"jobName",
+ "required":true,
+ "description":"Unique job name",
+ "example":"hive-monthly-sales-dr"
+ },
+ {
+ "propertyName":"jobClusterName",
+ "required":true,
+ "description":"Cluster where job should run",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"jobValidityStart",
+ "required":true,
+ "description":"Job validity start time",
+ "example":"2016-03-03T00:00Z"
+ },
+ {
+ "propertyName":"jobValidityEnd",
+ "required":true,
+ "description":"Job validity end time",
+ "example":"2018-03-13T00:00Z"
+ },
+ {
+ "propertyName":"jobFrequency",
+ "required":true,
+ "description":"job frequency. Valid frequency types are minutes, hours, days, months",
+ "example":"months(1)"
+ },
+ {
+ "propertyName":"jobTimezone",
+ "required":false,
+ "description":"Time zone for the job",
+ "example":"GMT"
+ },
+ {
+ "propertyName":"jobTags",
+ "required":false,
+ "description":"list of comma separated tags. Key Value Pairs, separated by comma",
+ "example":"consumer=consumer@xyz.com, owner=producer@xyz.com, _department_type=forecasting"
+ },
+ {
+ "propertyName":"jobRetryPolicy",
+ "required":false,
+ "description":"Job retry policy",
+ "example":"periodic"
+ },
+ {
+ "propertyName":"jobRetryDelay",
+ "required":false,
+ "description":"Job retry delay",
+ "example":"minutes(30)"
+ },
+ {
+ "propertyName":"jobRetryAttempts",
+ "required":false,
+ "description":"Job retry attempts",
+ "example":"3"
+ },
+ {
+ "propertyName":"jobRetryOnTimeout",
+ "required":false,
+ "description":"Job retry on timeout",
+ "example":true
+ },
+ {
+ "propertyName":"jobAclOwner",
+ "required":false,
+ "description":"ACL owner",
+ "example":"ambari-qa"
+ },
+ {
+ "propertyName":"jobAclGroup",
+ "required":false,
+ "description":"ACL group",
+ "example":"users"
+ },
+ {
+ "propertyName":"jobAclPermission",
+ "required":false,
+ "description":"ACL permission",
+ "example":"0x755"
+ },
+ {
+ "propertyName":"sourceCluster",
+ "required":true,
+ "description":"Source cluster for hive mirroring",
+ "example":"primaryCluster"
+ },
+ {
+ "propertyName":"sourceHiveServer2Uri",
+ "required":true,
+ "description":"Hive2 server end point",
+ "example":"hive2://localhost:10000"
+ },
+ {
+ "propertyName":"sourceDatabases",
+ "required":true,
+ "description":"For DB level replication specify multiple comma separated databases to replicate",
+ "example":"salesDb"
+ },
+ {
+ "propertyName":"sourceTables",
+ "required":false,
+ "description":"For table level replication specify multiple comma separated tables to replicate",
+ "example":"monthly_sales1, monthly_sales2"
+ },
+ {
+ "propertyName":"sourceStagingPath",
+ "required":false,
+ "description":"Staging path on source",
+ "example":"/apps/hive/dr"
+ },
+ {
+ "propertyName":"targetCluster",
+ "required":true,
+ "description":"target cluster for hive mirroring",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"targetHiveServer2Uri",
+ "required":true,
+ "description":"Hive2 server end point",
+ "example":"hive2://localhost:10000"
+ },
+ {
+ "propertyName":"targetStagingPath",
+ "required":false,
+ "description":"Staging path on target",
+ "example":"/apps/hive/dr"
+ },
+ {
+ "propertyName":"maxEvents",
+ "required":false,
+ "description":"To ceil the max events processed each time the job runs. Set it to max value depending on your bandwidth limit. Setting it to -1 will process all the events but can hog up the bandwidth. Use it judiciously!",
+ "example":"10000"
+ },
+ {
+ "propertyName":"replicationMaxMaps",
+ "required":false,
+ "description":"Maximum number of mappers to use for hive replication",
+ "example":"1"
+ },
+ {
+ "propertyName":"distcpMaxMaps",
+ "required":false,
+ "description":"Maximum number of mappers for DistCP",
+ "example":"1"
+ },
+ {
+ "propertyName":"distcpMapBandwidth",
+ "required":false,
+ "description":"Bandwidth in MB for each mapper in DistCP",
+ "example":"100"
+ },
+ {
+ "propertyName":"tdeEncryptionEnabled",
+ "required":false,
+ "description":"Set this flag to true if TDE encryption is enabled on source and target. Default value is false",
+ "example":"true"
+ },
+ {
+ "propertyName":"jobNotificationType",
+ "required":false,
+ "description":"Email Notification for Falcon instance completion",
+ "example":"email"
+ },
+ {
+ "propertyName":"jobNotificationReceivers",
+ "required":false,
+ "description":"Comma separated email Id's",
+ "example":"user1@gmail.com, user2@gmail.com"
+ }
+ ]
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json
new file mode 100644
index 0000000..8ec03b5
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/META/hive-mirroring-secure-properties.json
@@ -0,0 +1,191 @@
+{
+ "shortDescription": "This extension implements replicating hive metadata and data from one Hadoop cluster to another Hadoop cluster in secure environment.",
+ "properties":[
+ {
+ "propertyName":"jobName",
+ "required":true,
+ "description":"Unique job name",
+ "example":"hive-monthly-sales-dr"
+ },
+ {
+ "propertyName":"jobClusterName",
+ "required":true,
+ "description":"Cluster where job should run",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"jobValidityStart",
+ "required":true,
+ "description":"Job validity start time",
+ "example":"2016-03-03T00:00Z"
+ },
+ {
+ "propertyName":"jobValidityEnd",
+ "required":true,
+ "description":"Job validity end time",
+ "example":"2018-03-13T00:00Z"
+ },
+ {
+ "propertyName":"jobFrequency",
+ "required":true,
+ "description":"job frequency. Valid frequency types are minutes, hours, days, months",
+ "example":"months(1)"
+ },
+ {
+ "propertyName":"jobTimezone",
+ "required":false,
+ "description":"Time zone for the job",
+ "example":"GMT"
+ },
+ {
+ "propertyName":"jobTags",
+ "required":false,
+ "description":"list of comma separated tags. Key Value Pairs, separated by comma",
+ "example":"consumer=consumer@xyz.com, owner=producer@xyz.com, _department_type=forecasting"
+ },
+ {
+ "propertyName":"jobRetryPolicy",
+ "required":false,
+ "description":"Job retry policy",
+ "example":"periodic"
+ },
+ {
+ "propertyName":"jobRetryDelay",
+ "required":false,
+ "description":"Job retry delay",
+ "example":"minutes(30)"
+ },
+ {
+ "propertyName":"jobRetryAttempts",
+ "required":false,
+ "description":"Job retry attempts",
+ "example":"3"
+ },
+ {
+ "propertyName":"jobRetryOnTimeout",
+ "required":false,
+ "description":"Job retry on timeout",
+ "example":true
+ },
+ {
+ "propertyName":"jobAclOwner",
+ "required":false,
+ "description":"ACL owner",
+ "example":"ambari-qa"
+ },
+ {
+ "propertyName":"jobAclGroup",
+ "required":false,
+ "description":"ACL group",
+ "example":"users"
+ },
+ {
+ "propertyName":"jobAclPermission",
+ "required":false,
+ "description":"ACL permission",
+ "example":"0x755"
+ },
+ {
+ "propertyName":"sourceCluster",
+ "required":true,
+ "description":"Source cluster for hive mirroring",
+ "example":"primaryCluster"
+ },
+ {
+ "propertyName":"sourceHiveServer2Uri",
+ "required":true,
+ "description":"Hive2 server end point",
+ "example":"hive2://localhost:10000"
+ },
+ {
+ "propertyName":"sourceDatabases",
+ "required":true,
+ "description":"For DB level replication specify multiple comma separated databases to replicate",
+ "example":"salesDb"
+ },
+ {
+ "propertyName":"sourceTables",
+ "required":false,
+ "description":"For table level replication specify multiple comma separated tables to replicate",
+ "example":"monthly_sales1, monthly_sales2"
+ },
+ {
+ "propertyName":"sourceStagingPath",
+ "required":false,
+ "description":"Staging path on source",
+ "example":"/apps/hive/dr"
+ },
+ {
+ "propertyName":"sourceHive2KerberosPrincipal",
+ "required":true,
+ "description":"Required on secure clusters. Kerberos principal required to access hive servers ",
+ "example":"hive/_HOST@EXAMPLE.COM"
+ },
+ {
+ "propertyName":"targetCluster",
+ "required":true,
+ "description":"target cluster for hive mirroring",
+ "example":"backupCluster"
+ },
+ {
+ "propertyName":"targetHiveServer2Uri",
+ "required":true,
+ "description":"Hive2 server end point",
+ "example":"hive2://localhost:10000"
+ },
+ {
+ "propertyName":"targetStagingPath",
+ "required":false,
+ "description":"Staging path on target",
+ "example":"/apps/hive/dr"
+ },
+ {
+ "propertyName":"targetHive2KerberosPrincipal",
+ "required":true,
+ "description":"Required on secure clusters. Kerberos principal required to access hive servers ",
+ "example":"hive/_HOST@EXAMPLE.COM"
+ },
+ {
+ "propertyName":"maxEvents",
+ "required":false,
+ "description":"To ceil the max events processed each time the job runs. Set it to max value depending on your bandwidth limit. Setting it to -1 will process all the events but can hog up the bandwidth. Use it judiciously!",
+ "example":"10000"
+ },
+ {
+ "propertyName":"replicationMaxMaps",
+ "required":false,
+ "description":"Maximum number of mappers to use for hive replication",
+ "example":"1"
+ },
+ {
+ "propertyName":"distcpMaxMaps",
+ "required":false,
+ "description":"Maximum number of mappers for DistCP",
+ "example":"1"
+ },
+ {
+ "propertyName":"distcpMapBandwidth",
+ "required":false,
+ "description":"Bandwidth in MB for each mapper in DistCP",
+ "example":"100"
+ },
+ {
+ "propertyName":"tdeEncryptionEnabled",
+ "required":false,
+ "description":"Set this flag to true if TDE encryption is enabled on source and target. Default value is false",
+ "example":"true"
+ },
+ {
+ "propertyName":"jobNotificationType",
+ "required":false,
+ "description":"Email Notification for Falcon instance completion",
+ "example":"email"
+ },
+ {
+ "propertyName":"jobNotificationReceivers",
+ "required":false,
+ "description":"Comma separated email Id's",
+ "example":"user1@gmail.com, user2@gmail.com"
+ }
+ ]
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml
new file mode 100644
index 0000000..4497bb4
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-template.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="##jobName##" xmlns="uri:falcon:process:0.1">
+ <clusters>
+ <!-- source -->
+ <cluster name="##jobClusterName##">
+ <validity end="##jobValidityEnd##" start="##jobValidityStart##"/>
+ </cluster>
+ </clusters>
+
+ <tags/>
+
+ <parallel>1</parallel>
+ <!-- Replication needs to run only once to catch up -->
+ <order>LAST_ONLY</order>
+ <frequency>##jobFrequency##</frequency>
+ <timezone>##jobTimezone##</timezone>
+
+ <properties>
+ <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/>
+ </properties>
+
+ <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##"
+ path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/>
+ <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/>
+ <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/>
+ <ACL/>
+</process>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml
new file mode 100644
index 0000000..4bf048f
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-secure-workflow.xml
@@ -0,0 +1,363 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-hive-workflow'>
+ <credentials>
+ <credential name='hive_src_credentials' type='hcat'>
+ <property>
+ <name>hcat.metastore.uri</name>
+ <value>${sourceMetastoreUri}</value>
+ </property>
+ <property>
+ <name>hcat.metastore.principal</name>
+ <value>${sourceHiveMetastoreKerberosPrincipal}</value>
+ </property>
+ </credential>
+ <credential name='hive_tgt_credentials' type='hcat'>
+ <property>
+ <name>hcat.metastore.uri</name>
+ <value>${targetMetastoreUri}</value>
+ </property>
+ <property>
+ <name>hcat.metastore.principal</name>
+ <value>${targetHiveMetastoreKerberosPrincipal}</value>
+ </property>
+ </credential>
+ <credential name="hive2_src_credentials" type="hive2">
+ <property>
+ <name>hive2.server.principal</name>
+ <value>${sourceHive2KerberosPrincipal}</value>
+ </property>
+ <property>
+ <name>hive2.jdbc.url</name>
+ <value>jdbc:${sourceHiveServer2Uri}/${sourceDatabase}</value>
+ </property>
+ </credential>
+ <credential name="hive2_tgt_credentials" type="hive2">
+ <property>
+ <name>hive2.server.principal</name>
+ <value>${targetHive2KerberosPrincipal}</value>
+ </property>
+ <property>
+ <name>hive2.jdbc.url</name>
+ <value>jdbc:${targetHiveServer2Uri}/${sourceDatabase}</value>
+ </property>
+ </credential>
+ </credentials>
+ <start to='last-event'/>
+ <action name="last-event" cred="hive_tgt_credentials">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ <property>
+ <name>mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-sourceNNKerberosPrincipal</arg>
+ <arg>${sourceNNKerberosPrincipal}</arg>
+ <arg>-sourceHiveMetastoreKerberosPrincipal</arg>
+ <arg>${sourceHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-sourceHive2KerberosPrincipal</arg>
+ <arg>${sourceHive2KerberosPrincipal}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-targetNNKerberosPrincipal</arg>
+ <arg>${targetNNKerberosPrincipal}</arg>
+ <arg>-targetHiveMetastoreKerberosPrincipal</arg>
+ <arg>${targetHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-targetHive2KerberosPrincipal</arg>
+ <arg>${targetHive2KerberosPrincipal}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-clusterForJobNNKerberosPrincipal</arg>
+ <arg>${clusterForJobNNKerberosPrincipal}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>lastevents</arg>
+ </java>
+ <ok to="export-dr-replication"/>
+ <error to="fail"/>
+ </action>
+ <!-- Export Replication action -->
+ <action name="export-dr-replication" cred="hive_src_credentials,hive2_src_credentials">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ <property>
+ <name>mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-replicationMaxMaps</arg>
+ <arg>${replicationMaxMaps}</arg>
+ <arg>-distcpMaxMaps</arg>
+ <arg>${distcpMaxMaps}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-sourceNNKerberosPrincipal</arg>
+ <arg>${sourceNNKerberosPrincipal}</arg>
+ <arg>-sourceHiveMetastoreKerberosPrincipal</arg>
+ <arg>${sourceHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-sourceHive2KerberosPrincipal</arg>
+ <arg>${sourceHive2KerberosPrincipal}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-targetNNKerberosPrincipal</arg>
+ <arg>${targetNNKerberosPrincipal}</arg>
+ <arg>-targetHiveMetastoreKerberosPrincipal</arg>
+ <arg>${targetHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-targetHive2KerberosPrincipal</arg>
+ <arg>${targetHive2KerberosPrincipal}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-distcpMapBandwidth</arg>
+ <arg>${distcpMapBandwidth}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-clusterForJobNNKerberosPrincipal</arg>
+ <arg>${clusterForJobNNKerberosPrincipal}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>export</arg>
+ <arg>-counterLogDir</arg>
+ <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}/</arg>
+ </java>
+ <ok to="import-dr-replication"/>
+ <error to="fail"/>
+ </action>
+ <!-- Import Replication action -->
+ <action name="import-dr-replication" cred="hive_tgt_credentials,hive2_tgt_credentials">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ <property>
+ <name>mapreduce.job.hdfs-servers</name>
+ <value>${sourceNN},${targetNN}</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-replicationMaxMaps</arg>
+ <arg>${replicationMaxMaps}</arg>
+ <arg>-distcpMaxMaps</arg>
+ <arg>${distcpMaxMaps}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-sourceNNKerberosPrincipal</arg>
+ <arg>${sourceNNKerberosPrincipal}</arg>
+ <arg>-sourceHiveMetastoreKerberosPrincipal</arg>
+ <arg>${sourceHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-sourceHive2KerberosPrincipal</arg>
+ <arg>${sourceHive2KerberosPrincipal}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-targetNNKerberosPrincipal</arg>
+ <arg>${targetNNKerberosPrincipal}</arg>
+ <arg>-targetHiveMetastoreKerberosPrincipal</arg>
+ <arg>${targetHiveMetastoreKerberosPrincipal}</arg>
+ <arg>-targetHive2KerberosPrincipal</arg>
+ <arg>${targetHive2KerberosPrincipal}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-distcpMapBandwidth</arg>
+ <arg>${distcpMapBandwidth}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-clusterForJobNNKerberosPrincipal</arg>
+ <arg>${clusterForJobNNKerberosPrincipal}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>import</arg>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name="end"/>
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml
new file mode 100644
index 0000000..4497bb4
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-template.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<process name="##jobName##" xmlns="uri:falcon:process:0.1">
+ <clusters>
+ <!-- source -->
+ <cluster name="##jobClusterName##">
+ <validity end="##jobValidityEnd##" start="##jobValidityStart##"/>
+ </cluster>
+ </clusters>
+
+ <tags/>
+
+ <parallel>1</parallel>
+ <!-- Replication needs to run only once to catch up -->
+ <order>LAST_ONLY</order>
+ <frequency>##jobFrequency##</frequency>
+ <timezone>##jobTimezone##</timezone>
+
+ <properties>
+ <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/>
+ </properties>
+
+ <workflow name="##jobWorkflowName##" engine="##jobWorkflowEngine##"
+ path="##jobWorkflowPath##" lib="##jobWorkflowLibPath##"/>
+ <retry policy="##jobRetryPolicy##" delay="##jobRetryDelay##" attempts="3"/>
+ <notification type="##jobNotificationType##" to="##jobNotificationReceivers##"/>
+ <ACL/>
+</process>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml
----------------------------------------------------------------------
diff --git a/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml
new file mode 100644
index 0000000..9f9bf92
--- /dev/null
+++ b/addons/extensions/hive-mirroring/src/main/resources/runtime/hive-mirroring-workflow.xml
@@ -0,0 +1,255 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-hive-workflow'>
+ <start to='last-event'/>
+ <action name="last-event">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>lastevents</arg>
+ </java>
+ <ok to="export-dr-replication"/>
+ <error to="fail"/>
+ </action>
+ <!-- Export Replication action -->
+ <action name="export-dr-replication">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-replicationMaxMaps</arg>
+ <arg>${replicationMaxMaps}</arg>
+ <arg>-distcpMaxMaps</arg>
+ <arg>${distcpMaxMaps}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-distcpMapBandwidth</arg>
+ <arg>${distcpMapBandwidth}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>export</arg>
+ <arg>-counterLogDir</arg>
+ <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}/</arg>
+ </java>
+ <ok to="import-dr-replication"/>
+ <error to="fail"/>
+ </action>
+ <!-- Import Replication action -->
+ <action name="import-dr-replication">
+ <java>
+ <job-tracker>${jobTracker}</job-tracker>
+ <name-node>${nameNode}</name-node>
+ <configuration>
+ <property> <!-- hadoop 2 parameter -->
+ <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>mapred.job.queue.name</name>
+ <value>${queueName}</value>
+ </property>
+ <property>
+ <name>oozie.launcher.mapred.job.priority</name>
+ <value>${jobPriority}</value>
+ </property>
+ <property>
+ <name>oozie.use.system.libpath</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>oozie.action.sharelib.for.java</name>
+ <value>distcp,hive,hive2,hcatalog</value>
+ </property>
+ </configuration>
+ <main-class>org.apache.falcon.hive.HiveDRTool</main-class>
+ <arg>-Dmapred.job.queue.name=${queueName}</arg>
+ <arg>-Dmapred.job.priority=${jobPriority}</arg>
+ <arg>-falconLibPath</arg>
+ <arg>${wf:conf("falcon.libpath")}</arg>
+ <arg>-replicationMaxMaps</arg>
+ <arg>${replicationMaxMaps}</arg>
+ <arg>-distcpMaxMaps</arg>
+ <arg>${distcpMaxMaps}</arg>
+ <arg>-sourceCluster</arg>
+ <arg>${sourceCluster}</arg>
+ <arg>-sourceMetastoreUri</arg>
+ <arg>${sourceMetastoreUri}</arg>
+ <arg>-sourceHiveServer2Uri</arg>
+ <arg>${sourceHiveServer2Uri}</arg>
+ <arg>-sourceDatabase</arg>
+ <arg>${sourceDatabase}</arg>
+ <arg>-sourceTable</arg>
+ <arg>${sourceTable}</arg>
+ <arg>-sourceStagingPath</arg>
+ <arg>${sourceStagingPath}</arg>
+ <arg>-sourceNN</arg>
+ <arg>${sourceNN}</arg>
+ <arg>-targetCluster</arg>
+ <arg>${targetCluster}</arg>
+ <arg>-targetMetastoreUri</arg>
+ <arg>${targetMetastoreUri}</arg>
+ <arg>-targetHiveServer2Uri</arg>
+ <arg>${targetHiveServer2Uri}</arg>
+ <arg>-targetStagingPath</arg>
+ <arg>${targetStagingPath}</arg>
+ <arg>-targetNN</arg>
+ <arg>${targetNN}</arg>
+ <arg>-maxEvents</arg>
+ <arg>${maxEvents}</arg>
+ <arg>-distcpMapBandwidth</arg>
+ <arg>${distcpMapBandwidth}</arg>
+ <arg>-clusterForJobRun</arg>
+ <arg>${clusterForJobRun}</arg>
+ <arg>-clusterForJobRunWriteEP</arg>
+ <arg>${clusterForJobRunWriteEP}</arg>
+ <arg>-tdeEncryptionEnabled</arg>
+ <arg>${tdeEncryptionEnabled}</arg>
+ <arg>-jobName</arg>
+ <arg>${jobName}-${nominalTime}</arg>
+ <arg>-executionStage</arg>
+ <arg>import</arg>
+ </java>
+ <ok to="end"/>
+ <error to="fail"/>
+ </action>
+ <kill name="fail">
+ <message>
+ Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
+ </message>
+ </kill>
+ <end name="end"/>
+</workflow-app>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java
----------------------------------------------------------------------
diff --git a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java
index c9ad47e..71b9043 100644
--- a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java
+++ b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDRArgs.java
@@ -32,7 +32,7 @@ public enum HiveDRArgs {
SOURCE_HS2_URI("sourceHiveServer2Uri", "source HS2 uri"),
SOURCE_DATABASE("sourceDatabase", "comma source databases"),
SOURCE_TABLE("sourceTable", "comma source tables"),
- SOURCE_STAGING_PATH("sourceStagingPath", "source staging path for data"),
+ SOURCE_STAGING_PATH("sourceStagingPath", "source staging path for data", false),
// source hadoop endpoints
SOURCE_NN("sourceNN", "source name node"),
@@ -47,7 +47,7 @@ public enum HiveDRArgs {
TARGET_METASTORE_URI("targetMetastoreUri", "source meta store uri"),
TARGET_HS2_URI("targetHiveServer2Uri", "source meta store uri"),
- TARGET_STAGING_PATH("targetStagingPath", "source staging path for data"),
+ TARGET_STAGING_PATH("targetStagingPath", "source staging path for data", false),
// target hadoop endpoints
TARGET_NN("targetNN", "target name node"),
@@ -70,16 +70,13 @@ public enum HiveDRArgs {
// Map Bandwidth
DISTCP_MAP_BANDWIDTH("distcpMapBandwidth", "map bandwidth in mb", false),
- JOB_NAME("drJobName", "unique job name"),
+ JOB_NAME("jobName", "unique job name"),
CLUSTER_FOR_JOB_RUN("clusterForJobRun", "cluster where job runs"),
JOB_CLUSTER_NN("clusterForJobRunWriteEP", "write end point of cluster where job runs"),
JOB_CLUSTER_NN_KERBEROS_PRINCIPAL("clusterForJobNNKerberosPrincipal",
"Namenode kerberos principal of cluster on which replication job runs", false),
-
- FALCON_LIBPATH("falconLibPath", "Falcon Lib Path for Jar files", false),
-
KEEP_HISTORY("keepHistory", "Keep history of events file generated", false),
EXECUTION_STAGE("executionStage", "Flag for workflow stage execution", false),
COUNTER_LOGDIR("counterLogDir", "Log directory to store counter file", false);
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java
----------------------------------------------------------------------
diff --git a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java
index 868ec8d..0096727 100644
--- a/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java
+++ b/addons/hivedr/src/main/java/org/apache/falcon/hive/HiveDROptions.java
@@ -24,7 +24,7 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
-import org.apache.falcon.hive.exception.HiveReplicationException;
+import org.apache.falcon.hive.util.FileUtils;
import java.io.File;
import java.util.Arrays;
@@ -70,11 +70,14 @@ public class HiveDROptions {
return Arrays.asList(context.get(HiveDRArgs.SOURCE_TABLE).trim().split(","));
}
- public String getSourceStagingPath() throws HiveReplicationException {
- if (StringUtils.isNotEmpty(context.get(HiveDRArgs.SOURCE_STAGING_PATH))) {
- return context.get(HiveDRArgs.SOURCE_STAGING_PATH) + File.separator + getJobName();
+ public String getSourceStagingPath() {
+ String stagingPath = context.get(HiveDRArgs.SOURCE_STAGING_PATH);
+ if (StringUtils.isNotBlank(stagingPath)) {
+ stagingPath = StringUtils.removeEnd(stagingPath, File.separator);
+ return stagingPath + File.separator + getJobName();
+ } else {
+ return FileUtils.DEFAULT_EVENT_STORE_PATH + getJobName();
}
- throw new HiveReplicationException("Source StagingPath cannot be empty");
}
public String getSourceWriteEP() {
@@ -100,15 +103,19 @@ public class HiveDROptions {
public String getTargetMetastoreKerberosPrincipal() {
return context.get(HiveDRArgs.TARGET_HIVE_METASTORE_KERBEROS_PRINCIPAL);
}
+
public String getTargetHive2KerberosPrincipal() {
return context.get(HiveDRArgs.TARGET_HIVE2_KERBEROS_PRINCIPAL);
}
- public String getTargetStagingPath() throws HiveReplicationException {
- if (StringUtils.isNotEmpty(context.get(HiveDRArgs.TARGET_STAGING_PATH))) {
- return context.get(HiveDRArgs.TARGET_STAGING_PATH) + File.separator + getJobName();
+ public String getTargetStagingPath() {
+ String stagingPath = context.get(HiveDRArgs.TARGET_STAGING_PATH);
+ if (StringUtils.isNotBlank(stagingPath)) {
+ stagingPath = StringUtils.removeEnd(stagingPath, File.separator);
+ return stagingPath + File.separator + getJobName();
+ } else {
+ return FileUtils.DEFAULT_EVENT_STORE_PATH + getJobName();
}
- throw new HiveReplicationException("Target StagingPath cannot be empty");
}
public String getReplicationMaxMaps() {
@@ -135,23 +142,10 @@ public class HiveDROptions {
return context.get(HiveDRArgs.JOB_CLUSTER_NN_KERBEROS_PRINCIPAL);
}
- public void setSourceStagingDir(String path) {
- context.put(HiveDRArgs.SOURCE_STAGING_PATH, path);
- }
-
- public void setTargetStagingDir(String path) {
- context.put(HiveDRArgs.TARGET_STAGING_PATH, path);
- }
-
public String getExecutionStage() {
return context.get(HiveDRArgs.EXECUTION_STAGE);
}
- public boolean isTDEEncryptionEnabled() {
- return StringUtils.isEmpty(context.get(HiveDRArgs.TDE_ENCRYPTION_ENABLED))
- ? false : Boolean.valueOf(context.get(HiveDRArgs.TDE_ENCRYPTION_ENABLED));
- }
-
public boolean shouldBlock() {
return true;
}
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/README.txt
----------------------------------------------------------------------
diff --git a/addons/recipes/hdfs-replication/README.txt b/addons/recipes/hdfs-replication/README.txt
deleted file mode 100644
index 5742d43..0000000
--- a/addons/recipes/hdfs-replication/README.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-HDFS Directory Replication Recipe
-
-Overview
-This recipe implements replicating arbitrary directories on HDFS from one
-Hadoop cluster to another Hadoop cluster.
-This piggy backs on replication solution in Falcon which uses the DistCp tool.
-
-Use Case
-* Copy directories between HDFS clusters with out dated partitions
-* Archive directories from HDFS to Cloud. Ex: S3, Azure WASB
-
-Limitations
-As the data volume and number of files grow, this can get inefficient.
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/pom.xml
----------------------------------------------------------------------
diff --git a/addons/recipes/hdfs-replication/pom.xml b/addons/recipes/hdfs-replication/pom.xml
deleted file mode 100644
index 98d9795..0000000
--- a/addons/recipes/hdfs-replication/pom.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.falcon.recipes</groupId>
- <artifactId>falcon-hdfs-replication-recipe</artifactId>
- <version>0.10-SNAPSHOT</version>
- <description>Apache Falcon Sample Hdfs Replicaiton Recipe</description>
- <name>Apache Falcon Sample Hdfs Replication Recipe</name>
- <packaging>jar</packaging>
-</project>
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml
----------------------------------------------------------------------
diff --git a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml
deleted file mode 100644
index 441a189..0000000
--- a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-template.xml
+++ /dev/null
@@ -1,44 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<process name="##falcon.recipe.job.name##" xmlns="uri:falcon:process:0.1">
- <clusters>
- <!-- source -->
- <cluster name="##falcon.recipe.cluster.name##">
- <validity end="##falcon.recipe.cluster.validity.end##" start="##falcon.recipe.cluster.validity.start##"/>
- </cluster>
- </clusters>
-
- <tags>_falcon_mirroring_type=HDFS</tags>
-
- <parallel>1</parallel>
- <!-- Dir replication needs to run only once to catch up -->
- <order>LAST_ONLY</order>
- <frequency>##falcon.recipe.frequency##</frequency>
- <timezone>UTC</timezone>
-
- <properties>
- <property name="oozie.wf.subworkflow.classpath.inheritance" value="true"/>
- </properties>
-
- <workflow name="##falcon.recipe.workflow.name##" engine="oozie" path="/apps/data-mirroring/workflows/hdfs-replication-workflow.xml" lib="##workflow.lib.path##"/>
- <retry policy="##falcon.recipe.retry.policy##" delay="##falcon.recipe.retry.delay##" attempts="3"/>
- <notification type="##falcon.recipe.notification.type##" to="##falcon.recipe.notification.receivers##"/>
- <ACL/>
-</process>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/falcon/blob/95bf312f/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml
----------------------------------------------------------------------
diff --git a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml b/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml
deleted file mode 100644
index c1966be..0000000
--- a/addons/recipes/hdfs-replication/src/main/resources/hdfs-replication-workflow.xml
+++ /dev/null
@@ -1,82 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-<workflow-app xmlns='uri:oozie:workflow:0.3' name='falcon-dr-fs-workflow'>
- <start to='dr-replication'/>
- <!-- Replication action -->
- <action name="dr-replication">
- <java>
- <job-tracker>${jobTracker}</job-tracker>
- <name-node>${nameNode}</name-node>
- <configuration>
- <property> <!-- hadoop 2 parameter -->
- <name>oozie.launcher.mapreduce.job.user.classpath.first</name>
- <value>true</value>
- </property>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queueName}</value>
- </property>
- <property>
- <name>oozie.launcher.mapred.job.priority</name>
- <value>${jobPriority}</value>
- </property>
- <property>
- <name>oozie.use.system.libpath</name>
- <value>true</value>
- </property>
- <property>
- <name>oozie.action.sharelib.for.java</name>
- <value>distcp</value>
- </property>
- <property>
- <name>oozie.launcher.oozie.libpath</name>
- <value>${wf:conf("falcon.libpath")}</value>
- </property>
- <property>
- <name>oozie.launcher.mapreduce.job.hdfs-servers</name>
- <value>${drSourceClusterFS},${drTargetClusterFS}</value>
- </property>
- </configuration>
- <main-class>org.apache.falcon.replication.FeedReplicator</main-class>
- <arg>-Dmapred.job.queue.name=${queueName}</arg>
- <arg>-Dmapred.job.priority=${jobPriority}</arg>
- <arg>-maxMaps</arg>
- <arg>${distcpMaxMaps}</arg>
- <arg>-mapBandwidth</arg>
- <arg>${distcpMapBandwidth}</arg>
- <arg>-sourcePaths</arg>
- <arg>${drSourceDir}</arg>
- <arg>-targetPath</arg>
- <arg>${drTargetClusterFS}${drTargetDir}</arg>
- <arg>-falconFeedStorageType</arg>
- <arg>FILESYSTEM</arg>
- <arg>-availabilityFlag</arg>
- <arg>${availabilityFlag == 'NA' ? "NA" : availabilityFlag}</arg>
- <arg>-counterLogDir</arg>
- <arg>${logDir}/job-${nominalTime}/${srcClusterName == 'NA' ? '' : srcClusterName}</arg>
- </java>
- <ok to="end"/>
- <error to="fail"/>
- </action>
- <kill name="fail">
- <message>
- Workflow action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
- </message>
- </kill>
- <end name="end"/>
-</workflow-app>