You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by fo...@apache.org on 2018/05/08 09:51:25 UTC

incubator-airflow git commit: [AIRFLOW-2412] Fix HiveCliHook.load_file to address HIVE-10541

Repository: incubator-airflow
Updated Branches:
  refs/heads/master 088900ffb -> baf15e11a


[AIRFLOW-2412] Fix HiveCliHook.load_file to address HIVE-10541

HiveCliHook.load_file doesn't actually execute
LOAD DATA statement via beeline bundled with
Hive under 2.0 due to HIVE-10541.
This PR provides a workaround for this problem.

Closes #3327 from sekikn/AIRFLOW-2412


Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/baf15e11
Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/baf15e11
Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/baf15e11

Branch: refs/heads/master
Commit: baf15e11a51a07ad5adbc1be36a43f313f826a61
Parents: 088900f
Author: Kengo Seki <se...@apache.org>
Authored: Tue May 8 11:51:18 2018 +0200
Committer: Fokko Driesprong <fo...@godatadriven.com>
Committed: Tue May 8 11:51:18 2018 +0200

----------------------------------------------------------------------
 airflow/hooks/hive_hooks.py   |  5 +++++
 tests/hooks/test_hive_hook.py | 26 +++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/baf15e11/airflow/hooks/hive_hooks.py
----------------------------------------------------------------------
diff --git a/airflow/hooks/hive_hooks.py b/airflow/hooks/hive_hooks.py
index 65238df..0b7b056 100644
--- a/airflow/hooks/hive_hooks.py
+++ b/airflow/hooks/hive_hooks.py
@@ -420,6 +420,11 @@ class HiveCliHook(BaseHook):
             pvals = ", ".join(
                 ["{0}='{1}'".format(k, v) for k, v in partition.items()])
             hql += "PARTITION ({pvals});"
+
+        # As a workaround for HIVE-10541, add a newline character
+        # at the end of hql (AIRFLOW-2412).
+        hql += '\n'
+
         hql = hql.format(**locals())
         self.log.info(hql)
         self.run_cli(hql)

http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/baf15e11/tests/hooks/test_hive_hook.py
----------------------------------------------------------------------
diff --git a/tests/hooks/test_hive_hook.py b/tests/hooks/test_hive_hook.py
index f48bed8..c537831 100644
--- a/tests/hooks/test_hive_hook.py
+++ b/tests/hooks/test_hive_hook.py
@@ -20,12 +20,14 @@
 
 import datetime
 import random
+
+import mock
 import unittest
 
 from hmsclient import HMSClient
 
 from airflow.exceptions import AirflowException
-from airflow.hooks.hive_hooks import HiveMetastoreHook
+from airflow.hooks.hive_hooks import HiveCliHook, HiveMetastoreHook
 from airflow import DAG, configuration, operators
 from airflow.utils import timezone
 
@@ -82,6 +84,28 @@ class HiveEnvironmentTest(unittest.TestCase):
             metastore.drop_table(self.database, self.table, deleteData=True)
 
 
+class TestHiveCliHook(unittest.TestCase):
+
+    def test_run_cli(self):
+        hook = HiveCliHook()
+        hook.run_cli("SHOW DATABASES")
+
+    @mock.patch('airflow.hooks.hive_hooks.HiveCliHook.run_cli')
+    def test_load_file(self, mock_run_cli):
+        filepath = "/path/to/input/file"
+        table = "output_table"
+
+        hook = HiveCliHook()
+        hook.load_file(filepath=filepath, table=table, create=False)
+
+        query = (
+            "LOAD DATA LOCAL INPATH '{filepath}' "
+            "OVERWRITE INTO TABLE {table} \n"
+            .format(filepath=filepath, table=table)
+        )
+        mock_run_cli.assert_called_with(query)
+
+
 class TestHiveMetastoreHook(HiveEnvironmentTest):
     VALID_FILTER_MAP = {'key2': 'value2'}