You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2015/01/21 00:26:26 UTC

svn commit: r1653406 - in /hive/trunk/hcatalog/src/test/e2e/templeton: ./ deployers/ drivers/ inpdir/ tests/

Author: ekoifman
Date: Tue Jan 20 23:26:26 2015
New Revision: 1653406

URL: http://svn.apache.org/r1653406
Log:
HIVE-9272 Tests for utf-8 support (Aswathy Chellammal Sreekumar via Eugene Koifman)

Added:
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig"
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt"
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt
    hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf
Modified:
    hive/trunk/hcatalog/src/test/e2e/templeton/build.xml
    hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
    hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/build.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/build.xml?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/build.xml (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/build.xml Tue Jan 20 23:26:26 2015
@@ -120,6 +120,7 @@
             <arg value="${basedir}/tests/ddl.conf"/>
             <arg value="${basedir}/tests/jobsubmission.conf"/>
             <arg value="${basedir}/tests/jobsubmission2.conf"/>
+            <arg value="${basedir}/tests/utf8.conf"/>
         </exec>
     </target>
 

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/deployers/deploy_e2e_artifacts.sh Tue Jan 20 23:26:26 2015
@@ -32,6 +32,8 @@ ${HADOOP_HOME}/bin/hdfs dfs -put ${PROJ_
 
 #For hadoop2 there are 2 separate jars
 ${HADOOP_HOME}/bin/hdfs dfs -put ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar  webhcate2e/hexamples.jar
+#For utf8 test(for mapreduce) we need a jar with utf-8 characters in the name
+${HADOOP_HOME}/bin/hdfs dfs -put ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-examples-${HADOOP_VERSION}.jar  webhcate2e/hadoop_examples_䶴ㄩ鼾丄狜〇_2_2_0.jar
 ${HADOOP_HOME}/bin/hdfs dfs -put ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-${HADOOP_VERSION}.jar webhcate2e/hclient.jar
 ${HADOOP_HOME}/bin/hdfs dfs -put ${HADOOP_HOME}/share/hadoop/tools/lib/hadoop-streaming-${HADOOP_VERSION}.jar  /user/templeton/hadoop-streaming.jar
 

Modified: hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm?rev=1653406&r1=1653405&r2=1653406&view=diff
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm (original)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/drivers/TestDriverCurl.pm Tue Jan 20 23:26:26 2015
@@ -37,6 +37,7 @@ use English;
 use Storable qw(dclone);
 use File::Glob ':glob';
 use JSON::Path;
+use utf8;
 
 my $passedStr = 'passed';
 my $failedStr = 'failed';
@@ -922,12 +923,38 @@ sub compare
               $result = 0;
               next;
             }
-              
+            my $exp_userargsvalue;
+            my $r_userargsvalue;
+            if(ref($exp_userargs{$key}) eq "ARRAY"){
+              my @values = $exp_userargs{$key};
+              my $num_values = @values;
+
+              for(my $i=0;$i<=$num_values;$i++){
+                if (utf8::is_utf8($exp_userargs{$key}[$i])){
+                  $exp_userargs{$key}[$i] = utf8::decode($exp_userargs{$key}[$i]);
+                  $r_userargs{$key}[$i] = utf8::decode($r_userargs{$key}[$i]);
+                }
+              }
+              $exp_userargsvalue = $exp_userargs{$key};
+              $r_userargsvalue = $r_userargs{$key};
+            }
+            else {
+              if (utf8::is_utf8($exp_userargs{$key}))
+              {
+                $exp_userargsvalue = utf8::decode($exp_userargs{$key});
+                $r_userargsvalue = utf8::decode($r_userargs{$key});
+              } 
+              else 
+              {
+                $exp_userargsvalue = $exp_userargs{$key};
+                $r_userargsvalue = $r_userargs{$key};
+              }
+            }
             print $log "$0::$subName DEBUG comparing expected " 
                 . " $key ->" . dump($exp_userargs{$key})
                 . " With result $key ->" . dump($r_userargs{$key}) . "\n";
 
-            if (!Compare($exp_userargs{$key}, $r_userargs{$key})) {
+            if (!Compare($exp_userargsvalue, $r_userargsvalue)) {
               print $log "$0::$subName WARN check failed:" 
                   . " json compare failed. For field "
                   . "$key, regex <" . dump($r_userargs{$key})

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig"
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin%E4%B6%B4%E3%84%A9%E9%BC%BE%E4%B8%84%E7%8B%9C%E3%80%87work.pig%22?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig" (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/PigJoin䶴ㄩ鼾丄狜〇work.pig" Tue Jan 20 23:26:26 2015
@@ -0,0 +1,4 @@
+A = load '$INPDIR/table3.txt' using PigStorage('\t') AS (row:int, content:chararray);
+B = load '$INPDIR/table3ToJoin.txt' using PigStorage('\t') AS (row:int, content:chararray);
+C = JOIN A BY content, B BY content;
+store C into '$OUTDIR/PigJoin' USING PigStorage();
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt"
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof%E4%B6%B4%E3%84%A9%E9%BC%BE%E4%B8%84%E7%8B%9C%E3%80%87war.txt%22?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt" (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/artof䶴ㄩ鼾丄狜〇war.txt" Tue Jan 20 23:26:26 2015
@@ -0,0 +1,21 @@
+孫 子 曰 : 兵 者 , 國 之 大 事 , 死 生 之 地 , 存 亡 之 道 , 不 可 不 察 也 。 
+ 
+ 故 經 之 以 五 , 校 之 以 計 , 而 索 其 情 : 一 曰 道 , 二 曰 天 , 三 曰 地 , 四 曰 
+ 將 , 五 曰 法 。 道 者 , 令 民 于 上 同 意 者 也 , 可 與 之 死 , 可 與 之 生 , 民 不 
+ 詭 也 。 天 者 , 陰 陽 、 寒 暑 、 時 制 也 。 地 者 , 高 下 、 遠 近 、 險 易 、 廣 狹 
+ 、 死 生 也 。 將 者 , 智 、 信 、 仁 、 勇 、 嚴 也 。 法 者 , 曲 制 、 官 道 、 主 用 
+ 也 。 凡 此 五 者 , 將 莫 不 聞 , 知 之 者 勝 , 不 知 之 者 不 勝 。 故 校 之 以 計 , 
+ 而 索 其 情 。 曰 : 主 孰 有 道 ? 將 孰 有 能 ? 天 地 孰 得 ? 法 令 孰 行 ? 兵 眾 孰 
+ 強 ? 士 卒 孰 練 ? 賞 罰 孰 明 ? 吾 以 此 知 勝 負 矣 。 
+ 
+ 將 聽 吾 計 , 用 之 必 勝 , 留 之 ﹔ 將 不 聽 吾 計 , 用 之 必 敗 , 去 之 。 
+ 
+ 計 利 以 聽 , 乃 為 之 勢 , 以 佐 其 外 。 勢 者 , 因 利 而 制 權 也 。 
+ 
+ 兵 者 , 詭 道 也 。 故 能 而 示 之 不 能 , 用 而 示 之 不 用 , 近 而 示 之 遠 , 遠 而 
+ 示 之 近 。 利 而 誘 之 , 亂 而 取 之 , 實 而 備 之 , 強 而 避 之 , 怒 而 撓 之 , 卑 
+ 而 驕 之 , 佚 而 勞 之 , 親 而 離 之 , 攻 其 不 備 , 出 其 不 意 。 此 兵 家 之 勝 , 
+ 不 可 先 傳 也 。 
+ 
+ 夫 未 戰 而 廟 算 勝 者 , 得 算 多 也 ﹔ 未 戰 而 廟 算 不 勝 者 , 得 算 少 也 。 多 算 
+ 勝 , 少 算 不 勝 , 而 況 無 算 乎 ! 吾 以 此 觀 之 , 勝 負 見 矣 。
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table1.txt Tue Jan 20 23:26:26 2015
@@ -0,0 +1,27 @@
+868	狚鼾䶵鼾c鼾㐀ee䶴﨨b
+423	be狝h狛狝狝㐁﨨狛a
+440	hc鼾㐀啊狝g狜c狜䶵㐁狛㐀丄g
+701	䶴䶵ecea狝䶵e狝䶴
+550	﨨狝狝䶴狛g﨩䶵狝a䶴﨩﨩g狛a狝
+123	阿狛鼾㐀鼾﨨dg䶴㐁d
+316	狜﨨﨩a䶵d㐀g狝c狝䶵de
+807	e狛h齄狝e䶴啊h䶴㐁狚阿狜g䶴aa
+870	狝a狛g丄fe䶵﨨狜狛䶵b䶵狛a狚齄
+669	dc丄a丄狜﨨f齄﨨b齄hd鼾
+622	a狚齄㐁f阿阿狜c﨩狛b狝e
+216	fh﨨阿dcf㐀b㐀h㐀狛c﨩g狚
+700	狜﨩㐀阿﨨齄鼾狝阿b﨨﨩狚f䶴阿
+528	啊狚﨩狛鼾丄啊丄丄㐁a䶴狝d狜fd丄
+173	h狚ed㐀d狛bc阿㐀啊﨩
+3	䶴狝﨩狚㐁啊䶵齄齄﨩狜ee㐀㐁h狛﨨㐀
+567	a啊g狜丄狚c䶵h狝﨨h
+435	b齄阿阿丄g㐁cf丄阿ed
+943	㐁g䶵狛cc㐀䶵h狝﨩䶵啊
+365	狛啊d狝䶵cb﨩䶵bd﨨狝阿b
+298	䶵䶵﨩h鼾㐁a狛鼾狝狚䶵f狝
+377	bd䶵﨩齄䶵㐀h㐁㐁狛鼾狝
+951	g狚ee﨨ach狝f﨩㐀㐀䶵ba
+399	啊﨨鼾d鼾ff齄h丄﨨狚a阿ga啊齄c
+199	阿狛㐀eg齄狝狝g齄ga啊d狜㐁齄
+816	齄狛狛啊狜af狚狛㐁狜
+758	狛gc㐀狜fa﨨㐀㐁齄ab㐀h
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3.txt Tue Jan 20 23:26:26 2015
@@ -0,0 +1,14 @@
+1	䶵
+2	䶵
+3	狚
+4	䶵
+5	é¼¾
+6	䶵
+112	﨩﨨狝狝
+12	﨩﨨狝狝
+7	é¼¾
+8	䶵
+9	狚䶵
+10	狚䶵齄
+11	﨩齄䶵
+12	﨩﨨狝狝
\ No newline at end of file

Added: hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/inpdir/table3ToJoin.txt Tue Jan 20 23:26:26 2015
@@ -0,0 +1,5 @@
+12	﨩﨨狝狝
+2	䶵shouldnotshowup
+3	狚
+4	䶵
+11	﨩齄䶵

Added: hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf?rev=1653406&view=auto
==============================================================================
--- hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf (added)
+++ hive/trunk/hcatalog/src/test/e2e/templeton/tests/utf8.conf Tue Jan 20 23:26:26 2015
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+###############################################################################
+# curl command tests for templeton
+#
+#
+use utf8;
+
+$cfg = 
+{
+ 'driver' => 'Curl',
+
+ 'groups' => 
+ [
+##=============================================================================================================
+  {
+   'name' => 'Hive_UTF8',
+   'tests' => 
+   [
+    {
+    # Submitting Hive job with Unicode content on Unicode data
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/hive',
+     'post_options' => ['user.name=:UNAME:','execute=drop table if exists utf8_table;create table utf8_table(row int, content string) ROW FORMAT DELIMITED FIELDS TERMINATED BY \'\t\';LOAD DATA INPATH \':INPDIR_HDFS:/table1.txt\' OVERWRITE INTO TABLE utf8_table;','statusdir=:OUTDIR:/status/Hive_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    },
+   ]
+  },
+##=============================================================================================================
+  {
+   'name' => 'Pig_UTF8',
+   'tests' => 
+   [
+    {
+    # Submitting Pig job with Unicode content on Unicode data
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/pig',
+     'post_options' => ['user.name=:UNAME:','file=:INPDIR_HDFS:/PigJoin䶴ㄩ鼾丄狜〇work.pig','arg=-p', 'arg=INPDIR=:INPDIR_HDFS:','arg=-p','arg=OUTDIR=:OUTDIR:','statusdir=:OUTDIR:/status/Pig_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_call_back' => 1, 
+    },
+   ]
+  },
+##=============================================================================================================
+  {
+   'name' => 'MapReduce_UTF8',
+   'tests' => 
+   [
+    {
+         
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/mapreduce/jar',
+     'post_options' => ['user.name=:UNAME:','arg=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 'arg= :OUTDIR:/utf8_wc.txt', 
+                        'jar=:INPDIR_HDFS:/hadoop_examples_䶴ㄩ鼾丄狜〇_2_2_0.jar', 'class=wordcount','statusdir=:OUTDIR:/status/MapRed_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_percent_complete' => 'map 100% reduce 100%',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    }, 
+   ]
+  }, 
+##=============================================================================================================
+  {
+   'name' => 'MapRedStreaming_UTF8',
+   'tests' => 
+   [
+    {
+     'num' => 1,
+     'method' => 'POST',
+     'url' => ':TEMPLETON_URL:/templeton/v1/mapreduce/streaming',
+     'post_options' => ['user.name=:UNAME:','input=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 'input=:INPDIR_HDFS:/artof䶴ㄩ鼾丄狜〇war.txt', 'output=:OUTDIR:/MapRedStreaming_UTF8_count', 'mapper=cat', 'reducer=wc','statusdir=:OUTDIR:/status/MapRedStreaming_UTF8_:TNUM:'],
+     'json_field_substr_match' => { 'id' => '\d+'},
+                                #results
+     'status_code' => 200,
+     'check_job_created' => 1,
+     'check_job_complete' => 'SUCCESS',
+     'check_job_exit_value' => 0,
+     'check_call_back' => 1,
+    },
+   ]
+  },
+##=============================================================================================================
+
+  {
+   'name' => 'JobFiltering_UTF8',
+   'tests' => 
+   [
+    {
+     'num' => 1,
+     'depends_on' => 'Pig_UTF8',
+     'method' => 'GET',
+     'url' => ':TEMPLETON_URL:/templeton/v1/jobs?user.name=:UNAME:&showall=true&fields=*',
+     'user_name' => ':UNAME:',
+     'format_header' => 'Content-Type: application/json',
+     'filter_job_names' => ['TempletonControllerJob', 'PigLatin:PigJoin䶴ㄩ鼾丄狜〇work.pig'],
+     'status_code' => 200,
+    },
+   ]
+  },
+ ]
+},
+  ;
\ No newline at end of file