You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hcatalog-commits@incubator.apache.org by ga...@apache.org on 2012/03/03 22:13:09 UTC

svn commit: r1296714 - in /incubator/hcatalog/branches/branch-0.4: ./ src/java/org/apache/hcatalog/common/ src/java/org/apache/hcatalog/mapreduce/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/tests/ src/test/org/apache/hcatalog/mapreduce/

Author: gates
Date: Sat Mar  3 22:13:09 2012
New Revision: 1296714

URL: http://svn.apache.org/viewvc?rev=1296714&view=rev
Log:
HCATALOG-264 Barrier for HCatOutputFormat for Pig/Mapreduce if the table contains feature only supported in Hive

Modified:
    incubator/hcatalog/branches/branch-0.4/CHANGES.txt
    incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/common/ErrorType.java
    incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
    incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
    incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/tests/pig.conf
    incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java

Modified: incubator/hcatalog/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/CHANGES.txt?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/CHANGES.txt (original)
+++ incubator/hcatalog/branches/branch-0.4/CHANGES.txt Sat Mar  3 22:13:09 2012
@@ -21,7 +21,9 @@ Apache HCatalog Change Log
 Release 0.4.0 - Unreleased
 
   INCOMPATIBLE CHANGES
-  HCAT-279 Remove remaining code mentioning isd/osd (diajy via gates)
+  HCAT-264 Barrier for HCatOutputFormat for Pig/Mapreduce if the table contains feature only supported in Hive (daijy via gates)
+
+  HCAT-279 Remove remaining code mentioning isd/osd (daijy via gates)
 
   HCAT-261 Enable more DDL statements for HCat (daijy via gates)
 

Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/common/ErrorType.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/common/ErrorType.java?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/common/ErrorType.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/common/ErrorType.java Sat Mar  3 22:13:09 2012
@@ -45,6 +45,7 @@ public enum ErrorType {
     ERROR_TOO_MANY_DYNAMIC_PTNS         (2013, "Attempt to create too many dynamic partitions"),
     ERROR_INIT_LOADER                   (2014,  "Error initializing Pig loader"),
     ERROR_INIT_STORER                   (2015,  "Error initializing Pig storer"),
+    ERROR_NOT_SUPPORTED                 (2016,  "Error operation not supported"),
 
     /* Authorization Errors 3000 - 3999 */
     ERROR_ACCESS_CONTROL           (3000, "Permission denied"),

Modified: incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java Sat Mar  3 22:13:09 2012
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -77,7 +78,29 @@ public class HCatOutputFormat extends HC
         hiveConf = HCatUtil.getHiveConf(null, conf);
         client = HCatUtil.createHiveClient(hiveConf);
         Table table = client.getTable(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName());
-
+        
+        List<String> indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE);
+        
+        for (String indexName : indexList) {
+            Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName);
+            if (!index.isDeferredRebuild()) {
+                throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported");
+            }
+        }
+        StorageDescriptor sd = table.getSd();
+        
+        if (sd.isCompressed()) {
+            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported");
+        }
+        
+        if (sd.getBucketCols()!=null && !sd.getBucketCols().isEmpty()) {
+            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported");
+        }
+        
+        if (sd.getSortCols()!=null && !sd.getSortCols().isEmpty()) {
+            throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported");
+        }
+        
         if (table.getPartitionKeysSize() == 0 ){
           if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())){
             // attempt made to save partition values in non-partitioned table - throw error.

Modified: incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/drivers/TestDriverPig.pm?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/drivers/TestDriverPig.pm (original)
+++ incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/drivers/TestDriverPig.pm Sat Mar  3 22:13:09 2012
@@ -192,7 +192,7 @@ sub runTest
        # run a another Pig script to dump the results of the table.
        my $result;
        if (defined($testCmd->{'result_table'})) {
-           $result = $self->runPig( $testCmd, $log, 0);
+           $result = $self->runPig( $testCmd, $log, 0, 1);
            my @results = ();
            my @outputs = ();
            if (ref($testCmd->{'result_table'}) ne 'ARRAY') {
@@ -214,7 +214,7 @@ sub runTest
                    $tableName = $results[$i];
 	           $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark";
                    $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';";
-                   my $r = $self->runPig(\%modifiedTestCmd, $log, 1);
+                   my $r = $self->runPig(\%modifiedTestCmd, $log, 1, 1);
 	           $outputs[$i] = $r->{'output'};
                } else {
                    $localdir = $testCmd->{'localpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".out/$id";
@@ -233,7 +233,7 @@ sub runTest
            }
        }
        else {
-           $result = $self->runPig( $testCmd, $log, 1);
+           $result = $self->runPig( $testCmd, $log, 1, 1);
        }
        return $result;
     } elsif(  $testCmd->{'script'} ){
@@ -447,7 +447,7 @@ sub dumpPigTable
 
 sub runPig
 {
-    my ($self, $testCmd, $log, $copyResults) = @_;
+    my ($self, $testCmd, $log, $copyResults, $noFailOnFail) = @_;
     my $subName  = (caller(0))[3];
 
     my %result;
@@ -488,8 +488,13 @@ sub runPig
     print $log "Setting PIG_CLASSPATH to $ENV{'PIG_CLASSPATH'}\n";
     print $log "$0::$className::$subName INFO: Going to run pig command: @cmd\n";
 
-    IPC::Run::run(\@cmd, \undef, $log, $log) or
+    my $runrc = IPC::Run::run(\@cmd, \undef, $log, $log);
+
+    if (defined($noFailOnFail) && $noFailOnFail) {
+    } else {
         die "Failed running $pigfile\n";
+    }
+
     $result{'rc'} = $? >> 8;
 
 

Modified: incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/tests/pig.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/tests/pig.conf?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/tests/pig.conf (original)
+++ incubator/hcatalog/branches/branch-0.4/src/test/e2e/hcatalog/tests/pig.conf Sat Mar  3 22:13:09 2012
@@ -280,6 +280,28 @@ store a into ':OUTPATH:';\,
                                 }
                         ],
                 }, # end g
-
+                {
+                        'name' => 'Pig_HCat_Barrier',
+                        'tests' => [
+                                {
+                                 'num' => 1
+                                ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_1;
+create table pig_hcat_barrier_1 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
+                                ,'pig' => q\
+a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
+store a into 'pig_hcat_barrier_1' using org.apache.hcatalog.pig.HCatStorer('b=1'); \,
+                                ,'expected_err_regex' => 'not supported'
+                                },
+                                {
+                                 'num' => 2
+                                ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_2;
+create table pig_hcat_barrier_2 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) SORTED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
+                                ,'pig' => q\
+a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
+store a into 'pig_hcat_barrier_2' using org.apache.hcatalog.pig.HCatStorer('b=1'); \,
+                                ,'expected_err_regex' => 'not supported'
+                                },
+                        ],
+                }, # end g
          ]
 }

Modified: incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java
URL: http://svn.apache.org/viewvc/incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java?rev=1296714&r1=1296713&r2=1296714&view=diff
==============================================================================
--- incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java (original)
+++ incubator/hcatalog/branches/branch-0.4/src/test/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java Sat Mar  3 22:13:09 2012
@@ -109,8 +109,8 @@ public class TestHCatOutputFormat extend
     sd.setOutputFormat(RCFileOutputFormat.class.getName());
     sd.setParameters(new HashMap<String, String>());
     sd.getParameters().put("test_param_1", "Use this for comments etc");
-    sd.setBucketCols(new ArrayList<String>(2));
-    sd.getBucketCols().add("name");
+    //sd.setBucketCols(new ArrayList<String>(2));
+    //sd.getBucketCols().add("name");
     sd.setSerdeInfo(new SerDeInfo());
     sd.getSerdeInfo().setName(tbl.getTableName());
     sd.getSerdeInfo().setParameters(new HashMap<String, String>());