You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2011/09/26 21:44:01 UTC

svn commit: r1176010 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/test/org/apache/hadoop/hive/ql/exec/ ql/src/test/queries/clientpositi...

Author: cws
Date: Mon Sep 26 19:44:00 2011
New Revision: 1176010

URL: http://svn.apache.org/viewvc?rev=1176010&view=rev
Log:
HIVE-2457. Files in Avro-backed Hive tables do not have a .avro extension (Tom White via cws)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
    hive/trunk/ql/src/test/queries/clientpositive/input44.q
    hive/trunk/ql/src/test/results/clientpositive/input44.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1176010&r1=1176009&r2=1176010&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Mon Sep 26 19:44:00 2011
@@ -140,6 +140,7 @@ public class HiveConf extends Configurat
     SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true),
     JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000),
     TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000),
+    OUTPUT_FILE_EXTENSION("hive.output.file.extension", null),
 
     // should hive determine whether to run in local mode automatically ?
     LOCALMODEAUTO("hive.exec.mode.local.auto", false),

Modified: hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml?rev=1176010&r1=1176009&r2=1176010&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml (original)
+++ hive/trunk/conf/hive-default.xml Mon Sep 26 19:44:00 2011
@@ -1199,4 +1199,10 @@
   <description>To cleanup the hive scratchdir while starting the hive server</description>
 </property>
 
+<property>
+  <name>hive.output.file.extension</name>
+  <value></value>
+  <description>String used as a file extension for output files. If not set, defaults to the codec extension for text files (e.g. ".gz"), or no extension otherwise.</description>
+</property>
+
 </configuration>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1176010&r1=1176009&r2=1176010&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Mon Sep 26 19:44:00 2011
@@ -448,14 +448,11 @@ public class FileSinkOperator extends Te
           // OutputFormat.getRecordWriter() is that
           // getRecordWriter does not give us enough control over the file name that
           // we create.
+          String extension = Utilities.getFileExtension(jc, isCompressed,
+              hiveOutputFormat);
           if (!bDynParts) {
-            fsp.finalPaths[filesIdx] = HiveFileFormatUtils.getOutputFormatFinalPath(
-                parent, taskId, jc, hiveOutputFormat, isCompressed, fsp.finalPaths[filesIdx]);
+            fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, parent, extension);
           } else {
-            String extension = null;
-            if (hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) {
-              extension = Utilities.getFileExtension(jc, isCompressed);
-            }
             fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension);
           }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1176010&r1=1176009&r2=1176010&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Mon Sep 26 19:44:00 2011
@@ -96,6 +96,7 @@ import org.apache.hadoop.hive.ql.QueryPl
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
 import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
 import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
@@ -876,16 +877,43 @@ public final class Utilities {
    * @param isCompressed
    *          Whether the output file is compressed or not
    * @return the required file extension (example: .gz)
+   * @deprecated Use {@link #getFileExtension(JobConf, boolean, HiveOutputFormat)}
    */
+  @Deprecated
   public static String getFileExtension(JobConf jc, boolean isCompressed) {
-    if (!isCompressed) {
-      return "";
-    } else {
+    return getFileExtension(jc, isCompressed, new HiveIgnoreKeyTextOutputFormat());
+  }
+
+  /**
+   * Based on compression option, output format, and configured output codec -
+   * get extension for output file. Text files require an extension, whereas
+   * others, like sequence files, do not.
+   * <p>
+   * The property <code>hive.output.file.extension</code> is used to determine
+   * the extension - if set, it will override other logic for choosing an
+   * extension.
+   *
+   * @param jc
+   *          Job Configuration
+   * @param isCompressed
+   *          Whether the output file is compressed or not
+   * @param hiveOutputFormat
+   *          The output format, used to detect if the format is text
+   * @return the required file extension (example: .gz)
+   */
+  public static String getFileExtension(JobConf jc, boolean isCompressed,
+      HiveOutputFormat<?, ?> hiveOutputFormat) {
+    String extension = HiveConf.getVar(jc, HiveConf.ConfVars.OUTPUT_FILE_EXTENSION);
+    if (!StringUtils.isEmpty(extension)) {
+      return extension;
+    }
+    if ((hiveOutputFormat instanceof HiveIgnoreKeyTextOutputFormat) && isCompressed) {
       Class<? extends CompressionCodec> codecClass = FileOutputFormat.getOutputCompressorClass(jc,
           DefaultCodec.class);
       CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, jc);
       return codec.getDefaultExtension();
     }
+    return "";
   }
 
   /**

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=1176010&r1=1176009&r2=1176010&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Mon Sep 26 19:44:00 2011
@@ -107,7 +107,9 @@ public final class HiveFileFormatUtils {
    *          parent dir of the expected final output path
    * @param jc
    *          job configuration
+   * @deprecated
    */
+  @Deprecated
   public static Path getOutputFormatFinalPath(Path parent, String taskId, JobConf jc,
       HiveOutputFormat<?, ?> hiveOutputFormat, boolean isCompressed,
       Path defaultFinalPath) throws IOException {

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java?rev=1176010&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestUtilities.java Mon Sep 26 19:44:00 2011
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec;
+
+import static org.apache.hadoop.hive.ql.exec.Utilities.getFileExtension;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+public class TestUtilities extends TestCase {
+
+  public void testGetFileExtension() {
+    JobConf jc = new JobConf();
+    assertEquals("No extension for uncompressed unknown format", "",
+        getFileExtension(jc, false, null));
+    assertEquals("No extension for compressed unknown format", "",
+        getFileExtension(jc, true, null));
+    assertEquals("No extension for uncompressed text format", "",
+        getFileExtension(jc, false, new HiveIgnoreKeyTextOutputFormat()));
+    assertEquals("Deflate for uncompressed text format", ".deflate",
+        getFileExtension(jc, true, new HiveIgnoreKeyTextOutputFormat()));
+    assertEquals("No extension for uncompressed default format", "",
+        getFileExtension(jc, false));
+    assertEquals("Deflate for uncompressed default format", ".deflate",
+        getFileExtension(jc, true));
+
+    String extension = ".myext";
+    jc.set("hive.output.file.extension", extension);
+    assertEquals("Custom extension for uncompressed unknown format", extension,
+        getFileExtension(jc, false, null));
+    assertEquals("Custom extension for compressed unknown format", extension,
+        getFileExtension(jc, true, null));
+    assertEquals("Custom extension for uncompressed text format", extension,
+        getFileExtension(jc, false, new HiveIgnoreKeyTextOutputFormat()));
+    assertEquals("Custom extension for uncompressed text format", extension,
+        getFileExtension(jc, true, new HiveIgnoreKeyTextOutputFormat()));
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/input44.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/input44.q?rev=1176010&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/input44.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/input44.q Mon Sep 26 19:44:00 2011
@@ -0,0 +1,6 @@
+CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE;
+
+SET hive.output.file.extension=.txt;
+INSERT OVERWRITE TABLE dest SELECT src.* FROM src;
+
+dfs -cat ../build/ql/test/data/warehouse/dest/*.txt
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/input44.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input44.q.out?rev=1176010&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input44.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/input44.q.out Mon Sep 26 19:44:00 2011
@@ -0,0 +1,515 @@
+PREHOOK: query: CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest
+PREHOOK: query: INSERT OVERWRITE TABLE dest SELECT src.* FROM src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest
+POSTHOOK: query: INSERT OVERWRITE TABLE dest SELECT src.* FROM src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest
+POSTHOOK: Lineage: dest.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+238val_238
+86val_86
+311val_311
+27val_27
+165val_165
+409val_409
+255val_255
+278val_278
+98val_98
+484val_484
+265val_265
+193val_193
+401val_401
+150val_150
+273val_273
+224val_224
+369val_369
+66val_66
+128val_128
+213val_213
+146val_146
+406val_406
+429val_429
+374val_374
+152val_152
+469val_469
+145val_145
+495val_495
+37val_37
+327val_327
+281val_281
+277val_277
+209val_209
+15val_15
+82val_82
+403val_403
+166val_166
+417val_417
+430val_430
+252val_252
+292val_292
+219val_219
+287val_287
+153val_153
+193val_193
+338val_338
+446val_446
+459val_459
+394val_394
+237val_237
+482val_482
+174val_174
+413val_413
+494val_494
+207val_207
+199val_199
+466val_466
+208val_208
+174val_174
+399val_399
+396val_396
+247val_247
+417val_417
+489val_489
+162val_162
+377val_377
+397val_397
+309val_309
+365val_365
+266val_266
+439val_439
+342val_342
+367val_367
+325val_325
+167val_167
+195val_195
+475val_475
+17val_17
+113val_113
+155val_155
+203val_203
+339val_339
+0val_0
+455val_455
+128val_128
+311val_311
+316val_316
+57val_57
+302val_302
+205val_205
+149val_149
+438val_438
+345val_345
+129val_129
+170val_170
+20val_20
+489val_489
+157val_157
+378val_378
+221val_221
+92val_92
+111val_111
+47val_47
+72val_72
+4val_4
+280val_280
+35val_35
+427val_427
+277val_277
+208val_208
+356val_356
+399val_399
+169val_169
+382val_382
+498val_498
+125val_125
+386val_386
+437val_437
+469val_469
+192val_192
+286val_286
+187val_187
+176val_176
+54val_54
+459val_459
+51val_51
+138val_138
+103val_103
+239val_239
+213val_213
+216val_216
+430val_430
+278val_278
+176val_176
+289val_289
+221val_221
+65val_65
+318val_318
+332val_332
+311val_311
+275val_275
+137val_137
+241val_241
+83val_83
+333val_333
+180val_180
+284val_284
+12val_12
+230val_230
+181val_181
+67val_67
+260val_260
+404val_404
+384val_384
+489val_489
+353val_353
+373val_373
+272val_272
+138val_138
+217val_217
+84val_84
+348val_348
+466val_466
+58val_58
+8val_8
+411val_411
+230val_230
+208val_208
+348val_348
+24val_24
+463val_463
+431val_431
+179val_179
+172val_172
+42val_42
+129val_129
+158val_158
+119val_119
+496val_496
+0val_0
+322val_322
+197val_197
+468val_468
+393val_393
+454val_454
+100val_100
+298val_298
+199val_199
+191val_191
+418val_418
+96val_96
+26val_26
+165val_165
+327val_327
+230val_230
+205val_205
+120val_120
+131val_131
+51val_51
+404val_404
+43val_43
+436val_436
+156val_156
+469val_469
+468val_468
+308val_308
+95val_95
+196val_196
+288val_288
+481val_481
+457val_457
+98val_98
+282val_282
+197val_197
+187val_187
+318val_318
+318val_318
+409val_409
+470val_470
+137val_137
+369val_369
+316val_316
+169val_169
+413val_413
+85val_85
+77val_77
+0val_0
+490val_490
+87val_87
+364val_364
+179val_179
+118val_118
+134val_134
+395val_395
+282val_282
+138val_138
+238val_238
+419val_419
+15val_15
+118val_118
+72val_72
+90val_90
+307val_307
+19val_19
+435val_435
+10val_10
+277val_277
+273val_273
+306val_306
+224val_224
+309val_309
+389val_389
+327val_327
+242val_242
+369val_369
+392val_392
+272val_272
+331val_331
+401val_401
+242val_242
+452val_452
+177val_177
+226val_226
+5val_5
+497val_497
+402val_402
+396val_396
+317val_317
+395val_395
+58val_58
+35val_35
+336val_336
+95val_95
+11val_11
+168val_168
+34val_34
+229val_229
+233val_233
+143val_143
+472val_472
+322val_322
+498val_498
+160val_160
+195val_195
+42val_42
+321val_321
+430val_430
+119val_119
+489val_489
+458val_458
+78val_78
+76val_76
+41val_41
+223val_223
+492val_492
+149val_149
+449val_449
+218val_218
+228val_228
+138val_138
+453val_453
+30val_30
+209val_209
+64val_64
+468val_468
+76val_76
+74val_74
+342val_342
+69val_69
+230val_230
+33val_33
+368val_368
+103val_103
+296val_296
+113val_113
+216val_216
+367val_367
+344val_344
+167val_167
+274val_274
+219val_219
+239val_239
+485val_485
+116val_116
+223val_223
+256val_256
+263val_263
+70val_70
+487val_487
+480val_480
+401val_401
+288val_288
+191val_191
+5val_5
+244val_244
+438val_438
+128val_128
+467val_467
+432val_432
+202val_202
+316val_316
+229val_229
+469val_469
+463val_463
+280val_280
+2val_2
+35val_35
+283val_283
+331val_331
+235val_235
+80val_80
+44val_44
+193val_193
+321val_321
+335val_335
+104val_104
+466val_466
+366val_366
+175val_175
+403val_403
+483val_483
+53val_53
+105val_105
+257val_257
+406val_406
+409val_409
+190val_190
+406val_406
+401val_401
+114val_114
+258val_258
+90val_90
+203val_203
+262val_262
+348val_348
+424val_424
+12val_12
+396val_396
+201val_201
+217val_217
+164val_164
+431val_431
+454val_454
+478val_478
+298val_298
+125val_125
+431val_431
+164val_164
+424val_424
+187val_187
+382val_382
+5val_5
+70val_70
+397val_397
+480val_480
+291val_291
+24val_24
+351val_351
+255val_255
+104val_104
+70val_70
+163val_163
+438val_438
+119val_119
+414val_414
+200val_200
+491val_491
+237val_237
+439val_439
+360val_360
+248val_248
+479val_479
+305val_305
+417val_417
+199val_199
+444val_444
+120val_120
+429val_429
+169val_169
+443val_443
+323val_323
+325val_325
+277val_277
+230val_230
+478val_478
+178val_178
+468val_468
+310val_310
+317val_317
+333val_333
+493val_493
+460val_460
+207val_207
+249val_249
+265val_265
+480val_480
+83val_83
+136val_136
+353val_353
+172val_172
+214val_214
+462val_462
+233val_233
+406val_406
+133val_133
+175val_175
+189val_189
+454val_454
+375val_375
+401val_401
+421val_421
+407val_407
+384val_384
+256val_256
+26val_26
+134val_134
+67val_67
+384val_384
+379val_379
+18val_18
+462val_462
+492val_492
+100val_100
+298val_298
+9val_9
+341val_341
+498val_498
+146val_146
+458val_458
+362val_362
+186val_186
+285val_285
+348val_348
+167val_167
+18val_18
+273val_273
+183val_183
+281val_281
+344val_344
+97val_97
+469val_469
+315val_315
+84val_84
+28val_28
+37val_37
+448val_448
+152val_152
+348val_348
+307val_307
+194val_194
+414val_414
+477val_477
+222val_222
+126val_126
+90val_90
+169val_169
+403val_403
+400val_400
+200val_200
+97val_97