You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2010/07/20 03:24:29 UTC
svn commit: r965708 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/test/org/apache/hadoop/hive/ql/exec/
ql/src/test/results/clientpositive/
Author: nzhang
Date: Tue Jul 20 01:24:29 2010
New Revision: 965708
URL: http://svn.apache.org/viewvc?rev=965708&view=rev
Log:
HIVE-1463. hive output file names are unnecessarily large
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/build-common.xml
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Jul 20 01:24:29 2010
@@ -35,6 +35,9 @@ Trunk - Unreleased
Operator.forward()
(Yongqiang He via Ning Zhang)
+ HIVE-1463. Hive output file names are unnecessarily large
+ (Joydeep Sen Sarma via Ning Zhang)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Tue Jul 20 01:24:29 2010
@@ -407,6 +407,7 @@
<jvmarg value="-Xdebug"/>
<jvmarg value="-Xrunjdwp:transport=dt_socket,address=8000,server=y,suspend=y"/> -->
<env key="HADOOP_HOME" value="${hadoop.root}"/>
+ <env key="HADOOP_CLASSPATH" value="${test.data.dir}/conf"/>
<env key="TZ" value="US/Pacific"/>
<sysproperty key="test.output.overwrite" value="${overwrite}"/>
<sysproperty key="test.service.standalone.server" value="${standalone}"/>
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Jul 20 01:24:29 2010
@@ -278,8 +278,8 @@ public final class Utilities {
if(!HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJT).equals("local")) {
// use the default file system of the job
- FileSystem fs = FileSystem.get(job);
Path planPath = new Path(hiveScratchDir, "plan." + randGen.nextInt());
+ FileSystem fs = planPath.getFileSystem(job);
FSDataOutputStream out = fs.create(planPath);
serializeMapRedWork(w, out);
HiveConf.setVar(job, HiveConf.ConfVars.PLAN, planPath.toString());
@@ -467,9 +467,16 @@ public final class Utilities {
public static String getTaskId(Configuration hconf) {
String taskid = (hconf == null) ? null : hconf.get("mapred.task.id");
if ((taskid == null) || taskid.equals("")) {
- return ("" + randGen.nextInt());
+ return ("" + Math.abs(randGen.nextInt()));
} else {
- return taskid.replaceAll("task_[0-9]+_", "");
+ /* extract the task and attempt id from the hadoop taskid.
+ in version 17 the leading component was 'task_'. thereafter
+ the leading component is 'attempt_'. in 17 - hadoop also
+ seems to have used _map_ and _reduce_ to denote map/reduce
+ task types
+ */
+ String ret = taskid.replaceAll(".*_[mr]_", "").replaceAll(".*_(map|reduce)_", "");
+ return (ret);
}
}
@@ -958,31 +965,28 @@ public final class Utilities {
/**
* The first group will contain the task id. The second group is the optional
- * extension. The file name looks like: "24931_r_000000_0" or
- * "24931_r_000000_0.gz"
+ * extension. The file name looks like: "0_0" or "0_0.gz". There may be a leading
+ * prefix (tmp_). Since getTaskId() can return an integer only - this should match
+ * a pure integer as well
*/
- private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*_([0-9]*)_[0-9](\\..*)?$");
+ private static Pattern fileNameTaskIdRegex = Pattern.compile("^.*?([0-9]+)(_[0-9])?(\\..*)?$");
/**
- * Local job name looks like "job_local_1_map_0000", where 1 is job ID and 0000 is task ID.
- */
- private static Pattern fileNameLocalTaskIdRegex = Pattern.compile(".*local.*_([0-9]*)$");
-
- /**
- * Get the task id from the filename. E.g., get "000000" out of
- * "24931_r_000000_0" or "24931_r_000000_0.gz"
+ * Get the task id from the filename.
+ * It is assumed that the filename is derived from the output of getTaskId
+ *
+ * @param filename filename to extract taskid from
*/
public static String getTaskIdFromFilename(String filename) {
String taskId = filename;
- Matcher m = fileNameTaskIdRegex.matcher(filename);
+ int dirEnd = filename.lastIndexOf(Path.SEPARATOR);
+ if (dirEnd != -1)
+ taskId = filename.substring(dirEnd + 1);
+
+ Matcher m = fileNameTaskIdRegex.matcher(taskId);
if (!m.matches()) {
- Matcher m2 = fileNameLocalTaskIdRegex.matcher(filename);
- if (!m2.matches()) {
- LOG.warn("Unable to get task id from file name: " + filename
- + ". Using full filename as task id.");
- } else {
- taskId = m2.group(1);
- }
+ LOG.warn("Unable to get task id from file name: " + filename
+ + ". Using last component" + taskId + " as task id.");
} else {
taskId = m.group(1);
}
@@ -991,17 +995,21 @@ public final class Utilities {
}
/**
- * Replace the task id from the filename. E.g., replace "000000" out of
- * "24931_r_000000_0" or "24931_r_000000_0.gz" by 33 to
- * "24931_r_000033_0" or "24931_r_000033_0.gz"
+ * Replace the task id from the filename.
+ * It is assumed that the filename is derived from the output of getTaskId
+ *
+ * @param filename filename to replace taskid
+ * "0_0" or "0_0.gz" by 33 to
+ * "33_0" or "33_0.gz"
*/
public static String replaceTaskIdFromFilename(String filename, int bucketNum) {
String taskId = getTaskIdFromFilename(filename);
String newTaskId = replaceTaskId(taskId, bucketNum);
- return replaceTaskIdFromFilename(filename, taskId, newTaskId);
+ String ret = replaceTaskIdFromFilename(filename, taskId, newTaskId);
+ return (ret);
}
- public static String replaceTaskId(String taskId, int bucketNum) {
+ private static String replaceTaskId(String taskId, int bucketNum) {
String strBucketNum = String.valueOf(bucketNum);
int bucketNumLen = strBucketNum.length();
int taskIdLen = taskId.length();
@@ -1020,7 +1028,7 @@ public final class Utilities {
* @param newTaskId
* @return
*/
- public static String replaceTaskIdFromFilename(String filename,
+ private static String replaceTaskIdFromFilename(String filename,
String oldTaskId, String newTaskId) {
String[] spl = filename.split(oldTaskId);
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Tue Jul 20 01:24:29 2010
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.exec;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.LinkedList;
@@ -50,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.Pl
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ScriptDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.TextInputFormat;
@@ -433,33 +433,13 @@ public class TestExecDriver extends Test
mr.setReducer(op5);
}
- private File generatePlanFile() throws Exception {
- File scratchDir = new File((new HiveConf(TestExecDriver.class))
- .getVar(ConfVars.SCRATCHDIR));
- File planFile = File.createTempFile("plan", ".xml", scratchDir);
- System.out.println("Generating plan file " + planFile.toString());
- FileOutputStream out = new FileOutputStream(planFile);
- Utilities.serializeMapRedWork(mr, out);
- return planFile;
- }
-
- private void executePlan(File planFile) throws Exception {
+ private void executePlan() throws Exception {
String testName = new Exception().getStackTrace()[1].getMethodName();
- String cmdLine = conf.getVar(HiveConf.ConfVars.HADOOPBIN) + " jar "
- + conf.getJar() + " org.apache.hadoop.hive.ql.exec.ExecDriver -plan "
- + planFile.toString() + " " + ExecDriver.generateCmdLine(conf);
- System.out.println("Executing: " + cmdLine);
- Process executor = Runtime.getRuntime().exec(cmdLine);
-
- StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(),
- null, System.out);
- StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(),
- null, System.err);
-
- outPrinter.start();
- errPrinter.start();
-
- int exitVal = executor.waitFor();
+ MapRedTask mrtask = new MapRedTask();
+ DriverContext dctx = new DriverContext ();
+ mrtask.setWork(mr);
+ mrtask.initialize(conf, null, dctx);
+ int exitVal = mrtask.execute(dctx);
if (exitVal != 0) {
System.out.println(testName + " execution failed with exit status: "
@@ -475,8 +455,7 @@ public class TestExecDriver extends Test
try {
populateMapPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("lt100.txt.deflate", "mapplan1.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -490,8 +469,7 @@ public class TestExecDriver extends Test
try {
populateMapPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("lt100.txt", "mapplan2.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -506,8 +484,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan1(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("kv1.val.sorted.txt", "mapredplan1.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -522,8 +499,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan2(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("lt100.sorted.txt", "mapredplan2.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -538,8 +514,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan3(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"), db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "src2"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("kv1kv2.cogroup.txt", "mapredplan3.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -554,8 +529,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan4(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("kv1.string-sorted.txt", "mapredplan4.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -570,8 +544,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan5(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("kv1.string-sorted.txt", "mapredplan5.out");
} catch (Throwable e) {
e.printStackTrace();
@@ -586,8 +559,7 @@ public class TestExecDriver extends Test
try {
populateMapRedPlan6(db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
"src"));
- File planFile = generatePlanFile();
- executePlan(planFile);
+ executePlan();
fileDiff("lt100.sorted.txt", "mapredplan6.out");
} catch (Throwable e) {
e.printStackTrace();
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Tue Jul 20 01:24:29 2010
@@ -26,6 +26,7 @@ import java.util.Map;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.CollectDesc;
@@ -137,48 +138,47 @@ public class TestOperators extends TestC
}
}
- public void testFileSinkOperator() throws Throwable {
- try {
- System.out.println("Testing FileSink Operator");
- // col1
- ExprNodeDesc exprDesc1 = TestExecDriver.getStringColumn("col1");
-
- // col2
- ExprNodeDesc expr1 = TestExecDriver.getStringColumn("col0");
- ExprNodeDesc expr2 = new ExprNodeConstantDesc("1");
- ExprNodeDesc exprDesc2 = TypeCheckProcFactory.DefaultExprProcessor
- .getFuncExprNodeDesc("concat", expr1, expr2);
-
- // select operator to project these two columns
- ArrayList<ExprNodeDesc> earr = new ArrayList<ExprNodeDesc>();
- earr.add(exprDesc1);
- earr.add(exprDesc2);
- ArrayList<String> outputCols = new ArrayList<String>();
- for (int i = 0; i < earr.size(); i++) {
- outputCols.add("_col" + i);
- }
- SelectDesc selectCtx = new SelectDesc(earr, outputCols);
- Operator<SelectDesc> op = OperatorFactory.get(SelectDesc.class);
- op.setConf(selectCtx);
-
- // fileSinkOperator to dump the output of the select
- // fileSinkDesc fsd = new fileSinkDesc ("file:///tmp" + File.separator +
- // System.getProperty("user.name") + File.separator +
- // "TestFileSinkOperator",
- // Utilities.defaultTd, false);
- // Operator<fileSinkDesc> flop = OperatorFactory.getAndMakeChild(fsd, op);
+ private void testTaskIds(String [] taskIds, String expectedAttemptId, String expectedTaskId) {
+ Configuration conf = new JobConf(TestOperators.class);
+ for (String one: taskIds) {
+ conf.set("mapred.task.id", one);
+ String attemptId = Utilities.getTaskId(conf);
+ assertEquals(expectedAttemptId, attemptId);
+ assertEquals(Utilities.getTaskIdFromFilename(attemptId), expectedTaskId);
+ assertEquals(Utilities.getTaskIdFromFilename(attemptId + ".gz"), expectedTaskId);
+ assertEquals(Utilities.getTaskIdFromFilename
+ (Utilities.toTempPath(new Path(attemptId + ".gz")).toString()), expectedTaskId);
+ }
+ }
- op.initialize(new JobConf(TestOperators.class),
- new ObjectInspector[] {r[0].oi});
+ /**
+ * More stuff needs to be added here. Currently it only checks some basic
+ * file naming libraries
+ * The old test was deactivated as part of hive-405
+ */
+ public void testFileSinkOperator() throws Throwable {
- // evaluate on row
- for (int i = 0; i < 5; i++) {
- op.process(r[i].o, 0);
- }
- op.close(false);
+ try {
+ testTaskIds (new String [] {
+ "attempt_200707121733_0003_m_000005_0",
+ "attempt_local_0001_m_000005_0",
+ "task_200709221812_0001_m_000005_0",
+ "task_local_0001_m_000005_0"
+ }, "000005_0", "000005");
+
+ testTaskIds (new String [] {
+ "job_local_0001_map_000005",
+ "job_local_0001_reduce_000005",
+ }, "000005", "000005");
+
+ testTaskIds (new String [] {"1234567"},
+ "1234567", "1234567");
+
+ assertEquals(Utilities.getTaskIdFromFilename
+ ("/mnt/dev005/task_local_0001_m_000005_0"),
+ "000005");
System.out.println("FileSink Operator ok");
-
} catch (Throwable e) {
e.printStackTrace();
throw e;
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out?rev=965708&r1=965707&r2=965708&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/sample10.q.out Tue Jul 20 01:24:29 2010
@@ -102,14 +102,14 @@ STAGE PLANS:
type: bigint
Needs Tagging: false
Path -> Alias:
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0 [srcpartbucket]
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0 [srcpartbucket]
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0 [srcpartbucket]
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0 [srcpartbucket]
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0 [srcpartbucket]
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0 [srcpartbucket]
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0 [srcpartbucket]
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0 [srcpartbucket]
Path -> Partition:
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/attempt_local_0001_r_000000_0
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=11/000000_0
Partition
- base file name: attempt_local_0001_r_000000_0
+ base file name: 000000_0
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -122,13 +122,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -140,19 +140,19 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: srcpartbucket
name: srcpartbucket
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/attempt_local_0001_r_000000_0
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-08/hr=12/000000_0
Partition
- base file name: attempt_local_0001_r_000000_0
+ base file name: 000000_0
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -165,13 +165,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -183,19 +183,19 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: srcpartbucket
name: srcpartbucket
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/attempt_local_0001_r_000000_0
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=11/000000_0
Partition
- base file name: attempt_local_0001_r_000000_0
+ base file name: 000000_0
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -208,13 +208,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -226,19 +226,19 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: srcpartbucket
name: srcpartbucket
- file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/attempt_local_0001_r_000000_0
+ file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket/ds=2008-04-09/hr=12/000000_0
Partition
- base file name: attempt_local_0001_r_000000_0
+ base file name: 000000_0
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
partition values:
@@ -251,13 +251,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -269,13 +269,13 @@ STAGE PLANS:
columns.types string:string
file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat
- location file:/data/users/nzhang/work/999/apache-hive/build/ql/test/data/warehouse/srcpartbucket
+ location file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/test/data/warehouse/srcpartbucket
name srcpartbucket
partition_columns ds/hr
serialization.ddl struct srcpartbucket { string key, string value}
serialization.format 1
serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
- transient_lastDdlTime 1277145923
+ transient_lastDdlTime 1279517872
serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
name: srcpartbucket
name: srcpartbucket
@@ -299,7 +299,7 @@ STAGE PLANS:
File Output Operator
compressed: false
GlobalTableId: 0
- directory: file:/data/users/nzhang/work/999/apache-hive/build/ql/scratchdir/hive_2010-06-21_11-45-29_813_4438091765019255104/10001
+ directory: file:/mnt/vol/devrs004.snc1/jssarma/projects/hive_trunk/build/ql/scratchdir/hive_2010-07-18_22-37-59_060_3045357226899710132/10001
NumFilesPerFileSink: 1
table:
input format: org.apache.hadoop.mapred.TextInputFormat
@@ -322,14 +322,14 @@ PREHOOK: Input: default@srcpartbucket@ds
PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000
POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-30_185_5515955290012905688/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-37-59_639_2719302052990534208/10000
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
@@ -346,14 +346,14 @@ PREHOOK: Input: default@srcpartbucket@ds
PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000
POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-35_252_5650802559039809329/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-03_708_4461107599597555917/10000
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
@@ -370,14 +370,14 @@ PREHOOK: Input: default@srcpartbucket@ds
PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-PREHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000
+PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000
POSTHOOK: query: select * from srcpartbucket where ds is not null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11
POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12
-POSTHOOK: Output: file:/tmp/nzhang/hive_2010-06-21_11-45-40_176_6924299231993417982/10000
+POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-18_22-38-07_920_2071428667805216792/10000
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ]