You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2010/08/13 06:50:34 UTC
svn commit: r985094 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: jvs
Date: Fri Aug 13 04:50:34 2010
New Revision: 985094
URL: http://svn.apache.org/viewvc?rev=985094&view=rev
Log:
HIVE-1528. JSON UDTF function
(Ning Zhang via jvs)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Fri Aug 13 04:50:34 2010
@@ -39,6 +39,9 @@ Trunk - Unreleased
HIVE-1514. API to change fileformat and location of a partition
(He Yongqiang via namit)
+ HIVE-1528. JSON UDTF function
+ (Ning Zhang via jvs)
+
IMPROVEMENTS
HIVE-1394. Do not update transient_lastDdlTime if the partition is modified by a housekeeping
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Fri Aug 13 04:50:34 2010
@@ -130,13 +130,12 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFUpper;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFHistogramNumeric;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
@@ -148,6 +147,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVariance;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFnGrams;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArray;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayContains;
@@ -173,6 +173,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple;
import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath;
import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean;
@@ -394,6 +395,7 @@ public final class FunctionRegistry {
// Generic UDTF's
registerGenericUDTF("explode", GenericUDTFExplode.class);
+ registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class);
}
public static void registerTemporaryUDF(String functionName,
@@ -734,8 +736,9 @@ public final class FunctionRegistry {
}
public static GenericUDAFResolver getGenericUDAFResolver(String functionName) {
- if (LOG.isDebugEnabled())
+ if (LOG.isDebugEnabled()) {
LOG.debug("Looking up GenericUDAF: " + functionName);
+ }
FunctionInfo finfo = mFunctions.get(functionName.toLowerCase());
if (finfo == null) {
return null;
@@ -873,10 +876,11 @@ public final class FunctionRegistry {
conversionCost += cost;
}
}
- if (LOG.isDebugEnabled())
+ if (LOG.isDebugEnabled()) {
LOG.debug("Method " + (match ? "did" : "didn't") + " match: passed = "
+ argumentsPassed + " accepted = " + argumentsAccepted +
" method = " + m);
+ }
if (match) {
// Always choose the function with least implicit conversions.
if (conversionCost < leastConversionCost) {
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java Fri Aug 13 04:50:34 2010
@@ -0,0 +1,153 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+import org.json.JSONException;
+import org.json.JSONObject;
+/**
+ * GenericUDTFJSONTuple: this
+ *
+ */
+@Description(name = "json_tuple",
+ value = "_FUNC_(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. " +
+ "All the input parameters and output column types are string.")
+
+public class GenericUDTFJSONTuple extends GenericUDTF {
+
+ private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
+
+ int numCols; // number of output columns
+ String[] paths; // array of path expressions, each of which corresponds to a column
+ Text[] retCols; // array of returned column values
+ Text[] cols; // object pool of non-null Text, avoid creating objects all the time
+ Object[] nullCols; // array of null column values
+ ObjectInspector[] inputOIs; // input ObjectInspectors
+ boolean pathParsed = false;
+ boolean seenErrors = false;
+
+ @Override
+ public void close() throws HiveException {
+ }
+
+ @Override
+ public StructObjectInspector initialize(ObjectInspector[] args)
+ throws UDFArgumentException {
+
+ inputOIs = args;
+ numCols = args.length - 1;
+
+ if (numCols < 1) {
+ throw new UDFArgumentException("json_tuple() takes at least two arguments: " +
+ "the json string and a path expression");
+ }
+
+ for (int i = 0; i < args.length; ++i) {
+ if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE ||
+ !args[i].getTypeName().equals(Constants.STRING_TYPE_NAME)) {
+ throw new UDFArgumentException("json_tuple()'s arguments have to be string type");
+ }
+ }
+
+ seenErrors = false;
+ pathParsed = false;
+ paths = new String[numCols];
+ cols = new Text[numCols];
+ retCols = new Text[numCols];
+ nullCols = new Object[numCols];
+
+ for (int i = 0; i < numCols; ++i) {
+ cols[i] = new Text();
+ retCols[i] = cols[i];
+ nullCols[i] = null;
+ }
+
+ // construct output object inspector
+ ArrayList<String> fieldNames = new ArrayList<String>(numCols);
+ ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numCols);
+ for (int i = 0; i < numCols; ++i) {
+ // column name can be anything since it will be named by UDTF as clause
+ fieldNames.add("c" + i);
+ // all returned type will be Text
+ fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+ }
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
+ }
+
+ @Override
+ public void process(Object[] o) throws HiveException {
+
+ if (o[0] == null) {
+ forward(nullCols);
+ return;
+ }
+ // get the path expression for the 1st row only
+ if (!pathParsed) {
+ for (int i = 0;i < numCols; ++i) {
+ paths[i] = ((StringObjectInspector) inputOIs[i+1]).getPrimitiveJavaObject(o[i+1]);
+ }
+ pathParsed = true;
+ }
+
+ String jsonStr = ((StringObjectInspector) inputOIs[0]).getPrimitiveJavaObject(o[0]);
+ if (jsonStr == null) {
+ forward(nullCols);
+ return;
+ }
+ try {
+ JSONObject jsonObj = new JSONObject(jsonStr);
+
+ for (int i = 0; i < numCols; ++i) {
+ if (jsonObj.isNull(paths[i])) {
+ retCols[i] = null;
+ } else {
+ if (retCols[i] == null) {
+ retCols[i] = cols[i]; // use the object pool rather than creating a new object
+ }
+ retCols[i].set(jsonObj.getString(paths[i]));
+ }
+ }
+ forward(retCols);
+ } catch (JSONException e) {
+ // parsing error, invalid JSON string
+ if (!seenErrors) {
+ LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future.");
+ seenErrors = true;
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "json_tuple";
+ }
+}
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udtf_json_tuple.q Fri Aug 13 04:50:34 2010
@@ -0,0 +1,36 @@
+create table json_t (key string, jstring string);
+
+insert overwrite table json_t
+select * from (
+ select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+ union all
+ select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+ union all
+ select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1
+ union all
+ select '4', cast(null as string) from src limit 1
+ union all
+ select '5', '{"f1": "", "f5": null}' from src limit 1
+ union all
+ select '6', '[invalid JSON string]' from src limit 1
+) s;
+
+explain
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+explain
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a;
+
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a;
+
+explain
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5;
+
+explain
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2;
+
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2;
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=985094&r1=985093&r2=985094&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Fri Aug 13 04:50:34 2010
@@ -69,6 +69,7 @@ instr
int
isnotnull
isnull
+json_tuple
lcase
length
like
@@ -175,6 +176,7 @@ double
e
explode
from_unixtime
+json_tuple
lcase
like
locate
Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out?rev=985094&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udtf_json_tuple.q.out Fri Aug 13 04:50:34 2010
@@ -0,0 +1,461 @@
+PREHOOK: query: create table json_t (key string, jstring string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table json_t (key string, jstring string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@json_t
+PREHOOK: query: insert overwrite table json_t
+select * from (
+ select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+ union all
+ select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+ union all
+ select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1
+ union all
+ select '4', cast(null as string) from src limit 1
+ union all
+ select '5', '{"f1": "", "f5": null}' from src limit 1
+ union all
+ select '6', '[invalid JSON string]' from src limit 1
+) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@json_t
+POSTHOOK: query: insert overwrite table json_t
+select * from (
+ select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1
+ union all
+ select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1
+ union all
+ select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1
+ union all
+ select '4', cast(null as string) from src limit 1
+ union all
+ select '5', '{"f1": "", "f5": null}' from src limit 1
+ union all
+ select '6', '[invalid JSON string]' from src limit 1
+) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@json_t
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+PREHOOK: query: explain
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF b)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Lateral View Forward
+ Select Operator
+ SELECT * : (no compute)
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Select Operator
+ expressions:
+ expr: jstring
+ type: string
+ expr: 'f1'
+ type: string
+ expr: 'f2'
+ type: string
+ expr: 'f3'
+ type: string
+ expr: 'f4'
+ type: string
+ expr: 'f5'
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ UDTF Operator
+ function name: json_tuple
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: string
+ expr: _col3
+ type: string
+ expr: _col4
+ type: string
+ expr: _col5
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000
+POSTHOOK: query: select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-09_840_8200365876234855592/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+4 NULL NULL NULL NULL NULL
+3 value13 2 value33 value44 5.01
+2 value12 2 value3 4.01 NULL
+1 value1 value2 3 NULL 5.23
+5 NULL NULL NULL NULL
+PREHOOK: query: explain
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF json_t a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Select Operator
+ expressions:
+ expr: jstring
+ type: string
+ expr: 'f1'
+ type: string
+ expr: 'f2'
+ type: string
+ expr: 'f3'
+ type: string
+ expr: 'f4'
+ type: string
+ expr: 'f5'
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ UDTF Operator
+ function name: json_tuple
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000
+POSTHOOK: query: select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-13_794_8613454555471840841/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+NULL NULL NULL NULL NULL
+value13 2 value33 value44 5.01
+value12 2 value3 4.01 NULL
+value1 value2 3 NULL 5.23
+ NULL NULL NULL NULL
+PREHOOK: query: explain
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f5)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Lateral View Forward
+ Select Operator
+ SELECT * : (no compute)
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col3
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ Select Operator
+ expressions:
+ expr: jstring
+ type: string
+ expr: 'f1'
+ type: string
+ expr: 'f2'
+ type: string
+ expr: 'f3'
+ type: string
+ expr: 'f4'
+ type: string
+ expr: 'f5'
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ UDTF Operator
+ function name: json_tuple
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col3
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000
+POSTHOOK: query: select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-17_260_2655690577880014370/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+4 NULL NULL
+3 2 5.01
+2 2 NULL
+1 value2 5.23
+5 NULL NULL
+PREHOOK: query: explain
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF json_t a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL f1))) (TOK_GROUPBY (TOK_TABLE_OR_COL f2))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ Lateral View Forward
+ Select Operator
+ SELECT * : (no compute)
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Filter Operator
+ predicate:
+ expr: _col2 is not null
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _col3
+ type: string
+ outputColumnNames: _col3
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col3
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Select Operator
+ expressions:
+ expr: jstring
+ type: string
+ expr: 'f1'
+ type: string
+ expr: 'f2'
+ type: string
+ expr: 'f3'
+ type: string
+ expr: 'f4'
+ type: string
+ expr: 'f5'
+ type: string
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ UDTF Operator
+ function name: json_tuple
+ Lateral View Join Operator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Filter Operator
+ predicate:
+ expr: _col2 is not null
+ type: boolean
+ Select Operator
+ expressions:
+ expr: _col3
+ type: string
+ outputColumnNames: _col3
+ Group By Operator
+ aggregations:
+ expr: count()
+ bucketGroup: false
+ keys:
+ expr: _col3
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_t
+PREHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000
+POSTHOOK: query: select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_t
+POSTHOOK: Output: file:/tmp/nzhang/hive_2010-08-12_17-58-20_835_55486591128179740/-mr-10000
+POSTHOOK: Lineage: json_t.jstring EXPRESSION []
+POSTHOOK: Lineage: json_t.key EXPRESSION []
+NULL 1
+2 2
+value2 1