You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2021/09/22 09:31:45 UTC
[hive] branch master updated: HIVE-25485: Transform selects of
literals under a UNION ALL to inline table scan (#2608) (Zoltan Haindrich
reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository.
kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4efb565 HIVE-25485: Transform selects of literals under a UNION ALL to inline table scan (#2608) (Zoltan Haindrich reviewed by Krisztian Kasa)
4efb565 is described below
commit 4efb565b36ef740fd3a932cfcb07590fc3e93a40
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Wed Sep 22 11:31:30 2021 +0200
HIVE-25485: Transform selects of literals under a UNION ALL to inline table scan (#2608) (Zoltan Haindrich reviewed by Krisztian Kasa)
---
.../hive/jdbc/BaseJdbcWithMiniLlap.java.orig | 747 ---------------------
.../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 2 +-
.../calcite/rules/HiveRelDecorrelator.java | 2 +-
.../HiveUnionSimpleSelectsToInlineTableRule.java | 244 +++++++
.../calcite/translator/SqlFunctionConverter.java | 2 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 4 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 6 +-
.../test/queries/clientpositive/union_literals.q | 103 +++
.../results/clientpositive/llap/udf_likeall.q.out | 4 +-
.../results/clientpositive/llap/udf_likeany.q.out | 4 +-
.../clientpositive/llap/udf_sort_array_by.q.out | 6 +-
.../clientpositive/llap/union_literals.q.out | 454 +++++++++++++
.../clientpositive/llap/vectorized_mapjoin3.q.out | 229 +++----
13 files changed, 925 insertions(+), 882 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
deleted file mode 100644
index 4c46db9..0000000
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig
+++ /dev/null
@@ -1,747 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.jdbc;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.File;
-import java.math.BigDecimal;
-import java.net.URL;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.UUID;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.llap.FieldDesc;
-import org.apache.hadoop.hive.llap.Row;
-import org.apache.hadoop.hive.llap.Schema;
-import org.apache.hadoop.io.NullWritable;
-
-import org.apache.hive.jdbc.miniHS2.MiniHS2;
-import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType;
-import org.apache.hadoop.hive.common.type.Date;
-import org.apache.hadoop.hive.common.type.Timestamp;
-import org.apache.hadoop.hive.llap.LlapBaseInputFormat;
-
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.Test;
-import org.apache.hadoop.mapred.InputFormat;
-
-/**
- * Specialize this base class for different serde's/formats
- * {@link #beforeTest(boolean) beforeTest} should be called
- * by sub-classes in a {@link org.junit.BeforeClass} initializer
- */
-public abstract class BaseJdbcWithMiniLlap {
-
- private static String dataFileDir;
- private static Path kvDataFilePath;
- private static Path dataTypesFilePath;
- private static Path over10KFilePath;
-
- protected static MiniHS2 miniHS2 = null;
- protected static HiveConf conf = null;
- protected static Connection hs2Conn = null;
-
- // This method should be called by sub-classes in a @BeforeClass initializer
- public static MiniHS2 beforeTest(HiveConf inputConf) throws Exception {
- conf = inputConf;
- Class.forName(MiniHS2.getJdbcDriverName());
- miniHS2 = new MiniHS2(conf, MiniClusterType.LLAP);
- dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", "");
- kvDataFilePath = new Path(dataFileDir, "kv1.txt");
- dataTypesFilePath = new Path(dataFileDir, "datatypes.txt");
- over10KFilePath = new Path(dataFileDir, "over10k");
- Map<String, String> confOverlay = new HashMap<String, String>();
- miniHS2.start(confOverlay);
- miniHS2.getDFS().getFileSystem().mkdirs(new Path("/apps_staging_dir/anonymous"));
- return miniHS2;
- }
-
- static HiveConf defaultConf() throws Exception {
- String confDir = "../../data/conf/llap/";
- if (confDir != null && !confDir.isEmpty()) {
- HiveConf.setHiveSiteLocation(new URL("file://"+ new File(confDir).toURI().getPath() + "/hive-site.xml"));
- System.out.println("Setting hive-site: " + HiveConf.getHiveSiteLocation());
- }
- HiveConf defaultConf = new HiveConf();
- defaultConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
- defaultConf.setBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS, false);
- defaultConf.addResource(new URL("file://" + new File(confDir).toURI().getPath() + "/tez-site.xml"));
- return defaultConf;
- }
-
- @Before
- public void setUp() throws Exception {
- hs2Conn = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar");
- }
-
- public static Connection getConnection(String jdbcURL, String user, String pwd) throws SQLException {
- Connection conn = DriverManager.getConnection(jdbcURL, user, pwd);
- conn.createStatement().execute("set hive.support.concurrency = false");
- return conn;
- }
-
- @After
- public void tearDown() throws Exception {
- LlapBaseInputFormat.closeAll();
- hs2Conn.close();
- }
-
- @AfterClass
- public static void afterTest() throws Exception {
- if (miniHS2.isStarted()) {
- miniHS2.stop();
- }
- }
-
- protected void createTestTable(String tableName) throws Exception {
- createTestTable(hs2Conn, null, tableName, kvDataFilePath.toString());
- }
-
- public static void createTestTable(Connection connection, String database, String tableName, String srcFile) throws
- Exception {
- Statement stmt = connection.createStatement();
-
- if (database != null) {
- stmt.execute("CREATE DATABASE IF NOT EXISTS " + database);
- stmt.execute("USE " + database);
- }
-
- // create table
- stmt.execute("DROP TABLE IF EXISTS " + tableName);
- stmt.execute("CREATE TABLE " + tableName
- + " (under_col INT COMMENT 'the under column', value STRING) COMMENT ' test table'");
-
- // load data
- stmt.execute("load data local inpath '" + srcFile + "' into table " + tableName);
-
- ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName);
- assertTrue(res.next());
- assertEquals("val_238", res.getString(2));
- res.close();
- stmt.close();
- }
-
- protected void createDataTypesTable(String tableName) throws Exception {
- Statement stmt = hs2Conn.createStatement();
-
- // create table
- stmt.execute("DROP TABLE IF EXISTS " + tableName);
- // tables with various types
- stmt.execute("create table " + tableName
- + " (c1 int, c2 boolean, c3 double, c4 string,"
- + " c5 array<int>, c6 map<int,string>, c7 map<string,string>,"
- + " c8 struct<r:string,s:int,t:double>,"
- + " c9 tinyint, c10 smallint, c11 float, c12 bigint,"
- + " c13 array<array<string>>,"
- + " c14 map<int, map<int,int>>,"
- + " c15 struct<r:int,s:struct<a:int,b:string>>,"
- + " c16 array<struct<m:map<string,string>,n:int>>,"
- + " c17 timestamp, "
- + " c18 decimal(16,7), "
- + " c19 binary, "
- + " c20 date,"
- + " c21 varchar(20),"
- + " c22 char(15),"
- + " c23 binary"
- + ")");
- stmt.execute("load data local inpath '"
- + dataTypesFilePath.toString() + "' into table " + tableName);
- stmt.close();
- }
-
- protected void createOver10KTable(String tableName) throws Exception {
- try (Statement stmt = hs2Conn.createStatement()) {
-
- String createQuery =
- "create table " + tableName + " (t tinyint, si smallint, i int, b bigint, f float, d double, bo boolean, "
- + "s string, ts timestamp, `dec` decimal(4,2), bin binary) row format delimited fields terminated by '|'";
-
- // create table
- stmt.execute("DROP TABLE IF EXISTS " + tableName);
- stmt.execute(createQuery);
- // load data
- stmt.execute("load data local inpath '" + over10KFilePath.toString() + "' into table " + tableName);
- }
- }
-
- @Test(timeout = 60000)
- public void testLlapInputFormatEndToEnd() throws Exception {
- createTestTable("testtab1");
-
- int rowCount;
-
- RowCollector rowCollector = new RowCollector();
- String query = "select * from testtab1 where under_col = 0";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(3, rowCount);
- assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(0));
- assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(1));
- assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(2));
-
- // Try empty rows query
- rowCollector.rows.clear();
- query = "select * from testtab1 where true = false";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(0, rowCount);
- }
-
- @Test(timeout = 300000)
- public void testMultipleBatchesOfComplexTypes() throws Exception {
- final String tableName = "testMultipleBatchesOfComplexTypes";
- try (Statement stmt = hs2Conn.createStatement()) {
- String createQuery =
- "create table " + tableName + "(c1 array<struct<f1:string,f2:string>>, "
- + "c2 int, "
- + "c3 array<array<int>>, "
- + "c4 array<struct<f1:array<string>>>) STORED AS ORC";
-
- // create table
- stmt.execute("DROP TABLE IF EXISTS " + tableName);
- stmt.execute(createQuery);
- // load data
- stmt.execute("INSERT INTO " + tableName + " VALUES "
- // value 1
- + "(ARRAY(NAMED_STRUCT('f1','a1', 'f2','a2'), NAMED_STRUCT('f1','a3', 'f2','a4')), "
- + "1, ARRAY(ARRAY(1)), ARRAY(NAMED_STRUCT('f1',ARRAY('aa1')))), "
- // value 2
- + "(ARRAY(NAMED_STRUCT('f1','b1', 'f2','b2'), NAMED_STRUCT('f1','b3', 'f2','b4')), 2, "
- + "ARRAY(ARRAY(2,2), ARRAY(2,2)), "
- + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa2','aa2')), NAMED_STRUCT('f1',ARRAY('aa2','aa2')))), "
- // value 3
- + "(ARRAY(NAMED_STRUCT('f1','c1', 'f2','c2'), NAMED_STRUCT('f1','c3', 'f2','c4'), "
- + "NAMED_STRUCT('f1','c5', 'f2','c6')), 3, " + "ARRAY(ARRAY(3,3,3), ARRAY(3,3,3), ARRAY(3,3,3)), "
- + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa3','aa3','aa3')), "
- + "NAMED_STRUCT('f1',ARRAY('aa3','aa3', 'aa3')), NAMED_STRUCT('f1',ARRAY('aa3','aa3', 'aa3')))), "
- // value 4
- + "(ARRAY(NAMED_STRUCT('f1','d1', 'f2','d2'), NAMED_STRUCT('f1','d3', 'f2','d4'),"
- + " NAMED_STRUCT('f1','d5', 'f2','d6'), NAMED_STRUCT('f1','d7', 'f2','d8')), 4, "
- + "ARRAY(ARRAY(4,4,4,4),ARRAY(4,4,4,4),ARRAY(4,4,4,4),ARRAY(4,4,4,4)), "
- + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4')), "
- + "NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4')), NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4')),"
- + " NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4'))))");
-
- // generate 4096 rows from above records
- for (int i = 0; i < 10; i++) {
- stmt.execute(String.format("insert into %s select * from %s", tableName, tableName));
- }
- // validate test table
- ResultSet res = stmt.executeQuery("SELECT count(*) FROM " + tableName);
- assertTrue(res.next());
- assertEquals(4096, res.getInt(1));
- res.close();
- }
-
- RowCollector rowCollector = new RowCollector();
- String query = "select * from " + tableName;
- int rowCount = processQuery(query, 1, rowCollector);
- assertEquals(4096, rowCount);
-
- /*
- *
- * validate different rows
- * [[[a1, a2], [a3, a4]], 1, [[1]], [[[aa1]]]]
- * [[[b1, b2], [b3, b4]], 2, [[2, 2], [2, 2]], [[[aa2, aa2]], [[aa2, aa2]]]]
- * [[[c1, c2], [c3, c4], [c5, c6]], 3, [[3, 3, 3], [3, 3, 3], [3, 3, 3]], [[[aa3, aa3, aa3]], [[aa3, aa3, aa3]], [[aa3, aa3, aa3]]]]
- * [[[d1, d2], [d3, d4], [d5, d6], [d7, d8]], 4, [[4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4]], [[[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]]]]
- *
- */
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where c2=1 limit 1";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(1, rowCount);
- final String[] expected1 =
- { "[[a1, a2], [a3, a4]]",
- "1",
- "[[1]]",
- "[[[aa1]]]"
- };
- assertArrayEquals(expected1, rowCollector.rows.get(0));
-
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where c2=2 limit 1";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(1, rowCount);
- final String[] expected2 =
- { "[[b1, b2], [b3, b4]]",
- "2",
- "[[2, 2], [2, 2]]",
- "[[[aa2, aa2]], [[aa2, aa2]]]"
- };
- assertArrayEquals(expected2, rowCollector.rows.get(0));
-
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where c2=3 limit 1";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(1, rowCount);
- final String[] expected3 =
- { "[[c1, c2], [c3, c4], [c5, c6]]",
- "3",
- "[[3, 3, 3], [3, 3, 3], [3, 3, 3]]",
- "[[[aa3, aa3, aa3]], [[aa3, aa3, aa3]], [[aa3, aa3, aa3]]]"
- };
- assertArrayEquals(expected3, rowCollector.rows.get(0));
-
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where c2=4 limit 1";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(1, rowCount);
- final String[] expected4 =
- { "[[d1, d2], [d3, d4], [d5, d6], [d7, d8]]",
- "4",
- "[[4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4]]",
- "[[[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]]]"
- };
- assertArrayEquals(expected4, rowCollector.rows.get(0));
-
- }
-
- @Test(timeout = 300000)
- public void testLlapInputFormatEndToEndWithMultipleBatches() throws Exception {
- String tableName = "over10k_table";
-
- createOver10KTable(tableName);
-
- int rowCount;
-
- // Try with more than one batch
- RowCollector rowCollector = new RowCollector();
- String query = "select * from " + tableName;
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(9999, rowCount);
-
- // Try with less than one batch
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where s = 'rachel brown'";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(17, rowCount);
-
- // Try empty rows query
- rowCollector.rows.clear();
- query = "select * from " + tableName + " where false";
- rowCount = processQuery(query, 1, rowCollector);
- assertEquals(0, rowCount);
- }
-
-
- @Test(timeout = 60000)
- public void testNonAsciiStrings() throws Exception {
- createTestTable("testtab_nonascii");
-
- RowCollector rowCollector = new RowCollector();
- String nonAscii = "À côté du garçon";
- String query = "select value, '" + nonAscii + "' from testtab_nonascii where under_col=0";
- int rowCount = processQuery(query, 1, rowCollector);
- assertEquals(3, rowCount);
-
- assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(0));
- assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(1));
- assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(2));
- }
-
- @Test(timeout = 60000)
- public void testEscapedStrings() throws Exception {
- createTestTable("testtab1");
-
- RowCollector rowCollector = new RowCollector();
- String expectedVal1 = "'a',\"b\",\\c\\";
- String expectedVal2 = "multi\nline";
- String query = "select value, '\\'a\\',\"b\",\\\\c\\\\', 'multi\\nline' from testtab1 where under_col=0";
- int rowCount = processQuery(query, 1, rowCollector);
- assertEquals(3, rowCount);
-
- assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(0));
- assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(1));
- assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(2));
- }
-
- @Test(timeout = 60000)
- public void testDataTypes() throws Exception {
- createDataTypesTable("datatypes");
- RowCollector2 rowCollector = new RowCollector2();
- String query = "select * from datatypes";
- int rowCount = processQuery(query, 1, rowCollector);
- assertEquals(3, rowCount);
-
- // Verify schema
- String[][] colNameTypes = new String[][] {
- {"datatypes.c1", "int"},
- {"datatypes.c2", "boolean"},
- {"datatypes.c3", "double"},
- {"datatypes.c4", "string"},
- {"datatypes.c5", "array<int>"},
- {"datatypes.c6", "map<int,string>"},
- {"datatypes.c7", "map<string,string>"},
- {"datatypes.c8", "struct<r:string,s:int,t:double>"},
- {"datatypes.c9", "tinyint"},
- {"datatypes.c10", "smallint"},
- {"datatypes.c11", "float"},
- {"datatypes.c12", "bigint"},
- {"datatypes.c13", "array<array<string>>"},
- {"datatypes.c14", "map<int,map<int,int>>"},
- {"datatypes.c15", "struct<r:int,s:struct<a:int,b:string>>"},
- {"datatypes.c16", "array<struct<m:map<string,string>,n:int>>"},
- {"datatypes.c17", "timestamp"},
- {"datatypes.c18", "decimal(16,7)"},
- {"datatypes.c19", "binary"},
- {"datatypes.c20", "date"},
- {"datatypes.c21", "varchar(20)"},
- {"datatypes.c22", "char(15)"},
- {"datatypes.c23", "binary"},
- };
- FieldDesc fieldDesc;
- assertEquals(23, rowCollector.numColumns);
- for (int idx = 0; idx < rowCollector.numColumns; ++idx) {
- fieldDesc = rowCollector.schema.getColumns().get(idx);
- assertEquals("ColName idx=" + idx, colNameTypes[idx][0], fieldDesc.getName());
- assertEquals("ColType idx=" + idx, colNameTypes[idx][1], fieldDesc.getTypeInfo().getTypeName());
- }
-
- // First row is all nulls
- Object[] rowValues = rowCollector.rows.get(0);
- for (int idx = 0; idx < rowCollector.numColumns; ++idx) {
- assertEquals("idx=" + idx, null, rowValues[idx]);
- }
-
- // Second Row
- rowValues = rowCollector.rows.get(1);
- assertEquals(Integer.valueOf(-1), rowValues[0]);
- assertEquals(Boolean.FALSE, rowValues[1]);
- assertEquals(Double.valueOf(-1.1d), rowValues[2]);
- assertEquals("", rowValues[3]);
-
- List<?> c5Value = (List<?>) rowValues[4];
- assertEquals(0, c5Value.size());
-
- Map<?,?> c6Value = (Map<?,?>) rowValues[5];
- assertEquals(0, c6Value.size());
-
- Map<?,?> c7Value = (Map<?,?>) rowValues[6];
- assertEquals(0, c7Value.size());
-
- List<?> c8Value = (List<?>) rowValues[7];
- assertEquals(null, c8Value.get(0));
- assertEquals(null, c8Value.get(1));
- assertEquals(null, c8Value.get(2));
-
- assertEquals(Byte.valueOf((byte) -1), rowValues[8]);
- assertEquals(Short.valueOf((short) -1), rowValues[9]);
- assertEquals(Float.valueOf(-1.0f), rowValues[10]);
- assertEquals(Long.valueOf(-1l), rowValues[11]);
-
- List<?> c13Value = (List<?>) rowValues[12];
- assertEquals(0, c13Value.size());
-
- Map<?,?> c14Value = (Map<?,?>) rowValues[13];
- assertEquals(0, c14Value.size());
-
- List<?> c15Value = (List<?>) rowValues[14];
- assertEquals(null, c15Value.get(0));
- assertEquals(null, c15Value.get(1));
-
- List<?> c16Value = (List<?>) rowValues[15];
- assertEquals(0, c16Value.size());
-
- assertEquals(null, rowValues[16]);
- assertEquals(null, rowValues[17]);
- assertEquals(null, rowValues[18]);
- assertEquals(null, rowValues[19]);
- assertEquals(null, rowValues[20]);
- assertEquals(null, rowValues[21]);
- assertEquals(null, rowValues[22]);
-
- // Third row
- rowValues = rowCollector.rows.get(2);
- assertEquals(Integer.valueOf(1), rowValues[0]);
- assertEquals(Boolean.TRUE, rowValues[1]);
- assertEquals(Double.valueOf(1.1d), rowValues[2]);
- assertEquals("1", rowValues[3]);
-
- c5Value = (List<?>) rowValues[4];
- assertEquals(2, c5Value.size());
- assertEquals(Integer.valueOf(1), c5Value.get(0));
- assertEquals(Integer.valueOf(2), c5Value.get(1));
-
- c6Value = (Map<?,?>) rowValues[5];
- assertEquals(2, c6Value.size());
- assertEquals("x", c6Value.get(Integer.valueOf(1)));
- assertEquals("y", c6Value.get(Integer.valueOf(2)));
-
- c7Value = (Map<?,?>) rowValues[6];
- assertEquals(1, c7Value.size());
- assertEquals("v", c7Value.get("k"));
-
- c8Value = (List<?>) rowValues[7];
- assertEquals("a", c8Value.get(0));
- assertEquals(Integer.valueOf(9), c8Value.get(1));
- assertEquals(Double.valueOf(2.2d), c8Value.get(2));
-
- assertEquals(Byte.valueOf((byte) 1), rowValues[8]);
- assertEquals(Short.valueOf((short) 1), rowValues[9]);
- assertEquals(Float.valueOf(1.0f), rowValues[10]);
- assertEquals(Long.valueOf(1l), rowValues[11]);
-
- c13Value = (List<?>) rowValues[12];
- assertEquals(2, c13Value.size());
- List<?> listVal = (List<?>) c13Value.get(0);
- assertEquals("a", listVal.get(0));
- assertEquals("b", listVal.get(1));
- listVal = (List<?>) c13Value.get(1);
- assertEquals("c", listVal.get(0));
- assertEquals("d", listVal.get(1));
-
- c14Value = (Map<?,?>) rowValues[13];
- assertEquals(2, c14Value.size());
- Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1));
- assertEquals(2, mapVal.size());
- assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
- assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13)));
- mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2));
- assertEquals(1, mapVal.size());
- assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21)));
-
- c15Value = (List<?>) rowValues[14];
- assertEquals(Integer.valueOf(1), c15Value.get(0));
- listVal = (List<?>) c15Value.get(1);
- assertEquals(2, listVal.size());
- assertEquals(Integer.valueOf(2), listVal.get(0));
- assertEquals("x", listVal.get(1));
-
- c16Value = (List<?>) rowValues[15];
- assertEquals(2, c16Value.size());
- listVal = (List<?>) c16Value.get(0);
- assertEquals(2, listVal.size());
- mapVal = (Map<?,?>) listVal.get(0);
- assertEquals(0, mapVal.size());
- assertEquals(Integer.valueOf(1), listVal.get(1));
- listVal = (List<?>) c16Value.get(1);
- mapVal = (Map<?,?>) listVal.get(0);
- assertEquals(2, mapVal.size());
- assertEquals("b", mapVal.get("a"));
- assertEquals("d", mapVal.get("c"));
- assertEquals(Integer.valueOf(2), listVal.get(1));
-
- assertEquals(Timestamp.valueOf("2012-04-22 09:00:00.123456789"), rowValues[16]);
- assertEquals(new BigDecimal("123456789.123456"), rowValues[17]);
- assertArrayEquals("abcd".getBytes("UTF-8"), (byte[]) rowValues[18]);
- assertEquals(Date.valueOf("2013-01-01"), rowValues[19]);
- assertEquals("abc123", rowValues[20]);
- assertEquals("abc123 ", rowValues[21]);
- assertArrayEquals("X'01FF'".getBytes("UTF-8"), (byte[]) rowValues[22]);
- }
-
-
- @Test(timeout = 60000)
- public void testComplexQuery() throws Exception {
- createTestTable("testtab1");
-
- RowCollector rowCollector = new RowCollector();
- String query = "select value, count(*) from testtab1 where under_col=0 group by value";
- int rowCount = processQuery(query, 1, rowCollector);
- assertEquals(1, rowCount);
-
- assertArrayEquals(new String[] {"val_0", "3"}, rowCollector.rows.get(0));
- }
-
- protected interface RowProcessor {
- void process(Row row);
- }
-
- protected static class RowCollector implements RowProcessor {
- ArrayList<String[]> rows = new ArrayList<String[]>();
- Schema schema = null;
- int numColumns = 0;
-
- public void process(Row row) {
- if (schema == null) {
- schema = row.getSchema();
- numColumns = schema.getColumns().size();
- }
-
- String[] arr = new String[numColumns];
- for (int idx = 0; idx < numColumns; ++idx) {
- Object val = row.getValue(idx);
- arr[idx] = (val == null ? null : val.toString());
- }
- rows.add(arr);
- }
- }
-
- // Save the actual values from each row as opposed to the String representation.
- protected static class RowCollector2 implements RowProcessor {
- ArrayList<Object[]> rows = new ArrayList<Object[]>();
- Schema schema = null;
- int numColumns = 0;
-
- public void process(Row row) {
- if (schema == null) {
- schema = row.getSchema();
- numColumns = schema.getColumns().size();
- }
-
- Object[] arr = new Object[numColumns];
- for (int idx = 0; idx < numColumns; ++idx) {
- arr[idx] = row.getValue(idx);
- }
- rows.add(arr);
- }
- }
-
- protected int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception {
- return processQuery(null, query, numSplits, rowProcessor);
- }
-
- protected abstract InputFormat<NullWritable, Row> getInputFormat();
-
- protected int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor)
- throws Exception {
- String url = miniHS2.getJdbcURL();
- String user = System.getProperty("user.name");
- String pwd = user;
- String handleId = UUID.randomUUID().toString();
-
- InputFormat<NullWritable, Row> inputFormat = getInputFormat();
-
- // Get splits
- JobConf job = new JobConf(conf);
- job.set(LlapBaseInputFormat.URL_KEY, url);
- job.set(LlapBaseInputFormat.USER_KEY, user);
- job.set(LlapBaseInputFormat.PWD_KEY, pwd);
- job.set(LlapBaseInputFormat.QUERY_KEY, query);
- job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
- if (currentDatabase != null) {
- job.set(LlapBaseInputFormat.DB_KEY, currentDatabase);
- }
-
- InputSplit[] splits = inputFormat.getSplits(job, numSplits);
-
- // Fetch rows from splits
- int rowCount = 0;
- for (InputSplit split : splits) {
- System.out.println("Processing split " + split.getLocations());
-
- RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
- Row row = reader.createValue();
- while (reader.next(NullWritable.get(), row)) {
- rowProcessor.process(row);
- ++rowCount;
- }
- //In arrow-mode this will throw exception unless all buffers have been released
- //See org.apache.hadoop.hive.llap.LlapArrowBatchRecordReader
- reader.close();
- }
- LlapBaseInputFormat.close(handleId);
-
- return rowCount;
- }
-
- /**
- * Test CLI kill command of a query that is running.
- * We spawn 2 threads - one running the query and
- * the other attempting to cancel.
- * We're using a dummy udf to simulate a query,
- * that runs for a sufficiently long time.
- * @throws Exception
- */
- @Test
- public void testKillQuery() throws Exception {
- String tableName = "testtab1";
- createTestTable(tableName);
- Connection con = hs2Conn;
- Connection con2 = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar");
-
- String udfName = TestJdbcWithMiniHS2.SleepMsUDF.class.getName();
- Statement stmt1 = con.createStatement();
- Statement stmt2 = con2.createStatement();
- stmt1.execute("create temporary function sleepMsUDF as '" + udfName + "'");
- stmt1.close();
- final Statement stmt = con.createStatement();
-
- ExceptionHolder tExecuteHolder = new ExceptionHolder();
- ExceptionHolder tKillHolder = new ExceptionHolder();
-
- // Thread executing the query
- Thread tExecute = new Thread(new Runnable() {
- @Override
- public void run() {
- try {
- System.out.println("Executing query: ");
- // The test table has 500 rows, so total query time should be ~ 500*500ms
- stmt.executeQuery("select sleepMsUDF(t1.under_col, 100), t1.under_col, t2.under_col " +
- "from " + tableName + " t1 join " + tableName + " t2 on t1.under_col = t2.under_col");
- fail("Expecting SQLException");
- } catch (SQLException e) {
- tExecuteHolder.throwable = e;
- }
- }
- });
- // Thread killing the query
- Thread tKill = new Thread(new Runnable() {
- @Override
- public void run() {
- try {
- Thread.sleep(2000);
- String queryId = ((HiveStatement) stmt).getQueryId();
- System.out.println("Killing query: " + queryId);
-
- stmt2.execute("kill query '" + queryId + "'");
- stmt2.close();
- } catch (Exception e) {
- tKillHolder.throwable = e;
- }
- }
- });
-
- tExecute.start();
- tKill.start();
- tExecute.join();
- tKill.join();
- stmt.close();
- con2.close();
-
- assertNotNull("tExecute", tExecuteHolder.throwable);
- assertNull("tCancel", tKillHolder.throwable);
- }
-
- private static class ExceptionHolder {
- Throwable throwable;
- }
-}
-
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index bf941c5..b2583c0 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -3176,7 +3176,7 @@ public class TestJdbcDriver2 {
stmt.execute("SET hive.resultset.use.unique.column.names=true");
ResultSet rs = stmt.executeQuery("select 1 UNION ALL select 2");
ResultSetMetaData metaData = rs.getMetaData();
- assertEquals("_c0", metaData.getColumnLabel(1));
+ assertEquals("col1", metaData.getColumnLabel(1));
assertTrue("There's no . for the UNION column name", !metaData.getColumnLabel(1).contains("."));
stmt.close();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 61da9f4..fd54c0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -1692,7 +1692,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor {
return ret.succeed();
}
- private static RelNode stripHep(RelNode rel) {
+ static RelNode stripHep(RelNode rel) {
if (rel instanceof HepRelVertex) {
HepRelVertex hepRelVertex = (HepRelVertex) rel;
rel = hepRelVertex.getCurrentRel();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java
new file mode 100644
index 0000000..c5f316d
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelRecordType;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlOperator;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * Transforms SELECTS of literals under UNION ALL into inline table scans.
+ *
+ * This rule processes plain projects and inline tables below UNION ALL nodes.
+ *
+ *<pre>
+ * SELECT 1
+ * UNION ALL
+ * SELECT 2
+ * UNION ALL
+ * [...]
+ * </pre>
+ *
+ * <pre>
+ * HiveUnion(all=true)
+ * HiveProject(_o__c0=[1])
+ * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+ * HiveProject(_o__c0=[2])
+ * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+ * [...]
+ * </pre>
+ *
+ * will be transformed into
+ * <pre>
+ * HiveUnion(all=true)
+ * HiveProject(EXPR$0=[$0])
+ * HiveTableFunctionScan(invocation=[inline(ARRAY(ROW(1), ROW(2)))], rowType=[RecordType(INTEGER EXPR$0)])
+ * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+ * [...]
+ * </pre>
+ *
+ *
+ */
+public class HiveUnionSimpleSelectsToInlineTableRule extends RelOptRule {
+
+ protected static final Logger LOG = LoggerFactory.getLogger(HiveUnionSimpleSelectsToInlineTableRule.class);
+
+ private RelNode dummyTable;
+
+ public HiveUnionSimpleSelectsToInlineTableRule(RelNode dummyTable) {
+ super(operand(HiveUnion.class, any()));
+ this.dummyTable = dummyTable;
+ }
+
+ static class RowStorage extends HashMap<RelRecordType, List<RexNode>> {
+
+ private static final long serialVersionUID = 1L;
+
+ public void addRow(RexNode row) {
+ RelRecordType type = (RelRecordType) row.getType();
+
+ List<RexNode> e = get(type);
+ if (e == null) {
+ put(type, e = new ArrayList<RexNode>());
+ }
+ e.add(row);
+ }
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ RexBuilder rexBuilder = call.builder().getRexBuilder();
+ final HiveUnion union = call.rel(0);
+ if (!union.all) {
+ return;
+ }
+ List<RelNode> inputs = new ArrayList<RelNode>();
+ List<Project> projects = new ArrayList<>();
+ List<HiveTableFunctionScan> inlineTables = new ArrayList<>();
+
+ for (RelNode input : union.getInputs()) {
+ input = HiveRelDecorrelator.stripHep(input);
+
+ if (isPlainProject(input)) {
+ projects.add((Project) input);
+ continue;
+ }
+
+ if (isInlineTableOperand(input)) {
+ inlineTables.add((HiveTableFunctionScan) input);
+ continue;
+ }
+ inputs.add(input);
+ }
+
+ if (projects.size() + inlineTables.size() <= 1) {
+ // nothing to do
+ return;
+ }
+
+ RowStorage newRows = new RowStorage();
+ for (HiveTableFunctionScan rel : inlineTables) {
+ // inline(array(row1,row2,...))
+ RexCall rex = (RexCall) ((RexCall) rel.getCall()).operands.get(0);
+ for (RexNode row : rex.operands) {
+ if (!(row.getType() instanceof RelRecordType)) {
+ return;
+ }
+ newRows.addRow(row);
+ }
+ }
+
+ for (Project proj : projects) {
+ RexNode row = rexBuilder.makeCall(SqlStdOperatorTable.ROW, proj.getProjects());
+ if (!(row.getType() instanceof RelRecordType)) {
+ return;
+ }
+ newRows.addRow(row);
+ }
+
+ if (newRows.keySet().size() + inputs.size() == union.getInputs().size()) {
+ // nothing to do
+ return;
+ }
+
+ if (dummyTable == null) {
+ LOG.warn("Unexpected; rule would match - but dummyTable is not available");
+ return;
+ }
+
+ for (RelRecordType type : newRows.keySet()) {
+ List<RexNode> rows = newRows.get(type);
+
+ RelDataType arrayType = rexBuilder.getTypeFactory().createArrayType(type, -1);
+ try {
+ SqlOperator inlineFn =
+ SqlFunctionConverter.getCalciteFn("inline", Collections.singletonList(arrayType), type, true, false);
+ SqlOperator arrayFn =
+ SqlFunctionConverter.getCalciteFn("array", Collections.nCopies(rows.size(), type), arrayType, true, false);
+
+ RexNode expr = rexBuilder.makeCall(arrayFn, rows);
+ expr = rexBuilder.makeCall(inlineFn, expr);
+
+ RelNode newInlineTable = buildTableFunctionScan(expr, union.getCluster());
+
+ inputs.add(newInlineTable);
+
+ } catch (CalciteSemanticException e) {
+ LOG.debug("Conversion failed with exception", e);
+ return;
+ }
+ }
+
+ if (inputs.size() > 1) {
+ HiveUnion newUnion = (HiveUnion) union.copy(union.getTraitSet(), inputs, true);
+ call.transformTo(newUnion);
+ } else {
+ call.transformTo(inputs.get(0));
+ }
+ }
+
+ private boolean isPlainProject(RelNode input) {
+ input = HiveRelDecorrelator.stripHep(input);
+ if (!(input instanceof Project)) {
+ return false;
+ }
+ if (input.getInputs().size() == 0) {
+ return true;
+ }
+ return isDummyTableScan(input.getInput(0));
+ }
+
+ private boolean isInlineTableOperand(RelNode input) {
+ input = HiveRelDecorrelator.stripHep(input);
+ if (!(input instanceof HiveTableFunctionScan)) {
+ return false;
+ }
+ if (input.getInputs().size() == 0) {
+ return true;
+ }
+ return isDummyTableScan(input.getInput(0));
+ }
+
+ private boolean isDummyTableScan(RelNode input) {
+ input = HiveRelDecorrelator.stripHep(input);
+ if (!(input instanceof HiveTableScan)) {
+ return false;
+ }
+ HiveTableScan ts = (HiveTableScan) input;
+ Table table = ((RelOptHiveTable) ts.getTable()).getHiveTableMD();
+ if (!SemanticAnalyzer.DUMMY_DATABASE.equals(table.getDbName())) {
+ return false;
+ }
+ return true;
+ }
+
+ private RelNode buildTableFunctionScan(RexNode expr, RelOptCluster cluster)
+ throws CalciteSemanticException {
+
+ return HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster),
+ ImmutableList.of(dummyTable), expr, null, expr.getType(), null);
+
+ }
+}
\ No newline at end of file
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 56fd6f5..5b147b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -563,7 +563,7 @@ public class SqlFunctionConverter {
}
public static SqlOperator getCalciteFn(String hiveUdfName,
- ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType,
+ List<RelDataType> calciteArgTypes, RelDataType calciteRetType,
boolean deterministic, boolean runtimeConstant)
throws CalciteSemanticException {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 55b2fdd..472b3d7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -252,6 +252,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortPullUpConstants
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionSimpleSelectsToInlineTableRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
@@ -1608,6 +1609,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>();
LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameCalcitePosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>();
private final StatsSource statsSource;
+ private RelNode dummyTableScan;
protected CalcitePlannerAction(
Map<String, PrunedPartitionList> partitionCache,
@@ -2210,6 +2212,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
// 1. Run other optimizations that do not need stats
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
ProjectRemoveRule.Config.DEFAULT.toRule(), HiveUnionMergeRule.INSTANCE,
+ new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan),
HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE,
HiveJoinCommuteRule.INSTANCE);
@@ -5067,6 +5070,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
qb.addAlias(DUMMY_TABLE);
qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE);
RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb);
+ dummyTableScan = op;
aliasToRel.put(DUMMY_TABLE, op);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 25a2731..909853e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -11884,8 +11884,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
opParseCtx.get(operator).setRowResolver(newRR);
}
+ Path dummyPath;
protected Table getDummyTable() throws SemanticException {
- Path dummyPath = createDummyFile();
+ if (dummyPath == null) {
+ dummyPath = createDummyFile();
+ }
+
Table desc = new Table(DUMMY_DATABASE, DUMMY_TABLE);
desc.getTTable().getSd().setLocation(dummyPath.toString());
desc.getTTable().getSd().getSerdeInfo().setSerializationLib(NullStructSerDe.class.getName());
diff --git a/ql/src/test/queries/clientpositive/union_literals.q b/ql/src/test/queries/clientpositive/union_literals.q
new file mode 100644
index 0000000..dcecfb3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_literals.q
@@ -0,0 +1,103 @@
+
+
+explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+;
+
+explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2'
+;
+
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'))
+ as colors1
+UNION ALL
+SELECT * FROM (
+ VALUES(10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2';
+
+
+explain
+ select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+;
+
+ select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+;
+
+
+create table t (a string, b string);
+insert into t values(9,9);
+
+explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+;
+
+explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+;
+
+
+
+explain
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+;
+
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+;
+
+ select cast(a as integer) from t;
diff --git a/ql/src/test/results/clientpositive/llap/udf_likeall.q.out b/ql/src/test/results/clientpositive/llap/udf_likeall.q.out
index 7a089c9..3dbd6fd 100644
--- a/ql/src/test/results/clientpositive/llap/udf_likeall.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_likeall.q.out
@@ -71,8 +71,8 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: database:default
POSTHOOK: Output: default@like_all_table
-POSTHOOK: Lineage: like_all_table.company EXPRESSION []
-POSTHOOK: Lineage: like_all_table.pat EXPRESSION []
+POSTHOOK: Lineage: like_all_table.company SCRIPT []
+POSTHOOK: Lineage: like_all_table.pat SCRIPT []
PREHOOK: query: select company from like_all_table where company like all ('%oo%','%go%')
PREHOOK: type: QUERY
PREHOOK: Input: default@like_all_table
diff --git a/ql/src/test/results/clientpositive/llap/udf_likeany.q.out b/ql/src/test/results/clientpositive/llap/udf_likeany.q.out
index fc36686..d5bb45a 100644
--- a/ql/src/test/results/clientpositive/llap/udf_likeany.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_likeany.q.out
@@ -71,8 +71,8 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: database:default
POSTHOOK: Output: default@like_any_table
-POSTHOOK: Lineage: like_any_table.company EXPRESSION []
-POSTHOOK: Lineage: like_any_table.pat EXPRESSION []
+POSTHOOK: Lineage: like_any_table.company SCRIPT []
+POSTHOOK: Lineage: like_any_table.pat SCRIPT []
PREHOOK: query: select company from like_any_table where company like any ('%oo%','%in','fa%')
PREHOOK: type: QUERY
PREHOOK: Input: default@like_any_table
diff --git a/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out b/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out
index 4eda558..be80abb 100644
--- a/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out
@@ -101,9 +101,9 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: database:default
POSTHOOK: Output: default@sort_array_by_table
-POSTHOOK: Lineage: sort_array_by_table.company EXPRESSION []
-POSTHOOK: Lineage: sort_array_by_table.country EXPRESSION []
-POSTHOOK: Lineage: sort_array_by_table.employee EXPRESSION []
+POSTHOOK: Lineage: sort_array_by_table.company SCRIPT []
+POSTHOOK: Lineage: sort_array_by_table.country SCRIPT []
+POSTHOOK: Lineage: sort_array_by_table.employee SCRIPT []
PREHOOK: query: select company,country,sort_array_by(employee,'salary') as single_field_sort from sort_array_by_table
PREHOOK: type: QUERY
PREHOOK: Input: default@sort_array_by_table
diff --git a/ql/src/test/results/clientpositive/llap/union_literals.q.out b/ql/src/test/results/clientpositive/llap/union_literals.q.out
new file mode 100644
index 0000000..107cddc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/union_literals.q.out
@@ -0,0 +1,454 @@
+PREHOOK: query: explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Select Operator
+ expressions: array(const struct(1,'1'),const struct(2,'orange'),const struct(5,'yellow'),const struct(10,'green'),const struct(11,'blue'),const struct(12,'indigo'),const struct(20,'violet')) (type: array<struct<col1:int,col2:string>>)
+ outputColumnNames: _col0
+ UDTF Operator
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'),
+ (10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: array(const struct(2,'2'),const struct(2,'2')) (type: array<struct<col1:int,col2:string>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: array(const struct(1,'1'),const struct(2,'orange'),const struct(5,'yellow'),const struct(10,'green'),const struct(11,'blue'),const struct(12,'indigo'),const struct(20,'violet')) (type: array<struct<col1:int,col2:string>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'))
+ as colors1
+UNION ALL
+SELECT * FROM (
+ VALUES(10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM (
+ VALUES(1, '1'),
+ (2, 'orange'),
+ (5, 'yellow'))
+ as colors1
+UNION ALL
+SELECT * FROM (
+ VALUES(10, 'green'),
+ (11, 'blue'),
+ (12, 'indigo'),
+ (20, 'violet'))
+ AS Colors
+union all
+ select 2,'2'
+union all
+ select 2,'2'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+2 2
+2 2
+1 1
+2 orange
+5 yellow
+10 green
+11 blue
+12 indigo
+20 violet
+PREHOOK: query: explain
+ select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+ select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Select Operator
+ expressions: array(const struct(1,1),const struct(2,2),const struct(3,3)) (type: array<struct<col1:int,col2:int>>)
+ outputColumnNames: _col0
+ UDTF Operator
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: int)
+ outputColumnNames: _col0, _col1
+ ListSink
+
+PREHOOK: query: select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select 1, 1
+union all
+ select 2, 2
+union all
+ select 3, 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1 1
+2 2
+3 3
+PREHOOK: query: create table t (a string, b string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a string, b string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values(9,9)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values(9,9)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a SCRIPT []
+POSTHOOK: Lineage: t.b SCRIPT []
+PREHOOK: query: explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+CBO PLAN:
+HiveUnion(all=[true])
+ HiveProject(a=[CAST($0):INTEGER])
+ HiveTableScan(table=[[default, t]], table:alias=[t])
+ HiveProject(_o__c0=[1])
+ HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+CBO PLAN:
+HiveUnion(all=[true])
+ HiveProject(a=[CAST($0):INTEGER])
+ HiveTableScan(table=[[default, t]], table:alias=[t])
+ HiveProject(EXPR$0=[$0])
+ HiveTableFunctionScan(invocation=[inline(ARRAY(ROW(1), ROW(2)))], rowType=[RecordType(INTEGER EXPR$0)])
+ HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: explain
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+ select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: UDFToInteger(a) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: array(const struct(1),const struct(2),const struct(3)) (type: array<struct<col1:int>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: no inputs
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select cast(a as integer) from t
+union all
+ select cast(1 as integer)
+union all
+ select cast(2 as integer)
+union all
+ select cast(3 as integer)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+9
+1
+2
+3
+PREHOOK: query: select cast(a as integer) from t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select cast(a as integer) from t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+9
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
index 15109b9..8ad8ddd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
@@ -111,7 +111,7 @@ POSTHOOK: Input: default@table_19
POSTHOOK: Input: default@table_6
#### A masked pattern was here ####
418.9
-Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0]] in Stage 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[55][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT
t1.t_id,
null
@@ -813,7 +813,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0]] in Stage 'Reducer 3' is a cross product
+Warning: Map Join MAPJOIN[52][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT
t1.t_id,
@@ -853,47 +853,124 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Union 2 (CONTAINS)
- Map 4 <- Union 2 (CONTAINS)
- Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE), Union 2 (XPROD_EDGE)
- Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
- alias: _dummy_table
- Row Limit Per Split: 1
- Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ alias: t2
+ filterExpr: (t_id is not null and f_id is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string))
+ predicate: (t_id is not null and f_id is not null) (type: boolean)
+ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- Execution mode: llap
- LLAP IO: no inputs
+ expressions: t_id (type: string), f_id (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ Map Join Vectorization:
+ bigTableRetainColumnNums: [0, 1]
+ bigTableValueColumns: 0:string, 1:string
+ className: VectorMapJoinInnerBigOnlyMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nonOuterSmallTableKeyMapping: []
+ projectedOutput: 0:string, 1:string
+ hashTableImplementationType: OPTIMIZED
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 10 Data size: 4250 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ Map Join Vectorization:
+ bigTableKeyColumns: 0:string, 1:string
+ bigTableRetainColumnNums: []
+ className: VectorMapJoinInnerBigOnlyMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nonOuterSmallTableKeyMapping: [0]
+ projectedOutput: 0:string
+ hashTableImplementationType: OPTIMIZED
+ outputColumnNames: _col3
+ input vertices:
+ 1 Map 3
+ Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col3 (type: string), null (type: void)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5]
+ selectExpressions: ConstantVectorExpression(val null) -> 5:void
+ Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
Map Vectorization:
- enabled: false
+ enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false
- inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat
- Map 4
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1]
+ dataColumns: t_id:string, f_id:string, type:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [void]
+ Map 2
Map Operator Tree:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: array(const struct('TEST-1'),const struct('TEST-2')) (type: array<struct<col1:string>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ function name: inline
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
Map Vectorization:
@@ -901,11 +978,11 @@ STAGE PLANS:
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false
inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat
- Map 5
+ Map 3
Map Operator Tree:
TableScan
alias: t1
- filterExpr: (((type = 'TEST') and t_id is not null and f_id is not null) or (t_id is not null and f_id is not null)) (type: boolean)
+ filterExpr: ((type = 'TEST') and t_id is not null and f_id is not null) (type: boolean)
Statistics: Num rows: 10 Data size: 5520 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
@@ -936,31 +1013,6 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string))
- predicate: (t_id is not null and f_id is not null) (type: boolean)
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: t_id (type: string), f_id (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- null sort order:
- sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: 0:string, 1:string
- Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -978,77 +1030,6 @@ STAGE PLANS:
dataColumns: t_id:string, f_id:string, type:string
partitionColumnCount: 0
scratchColumnTypeNames: []
- Reducer 3
- Execution mode: llap
- Reduce Operator Tree:
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 20 Data size: 7540 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: string), _col1 (type: string)
- 1 _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col3
- input vertices:
- 1 Reducer 6
- Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col3 (type: string), null (type: void)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- MergeJoin Vectorization:
- enabled: false
- enableConditionsNotMet: Vectorizing MergeJoin Supported IS false
- Reducer 6
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: zz
- reduceColumnSortOrder: ++
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string
- partitionColumnCount: 0
- scratchColumnTypeNames: []
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
- outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string)
- null sort order: zz
- sort order: ++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumns: 0:string, 1:string
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE
- Union 2
- Vertex: Union 2
Stage: Stage-0
Fetch Operator