You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/06 00:26:58 UTC
svn commit: r1629544 [2/33] - in /hive/branches/spark-new: ./
accumulo-handler/ beeline/ beeline/src/java/org/apache/hive/beeline/ bin/
bin/ext/ common/ common/src/java/org/apache/hadoop/hive/conf/
common/src/test/org/apache/hadoop/hive/common/type/ co...
Propchange: hive/branches/spark-new/
------------------------------------------------------------------------------
Merged /hive/branches/cbo:r1605012-1627125
Merged /hive/trunk:r1626482-1629477
Modified: hive/branches/spark-new/accumulo-handler/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/accumulo-handler/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/accumulo-handler/pom.xml (original)
+++ hive/branches/spark-new/accumulo-handler/pom.xml Sun Oct 5 22:26:43 2014
@@ -112,6 +112,12 @@
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop-20S.version}</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>${hadoop-20S.version}</version>
<optional>true</optional>
@@ -123,6 +129,12 @@
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop-23.version}</version>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop-23.version}</version>
<optional>true</optional>
Modified: hive/branches/spark-new/beeline/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/beeline/pom.xml (original)
+++ hive/branches/spark-new/beeline/pom.xml Sun Oct 5 22:26:43 2014
@@ -49,6 +49,11 @@
<artifactId>hive-shims</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>${project.version}</version>
+ </dependency>
<!-- inter-project -->
<dependency>
<groupId>commons-cli</groupId>
@@ -88,12 +93,6 @@
<!-- test intra-project -->
<dependency>
<groupId>org.apache.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>${project.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
Modified: hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java (original)
+++ hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/BeeLine.java Sun Oct 5 22:26:43 2014
@@ -692,10 +692,6 @@ public class BeeLine implements Closeabl
int code = 0;
if (!commands.isEmpty()) {
- // for single command execute, disable color
- getOpts().setColor(false);
- getOpts().setHeaderInterval(-1);
-
for (Iterator<String> i = commands.iterator(); i.hasNext();) {
String command = i.next().toString();
debug(loc("executing-command", command));
Modified: hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java (original)
+++ hive/branches/spark-new/beeline/src/java/org/apache/hive/beeline/Commands.java Sun Oct 5 22:26:43 2014
@@ -38,6 +38,7 @@ import java.sql.Driver;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
+import java.sql.SQLWarning;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
@@ -47,10 +48,13 @@ import java.util.Set;
import java.util.TreeSet;
import org.apache.hadoop.hive.common.cli.ShellCmdExecutor;
+import org.apache.hive.jdbc.HiveStatement;
public class Commands {
private final BeeLine beeLine;
+ private static final int DEFAULT_QUERY_PROGRESS_INTERVAL = 1000;
+ private static final int DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT = 10 * 1000;
/**
* @param beeLine
@@ -728,7 +732,7 @@ public class Commands {
beeLine.handleException(e);
}
-
+ line = line.trim();
if (line.endsWith(";")) {
line = line.substring(0, line.length() - 1);
}
@@ -758,6 +762,7 @@ public class Commands {
try {
Statement stmnt = null;
boolean hasResults;
+ Thread logThread = null;
try {
long start = System.currentTimeMillis();
@@ -767,7 +772,15 @@ public class Commands {
hasResults = ((CallableStatement) stmnt).execute();
} else {
stmnt = beeLine.createStatement();
- hasResults = stmnt.execute(sql);
+ if (beeLine.getOpts().isSilent()) {
+ hasResults = stmnt.execute(sql);
+ } else {
+ logThread = new Thread(createLogRunnable(stmnt));
+ logThread.setDaemon(true);
+ logThread.start();
+ hasResults = stmnt.execute(sql);
+ logThread.interrupt();
+ }
}
beeLine.showWarnings();
@@ -782,6 +795,11 @@ public class Commands {
beeLine.info(beeLine.loc("rows-selected", count) + " "
+ beeLine.locElapsedTime(end - start));
} finally {
+ if (logThread != null) {
+ logThread.join(DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT);
+ showRemainingLogsIfAny(stmnt);
+ logThread = null;
+ }
rs.close();
}
} while (BeeLine.getMoreResults(stmnt));
@@ -792,6 +810,13 @@ public class Commands {
+ " " + beeLine.locElapsedTime(end - start));
}
} finally {
+ if (logThread != null) {
+ if (!logThread.isInterrupted()) {
+ logThread.interrupt();
+ }
+ logThread.join(DEFAULT_QUERY_PROGRESS_THREAD_TIMEOUT);
+ showRemainingLogsIfAny(stmnt);
+ }
if (stmnt != null) {
stmnt.close();
}
@@ -803,6 +828,61 @@ public class Commands {
return true;
}
+ private Runnable createLogRunnable(Statement statement) {
+ if (statement instanceof HiveStatement) {
+ final HiveStatement hiveStatement = (HiveStatement) statement;
+
+ Runnable runnable = new Runnable() {
+ @Override
+ public void run() {
+ while (hiveStatement.hasMoreLogs()) {
+ try {
+ // fetch the log periodically and output to beeline console
+ for (String log : hiveStatement.getQueryLog()) {
+ beeLine.info(log);
+ }
+ Thread.sleep(DEFAULT_QUERY_PROGRESS_INTERVAL);
+ } catch (SQLException e) {
+ beeLine.error(new SQLWarning(e));
+ return;
+ } catch (InterruptedException e) {
+ beeLine.debug("Getting log thread is interrupted, since query is done!");
+ return;
+ }
+ }
+ }
+ };
+ return runnable;
+ } else {
+ beeLine.debug("The statement instance is not HiveStatement type: " + statement.getClass());
+ return new Runnable() {
+ @Override
+ public void run() {
+ // do nothing.
+ }
+ };
+ }
+ }
+
+ private void showRemainingLogsIfAny(Statement statement) {
+ if (statement instanceof HiveStatement) {
+ HiveStatement hiveStatement = (HiveStatement) statement;
+ List<String> logs;
+ do {
+ try {
+ logs = hiveStatement.getQueryLog();
+ } catch (SQLException e) {
+ beeLine.error(new SQLWarning(e));
+ return;
+ }
+ for (String log : logs) {
+ beeLine.info(log);
+ }
+ } while (logs.size() > 0);
+ } else {
+ beeLine.debug("The statement instance is not HiveStatement type: " + statement.getClass());
+ }
+ }
public boolean quit(String line) {
beeLine.setExit(true);
Modified: hive/branches/spark-new/bin/beeline.cmd
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/bin/beeline.cmd?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/bin/beeline.cmd (original)
+++ hive/branches/spark-new/bin/beeline.cmd Sun Oct 5 22:26:43 2014
@@ -43,7 +43,22 @@ if not exist %HADOOP_HOME%\libexec\hadoo
@rem supress the HADOOP_HOME warnings in 1.x.x
set HADOOP_HOME_WARN_SUPPRESS=true
call %HADOOP_HOME%\libexec\hadoop-config.cmd
-set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\*;
+
+@rem include only the beeline client jar and its dependencies
+pushd %HIVE_HOME%\lib
+for /f %%a IN ('dir /b hive-beeline-**.jar') do (
+ set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a
+)
+for /f %%a IN ('dir /b super-csv-**.jar') do (
+ set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a
+)
+for /f %%a IN ('dir /b jline-**.jar') do (
+ set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a
+)
+for /f %%a IN ('dir /b hive-jdbc-**-standalone.jar') do (
+ set CLASSPATH=%CLASSPATH%;%HIVE_HOME%\lib\%%a
+)
+popd
call %JAVA_HOME%\bin\java %JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% org.apache.hive.beeline.BeeLine %*
Modified: hive/branches/spark-new/bin/ext/beeline.sh
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/bin/ext/beeline.sh?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/bin/ext/beeline.sh (original)
+++ hive/branches/spark-new/bin/ext/beeline.sh Sun Oct 5 22:26:43 2014
@@ -19,11 +19,17 @@ export SERVICE_LIST="${SERVICE_LIST}${TH
beeline () {
CLASS=org.apache.hive.beeline.BeeLine;
- execHiveCmd $CLASS "$@"
+
+ # include only the beeline client jar and its dependencies
+ beelineJarPath=`ls ${HIVE_LIB}/hive-beeline-*.jar`
+ superCsvJarPath=`ls ${HIVE_LIB}/super-csv-*.jar`
+ jlineJarPath=`ls ${HIVE_LIB}/jline-*.jar`
+ jdbcStandaloneJarPath=`ls ${HIVE_LIB}/hive-jdbc-*-standalone.jar`
+ export HADOOP_CLASSPATH=${beelineJarPath}:${superCsvJarPath}:${jlineJarPath}:${jdbcStandaloneJarPath}
+
+ exec $HADOOP jar ${beelineJarPath} $CLASS $HIVE_OPTS "$@"
}
beeline_help () {
- CLASS=org.apache.hive.beeline.BeeLine;
- execHiveCmd $CLASS "--help"
+ beeline "--help"
}
-
Modified: hive/branches/spark-new/common/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/common/pom.xml (original)
+++ hive/branches/spark-new/common/pom.xml Sun Oct 5 22:26:43 2014
@@ -72,6 +72,12 @@
</dependency>
<!-- test inter-project -->
<dependency>
+ <groupId>com.google.code.tempus-fugit</groupId>
+ <artifactId>tempus-fugit</artifactId>
+ <version>${tempus-fugit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
Modified: hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/spark-new/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sun Oct 5 22:26:43 2014
@@ -649,6 +649,9 @@ public class HiveConf extends Configurat
HIVEJOINCACHESIZE("hive.join.cache.size", 25000,
"How many rows in the joining tables (except the streaming table) should be cached in memory."),
+ // CBO related
+ HIVE_CBO_ENABLED("hive.cbo.enable", false, "Flag to control enabling Cost Based Optimizations using Optiq framework."),
+
// hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
// need to remove by hive .13. Also, do not change default (see SMB operator)
HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""),
@@ -1051,7 +1054,7 @@ public class HiveConf extends Configurat
"That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.\n" +
"The optimization will be automatically disabled if number of reducers would be less than specified value."),
- HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true,
+ HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false,
"When enabled dynamic partitioning column will be globally sorted.\n" +
"This way we can keep only one record writer open for each partition value\n" +
"in the reducer thereby reducing the memory pressure on reducers."),
@@ -1196,13 +1199,6 @@ public class HiveConf extends Configurat
"Average row size is computed from average column size of all columns in the row. In the absence\n" +
"of column statistics and for variable length complex columns like map, the average number of\n" +
"entries/values can be specified using this config."),
- // to accurately compute statistics for GROUPBY map side parallelism needs to be known
- HIVE_STATS_MAP_SIDE_PARALLELISM("hive.stats.map.parallelism", 1,
- "Hive/Tez optimizer estimates the data size flowing through each of the operators.\n" +
- "For GROUPBY operator, to accurately compute the data size map-side parallelism needs to\n" +
- "be known. By default, this value is set to 1 since optimizer is not aware of the number of\n" +
- "mappers during compile-time. This Hive config can be used to specify the number of mappers\n" +
- "to be used for data size computation of GROUPBY operator."),
// statistics annotation fetches stats for each partition, which can be expensive. turning
// this off will result in basic sizes being fetched from namenode instead
HIVE_STATS_FETCH_PARTITION_STATS("hive.stats.fetch.partition.stats", true,
@@ -1384,6 +1380,8 @@ public class HiveConf extends Configurat
"authorization manager class name to be used in the metastore for authorization.\n" +
"The user defined authorization class should implement interface \n" +
"org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider. "),
+ HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads", true,
+ "If this is true, metastore authorizer authorizes read actions on database, table"),
HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager",
"org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator",
"authenticator manager class name to be used in the metastore for authentication. \n" +
@@ -1479,10 +1477,10 @@ public class HiveConf extends Configurat
HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false,
"Where to insert into multilevel directories like\n" +
"\"insert directory '/HIVEFT25686/chinna/' from table\""),
- HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS("hive.warehouse.subdir.inherit.perms", false,
- "Set this to true if the the table directories should inherit the\n" +
- "permission of the warehouse or database directory instead of being created\n" +
- "with the permissions derived from dfs umask"),
+ HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS("hive.warehouse.subdir.inherit.perms", true,
+ "Set this to false if the table directories should be created\n" +
+ "with the permissions derived from dfs umask instead of\n" +
+ "inheriting the permission of the warehouse or database directory."),
HIVE_INSERT_INTO_EXTERNAL_TABLES("hive.insert.into.external.tables", true,
"whether insert into external tables is allowed"),
@@ -1513,8 +1511,8 @@ public class HiveConf extends Configurat
"The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery."),
// HiveServer2 global init file location
HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION("hive.server2.global.init.file.location", "${env:HIVE_CONF_DIR}",
- "The location of HS2 global init file (.hiverc).\n" +
- "If the property is reset, the value must be a valid path where the init file is located."),
+ "Either the location of a HS2 global init file or a directory containing a .hiverc file. If the \n" +
+ "property is set, the value must be a valid path to an init file or directory where the init file is located."),
HIVE_SERVER2_TRANSPORT_MODE("hive.server2.transport.mode", "binary", new StringSet("binary", "http"),
"Transport mode of HiveServer2."),
HIVE_SERVER2_THRIFT_BIND_HOST("hive.server2.thrift.bind.host", "",
@@ -1722,6 +1720,9 @@ public class HiveConf extends Configurat
HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true,
"This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" +
"The default value is true."),
+ HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true,
+ "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" +
+ "The default value is true."),
HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000,
"Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."),
HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000,
Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java (original)
+++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java Sun Oct 5 22:26:43 2014
@@ -18,10 +18,19 @@
package org.apache.hadoop.hive.common.type;
-import junit.framework.TestCase;
-
-public class TestHiveChar extends TestCase {
-
+import com.google.code.tempusfugit.concurrency.annotations.*;
+import com.google.code.tempusfugit.concurrency.*;
+import org.junit.*;
+import static org.junit.Assert.*;
+
+public class TestHiveChar {
+
+ @Rule public ConcurrentRule concurrentRule = new ConcurrentRule();
+ @Rule public RepeatingRule repeatingRule = new RepeatingRule();
+
+ @Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testBasic() {
HiveChar hc = new HiveChar("abc", 10);
assertEquals("abc ", hc.toString());
@@ -47,6 +56,9 @@ public class TestHiveChar extends TestCa
assertEquals(3, hc.getCharacterLength());
}
+ @Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testStringLength() {
HiveChar hc = new HiveChar();
@@ -60,6 +72,9 @@ public class TestHiveChar extends TestCa
assertEquals("0123456789 ", hc.toString());
}
+ @Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testComparison() {
HiveChar hc1 = new HiveChar();
HiveChar hc2 = new HiveChar();
Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java (original)
+++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveDecimal.java Sun Oct 5 22:26:43 2014
@@ -20,12 +20,19 @@ package org.apache.hadoop.hive.common.ty
import java.math.BigDecimal;
import java.math.BigInteger;
-import org.junit.Assert;
-import org.junit.Test;
+import com.google.code.tempusfugit.concurrency.annotations.*;
+import com.google.code.tempusfugit.concurrency.*;
+import org.junit.*;
+import static org.junit.Assert.*;
public class TestHiveDecimal {
+ @Rule public ConcurrentRule concurrentRule = new ConcurrentRule();
+ @Rule public RepeatingRule repeatingRule = new RepeatingRule();
+
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testPrecisionScaleEnforcement() {
String decStr = "1786135888657847525803324040144343378.09799306448796128931113691624";
HiveDecimal dec = HiveDecimal.create(decStr);
@@ -82,6 +89,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testMultiply() {
HiveDecimal dec1 = HiveDecimal.create("0.00001786135888657847525803");
HiveDecimal dec2 = HiveDecimal.create("3.0000123456789");
@@ -105,6 +114,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testPow() {
HiveDecimal dec = HiveDecimal.create("3.00001415926");
Assert.assertEquals(dec.pow(2), dec.multiply(dec));
@@ -118,6 +129,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testDivide() {
HiveDecimal dec1 = HiveDecimal.create("3.14");
HiveDecimal dec2 = HiveDecimal.create("3");
@@ -133,6 +146,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testPlus() {
HiveDecimal dec1 = HiveDecimal.create("99999999999999999999999999999999999");
HiveDecimal dec2 = HiveDecimal.create("1");
@@ -145,6 +160,8 @@ public class TestHiveDecimal {
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testSubtract() {
HiveDecimal dec1 = HiveDecimal.create("3.140");
HiveDecimal dec2 = HiveDecimal.create("1.00");
@@ -152,6 +169,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testPosMod() {
HiveDecimal hd1 = HiveDecimal.create("-100.91");
HiveDecimal hd2 = HiveDecimal.create("9.8");
@@ -160,12 +179,16 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testHashCode() {
Assert.assertEquals(HiveDecimal.create("9").hashCode(), HiveDecimal.create("9.00").hashCode());
Assert.assertEquals(HiveDecimal.create("0").hashCode(), HiveDecimal.create("0.00").hashCode());
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testException() {
HiveDecimal dec = HiveDecimal.create("3.1415.926");
Assert.assertNull(dec);
@@ -174,6 +197,8 @@ public class TestHiveDecimal {
}
@Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testBinaryConversion() {
testBinaryConversion("0.00");
testBinaryConversion("-12.25");
Modified: hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java (original)
+++ hive/branches/spark-new/common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java Sun Oct 5 22:26:43 2014
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hive.common.type;
-import junit.framework.TestCase;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.LogUtils;
@@ -28,8 +27,15 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Random;
+import com.google.code.tempusfugit.concurrency.annotations.*;
+import com.google.code.tempusfugit.concurrency.*;
+import org.junit.*;
+import static org.junit.Assert.*;
+
+public class TestHiveVarchar {
+ @Rule public ConcurrentRule concurrentRule = new ConcurrentRule();
+ @Rule public RepeatingRule repeatingRule = new RepeatingRule();
-public class TestHiveVarchar extends TestCase {
public TestHiveVarchar() {
super();
}
@@ -65,6 +71,9 @@ public class TestHiveVarchar extends Tes
}
}
+ @Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testStringLength() throws Exception {
int strLen = 20;
int[] lengths = { 15, 20, 25 };
@@ -124,6 +133,9 @@ public class TestHiveVarchar extends Tes
assertEquals(5, vc1.getCharacterLength());
}
+ @Test
+ @Concurrent(count=4)
+ @Repeating(repetition=100)
public void testComparison() throws Exception {
HiveVarchar hc1 = new HiveVarchar("abcd", 20);
HiveVarchar hc2 = new HiveVarchar("abcd", 20);
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleAvg.java Sun Oct 5 22:26:43 2014
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.contrib.udaf.example;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
@@ -32,6 +33,8 @@ import org.apache.hadoop.hive.ql.exec.UD
* more efficient.
*
*/
+@Description(name = "example_avg",
+value = "_FUNC_(col) - Example UDAF to compute average")
public final class UDAFExampleAvg extends UDAF {
/**
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleGroupConcat.java Sun Oct 5 22:26:43 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.contrib.u
import java.util.ArrayList;
import java.util.Collections;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
@@ -35,6 +36,8 @@ import org.apache.hadoop.hive.ql.exec.UD
* implement built-in aggregation functions, which are harder to program but
* more efficient.
*/
+@Description(name = "example_group_concat",
+value = "_FUNC_(col) - Example UDAF that concatenates all arguments from different rows into a single string")
public class UDAFExampleGroupConcat extends UDAF {
/**
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMaxN.java Sun Oct 5 22:26:43 2014
@@ -19,11 +19,13 @@
package org.apache.hadoop.hive.contrib.udaf.example;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
/**
* Returns the max N double values.
*/
+@Description(name = "example_max_n", value = "_FUNC_(expr) - Example UDAF that returns the max N double values")
public class UDAFExampleMaxN extends UDAF {
/**
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udaf/example/UDAFExampleMinN.java Sun Oct 5 22:26:43 2014
@@ -19,11 +19,13 @@
package org.apache.hadoop.hive.contrib.udaf.example;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
/**
* Returns the min N double values.
*/
+@Description(name = "example_min_n", value = "_FUNC_(expr) - Example UDAF that returns the min N double values")
public class UDAFExampleMinN extends UDAF{
/**
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleAdd.java Sun Oct 5 22:26:43 2014
@@ -17,12 +17,14 @@
*/
package org.apache.hadoop.hive.contrib.udf.example;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleAdd.
*
*/
+@Description(name = "example_add", value = "_FUNC_(expr) - Example UDAF that returns the sum")
public class UDFExampleAdd extends UDF {
public Integer evaluate(Integer... a) {
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleArraySum.java Sun Oct 5 22:26:43 2014
@@ -19,12 +19,14 @@ package org.apache.hadoop.hive.contrib.u
import java.util.List;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleArraySum.
*
*/
+@Description(name = "example_arraysum", value = "_FUNC_(expr) - Example UDAF that returns the sum")
public class UDFExampleArraySum extends UDF {
public Double evaluate(List<Double> a) {
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleFormat.java Sun Oct 5 22:26:43 2014
@@ -17,12 +17,14 @@
*/
package org.apache.hadoop.hive.contrib.udf.example;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleFormat.
*
*/
+@Description(name = "example_format", value = "_FUNC_(expr) - Example UDAF that returns formated String")
public class UDFExampleFormat extends UDF {
public String evaluate(String format, Object... args) {
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleMapConcat.java Sun Oct 5 22:26:43 2014
@@ -21,12 +21,15 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleMapConcat.
*
*/
+@Description(name = "example_mapconcat",
+value = "_FUNC_(expr) - Example UDAF that returns contents of Map as a formated String")
public class UDFExampleMapConcat extends UDF {
public String evaluate(Map<String, String> a) {
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udf/example/UDFExampleStructPrint.java Sun Oct 5 22:26:43 2014
@@ -19,12 +19,15 @@ package org.apache.hadoop.hive.contrib.u
import java.util.List;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleStructPrint.
*
*/
+@Description(name = "example_structprint",
+value = "_FUNC_(obj) - Example UDAF that returns contents of an object")
public class UDFExampleStructPrint extends UDF {
public String evaluate(Object a) {
Modified: hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java (original)
+++ hive/branches/spark-new/contrib/src/java/org/apache/hadoop/hive/contrib/udtf/example/GenericUDTFCount2.java Sun Oct 5 22:26:43 2014
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.contrib.u
import java.util.ArrayList;
+import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
@@ -34,6 +35,8 @@ import org.apache.hadoop.hive.serde2.obj
* to test outputting of rows on close with lateral view.
*
*/
+@Description(name = "udtfCount2",
+value = "_FUNC_(col) - UDF outputs the number of rows seen, twice.")
public class GenericUDTFCount2 extends GenericUDTF {
private transient Integer count = Integer.valueOf(0);
Modified: hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out (original)
+++ hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_add.q.out Sun Oct 5 22:26:43 2014
@@ -25,36 +25,24 @@ SELECT example_add(1, 2),
FROM src LIMIT 1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 500 Data size: 22000 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 1
- Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: 1
Processor Tree:
- ListSink
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 3 (type: int), 6 (type: int), 10 (type: int), 3.3000000000000003 (type: double), 6.6 (type: double), 11.0 (type: double), 10.4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 500 Data size: 22000 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
PREHOOK: query: SELECT example_add(1, 2),
example_add(1, 2, 3),
Modified: hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out (original)
+++ hive/branches/spark-new/contrib/src/test/results/clientpositive/udf_example_format.q.out Sun Oct 5 22:26:43 2014
@@ -19,36 +19,24 @@ SELECT example_format("abc"),
FROM src LIMIT 1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: src
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 500 Data size: 182500 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 1
- Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
Stage: Stage-0
Fetch Operator
limit: 1
Processor Tree:
- ListSink
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'abc' (type: string), '1.1' (type: string), '1.1 1.200000e+00' (type: string), 'a 12 10' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 500 Data size: 182500 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Statistics: Num rows: 1 Data size: 365 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
PREHOOK: query: SELECT example_format("abc"),
example_format("%1$s", 1.1),
Modified: hive/branches/spark-new/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/data/files/parquet_types.txt?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/data/files/parquet_types.txt (original)
+++ hive/branches/spark-new/data/files/parquet_types.txt Sun Oct 5 22:26:43 2014
@@ -1,21 +1,21 @@
-100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a
-101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab
-102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc
-103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd
-104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde
-105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef
-106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg
-107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh
-108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop
-109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef
-110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede
-111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded
-112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd
-113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc
-114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b
-115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161|rstuv|abcded
-116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded
-117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded
-118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede
-119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede
-120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |k1:v1|101,200|10,abc
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |k2:v2|102,200|10,def
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs
+115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk
Propchange: hive/branches/spark-new/hbase-handler/pom.xml
------------------------------------------------------------------------------
Merged /hive/branches/cbo/hbase-handler/pom.xml:r1605012-1627125
Merged /hive/trunk/hbase-handler/pom.xml:r1625359-1629477
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java Sun Oct 5 22:26:43 2014
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.hbase;
+import java.io.IOException;
+import java.util.Properties;
+
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
@@ -26,9 +29,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.io.IOException;
-import java.util.Properties;
-
public class DefaultHBaseKeyFactory extends AbstractHBaseKeyFactory implements HBaseKeyFactory {
protected LazySimpleSerDe.SerDeParameters serdeParams;
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Sun Oct 5 22:26:43 2014
@@ -53,6 +53,7 @@ public class HBaseSerDe extends Abstract
public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
+ public static final String HBASE_STRUCT_SERIALIZER_CLASS = "hbase.struct.serialization.class";
public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
@@ -98,7 +99,7 @@ public class HBaseSerDe extends Abstract
cachedHBaseRow = new LazyHBaseRow(
(LazySimpleStructObjectInspector) cachedObjectInspector,
- serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+ serdeParams.getKeyIndex(), serdeParams.getKeyFactory(), serdeParams.getValueFactories());
serializer = new HBaseRowSerializer(serdeParams);
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java Sun Oct 5 22:26:43 2014
@@ -41,6 +41,10 @@ import org.apache.hadoop.hive.serde.serd
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.util.StringUtils;
@@ -371,6 +375,19 @@ public class HBaseSerDeHelper {
}
/**
+ * Create the {@link LazyObjectBase lazy field}
+ * */
+ public static LazyObjectBase createLazyField(ColumnMapping[] columnMappings, int fieldID,
+ ObjectInspector inspector) {
+ ColumnMapping colMap = columnMappings[fieldID];
+ if (colMap.getQualifierName() == null && !colMap.isHbaseRowKey()) {
+ // a column family
+ return new LazyHBaseCellMap((LazyMapObjectInspector) inspector);
+ }
+ return LazyFactory.createLazyObject(inspector, colMap.getBinaryStorage().get(0));
+ }
+
+ /**
* Auto-generates the key struct for composite keys
*
* @param compositeKeyParts map of composite key part name to its type. Usually this would be
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Sun Oct 5 22:26:43 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.hbase.Colu
import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.StructHBaseValueFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
@@ -204,11 +205,21 @@ public class HBaseSerDeParameters {
for (int i = 0; i < columnMappings.size(); i++) {
String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
- if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+ if (AVRO_SERIALIZATION_TYPE.equals(serType)) {
Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
- valueFactories.add(new AvroHBaseValueFactory(schema));
+ valueFactories.add(new AvroHBaseValueFactory(i, schema));
+ } else if (STRUCT_SERIALIZATION_TYPE.equals(serType)) {
+ String structValueClassName = tbl.getProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS);
+
+ if (structValueClassName == null) {
+ throw new IllegalArgumentException(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS
+ + " must be set for hbase columns of type [" + STRUCT_SERIALIZATION_TYPE + "]");
+ }
+
+ Class<?> structValueClass = job.getClassByName(structValueClassName);
+ valueFactories.add(new StructHBaseValueFactory(i, structValueClass));
} else {
- valueFactories.add(new DefaultHBaseValueFactory());
+ valueFactories.add(new DefaultHBaseValueFactory(i));
}
}
} catch (Exception e) {
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java Sun Oct 5 22:26:43 2014
@@ -20,15 +20,15 @@ package org.apache.hadoop.hive.hbase;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -47,18 +47,21 @@ public class LazyHBaseRow extends LazySt
private final int iKey;
private final HBaseKeyFactory keyFactory;
+ private final List<HBaseValueFactory> valueFactories;
public LazyHBaseRow(LazySimpleStructObjectInspector oi) {
- this(oi, -1, null);
+ this(oi, -1, null, null);
}
/**
* Construct a LazyHBaseRow object with the ObjectInspector.
*/
- public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory) {
+ public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory,
+ List<HBaseValueFactory> valueFactories) {
super(oi);
this.iKey = iKey;
this.keyFactory = keyFactory;
+ this.valueFactories = valueFactories;
}
/**
@@ -76,13 +79,14 @@ public class LazyHBaseRow extends LazySt
if (fieldID == iKey) {
return keyFactory.createKey(fieldRef.getFieldObjectInspector());
}
- ColumnMapping colMap = columnsMapping[fieldID];
- if (colMap.qualifierName == null && !colMap.hbaseRowKey) {
- // a column family
- return new LazyHBaseCellMap((LazyMapObjectInspector) fieldRef.getFieldObjectInspector());
+
+ if (valueFactories != null) {
+ return valueFactories.get(fieldID).createValueObject(fieldRef.getFieldObjectInspector());
}
- return LazyFactory.createLazyObject(fieldRef.getFieldObjectInspector(),
- colMap.binaryStorage.get(0));
+
+ // fallback to default
+ return HBaseSerDeHelper.createLazyField(columnsMapping, fieldID,
+ fieldRef.getFieldObjectInspector());
}
/**
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java Sun Oct 5 22:26:43 2014
@@ -48,7 +48,8 @@ public class AvroHBaseValueFactory exten
*
* @param schema the associated {@link Schema schema}
* */
- public AvroHBaseValueFactory(Schema schema) {
+ public AvroHBaseValueFactory(int fieldID, Schema schema) {
+ super(fieldID);
this.schema = schema;
}
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java Sun Oct 5 22:26:43 2014
@@ -21,9 +21,12 @@ import java.io.IOException;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.ColumnMappings;
+import org.apache.hadoop.hive.hbase.HBaseSerDeHelper;
import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -35,15 +38,23 @@ import org.apache.hadoop.hive.serde2.typ
public class DefaultHBaseValueFactory implements HBaseValueFactory{
protected LazySimpleSerDe.SerDeParameters serdeParams;
+ protected ColumnMappings columnMappings;
protected HBaseSerDeParameters hbaseParams;
protected Properties properties;
protected Configuration conf;
+ private int fieldID;
+
+ public DefaultHBaseValueFactory(int fieldID) {
+ this.fieldID = fieldID;
+ }
+
@Override
public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
throws SerDeException {
this.hbaseParams = hbaseParams;
this.serdeParams = hbaseParams.getSerdeParams();
+ this.columnMappings = hbaseParams.getColumnMappings();
this.properties = properties;
this.conf = conf;
}
@@ -55,6 +66,11 @@ public class DefaultHBaseValueFactory im
1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
}
+ @Override
+ public LazyObjectBase createValueObject(ObjectInspector inspector) throws SerDeException {
+ return HBaseSerDeHelper.createLazyField(columnMappings.getColumnsMapping(), fieldID, inspector);
+ }
+
@Override
public byte[] serializeValue(Object object, StructField field)
throws IOException {
Modified: hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java (original)
+++ hive/branches/spark-new/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java Sun Oct 5 22:26:43 2014
@@ -22,8 +22,10 @@ import java.io.IOException;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseKeyFactory;
import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -53,6 +55,13 @@ public interface HBaseValueFactory {
ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException;
/**
+ * create custom object for hbase value
+ *
+ * @param inspector OI create by {@link HBaseKeyFactory#createKeyObjectInspector}
+ */
+ LazyObjectBase createValueObject(ObjectInspector inspector) throws SerDeException;
+
+ /**
* Serialize the given hive object
*
* @param object the object to be serialized
Modified: hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java (original)
+++ hive/branches/spark-new/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java Sun Oct 5 22:26:43 2014
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Properties;
+import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.avro.Schema;
@@ -61,6 +62,7 @@ import org.apache.hadoop.hive.serde2.io.
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
@@ -135,6 +137,27 @@ public class TestHBaseSerDe extends Test
" ]\n" +
"}";
+ private static final String EXPECTED_DESERIALIZED_AVRO_STRING =
+ "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"boolean1\":true,"
+ + "\"long1\":42432234234}}}";
+
+ private static final String EXPECTED_DESERIALIZED_AVRO_STRING_2 =
+ "{\"key\":\"test-row1\","
+ + "\"cola_avro\":{\"employeename\":\"Avro Employee1\","
+ + "\"employeeid\":11111,\"age\":25,\"gender\":\"FEMALE\","
+ + "\"contactinfo\":{\"address\":[{\"address1\":\"Avro First Address1\",\"address2\":"
+ + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":"
+ + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":"
+ + "{\"testkey\":\"testvalue\"}},{\"address1\":\"Avro First Address1\",\"address2\":"
+ + "\"Avro Second Address1\",\"city\":\"Avro City1\",\"zipcode\":123456,\"county\":"
+ + "{0:{\"areacode\":999,\"number\":1234567890}},\"aliases\":null,\"metadata\":"
+ + "{\"testkey\":\"testvalue\"}}],\"homephone\":{\"areacode\":999,\"number\":1234567890},"
+ + "\"officephone\":{\"areacode\":999,\"number\":1234455555}}}}";
+
+ private static final String EXPECTED_DESERIALIZED_AVRO_STRING_3 =
+ "{\"key\":\"test-row1\",\"cola_avro\":{\"arecord\":{\"int1\":42,\"string1\":\"test\","
+ + "\"boolean1\":true,\"long1\":42432234234}}}";
+
/**
* Test the default behavior of the Lazy family of objects and object inspectors.
*/
@@ -1047,7 +1070,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroSchemaInline();
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING);
}
private Properties createPropertiesForHiveAvroSchemaInline() {
@@ -1092,7 +1116,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema();
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING_3);
}
private Properties createPropertiesForHiveAvroForwardEvolvedSchema() {
@@ -1136,7 +1161,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema();
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING);
}
private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() {
@@ -1185,7 +1211,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroSerClass();
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING_2);
}
private Properties createPropertiesForHiveAvroSerClass() {
@@ -1243,7 +1270,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING);
} finally {
// Teardown the cluster
if (miniDfs != null) {
@@ -1298,7 +1326,8 @@ public class TestHBaseSerDe extends Test
Properties tbl = createPropertiesForHiveAvroExternalSchema();
serDe.initialize(conf, tbl);
- deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+ deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData,
+ EXPECTED_DESERIALIZED_AVRO_STRING_2);
}
private Properties createPropertiesForHiveAvroExternalSchema() {
@@ -1389,8 +1418,87 @@ public class TestHBaseSerDe extends Test
return tbl;
}
+ public void testHBaseSerDeCustomStructValue() throws IOException, SerDeException {
+
+ byte[] cfa = "cola".getBytes();
+ byte[] qualStruct = "struct".getBytes();
+
+ TestStruct testStruct = new TestStruct("A", "B", "C", false, (byte) 0);
+ byte[] key = testStruct.getBytes();
+ // Data
+ List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+ byte[] testData = testStruct.getBytes();
+ kvs.add(new KeyValue(key, cfa, qualStruct, testData));
+
+ Result r = new Result(kvs);
+ byte[] putKey = testStruct.getBytesWithDelimiters();
+
+ Put p = new Put(putKey);
+
+ // Post serialization, separators are automatically inserted between different fields in the
+ // struct. Currently there is not way to disable that. So the work around here is to pad the
+ // data with the separator bytes before creating a "Put" object
+ p.add(new KeyValue(putKey, cfa, qualStruct, Bytes.padTail(testData, 2)));
+
+ // Create, initialize, and test the SerDe
+ HBaseSerDe serDe = new HBaseSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createPropertiesForValueStruct();
+ serDe.initialize(conf, tbl);
+
+ deserializeAndSerializeHBaseValueStruct(serDe, r, p);
+
+ }
+
+ private Properties createPropertiesForValueStruct() {
+ Properties tbl = new Properties();
+ tbl.setProperty("cola.struct.serialization.type", "struct");
+ tbl.setProperty("cola.struct.test.value", "test value");
+ tbl.setProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS,
+ "org.apache.hadoop.hive.hbase.HBaseTestStructSerializer");
+ tbl.setProperty(serdeConstants.LIST_COLUMNS, "key,astring");
+ tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+ "struct<col1:string,col2:string,col3:string>,struct<col1:string,col2:string,col3:string>");
+ tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:struct");
+ tbl.setProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS,
+ "org.apache.hadoop.hive.hbase.HBaseTestCompositeKey");
+ return tbl;
+ }
+
+ private void deserializeAndSerializeHBaseValueStruct(HBaseSerDe serDe, Result r, Put p)
+ throws SerDeException, IOException {
+ StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
+
+ List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+ Object row = serDe.deserialize(new ResultWritable(r));
+
+ Object fieldData = null;
+ for (int j = 0; j < fieldRefs.size(); j++) {
+ fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
+ assertNotNull(fieldData);
+ if (fieldData instanceof LazyStruct) {
+ assertEquals(((LazyStruct) fieldData).getField(0).toString(), "A");
+ assertEquals(((LazyStruct) fieldData).getField(1).toString(), "B");
+ assertEquals(((LazyStruct) fieldData).getField(2).toString(), "C");
+ } else {
+ Assert.fail("fieldData should be an instance of LazyStruct");
+ }
+ }
+
+ assertEquals(
+ "{\"key\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"},\"astring\":{\"col1\":\"A\",\"col2\":\"B\",\"col3\":\"C\"}}",
+ SerDeUtils.getJSONString(row, soi));
+
+ // Now serialize
+ Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
+
+ assertEquals("Serialized put:", p.toString(), put.toString());
+ }
+
private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p,
- Object[] expectedFieldsData)
+ Object[] expectedFieldsData, String expectedDeserializedAvroString)
throws SerDeException, IOException {
StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
@@ -1403,6 +1511,8 @@ public class TestHBaseSerDe extends Test
assertNotNull(fieldData);
assertEquals(expectedFieldsData[j], fieldData.toString().trim());
}
+
+ assertEquals(expectedDeserializedAvroString, SerDeUtils.getJSONString(row, soi));
// Now serialize
Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
Modified: hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java (original)
+++ hive/branches/spark-new/hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java Sun Oct 5 22:26:43 2014
@@ -23,6 +23,7 @@ import java.util.List;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -104,6 +105,7 @@ public class HCatSemanticAnalyzer extend
case HiveParser.TOK_ALTERVIEW_DROPPARTS:
case HiveParser.TOK_ALTERVIEW_PROPERTIES:
case HiveParser.TOK_ALTERVIEW_RENAME:
+ case HiveParser.TOK_ALTERVIEW:
case HiveParser.TOK_CREATEVIEW:
case HiveParser.TOK_DROPVIEW:
@@ -359,7 +361,7 @@ public class HCatSemanticAnalyzer extend
AlterTableDesc alterTable = work.getAlterTblDesc();
if (alterTable != null) {
Table table = hive.getTable(SessionState.get().getCurrentDatabase(),
- alterTable.getOldName(), false);
+ Utilities.getDbTableName(alterTable.getOldName())[1], false);
Partition part = null;
if (alterTable.getPartSpec() != null) {
Modified: hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java (original)
+++ hive/branches/spark-new/hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java Sun Oct 5 22:26:43 2014
@@ -90,6 +90,7 @@ public class TestHCatPartitionPublish {
File workDir = handleWorkDir();
conf.set("yarn.scheduler.capacity.root.queues", "default");
conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
+ conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem");
fs = FileSystem.get(conf);
System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath());
Modified: hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml (original)
+++ hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/pom.xml Sun Oct 5 22:26:43 2014
@@ -53,6 +53,13 @@
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
</dependencies>
Modified: hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java?rev=1629544&r1=1629543&r2=1629544&view=diff
==============================================================================
--- hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java (original)
+++ hive/branches/spark-new/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java Sun Oct 5 22:26:43 2014
@@ -28,10 +28,12 @@ import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.Set;
import org.apache.commons.io.FileUtils;
@@ -42,6 +44,8 @@ import org.apache.hadoop.hive.cli.CliSes
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CommandNeedRetryException;
import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.StorageFormats;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
@@ -69,12 +73,16 @@ import org.joda.time.DateTime;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.*;
+import static org.junit.Assume.assumeTrue;
+@RunWith(Parameterized.class)
public class TestHCatLoader {
private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoader.class);
private static final String TEST_DATA_DIR = HCatUtil.makePathASafeFileName(System.getProperty("java.io.tmpdir") +
@@ -91,9 +99,30 @@ public class TestHCatLoader {
private Driver driver;
private Map<Integer, Pair<Integer, String>> basicInputData;
- protected String storageFormat() {
- return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," +
- "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')";
+ private static final Map<String, Set<String>> DISABLED_STORAGE_FORMATS =
+ new HashMap<String, Set<String>>() {{
+ put(IOConstants.AVRO, new HashSet<String>() {{
+ add("testReadDataBasic");
+ add("testReadPartitionedBasic");
+ add("testProjectionsBasic");
+ add("testSchemaLoadPrimitiveTypes");
+ }});
+ put(IOConstants.PARQUETFILE, new HashSet<String>() {{
+ add("testReadDataBasic");
+ add("testReadPartitionedBasic");
+ add("testProjectionsBasic");
+ }});
+ }};
+
+ private String storageFormat;
+
+ @Parameterized.Parameters
+ public static Collection<Object[]> generateParameters() {
+ return StorageFormats.names();
+ }
+
+ public TestHCatLoader(String storageFormat) {
+ this.storageFormat = storageFormat;
}
private void dropTable(String tablename) throws IOException, CommandNeedRetryException {
@@ -105,7 +134,7 @@ public class TestHCatLoader {
}
private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException {
- createTable(tablename, schema, partitionedBy, driver, storageFormat());
+ createTable(tablename, schema, partitionedBy, driver, storageFormat);
}
static void createTable(String tablename, String schema, String partitionedBy, Driver driver, String storageFormat)
@@ -209,17 +238,18 @@ public class TestHCatLoader {
server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});", ++i);
server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();", ++i);
server.executeBatch();
-
}
@After
public void tearDown() throws Exception {
try {
- dropTable(BASIC_TABLE);
- dropTable(COMPLEX_TABLE);
- dropTable(PARTITIONED_TABLE);
- dropTable(SPECIFIC_SIZE_TABLE);
- dropTable(AllTypesTable.ALL_PRIMITIVE_TYPES_TABLE);
+ if (driver != null) {
+ dropTable(BASIC_TABLE);
+ dropTable(COMPLEX_TABLE);
+ dropTable(PARTITIONED_TABLE);
+ dropTable(SPECIFIC_SIZE_TABLE);
+ dropTable(AllTypesTable.ALL_PRIMITIVE_TYPES_TABLE);
+ }
} finally {
FileUtils.deleteDirectory(new File(TEST_DATA_DIR));
}
@@ -227,6 +257,7 @@ public class TestHCatLoader {
@Test
public void testSchemaLoadBasic() throws IOException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
PigServer server = new PigServer(ExecType.LOCAL);
@@ -241,23 +272,28 @@ public class TestHCatLoader {
assertTrue(Xfields.get(1).type == DataType.CHARARRAY);
}
+
/**
* Test that we properly translate data types in Hive/HCat table schema into Pig schema
*/
@Test
public void testSchemaLoadPrimitiveTypes() throws IOException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
AllTypesTable.testSchemaLoadPrimitiveTypes();
}
+
/**
* Test that value from Hive table are read properly in Pig
*/
@Test
public void testReadDataPrimitiveTypes() throws Exception {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
AllTypesTable.testReadDataPrimitiveTypes();
}
@Test
public void testReadDataBasic() throws IOException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
PigServer server = new PigServer(ExecType.LOCAL);
server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();");
@@ -279,6 +315,7 @@ public class TestHCatLoader {
@Test
public void testSchemaLoadComplex() throws IOException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
PigServer server = new PigServer(ExecType.LOCAL);
@@ -337,6 +374,7 @@ public class TestHCatLoader {
@Test
public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
PigServer server = new PigServer(ExecType.LOCAL);
driver.run("select * from " + PARTITIONED_TABLE);
@@ -404,6 +442,7 @@ public class TestHCatLoader {
@Test
public void testProjectionsBasic() throws IOException {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
PigServer server = new PigServer(ExecType.LOCAL);
@@ -453,6 +492,7 @@ public class TestHCatLoader {
@Test
public void testColumnarStorePushdown() throws Exception {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
String PIGOUTPUT_DIR = TEST_DATA_DIR+ "/colpushdownop";
String PIG_FILE = "test.pig";
String expectedCols = "0,1";
@@ -486,6 +526,7 @@ public class TestHCatLoader {
@Test
public void testGetInputBytes() throws Exception {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000");
file.deleteOnExit();
RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw");
@@ -501,6 +542,7 @@ public class TestHCatLoader {
@Test
public void testConvertBooleanToInt() throws Exception {
+ assumeTrue(!TestUtil.shouldSkip(storageFormat, DISABLED_STORAGE_FORMATS));
String tbl = "test_convert_boolean_to_int";
String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt";
File inputDataDir = new File(inputFileName).getParentFile();
@@ -600,7 +642,11 @@ public class TestHCatLoader {
* Test that value from Hive table are read properly in Pig
*/
private static void testReadDataPrimitiveTypes() throws Exception {
- PigServer server = new PigServer(ExecType.LOCAL);
+ // testConvertBooleanToInt() sets HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER=true, and
+ // might be the last one to call HCatContext.INSTANCE.setConf(). Make sure setting is false.
+ Properties properties = new Properties();
+ properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "false");
+ PigServer server = new PigServer(ExecType.LOCAL, properties);
server.registerQuery("X = load '" + ALL_PRIMITIVE_TYPES_TABLE + "' using " + HCatLoader.class.getName() + "();");
Iterator<Tuple> XIter = server.openIterator("X");
int numTuplesRead = 0;
@@ -608,22 +654,26 @@ public class TestHCatLoader {
Tuple t = XIter.next();
assertEquals(HCatFieldSchema.Type.numPrimitiveTypes(), t.size());
int colPos = 0;
- for(Object referenceData : primitiveRows[numTuplesRead]) {
- if(referenceData == null) {
- assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data is null; actual " +
- t.get(colPos), t.get(colPos) == null);
- }
- else if(referenceData instanceof java.util.Date) {
- assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + ((java.util.Date)referenceData).getTime() + " actual=" +
- ((DateTime)t.get(colPos)).getMillis() + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")",
+ for (Object referenceData : primitiveRows[numTuplesRead]) {
+ if (referenceData == null) {
+ assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos
+ + " Reference data is null; actual "
+ + t.get(colPos), t.get(colPos) == null);
+ } else if (referenceData instanceof java.util.Date) {
+ // Note that here we ignore nanos part of Hive Timestamp since nanos are dropped when
+ // reading Hive from Pig by design.
+ assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos
+ + " Reference data=" + ((java.util.Date)referenceData).getTime()
+ + " actual=" + ((DateTime)t.get(colPos)).getMillis()
+ + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")",
((java.util.Date)referenceData).getTime()== ((DateTime)t.get(colPos)).getMillis());
- //note that here we ignore nanos part of Hive Timestamp since nanos are dropped when reading Hive from Pig by design
- }
- else {
- assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos + " Reference data=" + referenceData + " actual=" +
- t.get(colPos) + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ")",
+ } else {
+ // Doing String comps here as value objects in Hive in Pig are different so equals()
+ // doesn't work.
+ assertTrue("rowNum=" + numTuplesRead + " colNum=" + colPos
+ + " Reference data=" + referenceData + " actual=" + t.get(colPos)
+ + "; types=(" + referenceData.getClass() + "," + t.get(colPos).getClass() + ") ",
referenceData.toString().equals(t.get(colPos).toString()));
- //doing String comps here as value objects in Hive in Pig are different so equals() doesn't work
}
colPos++;
}
@@ -633,10 +683,10 @@ public class TestHCatLoader {
}
private static void setupAllTypesTable(Driver driver) throws Exception {
String[] primitiveData = new String[primitiveRows.length];
- for(int i = 0; i < primitiveRows.length; i++) {
+ for (int i = 0; i < primitiveRows.length; i++) {
Object[] rowData = primitiveRows[i];
StringBuilder row = new StringBuilder();
- for(Object cell : rowData) {
+ for (Object cell : rowData) {
row.append(row.length() == 0 ? "" : "\t").append(cell == null ? null : cell);
}
primitiveData[i] = row.toString();