You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/30 22:52:50 UTC
[1/3] hive git commit: HIVE-11616 : DelegationTokenSecretManager
reuses the same objectstore,
which has concurrency issues (Cody Fu/Sergey Shelukhin,
reviewed by Chaoyu Tang)
Repository: hive
Updated Branches:
refs/heads/master 19c42ac9b -> cd531e3f0
HIVE-11616 : DelegationTokenSecretManager reuses the same objectstore, which has concurrency issues (Cody Fu/Sergey Shelukhin, reviewed by Chaoyu Tang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd531e3f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd531e3f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd531e3f
Branch: refs/heads/master
Commit: cd531e3f0f188b4d1295c7223dd4fefacb897c1d
Parents: 730fdb6
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Oct 30 14:28:13 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Oct 30 14:46:34 2015 -0700
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java | 2 +-
.../java/org/apache/hadoop/hive/metastore/HiveMetaStore.java | 2 +-
.../java/org/apache/hive/service/auth/HiveAuthFactory.java | 8 +++-----
.../java/org/apache/hadoop/hive/thrift/DBTokenStore.java | 7 ++++---
.../apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java | 6 +++---
.../org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java | 2 +-
6 files changed, 13 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java
index 0b61a62..f5934ee 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/thrift/TestDBTokenStore.java
@@ -37,7 +37,7 @@ public class TestDBTokenStore extends TestCase{
public void testDBTokenStore() throws TokenStoreException, MetaException, IOException {
DelegationTokenStore ts = new DBTokenStore();
- ts.init(new HMSHandler("Test handler").getMS(), null);
+ ts.init(new HMSHandler("Test handler"), null);
assertEquals(0, ts.getMasterKeys().length);
assertEquals(false,ts.removeMasterKey(-1));
try{
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index cf2e25b..2740e40 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -5974,7 +5974,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
conf.getVar(HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE),
conf.getVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL));
// start delegation token manager
- saslServer.startDelegationTokenSecretManager(conf, baseHandler.getMS(), ServerMode.METASTORE);
+ saslServer.startDelegationTokenSecretManager(conf, baseHandler, ServerMode.METASTORE);
transFactory = saslServer.createTransportFactory(
MetaStoreUtils.getMetaStoreSaslProperties(conf));
processor = saslServer.wrapProcessor(
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java
----------------------------------------------------------------------
diff --git a/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java b/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java
index 1e6ac4f..3471f12 100644
--- a/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ b/service/src/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -112,16 +112,14 @@ public class HiveAuthFactory {
// start delegation token manager
try {
// rawStore is only necessary for DBTokenStore
- Object rawStore = null;
+ HMSHandler baseHandler = null;
String tokenStoreClass = conf.getVar(HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS);
if (tokenStoreClass.equals(DBTokenStore.class.getName())) {
- HMSHandler baseHandler = new HiveMetaStore.HMSHandler(
- "new db based metaserver", conf, true);
- rawStore = baseHandler.getMS();
+ baseHandler = new HiveMetaStore.HMSHandler("new db based metaserver", conf, true);
}
- saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2);
+ saslServer.startDelegationTokenSecretManager(conf, baseHandler, ServerMode.HIVESERVER2);
}
catch (MetaException|IOException e) {
throw new TTransportException("Failed to start token manager", e);
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java
index 21d18f5..de39d3d 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/DBTokenStore.java
@@ -132,17 +132,18 @@ public class DBTokenStore implements DelegationTokenStore {
return delTokenIdents;
}
- private Object rawStore;
+ private Object hmsHandler;
@Override
- public void init(Object rawStore, ServerMode smode) throws TokenStoreException {
- this.rawStore = rawStore;
+ public void init(Object hms, ServerMode smode) throws TokenStoreException {
+ this.hmsHandler = hms;
}
private Object invokeOnRawStore(String methName, Object[] params, Class<?> ... paramTypes)
throws TokenStoreException{
try {
+ Object rawStore = hmsHandler.getClass().getMethod("getMS").invoke(hmsHandler);
return rawStore.getClass().getMethod(methName, paramTypes).invoke(rawStore, params);
} catch (IllegalArgumentException e) {
throw new TokenStoreException(e);
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
index 20dec9a..d2b47be 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge.java
@@ -425,8 +425,8 @@ public abstract class HadoopThriftAuthBridge {
}
- public void startDelegationTokenSecretManager(Configuration conf, Object rawStore, ServerMode smode)
- throws IOException{
+ public void startDelegationTokenSecretManager(Configuration conf, Object hms, ServerMode smode)
+ throws IOException {
long secretKeyInterval =
conf.getLong(DELEGATION_KEY_UPDATE_INTERVAL_KEY,
DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
@@ -440,7 +440,7 @@ public abstract class HadoopThriftAuthBridge {
DELEGATION_TOKEN_GC_INTERVAL_DEFAULT);
DelegationTokenStore dts = getTokenStore(conf);
- dts.init(rawStore, smode);
+ dts.init(hms, smode);
secretManager = new TokenStoreDelegationTokenSecretManager(secretKeyInterval,
tokenMaxLifetime,
tokenRenewInterval,
http://git-wip-us.apache.org/repos/asf/hive/blob/cd531e3f/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java
----------------------------------------------------------------------
diff --git a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java
index e46f293..745e467 100644
--- a/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java
+++ b/shims/common/src/main/java/org/apache/hadoop/hive/thrift/ZooKeeperTokenStore.java
@@ -429,7 +429,7 @@ public class ZooKeeperTokenStore implements DelegationTokenStore {
}
@Override
- public void init(Object objectStore, ServerMode smode) {
+ public void init(Object hmsHandler, ServerMode smode) {
this.serverMode = smode;
zkConnectString =
conf.get(HadoopThriftAuthBridge.Server.DELEGATION_TOKEN_STORE_ZK_CONNECT_STR, null);
[3/3] hive git commit: HIVE-12294 : log line "Duplicate ID
in column ID list" appears in the logs (Sergey Shelukhin,
reviewed by Vikram Dixit K)
Posted by se...@apache.org.
HIVE-12294 : log line "Duplicate ID <number> in column ID list" appears in the logs (Sergey Shelukhin, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/894a499f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/894a499f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/894a499f
Branch: refs/heads/master
Commit: 894a499f131b44fd039627fb9550f24dc7226124
Parents: 19c42ac
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Oct 30 14:24:49 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Oct 30 14:46:34 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 ++++++-
.../org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java | 8 ++++----
2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 73037ea..af40137 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -282,7 +282,12 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
assert idStrs == null || knownNames.length == idStrs.length;
HashMap<String, Integer> nameIdMap = new HashMap<>();
for (int i = 0; i < knownNames.length; ++i) {
- nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
+ Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i;
+ Integer oldId = nameIdMap.put(knownNames[i], newId);
+ if (oldId != null && oldId.intValue() != newId.intValue()) {
+ throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: ["
+ + idStr + "], [" + nameStr + "]");
+ }
}
List<PredicateLeaf> leaves = sarg.getLeaves();
for (int i = 0; i < leaves.size(); ++i) {
http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
index fc0a4b7..0c7ac30 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
@@ -148,14 +148,14 @@ public final class ColumnProjectionUtils {
List<Integer> result = new ArrayList<Integer>(list.length);
for (String element : list) {
// it may contain duplicates, remove duplicates
- // TODO: WTF? This would break many assumptions elsewhere if it did.
- // Column names' and column ids' lists are supposed to be correlated.
Integer toAdd = Integer.parseInt(element);
if (!result.contains(toAdd)) {
result.add(toAdd);
- } else if (LOG.isInfoEnabled()) {
- LOG.info("Duplicate ID " + toAdd + " in column ID list");
}
+ // NOTE: some code uses this list to correlate with column names, and yet these lists may
+ // contain duplicates, which this call will remove and the other won't. As far as I can
+ // tell, no code will actually use these two methods together; all is good if the code
+ // gets the ID list without relying on this method. Or maybe it just works by magic.
}
return result;
}
[2/3] hive git commit: HIVE-11985 : don't store type names in
metastore when metastore type names are not used (Sergey Shelukhin,
reviewed by Ashutosh Chauhan)
Posted by se...@apache.org.
HIVE-11985 : don't store type names in metastore when metastore type names are not used (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/730fdb61
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/730fdb61
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/730fdb61
Branch: refs/heads/master
Commit: 730fdb61ae334a457e01a74edcc141482fec40f8
Parents: 894a499
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Oct 30 14:25:54 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Oct 30 14:46:34 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 14 +++++---
.../hadoop/hive/metastore/MetaStoreUtils.java | 24 ++++++++++---
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 26 ++++++++++----
.../apache/hadoop/hive/ql/metadata/Hive.java | 17 +++++++--
.../hadoop/hive/ql/metadata/Partition.java | 15 +++++++-
.../apache/hadoop/hive/ql/metadata/Table.java | 38 +++++++++++++++++---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 5 +++
.../hadoop/hive/serde2/AbstractSerDe.java | 9 +++++
.../hadoop/hive/serde2/avro/AvroSerDe.java | 19 ++++++++--
9 files changed, 142 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 59b66cd..936ef54 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -914,11 +914,15 @@ public class HiveConf extends Configuration {
"The default SerDe Hive will use for storage formats that do not specify a SerDe."),
SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema",
- "org.apache.hadoop.hive.ql.io.orc.OrcSerde,org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
- "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe,org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," +
- "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
- "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe,org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe",
- "SerDes retriving schema from metastore. This an internal parameter. Check with the hive dev. team"),
+ "org.apache.hadoop.hive.ql.io.orc.OrcSerde," +
+ "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
+ "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," +
+ "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," +
+ "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," +
+ "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," +
+ "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," +
+ "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe",
+ "SerDes retrieving schema from metastore. This is an internal parameter."),
HIVEHISTORYFILELOC("hive.querylog.location",
"${system:java.io.tmpdir}" + File.separator + "${system:user.name}",
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index 73b7574..bbaa1ce 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -395,6 +395,12 @@ public class MetaStoreUtils {
if (lib == null) {
return null;
}
+ return getDeserializer(conf, table, skipConfError, lib);
+ }
+
+ public static Deserializer getDeserializer(Configuration conf,
+ org.apache.hadoop.hive.metastore.api.Table table, boolean skipConfError,
+ String lib) throws MetaException {
try {
Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(lib).
asSubclass(Deserializer.class), conf);
@@ -570,8 +576,9 @@ public class MetaStoreUtils {
if (!validateColumnName(fieldSchema.getName())) {
return "name: " + fieldSchema.getName();
}
- if (!validateColumnType(fieldSchema.getType())) {
- return "type: " + fieldSchema.getType();
+ String typeError = validateColumnType(fieldSchema.getType());
+ if (typeError != null) {
+ return typeError;
}
}
return null;
@@ -646,6 +653,9 @@ public class MetaStoreUtils {
return false;
}
+ public static final int MAX_MS_TYPENAME_LENGTH = 2000; // 4000/2, for an unlikely unicode case
+
+ public static final String TYPE_FROM_DESERIALIZER = "<derived from deserializer>";
/**
* validate column type
*
@@ -653,7 +663,11 @@ public class MetaStoreUtils {
* @param name
* @return
*/
- static public boolean validateColumnType(String type) {
+ static public String validateColumnType(String type) {
+ if (type.equals(TYPE_FROM_DESERIALIZER)) return null;
+ if (type.length() > MAX_MS_TYPENAME_LENGTH) {
+ return "type name is too long: " + type;
+ }
int last = 0;
boolean lastAlphaDigit = isValidTypeChar(type.charAt(last));
for (int i = 1; i <= type.length(); i++) {
@@ -662,12 +676,12 @@ public class MetaStoreUtils {
String token = type.substring(last, i);
last = i;
if (!hiveThriftTypeMap.contains(token)) {
- return false;
+ return "type: " + type;
}
break;
}
}
- return true;
+ return null;
}
private static boolean isValidTypeChar(char c) {
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index dcac9ca..ff86d6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -164,6 +164,7 @@ import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeSpec;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
@@ -3023,7 +3024,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
cols.addAll(tbl.getPartCols());
}
} else {
- cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
+ cols = Hive.getFieldsFromDeserializer(colPath, deserializer); // TODO#: here - desc
if (descTbl.isFormatted()) {
// when column name is specified in describe table DDL, colPath will
// will be table_name.column_name
@@ -3256,7 +3257,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
- List<FieldSchema> oldCols = (part == null ? tbl.getCols() : part.getCols());
+ List<FieldSchema> oldCols = (part == null
+ ? tbl.getColsForMetastore() : part.getColsForMetastore());
StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd());
List<FieldSchema> newCols = alterTbl.getNewCols();
String serializationLib = sd.getSerdeInfo().getSerializationLib();
@@ -3284,7 +3286,8 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
sd.setCols(oldCols);
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
- List<FieldSchema> oldCols = (part == null ? tbl.getCols() : part.getCols());
+ List<FieldSchema> oldCols = (part == null
+ ? tbl.getColsForMetastore() : part.getColsForMetastore());
StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd());
List<FieldSchema> newCols = new ArrayList<FieldSchema>();
Iterator<FieldSchema> iterOldCols = oldCols.iterator();
@@ -3378,16 +3381,27 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
StorageDescriptor sd = (part == null ? tbl.getTTable().getSd() : part.getTPartition().getSd());
String serdeName = alterTbl.getSerdeName();
+ String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
sd.getSerdeInfo().setSerializationLib(serdeName);
if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
}
if (part != null) {
+ // TODO: wtf? This doesn't do anything.
part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
} else {
- if (!Table.hasMetastoreBasedSchema(conf, serdeName)) {
- tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), tbl.
- getDeserializer()));
+ if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters())
+ && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
+ // If new SerDe needs to store fields in metastore, but the old serde doesn't, save
+ // the fields so that new SerDe could operate. Note that this may fail if some fields
+ // from old SerDe are too long to be stored in metastore, but there's nothing we can do.
+ try {
+ Deserializer oldSerde = MetaStoreUtils.getDeserializer(
+ conf, tbl.getTTable(), false, oldSerdeName);
+ tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
+ } catch (MetaException ex) {
+ throw new HiveException(ex);
+ }
}
}
} else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index cef297a..74b08d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -781,6 +781,16 @@ public class Hive {
}
}
+ public static List<FieldSchema> getFieldsFromDeserializerForMsStorage(
+ Table tbl, Deserializer deserializer) throws SerDeException, MetaException {
+ List<FieldSchema> schema = MetaStoreUtils.getFieldsFromDeserializer(
+ tbl.getTableName(), tbl.getDeserializer());
+ for (FieldSchema field : schema) {
+ field.setType(MetaStoreUtils.TYPE_FROM_DESERIALIZER);
+ }
+ return schema;
+ }
+
/**
*
* @param tableName
@@ -905,8 +915,11 @@ public class Hive {
List<Order> sortCols = new ArrayList<Order>();
int k = 0;
Table metaBaseTbl = new Table(baseTbl);
- for (int i = 0; i < metaBaseTbl.getCols().size(); i++) {
- FieldSchema col = metaBaseTbl.getCols().get(i);
+ // Even though we are storing these in metastore, get regular columns. Indexes on lengthy
+ // types from e.g. Avro schema will just fail to create the index table (by design).
+ List<FieldSchema> cols = metaBaseTbl.getCols();
+ for (int i = 0; i < cols.size(); i++) {
+ FieldSchema col = cols.get(i);
if (indexedCols.contains(col.getName())) {
indexTblCols.add(col);
sortCols.add(new Order(col.getName(), 1));
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
index 06f5223..c8895c2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
@@ -467,10 +467,23 @@ public class Partition implements Serializable {
}
public List<FieldSchema> getCols() {
+ return getColsInternal(false);
+ }
+
+ public List<FieldSchema> getColsForMetastore() {
+ return getColsInternal(true);
+ }
+
+ private List<FieldSchema> getColsInternal(boolean forMs) {
try {
- if (Table.hasMetastoreBasedSchema(SessionState.getSessionConf(), tPartition.getSd())) {
+ String serializationLib = tPartition.getSd().getSerdeInfo().getSerializationLib();
+ // Do the lightweight check for general case.
+ if (Table.hasMetastoreBasedSchema(SessionState.getSessionConf(), serializationLib)) {
return tPartition.getSd().getCols();
+ } else if (forMs && !Table.shouldStoreFieldsInMetastore(
+ SessionState.getSessionConf(), serializationLib, table.getParameters())) {
+ return Hive.getFieldsFromDeserializerForMsStorage(table, getDeserializer());
}
return MetaStoreUtils.getFieldsFromDeserializer(table.getTableName(), getDeserializer());
} catch (Exception e) {
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 68e0731..d2a5948 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -55,6 +56,7 @@ import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hive.common.util.ReflectionUtil;
import java.io.Serializable;
import java.util.ArrayList;
@@ -596,11 +598,22 @@ public class Table implements Serializable {
}
public List<FieldSchema> getCols() {
+ return getColsInternal(false);
+ }
+
+ public List<FieldSchema> getColsForMetastore() {
+ return getColsInternal(true);
+ }
+ private List<FieldSchema> getColsInternal(boolean forMs) {
String serializationLib = getSerializationLib();
try {
+ // Do the lightweight check for general case.
if (hasMetastoreBasedSchema(SessionState.getSessionConf(), serializationLib)) {
return tTable.getSd().getCols();
+ } else if (forMs && !shouldStoreFieldsInMetastore(
+ SessionState.getSessionConf(), serializationLib, tTable.getParameters())) {
+ return Hive.getFieldsFromDeserializerForMsStorage(this, getDeserializer());
} else {
return MetaStoreUtils.getFieldsFromDeserializer(getTableName(), getDeserializer());
}
@@ -888,15 +901,28 @@ public class Table implements Serializable {
return tTable.isTemporary();
}
- public static boolean hasMetastoreBasedSchema(HiveConf conf, StorageDescriptor serde) {
- return hasMetastoreBasedSchema(conf, serde.getSerdeInfo().getSerializationLib());
- }
-
public static boolean hasMetastoreBasedSchema(HiveConf conf, String serdeLib) {
return StringUtils.isEmpty(serdeLib) ||
conf.getStringCollection(ConfVars.SERDESUSINGMETASTOREFORSCHEMA.varname).contains(serdeLib);
}
+ public static boolean shouldStoreFieldsInMetastore(
+ HiveConf conf, String serdeLib, Map<String, String> tableParams) {
+ if (hasMetastoreBasedSchema(conf, serdeLib)) return true;
+ // Table may or may not be using metastore. Only the SerDe can tell us.
+ AbstractSerDe deserializer = null;
+ try {
+ Class<?> clazz = conf.getClassByName(serdeLib);
+ if (!AbstractSerDe.class.isAssignableFrom(clazz)) return true; // The default.
+ deserializer = ReflectionUtil.newInstance(
+ conf.getClassByName(serdeLib).asSubclass(AbstractSerDe.class), conf);
+ } catch (Exception ex) {
+ LOG.warn("Cannot initialize SerDe: " + serdeLib + ", ignoring", ex);
+ return true;
+ }
+ return deserializer.shouldStoreFieldsInMetastore(tableParams);
+ }
+
public static void validateColumns(List<FieldSchema> columns, List<FieldSchema> partCols)
throws HiveException {
List<String> colNames = new ArrayList<String>();
@@ -936,4 +962,8 @@ public class Table implements Serializable {
this.tableSpec = tableSpec;
}
+ public boolean hasDeserializer() {
+ return deserializer != null;
+ }
+
};
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 70beff7..8a27577 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -10289,6 +10289,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
FieldSchema fieldSchema = derivedSchema.get(i);
// Modify a copy, not the original
fieldSchema = new FieldSchema(fieldSchema);
+ // TODO: there's a potential problem here if some table uses external schema like Avro,
+ // with a very large type name. It seems like the view does not derive the SerDe from
+ // the table, so it won't be able to just get the type from the deserializer like the
+ // table does; we won't be able to properly store the type in the RDBMS metastore.
+ // Not sure if these large cols could be in resultSchema. Ignore this for now 0_o
derivedSchema.set(i, fieldSchema);
sb.append(HiveUtils.unparseIdentifier(fieldSchema.getName(), conf));
sb.append(" AS ");
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
index c5e78c5..9269ff4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.serde2;
+import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
@@ -115,4 +116,12 @@ public abstract class AbstractSerDe implements SerDe {
public String getConfigurationErrors() {
return configErrors == null ? "" : configErrors;
}
+
+ /**
+ * @rturn Whether the SerDe that can store schema both inside and outside of metastore
+ * does, in fact, store it inside metastore, based on table parameters.
+ */
+ public boolean shouldStoreFieldsInMetastore(Map<String, String> tableParams) {
+ return false; // The default, unless SerDe overrides it. TODO#
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/730fdb61/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
index 5035426..0be54e0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.serde2.avro;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import java.util.Properties;
import org.apache.avro.Schema;
@@ -96,8 +97,7 @@ public class AvroSerDe extends AbstractSerDe {
final String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
final String columnCommentProperty = properties.getProperty(LIST_COLUMN_COMMENTS,"");
- if (properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null
- || properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName()) != null
+ if (hasExternalSchema(properties)
|| columnNameProperty == null || columnNameProperty.isEmpty()
|| columnTypeProperty == null || columnTypeProperty.isEmpty()) {
schema = determineSchemaOrReturnErrorSchema(configuration, properties);
@@ -129,6 +129,16 @@ public class AvroSerDe extends AbstractSerDe {
this.oi = aoig.getObjectInspector();
}
+ private boolean hasExternalSchema(Properties properties) {
+ return properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null
+ || properties.getProperty(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName()) != null;
+ }
+
+ private boolean hasExternalSchema(Map<String, String> tableParams) {
+ return tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName())
+ || tableParams.containsKey(AvroSerdeUtils.AvroTableProperties.SCHEMA_URL.getPropName());
+ }
+
public static Schema getSchemaFromCols(Properties properties,
List<String> columnNames, List<TypeInfo> columnTypes, String columnCommentProperty) {
List<String> columnComments;
@@ -232,4 +242,9 @@ public class AvroSerDe extends AbstractSerDe {
return avroSerializer;
}
+
+ @Override
+ public boolean shouldStoreFieldsInMetastore(Map<String, String> tableParams) {
+ return !hasExternalSchema(tableParams);
+ }
}