You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/04 00:54:19 UTC
svn commit: r1622374 - in /hive/branches/tez: ./
common/src/java/org/apache/hadoop/hive/conf/
hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/
metastore/src/java/org/apache/hadoop/hive/metastore/
metastore/src/test/org/apache/h...
Author: gunther
Date: Wed Sep 3 22:54:18 2014
New Revision: 1622374
URL: http://svn.apache.org/r1622374
Log:
Merge latest trunk into branch. (Gunther Hagleitner)
Added:
hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
- copied unchanged from r1622373, hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestStringDictionary.java
Modified:
hive/branches/tez/ (props changed)
hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java
Propchange: hive/branches/tez/
------------------------------------------------------------------------------
Merged /hive/trunk:r1622081-1622373
Modified: hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/tez/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Sep 3 22:54:18 2014
@@ -835,6 +835,10 @@ public class HiveConf extends Configurat
"If the number of keys in a dictionary is greater than this fraction of the total number of\n" +
"non-null rows, turn off dictionary encoding. Use 1 to always use dictionary encoding."),
HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE("hive.exec.orc.default.row.index.stride", 10000, "Define the default ORC index stride"),
+ HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK("hive.orc.row.index.stride.dictionary.check", true,
+ "If enabled dictionary check will happen after first row index stride (default 10000 rows)\n" +
+ "else dictionary check will happen before writing first stripe. In both cases, the decision\n" +
+ "to use dictionary or not will be retained thereafter."),
HIVE_ORC_DEFAULT_BUFFER_SIZE("hive.exec.orc.default.buffer.size", 256 * 1024, "Define the default ORC buffer size"),
HIVE_ORC_DEFAULT_BLOCK_PADDING("hive.exec.orc.default.block.padding", true, "Define the default block padding"),
HIVE_ORC_BLOCK_PADDING_TOLERANCE("hive.exec.orc.block.padding.tolerance", 0.05f,
Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java (original)
+++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java Wed Sep 3 22:54:18 2014
@@ -199,7 +199,8 @@ public class HCatLoader extends HCatBase
throws IOException {
Table table = phutil.getTable(location,
hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
- PigHCatUtil.getHCatServerPrincipal(job));
+ PigHCatUtil.getHCatServerPrincipal(job),
+ job); // Pass job to initialize metastore conf overrides
List<FieldSchema> tablePartitionKeys = table.getPartitionKeys();
String[] partitionKeys = new String[tablePartitionKeys.size()];
for (int i = 0; i < tablePartitionKeys.size(); i++) {
@@ -215,7 +216,11 @@ public class HCatLoader extends HCatBase
Table table = phutil.getTable(location,
hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job),
- PigHCatUtil.getHCatServerPrincipal(job));
+ PigHCatUtil.getHCatServerPrincipal(job),
+
+ // Pass job to initialize metastore conf overrides for embedded metastore case
+ // (hive.metastore.uris = "").
+ job);
HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table);
try {
PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema);
Modified: hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java (original)
+++ hive/branches/tez/hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java Wed Sep 3 22:54:18 2014
@@ -142,8 +142,16 @@ class PigHCatUtil {
}
private static HiveMetaStoreClient getHiveMetaClient(String serverUri,
- String serverKerberosPrincipal, Class<?> clazz) throws Exception {
- HiveConf hiveConf = new HiveConf(clazz);
+ String serverKerberosPrincipal,
+ Class<?> clazz,
+ Job job) throws Exception {
+
+ // The job configuration is passed in so the configuration will be cloned
+ // from the pig job configuration. This is necessary for overriding
+ // metastore configuration arguments like the metastore jdbc connection string
+ // and password, in the case of an embedded metastore, which you get when
+ // hive.metastore.uris = "".
+ HiveConf hiveConf = new HiveConf(job.getConfiguration(), clazz);
if (serverUri != null) {
hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim());
@@ -178,7 +186,13 @@ class PigHCatUtil {
return new HCatSchema(fcols);
}
- public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException {
+ /*
+ * The job argument is passed so that configuration overrides can be used to initialize
+ * the metastore configuration in the special case of an embedded metastore
+ * (hive.metastore.uris = "").
+ */
+ public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal,
+ Job job) throws IOException {
Pair<String, String> loc_server = new Pair<String, String>(location, hcatServerUri);
Table hcatTable = hcatTableCache.get(loc_server);
if (hcatTable != null) {
@@ -191,7 +205,7 @@ class PigHCatUtil {
Table table = null;
HiveMetaStoreClient client = null;
try {
- client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class);
+ client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class, job);
table = HCatUtil.getTable(client, dbName, tableName);
} catch (NoSuchObjectException nsoe) {
throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Wed Sep 3 22:54:18 2014
@@ -3721,19 +3721,6 @@ public class HiveMetaStore extends Thrif
endFunction("write_partition_column_statistics: ", ret != false, null, tableName);
}
}
- public boolean update_partition_column_statistics(
- SetPartitionsStatsRequest request) throws NoSuchObjectException,
- InvalidObjectException, MetaException, TException,
- InvalidInputException {
- boolean ret = false;
- try {
- ret = getMS().updatePartitionColumnStatistics(request);
- return ret;
- } finally {
- endFunction("write_partition_column_statistics: ", ret != false, null,
- null);
- }
- }
@Override
public boolean delete_partition_column_statistics(String dbName, String tableName,
@@ -5058,9 +5045,13 @@ public class HiveMetaStore extends Thrif
@Override
public boolean set_aggr_stats_for(SetPartitionsStatsRequest request)
- throws NoSuchObjectException, InvalidObjectException, MetaException,
- InvalidInputException, TException {
- return update_partition_column_statistics(request);
+ throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException,
+ TException {
+ boolean ret = true;
+ for (ColumnStatistics colStats : request.getColStats()) {
+ ret = ret && update_partition_column_statistics(colStats);
+ }
+ return ret;
}
}
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Wed Sep 3 22:54:18 2014
@@ -5779,34 +5779,6 @@ public class ObjectStore implements RawS
}
}
- @Override
- public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
- throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
- boolean committed = false;
- try {
- openTransaction();
- for (ColumnStatistics colStats : request.getColStats()) {
- ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
- statsDesc.setDbName(statsDesc.getDbName().toLowerCase());
- statsDesc.setTableName(statsDesc.getTableName().toLowerCase());
- List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
- for (ColumnStatisticsObj statsObj : statsObjs) {
- statsObj.setColName(statsObj.getColName().toLowerCase());
- statsObj.setColType(statsObj.getColType().toLowerCase());
- MPartitionColumnStatistics mStatsObj = StatObjectConverter
- .convertToMPartitionColumnStatistics(null, statsDesc, statsObj);
- pm.makePersistent(mStatsObj);
- }
- }
- committed = commitTransaction();
- return committed;
- } finally {
- if (!committed) {
- rollbackTransaction();
- }
- }
- }
-
private List<MTableColumnStatistics> getMTableColumnStatistics(
Table table, List<String> colNames) throws MetaException {
boolean committed = false;
Modified: hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/tez/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Wed Sep 3 22:54:18 2014
@@ -552,8 +552,5 @@ public interface RawStore extends Config
public AggrStats get_aggr_stats_for(String dbName, String tblName,
List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
-
- boolean updatePartitionColumnStatistics(
- SetPartitionsStatsRequest request) throws NoSuchObjectException,
- MetaException, InvalidObjectException, InvalidInputException;
+
}
Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Wed Sep 3 22:54:18 2014
@@ -720,10 +720,4 @@ public class DummyRawStoreControlledComm
return null;
}
- @Override
- public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
- throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
- return objectStore.updatePartitionColumnStatistics(request);
- }
-
}
Modified: hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/tez/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Wed Sep 3 22:54:18 2014
@@ -736,12 +736,7 @@ public class DummyRawStoreForJdoConnecti
throws MetaException {
return null;
}
-
- @Override
- public boolean updatePartitionColumnStatistics(SetPartitionsStatsRequest request)
- throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
- return false;
- }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Wed Sep 3 22:54:18 2014
@@ -96,9 +96,6 @@ class WriterImpl implements Writer, Memo
private static final int HDFS_BUFFER_SIZE = 256 * 1024;
private static final int MIN_ROW_INDEX_STRIDE = 1000;
- // HDFS requires blocks < 2GB and multiples of 512, so pick 1.5GB
- private static final long MAX_BLOCK_SIZE = 1536 * 1024 * 1024;
-
// threshold above which buffer size will be automatically resized
private static final int COLUMN_COUNT_THRESHOLD = 1000;
@@ -135,8 +132,6 @@ class WriterImpl implements Writer, Memo
new TreeMap<String, ByteString>();
private final StreamFactory streamFactory = new StreamFactory();
private final TreeWriter treeWriter;
- private final OrcProto.RowIndex.Builder rowIndex =
- OrcProto.RowIndex.newBuilder();
private final boolean buildIndex;
private final MemoryManager memoryManager;
private final OrcFile.Version version;
@@ -678,7 +673,7 @@ class WriterImpl implements Writer, Memo
if (rowIndexStream != null) {
if (rowIndex.getEntryCount() != requiredIndexEntries) {
throw new IllegalArgumentException("Column has wrong number of " +
- "index entries found: " + rowIndexEntry + " expected: " +
+ "index entries found: " + rowIndex.getEntryCount() + " expected: " +
requiredIndexEntries);
}
rowIndex.build().writeTo(rowIndexStream);
@@ -1005,6 +1000,8 @@ class WriterImpl implements Writer, Memo
private final float dictionaryKeySizeThreshold;
private boolean useDictionaryEncoding = true;
private boolean isDirectV2 = true;
+ private boolean doneDictionaryCheck;
+ private final boolean strideDictionaryCheck;
StringTreeWriter(int columnId,
ObjectInspector inspector,
@@ -1025,9 +1022,14 @@ class WriterImpl implements Writer, Memo
directLengthOutput = createIntegerWriter(writer.createStream(id,
OrcProto.Stream.Kind.LENGTH), false, isDirectV2, writer);
dictionaryKeySizeThreshold = writer.getConfiguration().getFloat(
- HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
- HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.
- defaultFloatVal);
+ HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
+ HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.
+ defaultFloatVal);
+ strideDictionaryCheck = writer.getConfiguration().getBoolean(
+ HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.varname,
+ HiveConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE_DICTIONARY_CHECK.
+ defaultBoolVal);
+ doneDictionaryCheck = false;
}
/**
@@ -1045,21 +1047,71 @@ class WriterImpl implements Writer, Memo
super.write(obj);
if (obj != null) {
Text val = getTextValue(obj);
- rows.add(dictionary.add(val));
+ if (useDictionaryEncoding || !strideDictionaryCheck) {
+ rows.add(dictionary.add(val));
+ } else {
+ // write data and length
+ directStreamOutput.write(val.getBytes(), 0, val.getLength());
+ directLengthOutput.write(val.getLength());
+ }
indexStatistics.updateString(val);
}
}
+ private boolean checkDictionaryEncoding() {
+ if (!doneDictionaryCheck) {
+ // Set the flag indicating whether or not to use dictionary encoding
+ // based on whether or not the fraction of distinct keys over number of
+ // non-null rows is less than the configured threshold
+ float ratio = rows.size() > 0 ? (float) (dictionary.size()) / rows.size() : 0.0f;
+ useDictionaryEncoding = !isDirectV2 || ratio <= dictionaryKeySizeThreshold;
+ doneDictionaryCheck = true;
+ }
+ return useDictionaryEncoding;
+ }
+
@Override
void writeStripe(OrcProto.StripeFooter.Builder builder,
int requiredIndexEntries) throws IOException {
- // Set the flag indicating whether or not to use dictionary encoding
- // based on whether or not the fraction of distinct keys over number of
- // non-null rows is less than the configured threshold
- useDictionaryEncoding =
- (!isDirectV2) || (rows.size() > 0 &&
- (float)(dictionary.size()) / rows.size() <=
- dictionaryKeySizeThreshold);
+ // if rows in stripe is less than dictionaryCheckAfterRows, dictionary
+ // checking would not have happened. So do it again here.
+ checkDictionaryEncoding();
+
+ if (useDictionaryEncoding) {
+ flushDictionary();
+ } else {
+ // flushout any left over entries from dictionary
+ if (rows.size() > 0) {
+ flushDictionary();
+ }
+
+ // suppress the stream for every stripe if dictionary is disabled
+ stringOutput.suppress();
+ }
+
+ // we need to build the rowindex before calling super, since it
+ // writes it out.
+ super.writeStripe(builder, requiredIndexEntries);
+ stringOutput.flush();
+ lengthOutput.flush();
+ rowOutput.flush();
+ directStreamOutput.flush();
+ directLengthOutput.flush();
+ // reset all of the fields to be ready for the next stripe.
+ dictionary.clear();
+ savedRowIndex.clear();
+ rowIndexValueCount.clear();
+ recordPosition(rowIndexPosition);
+ rowIndexValueCount.add(0L);
+
+ if (!useDictionaryEncoding) {
+ // record the start positions of first index stride of next stripe i.e
+ // beginning of the direct streams when dictionary is disabled
+ recordDirectStreamPosition();
+ }
+ }
+
+ private void flushDictionary() throws IOException {
final int[] dumpOrder = new int[dictionary.size()];
if (useDictionaryEncoding) {
@@ -1113,21 +1165,7 @@ class WriterImpl implements Writer, Memo
}
}
}
- // we need to build the rowindex before calling super, since it
- // writes it out.
- super.writeStripe(builder, requiredIndexEntries);
- stringOutput.flush();
- lengthOutput.flush();
- rowOutput.flush();
- directStreamOutput.flush();
- directLengthOutput.flush();
- // reset all of the fields to be ready for the next stripe.
- dictionary.clear();
rows.clear();
- savedRowIndex.clear();
- rowIndexValueCount.clear();
- recordPosition(rowIndexPosition);
- rowIndexValueCount.add(0L);
}
@Override
@@ -1165,10 +1203,30 @@ class WriterImpl implements Writer, Memo
OrcProto.RowIndexEntry.Builder rowIndexEntry = getRowIndexEntry();
rowIndexEntry.setStatistics(indexStatistics.serialize());
indexStatistics.reset();
- savedRowIndex.add(rowIndexEntry.build());
+ OrcProto.RowIndexEntry base = rowIndexEntry.build();
+ savedRowIndex.add(base);
rowIndexEntry.clear();
recordPosition(rowIndexPosition);
rowIndexValueCount.add(Long.valueOf(rows.size()));
+ if (strideDictionaryCheck) {
+ checkDictionaryEncoding();
+ }
+ if (!useDictionaryEncoding) {
+ if (rows.size() > 0) {
+ flushDictionary();
+ // just record the start positions of next index stride
+ recordDirectStreamPosition();
+ } else {
+ // record the start positions of next index stride
+ recordDirectStreamPosition();
+ getRowIndex().addEntry(base);
+ }
+ }
+ }
+
+ private void recordDirectStreamPosition() throws IOException {
+ directStreamOutput.getPosition(rowIndexPosition);
+ directLengthOutput.getPosition(rowIndexPosition);
}
@Override
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Wed Sep 3 22:54:18 2014
@@ -109,6 +109,7 @@ import org.apache.hadoop.hive.shims.Hado
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.security.UserGroupInformation;
import org.apache.thrift.TException;
import com.google.common.collect.Sets;
@@ -128,6 +129,7 @@ public class Hive {
private HiveConf conf = null;
private IMetaStoreClient metaStoreClient;
+ private UserGroupInformation owner;
private static ThreadLocal<Hive> hiveDB = new ThreadLocal<Hive>() {
@Override
@@ -181,7 +183,11 @@ public class Hive {
*/
public static Hive get(HiveConf c, boolean needsRefresh) throws HiveException {
Hive db = hiveDB.get();
- if (db == null || needsRefresh) {
+ if (db == null || needsRefresh || !db.isCurrentUserOwner()) {
+ if (db != null) {
+ LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh +
+ ", db.isCurrentUserOwner = " + db.isCurrentUserOwner());
+ }
closeCurrent();
c.set("fs.scheme.class", "dfs");
Hive newdb = new Hive(c);
@@ -194,6 +200,11 @@ public class Hive {
public static Hive get() throws HiveException {
Hive db = hiveDB.get();
+ if (db != null && !db.isCurrentUserOwner()) {
+ LOG.debug("Creating new db. db.isCurrentUserOwner = " + db.isCurrentUserOwner());
+ db.close();
+ db = null;
+ }
if (db == null) {
SessionState session = SessionState.get();
db = new Hive(session == null ? new HiveConf(Hive.class) : session.getConf());
@@ -220,6 +231,17 @@ public class Hive {
conf = c;
}
+
+ private boolean isCurrentUserOwner() throws HiveException {
+ try {
+ return owner == null || owner.equals(UserGroupInformation.getCurrentUser());
+ } catch(IOException e) {
+ throw new HiveException("Error getting current user: " + e.getMessage(), e);
+ }
+ }
+
+
+
/**
* closes the connection to metastore for the calling thread
*/
@@ -2496,6 +2518,13 @@ private void constructOneLBLocationMap(F
@Unstable
public IMetaStoreClient getMSC() throws MetaException {
if (metaStoreClient == null) {
+ try {
+ owner = UserGroupInformation.getCurrentUser();
+ } catch(IOException e) {
+ String msg = "Error getting current user: " + e.getMessage();
+ LOG.error(msg, e);
+ throw new MetaException(msg + "\n" + StringUtils.stringifyException(e));
+ }
metaStoreClient = createMetaStoreClient();
}
return metaStoreClient;
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java Wed Sep 3 22:54:18 2014
@@ -32,6 +32,7 @@ public class ColStatistics {
private double avgColLen;
private long numTrues;
private long numFalses;
+ private Range range;
public ColStatistics(String tabAlias, String colName, String colType) {
this.setTableAlias(tabAlias);
@@ -118,6 +119,17 @@ public class ColStatistics {
this.numFalses = numFalses;
}
+ public Range getRange() {
+ return range;
+ }
+
+ public void setRange(Number minVal, Number maxVal) {
+ this.range = new Range(minVal, maxVal);
+ }
+
+ public void setRange(Range r) {
+ this.range = r;
+ }
@Override
public String toString() {
@@ -150,7 +162,24 @@ public class ColStatistics {
clone.setNumNulls(numNulls);
clone.setNumTrues(numTrues);
clone.setNumFalses(numFalses);
+ if (range != null ) {
+ clone.setRange(range.clone());
+ }
return clone;
}
+ public static class Range {
+ public final Number minValue;
+ public final Number maxValue;
+ Range(Number minValue, Number maxValue) {
+ super();
+ this.minValue = minValue;
+ this.maxValue = maxValue;
+ }
+ @Override
+ public Range clone() {
+ return new Range(minValue, maxValue);
+ }
+ }
+
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Wed Sep 3 22:54:18 2014
@@ -25,10 +25,12 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -76,6 +78,8 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
import org.apache.hadoop.io.BytesWritable;
+import java.math.BigDecimal;
+import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -404,18 +408,22 @@ public class StatsUtils {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
+ cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
} else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) {
cs.setCountDistint(csd.getLongStats().getNumDVs());
cs.setNumNulls(csd.getLongStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
+ cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue());
} else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive1());
+ cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
} else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) {
cs.setCountDistint(csd.getDoubleStats().getNumDVs());
cs.setNumNulls(csd.getDoubleStats().getNumNulls());
cs.setAvgColLen(JavaDataModel.get().primitive2());
+ cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue());
} else if (colType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME)
|| colType.startsWith(serdeConstants.CHAR_TYPE_NAME)
|| colType.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
@@ -441,6 +449,13 @@ public class StatsUtils {
cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
cs.setCountDistint(csd.getDecimalStats().getNumDVs());
cs.setNumNulls(csd.getDecimalStats().getNumNulls());
+ Decimal val = csd.getDecimalStats().getHighValue();
+ BigDecimal maxVal = HiveDecimal.
+ create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
+ val = csd.getDecimalStats().getLowValue();
+ BigDecimal minVal = HiveDecimal.
+ create(new BigInteger(val.getUnscaled()), val.getScale()).bigDecimalValue();
+ cs.setRange(minVal, maxVal);
} else if (colType.equalsIgnoreCase(serdeConstants.DATE_TYPE_NAME)) {
cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
} else {
Modified: hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/branches/tez/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Wed Sep 3 22:54:18 2014
@@ -42,6 +42,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -1684,7 +1685,7 @@ public class TestOrcFile {
}
@Test
- public void testMemoryManagement() throws Exception {
+ public void testMemoryManagementV11() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector
@@ -1699,7 +1700,8 @@ public class TestOrcFile {
.stripeSize(50000)
.bufferSize(100)
.rowIndexStride(0)
- .memory(memory));
+ .memory(memory)
+ .version(Version.V_0_11));
assertEquals(testFilePath, memory.path);
for(int i=0; i < 2500; ++i) {
writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
@@ -1719,6 +1721,45 @@ public class TestOrcFile {
}
@Test
+ public void testMemoryManagementV12() throws Exception {
+ ObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = ObjectInspectorFactory.getReflectionObjectInspector
+ (InnerStruct.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
+ Writer writer = OrcFile.createWriter(testFilePath,
+ OrcFile.writerOptions(conf)
+ .inspector(inspector)
+ .compress(CompressionKind.NONE)
+ .stripeSize(50000)
+ .bufferSize(100)
+ .rowIndexStride(0)
+ .memory(memory)
+ .version(Version.V_0_12));
+ assertEquals(testFilePath, memory.path);
+ for(int i=0; i < 2500; ++i) {
+ writer.addRow(new InnerStruct(i*300, Integer.toHexString(10*i)));
+ }
+ writer.close();
+ assertEquals(null, memory.path);
+ Reader reader = OrcFile.createReader(testFilePath,
+ OrcFile.readerOptions(conf).filesystem(fs));
+ int i = 0;
+ for(StripeInformation stripe: reader.getStripes()) {
+ i += 1;
+ assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(),
+ stripe.getDataLength() < 5000);
+ }
+ // with HIVE-7832, the dictionaries will be disabled after writing the first
+ // stripe as there are too many distinct values. Hence only 3 stripes as
+ // compared to 25 stripes in version 0.11 (above test case)
+ assertEquals(3, i);
+ assertEquals(2500, reader.getNumberOfRows());
+ }
+
+ @Test
public void testPredicatePushdown() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
Modified: hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java?rev=1622374&r1=1622373&r2=1622374&view=diff
==============================================================================
--- hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java (original)
+++ hive/branches/tez/shims/common-secure/src/main/java/org/apache/hadoop/hive/thrift/HadoopThriftAuthBridge20S.java Wed Sep 3 22:54:18 2014
@@ -24,6 +24,7 @@ import java.net.InetAddress;
import java.net.Socket;
import java.security.PrivilegedAction;
import java.security.PrivilegedExceptionAction;
+import java.util.Locale;
import java.util.Map;
import javax.security.auth.callback.Callback;
@@ -79,11 +80,23 @@ public class HadoopThriftAuthBridge20S e
}
@Override
- public Client createClientWithConf(String authType) {
- Configuration conf = new Configuration();
- conf.set(HADOOP_SECURITY_AUTHENTICATION, authType);
- UserGroupInformation.setConfiguration(conf);
- return new Client();
+ public Client createClientWithConf(String authMethod) {
+ UserGroupInformation ugi;
+ try {
+ ugi = UserGroupInformation.getLoginUser();
+ } catch(IOException e) {
+ throw new IllegalStateException("Unable to get current login user: " + e, e);
+ }
+ if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
+ LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current.");
+ return new Client();
+ } else {
+ LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current.");
+ Configuration conf = new Configuration();
+ conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
+ UserGroupInformation.setConfiguration(conf);
+ return new Client();
+ }
}
@Override
@@ -105,15 +118,48 @@ public class HadoopThriftAuthBridge20S e
}
@Override
- public UserGroupInformation getCurrentUGIWithConf(String authType)
+ public UserGroupInformation getCurrentUGIWithConf(String authMethod)
throws IOException {
- Configuration conf = new Configuration();
- conf.set(HADOOP_SECURITY_AUTHENTICATION, authType);
- UserGroupInformation.setConfiguration(conf);
- return UserGroupInformation.getCurrentUser();
+ UserGroupInformation ugi;
+ try {
+ ugi = UserGroupInformation.getCurrentUser();
+ } catch(IOException e) {
+ throw new IllegalStateException("Unable to get current user: " + e, e);
+ }
+ if (loginUserHasCurrentAuthMethod(ugi, authMethod)) {
+ LOG.debug("Not setting UGI conf as passed-in authMethod of " + authMethod + " = current.");
+ return ugi;
+ } else {
+ LOG.debug("Setting UGI conf as passed-in authMethod of " + authMethod + " != current.");
+ Configuration conf = new Configuration();
+ conf.set(HADOOP_SECURITY_AUTHENTICATION, authMethod);
+ UserGroupInformation.setConfiguration(conf);
+ return UserGroupInformation.getCurrentUser();
+ }
}
/**
+ * Return true if the current login user is already using the given authMethod.
+ *
+ * Used above to ensure we do not create a new Configuration object and as such
+ * lose other settings such as the cluster to which the JVM is connected. Required
+ * for oozie since it does not have a core-site.xml see HIVE-7682
+ */
+ private boolean loginUserHasCurrentAuthMethod(UserGroupInformation ugi, String sAuthMethod) {
+ AuthenticationMethod authMethod;
+ try {
+ // based on SecurityUtil.getAuthenticationMethod()
+ authMethod = Enum.valueOf(AuthenticationMethod.class, sAuthMethod.toUpperCase(Locale.ENGLISH));
+ } catch (IllegalArgumentException iae) {
+ throw new IllegalArgumentException("Invalid attribute value for " +
+ HADOOP_SECURITY_AUTHENTICATION + " of " + sAuthMethod, iae);
+ }
+ LOG.debug("Current authMethod = " + ugi.getAuthenticationMethod());
+ return ugi.getAuthenticationMethod().equals(authMethod);
+ }
+
+
+ /**
* Read and return Hadoop SASL configuration which can be configured using
* "hadoop.rpc.protection"
* @param conf