You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/02/16 18:14:04 UTC
hive git commit: HIVE-15161: migrate ColumnStats to use jackson
(Zoltan Haindrich, via Pengcheng Xiong)
Repository: hive
Updated Branches:
refs/heads/master e732aa27e -> 6e652a3b9
HIVE-15161: migrate ColumnStats to use jackson (Zoltan Haindrich, via Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6e652a3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6e652a3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6e652a3b
Branch: refs/heads/master
Commit: 6e652a3b990bc53e61970ddc1aa2c0b503cd13be
Parents: e732aa2
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Feb 16 10:13:56 2017 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Feb 16 10:13:56 2017 -0800
----------------------------------------------------------------------
.../hadoop/hive/common/StatsSetupConst.java | 223 ++++++++++---------
.../hadoop/hive/common/TestStatsSetupConst.java | 54 +++++
2 files changed, 171 insertions(+), 106 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index c78f005..926b4a6 100644
--- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -17,19 +17,31 @@
*/
package org.apache.hadoop.hive.common;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-import java.util.LinkedHashMap;
+import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
/**
* A class that defines the constant strings used by the statistics implementation.
@@ -144,35 +156,62 @@ public class StatsSetupConst {
public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] {
COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS};
+ private static class ColumnStatsAccurate {
+ private static ObjectReader objectReader;
+ private static ObjectWriter objectWriter;
+
+ static {
+ ObjectMapper objectMapper = new ObjectMapper();
+ objectReader = objectMapper.readerFor(ColumnStatsAccurate.class);
+ objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class);
+ }
+
+ static class BooleanSerializer extends JsonSerializer<Boolean> {
+
+ @Override
+ public void serialize(Boolean value, JsonGenerator jsonGenerator,
+ SerializerProvider serializerProvider) throws IOException, JsonProcessingException {
+ jsonGenerator.writeString(value.toString());
+ }
+ }
+
+ static class BooleanDeserializer extends JsonDeserializer<Boolean> {
+
+ public Boolean deserialize(JsonParser jsonParser,
+ DeserializationContext deserializationContext)
+ throws IOException, JsonProcessingException {
+ return Boolean.valueOf(jsonParser.getValueAsString());
+ }
+ }
+
+ @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+ @JsonSerialize(using = BooleanSerializer.class)
+ @JsonDeserialize(using = BooleanDeserializer.class)
+ @JsonProperty(BASIC_STATS)
+ boolean basicStats;
+
+ @JsonInclude(JsonInclude.Include.NON_EMPTY)
+ @JsonProperty(COLUMN_STATS)
+ @JsonSerialize(contentUsing = BooleanSerializer.class)
+ @JsonDeserialize(contentUsing = BooleanDeserializer.class)
+ TreeMap<String, Boolean> columnStats = new TreeMap<>();
+
+ };
+
public static boolean areBasicStatsUptoDate(Map<String, String> params) {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- if (stats != null && stats.has(BASIC_STATS)) {
- return true;
- } else {
+ if (params == null) {
return false;
}
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ return stats.basicStats;
}
public static boolean areColumnStatsUptoDate(Map<String, String> params, String colName) {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- try {
- if (!stats.has(COLUMN_STATS)) {
- return false;
- } else {
- JSONObject columns = stats.getJSONObject(COLUMN_STATS);
- if (columns != null && columns.has(colName)) {
- return true;
- } else {
- return false;
- }
- }
- } catch (JSONException e) {
- // For backward compatibility, if previous value can not be parsed to a
- // json object, it will come here.
- LOG.debug("In StatsSetupConst, JsonParser can not parse COLUMN_STATS.");
+ if (params == null) {
return false;
}
-
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ return stats.columnStats.containsKey(colName);
}
// It will only throw JSONException when stats.put(BASIC_STATS, TRUE)
@@ -180,79 +219,67 @@ public class StatsSetupConst {
// note that set basic stats false will wipe out column stats too.
public static void setBasicStatsState(Map<String, String> params, String setting) {
if (setting.equals(FALSE)) {
- if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) {
+ if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) {
params.remove(COLUMN_STATS_ACCURATE);
}
- } else {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-
- try {
- stats.put(BASIC_STATS, TRUE);
- } catch (JSONException e) {
- // impossible to throw any json exceptions.
- LOG.trace(e.getMessage());
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
+ return;
+ }
+ if (params == null) {
+ throw new RuntimeException("params are null...cant set columnstatstate!");
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ stats.basicStats = true;
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException("can't serialize column stats", e);
}
}
public static void setColumnStatsState(Map<String, String> params, List<String> colNames) {
- try {
- JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ if (params == null) {
+ throw new RuntimeException("params are null...cant set columnstatstate!");
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
- JSONObject colStats;
- if (stats.has(COLUMN_STATS)) {
- colStats = stats.getJSONObject(COLUMN_STATS);
- } else {
- colStats = new JSONObject(new TreeMap<String,String>());
- }
- for (String colName : colNames) {
- if (!colStats.has(colName)) {
- colStats.put(colName, TRUE);
- }
+ for (String colName : colNames) {
+ if (!stats.columnStats.containsKey(colName)) {
+ stats.columnStats.put(colName, true);
}
- stats.put(COLUMN_STATS, colStats);
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
- } catch (JSONException e) {
- // impossible to throw any json exceptions.
+ }
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
LOG.trace(e.getMessage());
}
}
public static void clearColumnStatsState(Map<String, String> params) {
- String statsAcc;
- if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
- // statsAcc may not be jason format, which will throw exception
- JSONObject stats = parseStatsAcc(statsAcc);
-
- if (stats.has(COLUMN_STATS)) {
- stats.remove(COLUMN_STATS);
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
+ if (params == null) {
+ return;
+ }
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ stats.columnStats.clear();
+
+ try {
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ LOG.trace(e.getMessage());
}
}
public static void removeColumnStatsState(Map<String, String> params, List<String> colNames) {
- String statsAcc;
- if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
- // statsAcc may not be jason format, which will throw exception
- JSONObject stats = parseStatsAcc(statsAcc);
- try {
- JSONObject colStats = stats.getJSONObject(COLUMN_STATS);
- for (String colName : colNames) {
- if (colStats.has(colName)) {
- colStats.remove(colName);
- }
- }
- if (colStats.length() != 0) {
- stats.put(COLUMN_STATS, colStats);
- } else {
- stats.remove(COLUMN_STATS);
- }
- params.put(COLUMN_STATS_ACCURATE, stats.toString());
- } catch (JSONException e) {
- LOG.debug(e.getMessage());
+ if (params == null) {
+ return;
+ }
+ try {
+ ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+ for (String string : colNames) {
+ stats.columnStats.remove(string);
}
+ params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+ } catch (JsonProcessingException e) {
+ LOG.trace(e.getMessage());
}
}
@@ -265,34 +292,18 @@ public class StatsSetupConst {
setBasicStatsState(params, setting);
}
- private static JSONObject parseStatsAcc(String statsAcc) {
+ private static ColumnStatsAccurate parseStatsAcc(String statsAcc) {
if (statsAcc == null) {
- return new JSONObject(new LinkedHashMap<String,Object>());
- } else {
- try {
- return new JSONObject(statsAcc);
- } catch (JSONException e) {
- return statsAccUpgrade(statsAcc);
- }
+ return new ColumnStatsAccurate();
}
- }
-
- private static JSONObject statsAccUpgrade(String statsAcc) {
- JSONObject stats;
- // old format of statsAcc, e.g., TRUE or FALSE
- LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc.");
- stats = new JSONObject(new LinkedHashMap<String,Object>());
try {
- if (statsAcc.equals(TRUE)) {
- stats.put(BASIC_STATS, TRUE);
- } else {
- stats.put(BASIC_STATS, FALSE);
+ return ColumnStatsAccurate.objectReader.readValue(statsAcc);
+ } catch (Exception e) {
+ ColumnStatsAccurate ret = new ColumnStatsAccurate();
+ if (TRUE.equalsIgnoreCase(statsAcc)) {
+ ret.basicStats = true;
}
- } catch (JSONException e1) {
- // impossible to throw any json exceptions.
- LOG.trace(e1.getMessage());
+ return ret;
}
- return stats;
}
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
index 7a7ad42..792b862 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.common;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import java.util.HashMap;
import java.util.Map;
@@ -53,4 +54,57 @@ public class TestStatsSetupConst {
assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
}
+ @Test
+ public void testSetBasicStatsState_falseIsAbsent() {
+ Map<String, String> params=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+ assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ // earlier implementation have quoted boolean values...so the new implementation should preserve this
+ @Test
+ public void testStatColumnEntriesCompat() {
+ Map<String, String> params0=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo"));
+
+ assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ @Test
+ public void testColumnEntries_orderIndependence() {
+ Map<String, String> params0=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar"));
+ Map<String, String> params1=new HashMap<>();
+ StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo"));
+ StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+ assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ @Test
+ public void testColumnEntries_orderIndependence2() {
+ Map<String, String> params0=new HashMap<>();
+ // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order
+ StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year"));
+ StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month"));
+ Map<String, String> params1=new HashMap<>();
+ StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year"));
+ StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+ System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
+
+ // FIXME: current objective is to keep the previous outputs...but this is possibly bad..
+ @Test
+ public void testColumnEntries_areKept_whenBasicIsAbsent() {
+ Map<String, String> params=new HashMap<>();
+ StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+ StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
+ assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+ }
}