You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/02/16 18:14:04 UTC

hive git commit: HIVE-15161: migrate ColumnStats to use jackson (Zoltan Haindrich, via Pengcheng Xiong)

Repository: hive
Updated Branches:
  refs/heads/master e732aa27e -> 6e652a3b9


HIVE-15161: migrate ColumnStats to use jackson (Zoltan Haindrich, via Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6e652a3b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6e652a3b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6e652a3b

Branch: refs/heads/master
Commit: 6e652a3b990bc53e61970ddc1aa2c0b503cd13be
Parents: e732aa2
Author: Pengcheng Xiong <px...@apache.org>
Authored: Thu Feb 16 10:13:56 2017 -0800
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Thu Feb 16 10:13:56 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hive/common/StatsSetupConst.java     | 223 ++++++++++---------
 .../hadoop/hive/common/TestStatsSetupConst.java |  54 +++++
 2 files changed, 171 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index c78f005..926b4a6 100644
--- a/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ b/common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -17,19 +17,31 @@
  */
 package org.apache.hadoop.hive.common;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-import java.util.LinkedHashMap;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationContext;
+import com.fasterxml.jackson.databind.JsonDeserializer;
+import com.fasterxml.jackson.databind.JsonSerializer;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+
 
 /**
  * A class that defines the constant strings used by the statistics implementation.
@@ -144,35 +156,62 @@ public class StatsSetupConst {
   public static final String[] TABLE_PARAMS_STATS_KEYS = new String[] {
     COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE,ROW_COUNT, RAW_DATA_SIZE, NUM_PARTITIONS};
 
+  private static class ColumnStatsAccurate {
+    private static ObjectReader objectReader;
+    private static ObjectWriter objectWriter;
+
+    static {
+      ObjectMapper objectMapper = new ObjectMapper();
+      objectReader = objectMapper.readerFor(ColumnStatsAccurate.class);
+      objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class);
+    }
+
+    static class BooleanSerializer extends JsonSerializer<Boolean> {
+
+      @Override
+      public void serialize(Boolean value, JsonGenerator jsonGenerator,
+          SerializerProvider serializerProvider) throws IOException, JsonProcessingException {
+        jsonGenerator.writeString(value.toString());
+      }
+    }
+
+    static class BooleanDeserializer extends JsonDeserializer<Boolean> {
+
+      public Boolean deserialize(JsonParser jsonParser,
+          DeserializationContext deserializationContext)
+              throws IOException, JsonProcessingException {
+        return Boolean.valueOf(jsonParser.getValueAsString());
+      }
+    }
+
+    @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+    @JsonSerialize(using = BooleanSerializer.class)
+    @JsonDeserialize(using = BooleanDeserializer.class)
+    @JsonProperty(BASIC_STATS)
+    boolean basicStats;
+
+    @JsonInclude(JsonInclude.Include.NON_EMPTY)
+    @JsonProperty(COLUMN_STATS)
+    @JsonSerialize(contentUsing = BooleanSerializer.class)
+    @JsonDeserialize(contentUsing = BooleanDeserializer.class)
+    TreeMap<String, Boolean> columnStats = new TreeMap<>();
+
+  };
+
   public static boolean areBasicStatsUptoDate(Map<String, String> params) {
-    JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    if (stats != null && stats.has(BASIC_STATS)) {
-      return true;
-    } else {
+    if (params == null) {
       return false;
     }
+    ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+    return stats.basicStats;
   }
 
   public static boolean areColumnStatsUptoDate(Map<String, String> params, String colName) {
-    JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    try {
-      if (!stats.has(COLUMN_STATS)) {
-        return false;
-      } else {
-        JSONObject columns = stats.getJSONObject(COLUMN_STATS);
-        if (columns != null && columns.has(colName)) {
-          return true;
-        } else {
-          return false;
-        }
-      }
-    } catch (JSONException e) {
-      // For backward compatibility, if previous value can not be parsed to a
-      // json object, it will come here.
-      LOG.debug("In StatsSetupConst, JsonParser can not parse COLUMN_STATS.");
+    if (params == null) {
       return false;
     }
-
+    ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+    return stats.columnStats.containsKey(colName);
   }
 
   // It will only throw JSONException when stats.put(BASIC_STATS, TRUE)
@@ -180,79 +219,67 @@ public class StatsSetupConst {
   // note that set basic stats false will wipe out column stats too.
   public static void setBasicStatsState(Map<String, String> params, String setting) {
     if (setting.equals(FALSE)) {
-      if (params != null && params.containsKey(COLUMN_STATS_ACCURATE)) {
+      if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) {
         params.remove(COLUMN_STATS_ACCURATE);
       }
-    } else {
-      JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-      
-      try {
-        stats.put(BASIC_STATS, TRUE);
-      } catch (JSONException e) {
-        // impossible to throw any json exceptions.
-        LOG.trace(e.getMessage());
-      }
-      params.put(COLUMN_STATS_ACCURATE, stats.toString());
+      return;
+    }
+    if (params == null) {
+      throw new RuntimeException("params are null...cant set columnstatstate!");
+    }
+    ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+    stats.basicStats = true;
+    try {
+      params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException("can't serialize column stats", e);
     }
   }
 
   public static void setColumnStatsState(Map<String, String> params, List<String> colNames) {
-    try {
-      JSONObject stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+    if (params == null) {
+      throw new RuntimeException("params are null...cant set columnstatstate!");
+    }
+    ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
 
-      JSONObject colStats;
-      if (stats.has(COLUMN_STATS)) {
-        colStats = stats.getJSONObject(COLUMN_STATS);
-      } else {
-        colStats = new JSONObject(new TreeMap<String,String>());
-      }
-      for (String colName : colNames) {
-        if (!colStats.has(colName)) {
-          colStats.put(colName, TRUE);
-        }
+    for (String colName : colNames) {
+      if (!stats.columnStats.containsKey(colName)) {
+        stats.columnStats.put(colName, true);
       }
-      stats.put(COLUMN_STATS, colStats);
-      params.put(COLUMN_STATS_ACCURATE, stats.toString());
-    } catch (JSONException e) {
-      // impossible to throw any json exceptions.
+    }
+    try {
+      params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+    } catch (JsonProcessingException e) {
       LOG.trace(e.getMessage());
     }
   }
 
   public static void clearColumnStatsState(Map<String, String> params) {
-    String statsAcc;
-    if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
-      // statsAcc may not be jason format, which will throw exception
-      JSONObject stats = parseStatsAcc(statsAcc);
-      
-      if (stats.has(COLUMN_STATS)) {
-        stats.remove(COLUMN_STATS);
-      }
-      params.put(COLUMN_STATS_ACCURATE, stats.toString());
+    if (params == null) {
+      return;
+    }
+    ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+    stats.columnStats.clear();
+
+    try {
+      params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+    } catch (JsonProcessingException e) {
+      LOG.trace(e.getMessage());
     }
   }
 
   public static void removeColumnStatsState(Map<String, String> params, List<String> colNames) {
-    String statsAcc;
-    if (params != null && (statsAcc = params.get(COLUMN_STATS_ACCURATE)) != null) {
-      // statsAcc may not be jason format, which will throw exception
-      JSONObject stats = parseStatsAcc(statsAcc);
-      try {
-        JSONObject colStats = stats.getJSONObject(COLUMN_STATS);
-        for (String colName : colNames) {
-          if (colStats.has(colName)) {
-            colStats.remove(colName);
-          }
-        }
-        if (colStats.length() != 0) {
-          stats.put(COLUMN_STATS, colStats);
-        } else {
-          stats.remove(COLUMN_STATS);
-        }
-        params.put(COLUMN_STATS_ACCURATE, stats.toString());
-      } catch (JSONException e) {
-        LOG.debug(e.getMessage());
+    if (params == null) {
+      return;
+    }
+    try {
+      ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
+      for (String string : colNames) {
+        stats.columnStats.remove(string);
       }
+      params.put(COLUMN_STATS_ACCURATE, ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
+    } catch (JsonProcessingException e) {
+      LOG.trace(e.getMessage());
     }
   }
 
@@ -265,34 +292,18 @@ public class StatsSetupConst {
     setBasicStatsState(params, setting);
   }
   
-  private static JSONObject parseStatsAcc(String statsAcc) {
+  private static ColumnStatsAccurate parseStatsAcc(String statsAcc) {
     if (statsAcc == null) {
-      return new JSONObject(new LinkedHashMap<String,Object>());
-    } else {
-      try {
-        return new JSONObject(statsAcc);
-      } catch (JSONException e) {
-        return statsAccUpgrade(statsAcc);
-      }
+      return new ColumnStatsAccurate();
     }
-  }
-
-  private static JSONObject statsAccUpgrade(String statsAcc) {
-    JSONObject stats;
-    // old format of statsAcc, e.g., TRUE or FALSE
-    LOG.debug("In StatsSetupConst, JsonParser can not parse statsAcc.");
-    stats = new JSONObject(new LinkedHashMap<String,Object>());
     try {
-      if (statsAcc.equals(TRUE)) {
-        stats.put(BASIC_STATS, TRUE);
-      } else {
-        stats.put(BASIC_STATS, FALSE);
+      return ColumnStatsAccurate.objectReader.readValue(statsAcc);
+    } catch (Exception e) {
+      ColumnStatsAccurate ret = new ColumnStatsAccurate();
+      if (TRUE.equalsIgnoreCase(statsAcc)) {
+        ret.basicStats = true;
       }
-    } catch (JSONException e1) {
-      // impossible to throw any json exceptions.
-      LOG.trace(e1.getMessage());
+      return ret;
     }
-    return stats;
   }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/6e652a3b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
----------------------------------------------------------------------
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
index 7a7ad42..792b862 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestStatsSetupConst.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.common;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -53,4 +54,57 @@ public class TestStatsSetupConst {
     assertEquals("{\"BASIC_STATS\":\"true\"}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
   }
 
+  @Test
+  public void testSetBasicStatsState_falseIsAbsent() {
+    Map<String, String> params=new HashMap<>();
+    StatsSetupConst.setBasicStatsState(params, String.valueOf(true));
+    StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+    assertNull(params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+  }
+
+  // earlier implementation have quoted boolean values...so the new implementation should preserve this
+  @Test
+  public void testStatColumnEntriesCompat() {
+    Map<String, String> params0=new HashMap<>();
+    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo"));
+
+    assertEquals("{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+  }
+
+  @Test
+  public void testColumnEntries_orderIndependence() {
+    Map<String, String> params0=new HashMap<>();
+    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("Foo","Bar"));
+    Map<String, String> params1=new HashMap<>();
+    StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("Bar","Foo"));
+    StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+    assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+  }
+
+  @Test
+  public void testColumnEntries_orderIndependence2() {
+    Map<String, String> params0=new HashMap<>();
+    // in case jackson is able to deserialize...it may use a different implementation for the map - which may not preserve order
+    StatsSetupConst.setBasicStatsState(params0, String.valueOf(true));
+    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year"));
+    StatsSetupConst.setColumnStatsState(params0, Lists.newArrayList("year","month"));
+    Map<String, String> params1=new HashMap<>();
+    StatsSetupConst.setColumnStatsState(params1, Lists.newArrayList("month","year"));
+    StatsSetupConst.setBasicStatsState(params1, String.valueOf(true));
+
+    System.out.println(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+    assertEquals(params0.get(StatsSetupConst.COLUMN_STATS_ACCURATE),params1.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+  }
+
+  // FIXME: current objective is to keep the previous outputs...but this is possibly bad..
+  @Test
+  public void testColumnEntries_areKept_whenBasicIsAbsent() {
+    Map<String, String> params=new HashMap<>();
+    StatsSetupConst.setBasicStatsState(params, String.valueOf(false));
+    StatsSetupConst.setColumnStatsState(params, Lists.newArrayList("Foo"));
+    assertEquals("{\"COLUMN_STATS\":{\"Foo\":\"true\"}}",params.get(StatsSetupConst.COLUMN_STATS_ACCURATE));
+  }
 }