You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/08/31 08:42:51 UTC
svn commit: r1379313 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/udf/UDFJson.java
java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
test/results/clientpositive/udf_get_json_object.q.out
Author: namit
Date: Fri Aug 31 06:42:51 2012
New Revision: 1379313
URL: http://svn.apache.org/viewvc?rev=1379313&view=rev
Log:
HIVE-3393 get_json_object and json_tuple should use Jackson library
(Kevin Wilfong via namit)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java?rev=1379313&r1=1379312&r2=1379313&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java Fri Aug 31 06:42:51 2012
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.udf;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -28,9 +29,9 @@ import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.type.TypeFactory;
+import org.codehaus.jackson.type.JavaType;
/**
* UDFJson.
@@ -58,6 +59,9 @@ public class UDFJson extends UDF {
private final Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-\\:\\s]+).*");
private final Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
+ private static final ObjectMapper MAPPER = new ObjectMapper();
+ private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
+
// An LRU cache using a linked hash map
static class HashCache<K, V> extends LinkedHashMap<K, V> {
@@ -80,7 +84,8 @@ public class UDFJson extends UDF {
static Map<String, Object> extractObjectCache = new HashCache<String, Object>();
static Map<String, String[]> pathExprCache = new HashCache<String, String[]>();
- static Map<String, ArrayList<String>> indexListCache = new HashCache<String, ArrayList<String>>();
+ static Map<String, ArrayList<String>> indexListCache =
+ new HashCache<String, ArrayList<String>>();
static Map<String, String> mKeyGroup1Cache = new HashCache<String, String>();
static Map<String, Boolean> mKeyMatchesCache = new HashCache<String, Boolean>();
@@ -115,34 +120,47 @@ public class UDFJson extends UDF {
return null;
}
- try {
- // Cache pathExpr
- String[] pathExpr = pathExprCache.get(pathString);
- if (pathExpr == null) {
- pathExpr = pathString.split("\\.", -1);
- pathExprCache.put(pathString, pathExpr);
- }
+ // Cache pathExpr
+ String[] pathExpr = pathExprCache.get(pathString);
+ if (pathExpr == null) {
+ pathExpr = pathString.split("\\.", -1);
+ pathExprCache.put(pathString, pathExpr);
+ }
- if (!pathExpr[0].equalsIgnoreCase("$")) {
+ if (!pathExpr[0].equalsIgnoreCase("$")) {
+ return null;
+ }
+ // Cache extractObject
+ Object extractObject = extractObjectCache.get(jsonString);
+ if (extractObject == null) {
+ try {
+ extractObject = MAPPER.readValue(jsonString, MAP_TYPE);
+ } catch (Exception e) {
return null;
}
- // Cache extractObject
- Object extractObject = extractObjectCache.get(jsonString);
+ extractObjectCache.put(jsonString, extractObject);
+ }
+ for (int i = 1; i < pathExpr.length; i++) {
if (extractObject == null) {
- extractObject = new JSONObject(jsonString);
- extractObjectCache.put(jsonString, extractObject);
+ return null;
}
- for (int i = 1; i < pathExpr.length; i++) {
- extractObject = extract(extractObject, pathExpr[i]);
+ extractObject = extract(extractObject, pathExpr[i]);
+ }
+ if (extractObject instanceof Map || extractObject instanceof List) {
+ try {
+ result.set(MAPPER.writeValueAsString(extractObject));
+ } catch (Exception e) {
+ return null;
}
+ } else if (extractObject != null) {
result.set(extractObject.toString());
- return result;
- } catch (Exception e) {
+ } else {
return null;
}
+ return result;
}
- private Object extract(Object json, String path) throws JSONException {
+ private Object extract(Object json, String path) {
// Cache patternkey.matcher(path).matches()
Matcher mKey = null;
@@ -185,68 +203,73 @@ public class UDFJson extends UDF {
return json;
}
- ArrayList<Object> jsonList = new ArrayList<Object>();
+ List<Object> jsonList = new ArrayList<Object>();
- private Object extract_json_withindex(Object json, ArrayList<String> indexList)
- throws JSONException {
+ @SuppressWarnings("unchecked")
+ private Object extract_json_withindex(Object json, ArrayList<String> indexList) {
jsonList.clear();
jsonList.add(json);
Iterator<String> itr = indexList.iterator();
while (itr.hasNext()) {
String index = itr.next();
- ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
+ List<Object> tmp_jsonList = new ArrayList<Object>();
if (index.equalsIgnoreCase("*")) {
- for (int i = 0; i < (jsonList).size(); i++) {
- try {
- JSONArray array = (JSONArray) (jsonList).get(i);
- for (int j = 0; j < array.length(); j++) {
- tmp_jsonList.add(array.get(j));
+ for (int i = 0; i < jsonList.size(); i++) {
+ Object array = jsonList.get(i);
+ if (array instanceof List) {
+ for (int j = 0; j < ((List<Object>)array).size(); j++) {
+ tmp_jsonList.add(((List<Object>)array).get(j));
}
- } catch (Exception e) {
- continue;
}
}
jsonList = tmp_jsonList;
} else {
for (int i = 0; i < (jsonList).size(); i++) {
- try {
- tmp_jsonList.add(((JSONArray) (jsonList).get(i)).get(Integer
- .parseInt(index)));
- } catch (ClassCastException e) {
+ Object array = jsonList.get(i);
+ int indexValue = Integer.parseInt(index);
+ if (!(array instanceof List)) {
continue;
- } catch (JSONException e) {
+ }
+ if (indexValue >= ((List<Object>)array).size()) {
return null;
}
+ tmp_jsonList.add(((List<Object>)array).get(indexValue));
jsonList = tmp_jsonList;
}
}
}
- return (jsonList.size() > 1) ? new JSONArray(jsonList) : jsonList.get(0);
+ if (jsonList.isEmpty()) {
+ return null;
+ }
+ return (jsonList.size() > 1) ? new ArrayList<Object>(jsonList) : jsonList.get(0);
}
- private Object extract_json_withkey(Object json, String path)
- throws JSONException {
- if (json.getClass() == org.json.JSONArray.class) {
- JSONArray jsonArray = new JSONArray();
- for (int i = 0; i < ((JSONArray) json).length(); i++) {
- Object josn_elem = ((JSONArray) json).get(i);
- try {
- Object json_obj = ((JSONObject) josn_elem).get(path);
- if (json_obj.getClass() == org.json.JSONArray.class) {
- for (int j = 0; j < ((JSONArray) json_obj).length(); j++) {
- jsonArray.put(((JSONArray) json_obj).get(j));
- }
- } else {
- jsonArray.put(json_obj);
- }
- } catch (Exception e) {
+ @SuppressWarnings("unchecked")
+ private Object extract_json_withkey(Object json, String path) {
+ if (json instanceof List) {
+ List<Object> jsonArray = new ArrayList<Object>();
+ for (int i = 0; i < ((List<Object>) json).size(); i++) {
+ Object json_elem = ((List<Object>) json).get(i);
+ Object json_obj = null;
+ if (json_elem instanceof Map) {
+ json_obj = ((Map<String, Object>) json_elem).get(path);
+ } else {
continue;
}
+ if (json_obj instanceof List) {
+ for (int j = 0; j < ((List<Object>) json_obj).size(); j++) {
+ jsonArray.add(((List<Object>) json_obj).get(j));
+ }
+ } else if (json_obj != null) {
+ jsonArray.add(json_obj);
+ }
}
- return (jsonArray.length() == 0) ? null : jsonArray;
+ return (jsonArray.size() == 0) ? null : jsonArray;
+ } else if (json instanceof Map) {
+ return ((Map<String, Object>) json).get(path);
} else {
- return ((JSONObject) json).get(path);
+ return null;
}
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java?rev=1379313&r1=1379312&r2=1379313&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFJSONTuple.java Fri Aug 31 06:42:51 2012
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf.ge
import java.util.ArrayList;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
@@ -34,8 +35,10 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;
-import org.json.JSONException;
-import org.json.JSONObject;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.type.TypeFactory;
+import org.codehaus.jackson.type.JavaType;
+
/**
* GenericUDTFJSONTuple: this
*
@@ -48,6 +51,9 @@ public class GenericUDTFJSONTuple extend
private static Log LOG = LogFactory.getLog(GenericUDTFJSONTuple.class.getName());
+ private static final ObjectMapper MAPPER = new ObjectMapper();
+ private static final JavaType MAP_TYPE = TypeFactory.fromClass(Map.class);
+
int numCols; // number of output columns
String[] paths; // array of path expressions, each of which corresponds to a column
Text[] retCols; // array of returned column values
@@ -77,7 +83,7 @@ public class GenericUDTFJSONTuple extend
}
- static Map<String, JSONObject> jsonObjectCache = new HashCache<String, JSONObject>();
+ static Map<String, Object> jsonObjectCache = new HashCache<String, Object>();
@Override
public void close() throws HiveException {
@@ -127,6 +133,7 @@ public class GenericUDTFJSONTuple extend
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
+ @SuppressWarnings("unchecked")
@Override
public void process(Object[] o) throws HiveException {
@@ -148,32 +155,39 @@ public class GenericUDTFJSONTuple extend
return;
}
try {
- JSONObject jsonObj = jsonObjectCache.get(jsonStr);
+ Object jsonObj = jsonObjectCache.get(jsonStr);
if (jsonObj == null) {
- jsonObj = new JSONObject(jsonStr);
+ try {
+ jsonObj = MAPPER.readValue(jsonStr, MAP_TYPE);
+ } catch (Exception e) {
+ reportInvalidJson(jsonStr);
+ forward(nullCols);
+ return;
+ }
jsonObjectCache.put(jsonStr, jsonObj);
}
+ if (!(jsonObj instanceof Map)) {
+ reportInvalidJson(jsonStr);
+ forward(nullCols);
+ return;
+ }
+
for (int i = 0; i < numCols; ++i) {
- if (jsonObj.isNull(paths[i])) {
- retCols[i] = null;
+ if (retCols[i] == null) {
+ retCols[i] = cols[i]; // use the object pool rather than creating a new object
+ }
+ Object extractObject = ((Map<String, Object>)jsonObj).get(paths[i]);
+ if (extractObject instanceof Map || extractObject instanceof List) {
+ retCols[i].set(MAPPER.writeValueAsString(extractObject));
+ } else if (extractObject != null) {
+ retCols[i].set(extractObject.toString());
} else {
- if (retCols[i] == null) {
- retCols[i] = cols[i]; // use the object pool rather than creating a new object
- }
- retCols[i].set(jsonObj.getString(paths[i]));
+ retCols[i] = null;
}
}
forward(retCols);
return;
- } catch (JSONException e) {
- // parsing error, invalid JSON string
- if (!seenErrors) {
- LOG.error("The input is not a valid JSON string: " + jsonStr + ". Skipping such error messages in the future.");
- seenErrors = true;
- }
- forward(nullCols);
- return;
} catch (Throwable e) {
LOG.error("JSON parsing/evaluation exception" + e);
forward(nullCols);
@@ -184,4 +198,12 @@ public class GenericUDTFJSONTuple extend
public String toString() {
return "json_tuple";
}
+
+ private void reportInvalidJson(String jsonStr) {
+ if (!seenErrors) {
+ LOG.error("The input is not a valid JSON string: " + jsonStr +
+ ". Skipping such error messages in the future.");
+ seenErrors = true;
+ }
+ }
}
Modified: hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out?rev=1379313&r1=1379312&r2=1379313&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/udf_get_json_object.q.out Fri Aug 31 06:42:51 2012
@@ -92,7 +92,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_json
#### A masked pattern was here ####
POSTHOOK: Lineage: dest1.c1 SIMPLE []
-amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"book":[{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"bicycle":{"price":19.95,"color":"red"}}
+amy {"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],"basket":[[1,2,{"b":"y","a":"x"}],[3,4],[5,6]],"book":[{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}],"bicycle":{"price":19.95,"color":"red"}}
PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.bicycle'), get_json_object(src_json.json, '$.store.book') FROM src_json
PREHOOK: type: QUERY
PREHOOK: Input: default@src_json
@@ -102,7 +102,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_json
#### A masked pattern was here ####
POSTHOOK: Lineage: dest1.c1 SIMPLE []
-{"price":19.95,"color":"red"} [{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}]
+{"price":19.95,"color":"red"} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[0]'), get_json_object(src_json.json, '$.store.book[*]') FROM src_json
PREHOOK: type: QUERY
PREHOOK: Input: default@src_json
@@ -112,7 +112,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_json
#### A masked pattern was here ####
POSTHOOK: Lineage: dest1.c1 SIMPLE []
-{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95} [{"author":"Nigel Rees","category":"reference","title":"Sayings of the Century","price":8.95},{"author":"Herman Melville","category":"fiction","title":"Moby Dick","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","category":"fiction","title":"The Lord of the Rings","price":22.99,"reader":[{"name":"bob","age":25},{"name":"jack","age":26}],"isbn":"0-395-19395-8"}]
+{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95} [{"author":"Nigel Rees","title":"Sayings of the Century","category":"reference","price":8.95},{"author":"Herman Melville","title":"Moby Dick","category":"fiction","price":8.99,"isbn":"0-553-21311-3"},{"author":"J. R. R. Tolkien","title":"The Lord of the Rings","category":"fiction","reader":[{"age":25,"name":"bob"},{"age":26,"name":"jack"}],"price":22.99,"isbn":"0-395-19395-8"}]
PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[0].category'), get_json_object(src_json.json, '$.store.book[*].category'), get_json_object(src_json.json, '$.store.book[*].isbn'), get_json_object(src_json.json, '$.store.book[*].reader') FROM src_json
PREHOOK: type: QUERY
PREHOOK: Input: default@src_json
@@ -122,7 +122,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src_json
#### A masked pattern was here ####
POSTHOOK: Lineage: dest1.c1 SIMPLE []
-reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"name":"bob","age":25},{"name":"jack","age":26}]
+reference ["reference","fiction","fiction"] ["0-553-21311-3","0-395-19395-8"] [{"age":25,"name":"bob"},{"age":26,"name":"jack"}]
PREHOOK: query: SELECT get_json_object(src_json.json, '$.store.book[*].reader[0].age'), get_json_object(src_json.json, '$.store.book[*].reader[*].age') FROM src_json
PREHOOK: type: QUERY
PREHOOK: Input: default@src_json