You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/12 20:13:21 UTC
svn commit: r835503 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
Author: namit
Date: Thu Nov 12 19:13:21 2009
New Revision: 835503
URL: http://svn.apache.org/viewvc?rev=835503&view=rev
Log:
HIVE-914. Optimize UDFJson (Paul Yang via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=835503&r1=835502&r2=835503&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Nov 12 19:13:21 2009
@@ -115,6 +115,8 @@
HIVE-855. UDF: Concat should accept multiple arguments.
(Paul Yang via zshao)
+ HIVE-914. Optimize UDFJson (Paul Yang via namit)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java?rev=835503&r1=835502&r2=835503&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFJson.java Thu Nov 12 19:13:21 2009
@@ -21,6 +21,8 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -56,10 +58,40 @@
)
public class UDFJson extends UDF {
private static Log LOG = LogFactory.getLog(UDFJson.class.getName());
- private Pattern pattern_key = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");
- private Pattern pattern_index = Pattern.compile("\\[([0-9]+|\\*)\\]");
+ private Pattern patternKey = Pattern.compile("^([a-zA-Z0-9_\\-]+).*");
+ private Pattern patternIndex = Pattern.compile("\\[([0-9]+|\\*)\\]");
+ // An LRU cache using a linked hash map
+ static class HashCache<K,V> extends LinkedHashMap<K,V> {
+
+ private static final int CACHE_SIZE = 16;
+ private static final int INIT_SIZE = 32;
+ private static final float LOAD_FACTOR = 0.6f;
+
+ HashCache() {
+ super(INIT_SIZE,LOAD_FACTOR);
+ }
+ private static final long serialVersionUID = 1;
+ @Override
+ protected boolean removeEldestEntry (Map.Entry<K,V> eldest) {
+ return size() > CACHE_SIZE;
+ }
+
+ }
+
+ static Map<String, Object> extractObjectCache =
+ new HashCache<String, Object>();
+ static Map<String, String[]> pathExprCache =
+ new HashCache<String, String[]>();
+ static Map<String, ArrayList<String>> indexListCache =
+ new HashCache<String, ArrayList<String>>();
+ static Map<String, String> mKeyGroup1Cache =
+ new HashCache<String, String>();
+ static Map<String, Boolean> mKeyMatchesCache =
+ new HashCache<String, Boolean>();
+
Text result = new Text();
+
public UDFJson() {
}
@@ -83,24 +115,34 @@
* [,] : Union operator
* [start:end:step] : array slice operator
*
- * @param jsonText the json string.
- * @param pathText the json path expression.
- * @return json string or null when error happens.
+ * @param jsonString the json string.
+ * @param pathString the json path expression.
+ * @return json string or null when an error happens.
*/
- public Text evaluate(Text jsonText, Text pathText) {
- if (jsonText == null || pathText == null) {
+ public Text evaluate(String jsonString, String pathString) {
+
+ if(jsonString == null || jsonString == "" ||
+ pathString == null || pathString == "") {
return null;
}
-
- String jsonString = jsonText.toString();
- String pathString = pathText.toString();
try {
- String[] pathExpr = pathString.split("\\.", -1);
+ // Cache pathExpr
+ String[] pathExpr = pathExprCache.get(pathString);
+ if (pathExpr == null) {
+ pathExpr = pathString.split("\\.", -1);
+ pathExprCache.put(pathString, pathExpr);
+ }
+
if (!pathExpr[0].equalsIgnoreCase("$")) {
return null;
}
- Object extractObject = new JSONObject(jsonString);
+ // Cache extractObject
+ Object extractObject = extractObjectCache.get(jsonString);
+ if(extractObject == null) {
+ extractObject = new JSONObject(jsonString);
+ extractObjectCache.put(jsonString, extractObject);
+ }
for (int i = 1; i < pathExpr.length; i++) {
extractObject = extract(extractObject, pathExpr[i]);
}
@@ -112,33 +154,60 @@
}
private Object extract(Object json, String path) throws JSONException {
- Matcher m_key = pattern_key.matcher(path);
- if (!m_key.matches()) {
+
+ // Cache patternkey.matcher(path).matches()
+ Matcher mKey = null;
+ Boolean mKeyMatches = mKeyMatchesCache.get(path);
+ if (mKeyMatches == null) {
+ mKey = patternKey.matcher(path);
+ mKeyMatches = mKey.matches() ? Boolean.TRUE : Boolean.FALSE;
+ mKeyMatchesCache.put(path, mKeyMatches);
+ }
+ if (!mKeyMatches.booleanValue()) {
return null;
}
- json = extract_json_withkey(json, m_key.group(1));
-
- Matcher m_index = pattern_index.matcher(path);
- ArrayList<String> index_list = new ArrayList<String>();
- while (m_index.find()) {
- index_list.add(m_index.group(1));
+
+ // Cache mkey.group(1)
+ String mKeyGroup1 = mKeyGroup1Cache.get(path);
+ if (mKeyGroup1 == null) {
+ if (mKey == null) {
+ mKey = patternKey.matcher(path);
+ }
+ mKeyGroup1 = mKey.group(1);
+ mKeyGroup1Cache.put(path, mKeyGroup1);
}
- if (index_list.size() > 0) {
- json = extract_json_withindex(json, index_list);
+ json = extract_json_withkey(json, mKeyGroup1);
+
+ // Cache indexList
+ ArrayList<String> indexList = indexListCache.get(path);
+ if(indexList == null) {
+ Matcher mIndex = patternIndex.matcher(path);
+ indexList = new ArrayList<String>();
+ while (mIndex.find()) {
+ indexList.add(mIndex.group(1));
+ }
+ indexListCache.put(path, indexList);
}
+ if (indexList.size() > 0) {
+ json = extract_json_withindex(json, indexList);
+ }
+
return json;
}
+ ArrayList<Object> jsonList = new ArrayList<Object>();
+
private Object extract_json_withindex(Object json, ArrayList<String> indexList)
throws JSONException {
- ArrayList<Object> jsonList = new ArrayList<Object>();
+
+ jsonList.clear();
jsonList.add(json);
Iterator<String> itr = indexList.iterator();
while (itr.hasNext()) {
String index = itr.next();
+ ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
if (index.equalsIgnoreCase("*")) {
- ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
for (int i = 0; i < ((ArrayList<Object>) jsonList).size(); i++) {
try {
JSONArray array = (JSONArray) ((ArrayList<Object>) jsonList).get(i);
@@ -151,7 +220,6 @@
}
jsonList = tmp_jsonList;
} else {
- ArrayList<Object> tmp_jsonList = new ArrayList<Object>();
for (int i = 0; i < ((ArrayList<Object>) jsonList).size(); i++) {
try {
tmp_jsonList