You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by ji...@apache.org on 2014/12/12 09:22:57 UTC
[43/45] tajo git commit: TAJO-1242: Json scanner can not read some
case of trucated text. (jinho)
TAJO-1242: Json scanner can not read some case of trucated text. (jinho)
Closes #296
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/c665ae1f
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/c665ae1f
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/c665ae1f
Branch: refs/heads/index_support
Commit: c665ae1f6fc1e35e6a743e7e4e377c7885686b32
Parents: 5d9a130
Author: jhkim <jh...@apache.org>
Authored: Fri Dec 12 17:00:40 2014 +0900
Committer: jhkim <jh...@apache.org>
Committed: Fri Dec 12 17:00:40 2014 +0900
----------------------------------------------------------------------
CHANGES | 4 +-
.../testErrorTolerance3.json | 1 +
.../tajo/storage/json/JsonLineDeserializer.java | 39 ++++++++++----------
.../tajo/storage/TestDelimitedTextFile.java | 17 +++++++++
4 files changed, 41 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/c665ae1f/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index d758459..e41ea56 100644
--- a/CHANGES
+++ b/CHANGES
@@ -109,7 +109,9 @@ Release 0.9.1 - unreleased
BUG FIXES
- TAJO-1239 ORDER BY with null column desc miss some data.
+ TAJO-1242: Json scanner can not read some case of trucated text. (jinho)
+
+ TAJO-1239: ORDER BY with null column desc miss some data.
(Hyoungjun Kim via hyunsik)
TAJO-1244: tajo.worker.tmpdir.locations should use a validator for a list
http://git-wip-us.apache.org/repos/asf/tajo/blob/c665ae1f/tajo-storage/src/test/resources/dataset/TestDelimitedTextFile/testErrorTolerance3.json
----------------------------------------------------------------------
diff --git a/tajo-storage/src/test/resources/dataset/TestDelimitedTextFile/testErrorTolerance3.json b/tajo-storage/src/test/resources/dataset/TestDelimitedTextFile/testErrorTolerance3.json
new file mode 100644
index 0000000..a7fe424
--- /dev/null
+++ b/tajo-storage/src/test/resources/dataset/TestDelimitedTextFile/testErrorTolerance3.json
@@ -0,0 +1 @@
+{"id":[{"text":"json test
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/c665ae1f/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
index dfe36f6..a7e02a4 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/json/JsonLineDeserializer.java
@@ -32,7 +32,6 @@ import org.apache.tajo.common.exception.NotImplementedException;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.datum.TextDatum;
-import org.apache.tajo.datum.protobuf.ProtobufJsonFormat;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.storage.text.TextLineDeserializer;
import org.apache.tajo.storage.text.TextLineParsingError;
@@ -42,8 +41,8 @@ import java.util.Iterator;
public class JsonLineDeserializer extends TextLineDeserializer {
private JSONParser parser;
- private Type [] types;
- private String [] columnNames;
+ private Type[] types;
+ private String[] columnNames;
public JsonLineDeserializer(Schema schema, TableMeta meta, int[] targetColumnIndexes) {
super(schema, meta, targetColumnIndexes);
@@ -54,27 +53,34 @@ public class JsonLineDeserializer extends TextLineDeserializer {
types = SchemaUtil.toTypes(schema);
columnNames = SchemaUtil.toSimpleNames(schema);
- parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE);
+ parser = new JSONParser(JSONParser.MODE_JSON_SIMPLE | JSONParser.IGNORE_CONTROL_CHAR);
}
@Override
public void deserialize(ByteBuf buf, Tuple output) throws IOException, TextLineParsingError {
- byte [] line = new byte[buf.readableBytes()];
+ byte[] line = new byte[buf.readableBytes()];
buf.readBytes(line);
+ JSONObject object;
try {
- JSONObject object = (JSONObject) parser.parse(line);
+ object = (JSONObject) parser.parse(line);
+ } catch (ParseException pe) {
+ throw new TextLineParsingError(new String(line, TextDatum.DEFAULT_CHARSET), pe);
+ } catch (ArrayIndexOutOfBoundsException ae) {
+ // truncated value
+ throw new TextLineParsingError(new String(line, TextDatum.DEFAULT_CHARSET), ae);
+ }
- for (int i = 0; i < targetColumnIndexes.length; i++) {
- int actualIdx = targetColumnIndexes[i];
- String fieldName = columnNames[actualIdx];
+ for (int i = 0; i < targetColumnIndexes.length; i++) {
+ int actualIdx = targetColumnIndexes[i];
+ String fieldName = columnNames[actualIdx];
- if (!object.containsKey(fieldName)) {
- output.put(actualIdx, NullDatum.get());
- continue;
- }
+ if (!object.containsKey(fieldName)) {
+ output.put(actualIdx, NullDatum.get());
+ continue;
+ }
- switch (types[actualIdx]) {
+ switch (types[actualIdx]) {
case BOOLEAN:
String boolStr = object.getAsString(fieldName);
if (boolStr != null) {
@@ -210,12 +216,7 @@ public class JsonLineDeserializer extends TextLineDeserializer {
default:
throw new NotImplementedException(types[actualIdx].name() + " is not supported.");
- }
}
- } catch (ParseException pe) {
- throw new TextLineParsingError(new String(line, TextDatum.DEFAULT_CHARSET), pe);
- } catch (Throwable e) {
- throw new IOException(e);
}
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/c665ae1f/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestDelimitedTextFile.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestDelimitedTextFile.java b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestDelimitedTextFile.java
index 8749925..7e4b7aa 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestDelimitedTextFile.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestDelimitedTextFile.java
@@ -160,4 +160,21 @@ public class TestDelimitedTextFile {
}
fail();
}
+
+ @Test
+ public void testIgnoreTruncatedValueErrorTolerance() throws IOException {
+ TajoConf conf = new TajoConf();
+ TableMeta meta = CatalogUtil.newTableMeta(CatalogProtos.StoreType.JSON);
+ meta.putOption(StorageUtil.TEXT_ERROR_TOLERANCE_MAXNUM, "1");
+ FileFragment fragment = getFileFragment("testErrorTolerance3.json");
+ Scanner scanner = StorageManager.getStorageManager(conf).getScanner(meta, schema, fragment);
+ scanner.init();
+
+ try {
+ Tuple tuple = scanner.next();
+ assertNull(tuple);
+ } finally {
+ scanner.close();
+ }
+ }
}