You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2014/08/13 08:05:14 UTC
git commit: TAJO-1000: TextDatum.asChar() is incorrect,
if client charset is different. (jinho)
Repository: tajo
Updated Branches:
refs/heads/master 7e31a3201 -> fb4135a3b
TAJO-1000: TextDatum.asChar() is incorrect, if client charset is different. (jinho)
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fb4135a3
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fb4135a3
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fb4135a3
Branch: refs/heads/master
Commit: fb4135a3bf16e61345ca2a5b6a6fea5b516e7a3e
Parents: 7e31a32
Author: jhkim <jh...@apache.org>
Authored: Wed Aug 13 15:04:14 2014 +0900
Committer: jhkim <jh...@apache.org>
Committed: Wed Aug 13 15:04:14 2014 +0900
----------------------------------------------------------------------
CHANGES | 3 ++
tajo-common/pom.xml | 15 ++++++++
.../java/org/apache/tajo/datum/TextDatum.java | 8 +++--
.../org/apache/tajo/datum/TestTextDatum.java | 36 ++++++++++++++++----
tajo-project/pom.xml | 12 +++++++
5 files changed, 65 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 5d500a8..956db89 100644
--- a/CHANGES
+++ b/CHANGES
@@ -112,6 +112,9 @@ Release 0.9.0 - unreleased
BUG FIXES
+ TAJO-1000: TextDatum.asChar() is incorrect, if client charset is different.
+ (jinho)
+
TAJO-995: HiveMetaStoreClient wrapper should retry the connection. (jinho)
TAJO-947: ColPartitionStoreExec can cause URISyntaxException due
http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-common/pom.xml b/tajo-common/pom.xml
index c0f3402..da2a7d0 100644
--- a/tajo-common/pom.xml
+++ b/tajo-common/pom.xml
@@ -209,6 +209,21 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-module-junit4</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-api-mockito</artifactId>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<profiles>
http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
index e8424b3..b642168 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
@@ -25,10 +25,14 @@ import org.apache.tajo.exception.InvalidCastException;
import org.apache.tajo.exception.InvalidOperationException;
import org.apache.tajo.util.MurmurHash;
+import java.nio.charset.Charset;
import java.util.Comparator;
public class TextDatum extends Datum {
+ static Charset defaultCharset = Charset.forName("UTF-8");
+
@Expose private final int size;
+ /* encoded in UTF-8 */
@Expose private final byte[] bytes;
public static final TextDatum EMPTY_TEXT = new TextDatum("");
@@ -41,7 +45,7 @@ public class TextDatum extends Datum {
}
public TextDatum(String string) {
- this(string.getBytes());
+ this(string.getBytes(defaultCharset));
}
@Override
@@ -85,7 +89,7 @@ public class TextDatum extends Datum {
}
public String asChars() {
- return new String(this.bytes);
+ return new String(this.bytes, defaultCharset);
}
@Override
http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java b/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java
index 7feab46..bf48f78 100644
--- a/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java
+++ b/tajo-common/src/test/java/org/apache/tajo/datum/TestTextDatum.java
@@ -18,21 +18,22 @@
package org.apache.tajo.datum;
-import org.junit.Test;
import org.apache.tajo.common.TajoDataTypes.Type;
+import org.junit.Test;
+import org.powermock.reflect.Whitebox;
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import java.nio.charset.Charset;
+
+import static org.junit.Assert.*;
public class TestTextDatum {
-
+
@Test
public final void testType() {
Datum d = DatumFactory.createText("12345");
assertEquals(d.type(), Type.TEXT);
}
-
+
@Test
public final void testAsInt4() {
Datum d = DatumFactory.createText("12345");
@@ -62,7 +63,7 @@ public class TestTextDatum {
Datum d = DatumFactory.createText("12345");
assertEquals("12345", d.asChars());
}
-
+
@Test
public final void testSize() {
Datum d = DatumFactory.createText("12345");
@@ -74,4 +75,25 @@ public class TestTextDatum {
Datum d = DatumFactory.createText("12345");
assertArrayEquals(d.asByteArray(), d.asTextBytes());
}
+
+ @Test
+ public final void testTextEncoding() {
+ String text = "나랏말싸미 듕귁에 달아 문자와로 서르 사맛디 아니할쎄";
+ TextDatum test = new TextDatum(text);
+
+ TextDatum fromUTF8 = new TextDatum(text.getBytes(Charset.forName("UTF-8")));
+ assertEquals(test, fromUTF8);
+
+ Charset systemCharSet = Charset.defaultCharset();
+ //hack for testing
+ Whitebox.setInternalState(Charset.class, "defaultCharset", Charset.forName("EUC-KR"));
+ assertEquals(Charset.forName("EUC-KR"), Charset.defaultCharset());
+
+ assertEquals(text, test.asChars());
+ assertNotEquals(new String(test.asByteArray()), test.asChars());
+
+ //restore
+ Whitebox.setInternalState(Charset.class, "defaultCharset", systemCharSet);
+ assertEquals(systemCharSet, Charset.defaultCharset());
+ }
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/fb4135a3/tajo-project/pom.xml
----------------------------------------------------------------------
diff --git a/tajo-project/pom.xml b/tajo-project/pom.xml
index 7c0da53..b3c1fe2 100644
--- a/tajo-project/pom.xml
+++ b/tajo-project/pom.xml
@@ -951,6 +951,18 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-module-junit4</artifactId>
+ <version>1.5.5</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.powermock</groupId>
+ <artifactId>powermock-api-mockito</artifactId>
+ <version>1.5.5</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.6.6.Final</version>