You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by ji...@apache.org on 2015/10/29 01:25:03 UTC

[1/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Repository: incubator-asterixdb-hyracks
Updated Branches:
  refs/heads/master 492b6fea9 -> 26c3b5361


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 8f00aac..368ba2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,87 +18,88 @@
  !-->
 
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <groupId>org.apache.hyracks</groupId>
-  <artifactId>fullstack</artifactId>
-  <version>0.2.17-SNAPSHOT</version>
-  <packaging>pom</packaging>
-  <name>hyracks-ecosystem-full-stack</name>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.hyracks</groupId>
+    <artifactId>fullstack</artifactId>
+    <version>0.2.17-SNAPSHOT</version>
+    <packaging>pom</packaging>
+    <name>hyracks-ecosystem-full-stack</name>
 
-  <parent>
-    <groupId>org.apache</groupId>
-    <artifactId>apache</artifactId>
-    <version>LATEST</version>
-  </parent>
+    <parent>
+        <groupId>org.apache</groupId>
+        <artifactId>apache</artifactId>
+        <version>LATEST</version>
+    </parent>
 
-  <licenses>
-    <license>
-      <name>Apache License, Version 2.0</name>
-      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-      <distribution>repo</distribution>
-      <comments>A business-friendly OSS license</comments>
-    </license>
-  </licenses>
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+            <comments>A business-friendly OSS license</comments>
+        </license>
+    </licenses>
 
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <jvm.extraargs />
-    <!-- Definition of tests in various categories which may be excluded -->
-    <hanging.pregelix.tests>**/pregelix/**/FailureRecovery*.java</hanging.pregelix.tests>
-    <jdk.version>1.8</jdk.version>
-    <hivesterix.perf.tests>**/hivesterix/perf/PerfTestSuite.java</hivesterix.perf.tests>
-    <global.test.includes>**/*TestSuite.java,**/*Test.java</global.test.includes>
-    <global.test.excludes>**/Abstract*.java,${hanging.pregelix.tests},${hivesterix.perf.tests}</global.test.excludes>
-    <!-- Versions under dependencymanagement or used in many projects via properties -->
-    <hadoop.version>2.2.0</hadoop.version>
-    <junit.version>4.8.1</junit.version>
-    <commons.io.version>2.4</commons.io.version>
-  </properties>
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>junit</groupId>
-        <artifactId>junit</artifactId>
-        <version>${junit.version}</version>
-      </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-client</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-client</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-hdfs</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-minicluster</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-          <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-mapreduce-client-core</artifactId>
-            <version>${hadoop.version}</version>
-          </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <version>${commons.io.version}</version>
-        </dependency>
-    </dependencies>
-  </dependencyManagement>
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <jvm.extraargs/>
+        <!-- Definition of tests in various categories which may be excluded -->
+        <hanging.pregelix.tests>**/pregelix/**/FailureRecovery*.java</hanging.pregelix.tests>
+        <hivesterix.perf.tests>**/hivesterix/perf/PerfTestSuite.java</hivesterix.perf.tests>
+        <global.test.includes>**/*TestSuite.java,**/*Test.java</global.test.includes>
+        <global.test.excludes>**/Abstract*.java,${hanging.pregelix.tests},${hivesterix.perf.tests}
+        </global.test.excludes>
+        <!-- Versions under dependencymanagement or used in many projects via properties -->
+        <hadoop.version>2.2.0</hadoop.version>
+        <junit.version>4.8.1</junit.version>
+        <commons.io.version>2.4</commons.io.version>
+    </properties>
+    <dependencyManagement>
+        <dependencies>
+            <dependency>
+                <groupId>junit</groupId>
+                <artifactId>junit</artifactId>
+                <version>${junit.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-yarn-client</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-client</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-common</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-hdfs</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-minicluster</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-mapreduce-client-core</artifactId>
+                <version>${hadoop.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>commons-io</groupId>
+                <artifactId>commons-io</artifactId>
+                <version>${commons.io.version}</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
 
   <build>
     <plugins>
@@ -167,6 +168,7 @@
            <exclude>**/*.conf</exclude>
            <exclude>**/src/main/resources/*.cleaned</exclude>
            <exclude>**/ClusterControllerService/**</exclude>
+           <exclude>**/target/**</exclude>
            <exclude>**/output/**</exclude>
            <exclude>**/*.iml</exclude>
       </excludes>


[2/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
index f372dbe..6e764c3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
@@ -20,21 +20,19 @@
 package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.apache.hyracks.data.std.util.GrowableArray;
-
 public class NGramTokenizerTest {
 
     private char PRECHAR = '#';
@@ -72,11 +70,7 @@ public class NGramTokenizerTest {
 
     @Before
     public void init() throws Exception {
-        // serialize string into bytes
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        DataOutput dos = new DataOutputStream(baos);
-        dos.writeUTF(str);
-        inputBuffer = baos.toByteArray();
+        inputBuffer = UTF8StringUtil.writeStringToBytes(str);
     }
 
     void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
@@ -192,7 +186,8 @@ public class NGramTokenizerTest {
             ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
             DataInput in = new DataInputStream(bais);
 
-            String strGram = in.readUTF();
+            UTF8StringReader reader = new UTF8StringReader();
+            String strGram = reader.readUTF(in);
 
             // System.out.println("\"" + strGram + "\"");
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
index c42022e..78ba6a3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
@@ -20,21 +20,19 @@
 package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 
-import junit.framework.Assert;
-
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 import org.junit.Before;
 import org.junit.Test;
 
-import org.apache.hyracks.data.std.util.GrowableArray;
+import junit.framework.Assert;
 
 public class WordTokenizerTest {
 
@@ -46,7 +44,8 @@ public class WordTokenizerTest {
     private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList<Integer>();
 
     private boolean isSeparator(char c) {
-        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
+                || Character.getType(c) == Character.OTHER_NUMBER);
     }
 
     private void tokenize(String text, ArrayList<String> tokens) {
@@ -78,10 +77,7 @@ public class WordTokenizerTest {
     @Before
     public void init() throws IOException {
         // serialize text into bytes
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        DataOutput dos = new DataOutputStream(baos);
-        dos.writeUTF(text);
-        inputBuffer = baos.toByteArray();
+        inputBuffer = UTF8StringUtil.writeStringToBytes(text);
 
         // init expected string tokens
         tokenize(text, expectedUTF8Tokens);
@@ -144,7 +140,8 @@ public class WordTokenizerTest {
     public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
 
         HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
-        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false,
+                tokenFactory);
 
         tokenizer.reset(inputBuffer, 0, inputBuffer.length);
 
@@ -175,7 +172,8 @@ public class WordTokenizerTest {
     public void testWordTokenizerWithUTF8Tokens() throws IOException {
 
         UTF8WordTokenFactory tokenFactory = new UTF8WordTokenFactory();
-        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+        DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false,
+                tokenFactory);
 
         tokenizer.reset(inputBuffer, 0, inputBuffer.length);
 
@@ -194,7 +192,8 @@ public class WordTokenizerTest {
             ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
             DataInput in = new DataInputStream(bais);
 
-            String strToken = in.readUTF();
+            UTF8StringReader reader = new UTF8StringReader();
+            String strToken = reader.readUTF(in);
 
             Assert.assertEquals(expectedUTF8Tokens.get(tokenCount), strToken);
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 36f615f..fd94870 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -88,7 +88,7 @@ public class LSMInvertedIndexTestUtils {
         fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
         fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
         ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
         TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
         return tupleGen;
     }
@@ -98,7 +98,7 @@ public class LSMInvertedIndexTestUtils {
         fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
         fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
         ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
         TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
         return tupleGen;
     }
@@ -110,7 +110,7 @@ public class LSMInvertedIndexTestUtils {
             case INMEMORY:
             case ONDISK:
             case LSM: {
-                fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+                fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(),
                         IntegerSerializerDeserializer.INSTANCE };
                 break;
             }
@@ -118,7 +118,7 @@ public class LSMInvertedIndexTestUtils {
             case PARTITIONED_ONDISK:
             case PARTITIONED_LSM: {
                 // Such indexes also include the set-size for partitioning.
-                fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+                fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(),
                         ShortSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
                 break;
             }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/pom.xml b/hyracks/hyracks-util/pom.xml
new file mode 100644
index 0000000..ca38040
--- /dev/null
+++ b/hyracks/hyracks-util/pom.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one
+  ~ or more contributor license agreements.  See the NOTICE file
+  ~ distributed with this work for additional information
+  ~ regarding copyright ownership.  The ASF licenses this file
+  ~ to you under the Apache License, Version 2.0 (the
+  ~ "License"); you may not use this file except in compliance
+  ~ with the License.  You may obtain a copy of the License at
+  ~
+  ~   http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing,
+  ~ software distributed under the License is distributed on an
+  ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  ~ KIND, either express or implied.  See the License for the
+  ~ specific language governing permissions and limitations
+  ~ under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>hyracks</artifactId>
+        <groupId>org.apache.hyracks</groupId>
+        <version>0.2.17-SNAPSHOT</version>
+    </parent>
+
+    <modelVersion>4.0.0</modelVersion>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+                <version>2.6</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>test-jar</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <artifactId>hyracks-util</artifactId>
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+    </dependencies>
+
+
+</project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java
new file mode 100644
index 0000000..257daee
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.util.Arrays;
+
+public class Base64Parser {
+    private static final byte[] DECODE_MAP = initDecodeMap();
+    private static final byte PADDING = 127;
+
+    private static byte[] initDecodeMap() {
+        byte[] map = new byte[128];
+        Arrays.fill(map, (byte) -1);
+
+        int i;
+        for (i = 'A'; i <= 'Z'; i++) {
+            map[i] = (byte) (i - 'A');
+        }
+        for (i = 'a'; i <= 'z'; i++) {
+            map[i] = (byte) (i - 'a' + 26);
+        }
+        for (i = '0'; i <= '9'; i++) {
+            map[i] = (byte) (i - '0' + 52);
+        }
+        map['+'] = 62;
+        map['/'] = 63;
+        map['='] = PADDING;
+
+        return map;
+    }
+
+    private byte[] quadruplet = new byte[4];
+    private byte[] storage;
+    private int length = 0;
+
+    /**
+     * Parse the Base64 sequence from {@code input} into {@code out}
+     * Note, the out should have enough space by checking the {@link #guessLength(char[], int, int)} first
+     *
+     * @param input
+     * @param start
+     * @param length
+     * @param out
+     * @param offset
+     * @return
+     */
+    public int parseBase64String(char[] input, int start, int length, byte[] out, int offset) {
+        int outLength = 0;
+
+        int i;
+        int q = 0;
+
+        // convert each quadruplet to three bytes.
+        for (i = 0; i < length; i++) {
+            char ch = input[start + i];
+            byte v = DECODE_MAP[ch];
+
+            if (v == -1) {
+                throw new IllegalArgumentException("Invalid Base64 character");
+            }
+            quadruplet[q++] = v;
+
+            if (q == 4) {
+                outLength += dumpQuadruplet(out, offset + outLength);
+                q = 0;
+            }
+        }
+
+        return outLength;
+    }
+
+    /**
+     * Parse the Base64 sequence from {@code input} into {@code out}
+     * Note, the out should have enough space by checking the {@link #guessLength(byte[], int, int)} first
+     *
+     * @param input
+     * @param start
+     * @param length
+     * @param out
+     * @param offset
+     * @return the number of written bytes
+     */
+    public int parseBase64String(byte[] input, int start, int length, byte[] out, int offset) {
+        int outLength = 0;
+
+        int i;
+        int q = 0;
+
+        // convert each quadruplet to three bytes.
+        for (i = 0; i < length; i++) {
+            char ch = (char) input[start + i];
+            byte v = DECODE_MAP[ch];
+
+            if (v == -1) {
+                throw new IllegalArgumentException("Invalid Base64 character");
+            }
+            quadruplet[q++] = v;
+
+            if (q == 4) {
+                outLength += dumpQuadruplet(out, offset + outLength);
+                q = 0;
+            }
+        }
+
+        return outLength;
+    }
+
+    /**
+     * computes the length of binary data speculatively.
+     * Our requirement is to create byte[] of the exact length to store the binary data.
+     * If we do this in a straight-forward way, it takes two passes over the data.
+     * Experiments show that this is a non-trivial overhead (35% or so is spent on
+     * the first pass in calculating the length.)
+     * So the approach here is that we compute the length speculatively, without looking
+     * at the whole contents. The obtained speculative value is never less than the
+     * actual length of the binary data, but it may be bigger. So if the speculation
+     * goes wrong, we'll pay the cost of reallocation and buffer copying.
+     * If the base64 text is tightly packed with no indentation nor illegal char
+     * (like what most web services produce), then the speculation of this method
+     * will be correct, so we get the performance benefit.
+     */
+    public static int guessLength(char[] chars, int start, int length) {
+
+        // compute the tail '=' chars
+        int j = length - 1;
+        for (; j >= 0; j--) {
+            byte code = DECODE_MAP[chars[start + j]];
+            if (code == PADDING) {
+                continue;
+            }
+            if (code == -1) // most likely this base64 text is indented. go with the upper bound
+            {
+                return length / 4 * 3;
+            }
+            break;
+        }
+
+        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
+        int padSize = length - j;
+        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
+        {
+            return length / 4 * 3;
+        }
+
+        // so far this base64 looks like it's unindented tightly packed base64.
+        // take a chance and create an array with the expected size
+        return length / 4 * 3 - padSize;
+    }
+
+    public static int guessLength(byte[] chars, int start, int length) {
+
+        // compute the tail '=' chars
+        int j = length - 1;
+        for (; j >= 0; j--) {
+            byte code = DECODE_MAP[chars[start + j]];
+            if (code == PADDING) {
+                continue;
+            }
+            if (code == -1) // most likely this base64 text is indented. go with the upper bound
+            {
+                return length / 4 * 3;
+            }
+            break;
+        }
+
+        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
+        int padSize = length - j;
+        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
+        {
+            return length / 4 * 3;
+        }
+
+        // so far this base64 looks like it's unindented tightly packed base64.
+        // take a chance and create an array with the expected size
+        return length / 4 * 3 - padSize;
+    }
+
+    public byte[] getByteArray() {
+        return storage;
+    }
+
+    public int getLength() {
+        return length;
+    }
+
+    /**
+     * Same as {@link #parseBase64String(byte[], int, int, byte[], int)}, but we will provide the storage for caller
+     *
+     * @param input
+     * @param start
+     * @param length
+     */
+    public void generatePureByteArrayFromBase64String(byte[] input, int start, int length) {
+        // The base64 character length equals to utf8length
+        if (length % 4 != 0) {
+            throw new IllegalArgumentException(
+                    "Invalid Base64 string, the length of the string should be a multiple of 4");
+        }
+        final int buflen = guessLength(input, start, length);
+        ensureCapacity(buflen);
+        this.length = parseBase64String(input, start, length, storage, 0);
+    }
+
+    public void generatePureByteArrayFromBase64String(char[] input, int start, int length) {
+        if (length % 4 != 0) {
+            throw new IllegalArgumentException(
+                    "Invalid Base64 string, the length of the string should be a multiple of 4");
+        }
+        final int buflen = guessLength(input, start, length);
+        ensureCapacity(buflen);
+        this.length = parseBase64String(input, start, length, storage, 0);
+    }
+
+    private void ensureCapacity(int length) {
+        if (storage == null || storage.length < length) {
+            storage = new byte[length];
+        }
+    }
+
+    private int dumpQuadruplet(byte[] out, int offset) {
+        int outLength = 0;
+        // quadruplet is now filled.
+        out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
+        if (quadruplet[2] != PADDING) {
+            out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
+        }
+        if (quadruplet[3] != PADDING) {
+            out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
+        }
+        return outLength;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java
new file mode 100644
index 0000000..0e1c078
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.io.IOException;
+
+public class Base64Printer {
+    /**
+     * Encodes a byte array into a {@code Appendable} stream by doing base64 encoding.
+     *
+     * @return the same input stream.
+     */
+    public static Appendable printBase64Binary(byte[] input, int offset, int len, Appendable appendable)
+            throws IOException {
+        // encode elements until only 1 or 2 elements are left to encode
+        int remaining = len;
+        int i;
+        for (i = offset; remaining >= 3; remaining -= 3, i += 3) {
+            appendable.append(encode(input[i] >> 2));
+            appendable.append(encode(
+                    ((input[i] & 0x3) << 4)
+                            | ((input[i + 1] >> 4) & 0xF)));
+            appendable.append(encode(
+                    ((input[i + 1] & 0xF) << 2)
+                            | ((input[i + 2] >> 6) & 0x3)));
+            appendable.append(encode(input[i + 2] & 0x3F));
+        }
+        // encode when exactly 1 element (left) to encode
+        if (remaining == 1) {
+            appendable.append(encode(input[i] >> 2));
+            appendable.append(encode(((input[i]) & 0x3) << 4));
+            appendable.append('=');
+            appendable.append('=');
+        }
+        // encode when exactly 2 elements (left) to encode
+        if (remaining == 2) {
+            appendable.append(encode(input[i] >> 2));
+            appendable.append(encode(((input[i] & 0x3) << 4)
+                    | ((input[i + 1] >> 4) & 0xF)));
+            appendable.append(encode((input[i + 1] & 0xF) << 2));
+            appendable.append('=');
+        }
+        return appendable;
+    }
+
+    /**
+     * Encodes a byte array into a char array by doing base64 encoding.
+     * The caller must supply a big enough buffer.
+     *
+     * @return the value of {@code ptr+((len+2)/3)*4}, which is the new offset
+     * in the output buffer where the further bytes should be placed.
+     */
+    public static int printBase64Binary(byte[] input, int offset, int len, char[] buf, int ptr) {
+        // encode elements until only 1 or 2 elements are left to encode
+        int remaining = len;
+        int i;
+        for (i = offset; remaining >= 3; remaining -= 3, i += 3) {
+            buf[ptr++] = encode(input[i] >> 2);
+            buf[ptr++] = encode(
+                    ((input[i] & 0x3) << 4)
+                            | ((input[i + 1] >> 4) & 0xF));
+            buf[ptr++] = encode(
+                    ((input[i + 1] & 0xF) << 2)
+                            | ((input[i + 2] >> 6) & 0x3));
+            buf[ptr++] = encode(input[i + 2] & 0x3F);
+        }
+        // encode when exactly 1 element (left) to encode
+        if (remaining == 1) {
+            buf[ptr++] = encode(input[i] >> 2);
+            buf[ptr++] = encode(((input[i]) & 0x3) << 4);
+            buf[ptr++] = '=';
+            buf[ptr++] = '=';
+        }
+        // encode when exactly 2 elements (left) to encode
+        if (remaining == 2) {
+            buf[ptr++] = encode(input[i] >> 2);
+            buf[ptr++] = encode(((input[i] & 0x3) << 4)
+                    | ((input[i + 1] >> 4) & 0xF));
+            buf[ptr++] = encode((input[i + 1] & 0xF) << 2);
+            buf[ptr++] = '=';
+        }
+        return ptr;
+    }
+
+    private static final char[] encodeMap = initEncodeMap();
+
+    private static char[] initEncodeMap() {
+        char[] map = new char[64];
+        int i;
+        for (i = 0; i < 26; i++) {
+            map[i] = (char) ('A' + i);
+        }
+        for (i = 26; i < 52; i++) {
+            map[i] = (char) ('a' + (i - 26));
+        }
+        for (i = 52; i < 62; i++) {
+            map[i] = (char) ('0' + (i - 52));
+        }
+        map[62] = '+';
+        map[63] = '/';
+
+        return map;
+    }
+
+    public static char encode(int i) {
+        return encodeMap[i & 0x3F];
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java
new file mode 100644
index 0000000..ba7276b
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+public class HexParser {
+    public static boolean isValidHexChar(char c) {
+        if (c >= '0' && c <= '9'
+                || c >= 'a' && c <= 'f'
+                || c >= 'A' && c <= 'F') {
+            return true;
+        }
+        return false;
+    }
+
+    public static int getValueFromValidHexChar(char c) {
+        if (c >= '0' && c <= '9') {
+            return c - '0';
+        }
+        if (c >= 'a' && c <= 'f') {
+            return 10 + c - 'a';
+        }
+        if (c >= 'A' && c <= 'F') {
+            return 10 + c - 'A';
+        }
+        throw new IllegalArgumentException("Invalid hex character : " + c);
+    }
+
+    private byte[] storage;
+    private int length;
+
+    public byte[] getByteArray() {
+        return storage;
+    }
+
+    public int getLength() {
+        return length;
+    }
+
+    public void generateByteArrayFromHexString(char[] input, int start, int length) {
+        if (length % 2 != 0) {
+            throw new IllegalArgumentException(
+                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
+        }
+        this.length = length / 2;
+        ensureCapacity(this.length);
+        generateByteArrayFromHexString(input, start, length, storage, 0);
+    }
+
+    public void generateByteArrayFromHexString(byte[] input, int start, int length) {
+        if (length % 2 != 0) {
+            throw new IllegalArgumentException(
+                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
+        }
+        this.length = length / 2;
+        ensureCapacity(this.length);
+        generateByteArrayFromHexString(input, start, length, storage, 0);
+    }
+
+    private void ensureCapacity(int capacity) {
+        if (storage == null || storage.length < capacity) {
+            storage = new byte[capacity];
+        }
+    }
+
+    public static void generateByteArrayFromHexString(char[] input, int start, int length, byte[] output,
+            int offset) {
+        for (int i = 0; i < length; i += 2) {
+            output[offset + i / 2] = (byte) ((getValueFromValidHexChar(input[start + i]) << 4) +
+                    getValueFromValidHexChar(input[start + i + 1]));
+        }
+    }
+
+    public static void generateByteArrayFromHexString(byte[] input, int start, int length, byte[] output,
+            int offset) {
+        for (int i = 0; i < length; i += 2) {
+            output[offset + i / 2] = (byte) ((getValueFromValidHexChar((char) input[start + i]) << 4) +
+                    getValueFromValidHexChar((char) input[start + i + 1]));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java
new file mode 100644
index 0000000..5a9c064
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.io.IOException;
+
+public class HexPrinter {
+    public enum CASE {
+        LOWER_CASE,
+        UPPER_CASE,
+    }
+
+    public static byte hex(int i, CASE c) {
+        switch (c) {
+            case LOWER_CASE:
+                return (byte) (i < 10 ? i + '0' : i + ('a' - 10));
+            case UPPER_CASE:
+                return (byte) (i < 10 ? i + '0' : i + ('A' - 10));
+        }
+        return Byte.parseByte(null);
+    }
+
+    public static Appendable printHexString(byte[] bytes, int start, int length, Appendable appendable)
+            throws IOException {
+        for (int i = 0; i < length; ++i) {
+            appendable.append((char) hex((bytes[start + i] >>> 4) & 0x0f, CASE.UPPER_CASE));
+            appendable.append((char) hex((bytes[start + i] & 0x0f), CASE.UPPER_CASE));
+        }
+        return appendable;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java
new file mode 100644
index 0000000..5a716b4
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.encoding;
+
+import java.io.DataInput;
+import java.io.IOException;
+
+/**
+ * Encodes positive integers in a variable-bytes format.
+ *
+ * Each byte stores seven bits of the number. The first bit of each byte notifies if it is the last byte.
+ * Specifically, if the first bit is set, then we need to shift the current value by seven and
+ * continue to read the next byte util we meet a byte whose first byte is unset.
+ *
+ * e.g. if the number is < 128, it will be stored using one byte and the byte value keeps as original.
+ * To store the number 255 (0xff) , it will be encoded as [0x81,0x7f]. To decode that value, it reads the 0x81
+ * to know that the current value is (0x81 & 0x7f)= 0x01, and the first bit tells that there are more bytes to
+ * be read. When it meets 0x7f, whose first flag is unset, it knows that it is the final byte to decode.
+ * Finally it will return ( 0x01 << 7) + 0x7f === 255.
+ *
+ */
+public class VarLenIntEncoderDecoder {
+    // sometimes the dec number is easier to get the sense of how big it is.
+    public static final int BOUND_ONE_BYTE = 128; // 1 << 7
+    public static final int BOUND_TWO_BYTE = 16384; // 1 << 14
+    public static final int BOUND_THREE_BYTE = 2097152; // 1 << 21
+    public static final int BOUND_FOUR_BYTE = 268435456; // 1 << 28
+    public static final int BOUND_FIVE_BYTE = Integer.MAX_VALUE;
+
+    public static final int ENCODE_MASK = 0x0000007F;
+    public static final byte CONTINUE_CHUNK = (byte) 0x80;
+    public static final byte DECODE_MASK = 0x7F;
+
+    // calculate the number of bytes needed for encoding
+    public static int getBytesRequired(int length) {
+        if (length < 0) {
+            throw new IllegalArgumentException("The length must be an non-negative value");
+        }
+
+        int byteCount = 0;
+        while (length > ENCODE_MASK) {
+            length = length >>> 7;
+            byteCount++;
+        }
+        return byteCount + 1;
+    }
+
+    public static int decode(DataInput in) throws IOException {
+        int sum = 0;
+        byte b = in.readByte();
+        while ((b & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+            sum = (sum + (b & DECODE_MASK)) << 7;
+            b = in.readByte();
+        }
+        sum += b;
+        return sum;
+    }
+
+    public static int decode(byte[] srcBytes, int startPos) {
+        int sum = 0;
+        while ((srcBytes[startPos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+            sum = (sum + (srcBytes[startPos] & DECODE_MASK)) << 7;
+            startPos++;
+        }
+        sum += srcBytes[startPos++];
+        return sum;
+    }
+
+    public static int encode(int lengthVal, byte[] destBytes, int startPos) {
+        if (lengthVal < 0) {
+            throw new IllegalArgumentException("The length must be an non-negative value");
+        }
+        int nextPos = startPos;
+        while (lengthVal > ENCODE_MASK) {
+            destBytes[nextPos++] = (byte) (lengthVal & ENCODE_MASK);
+            lengthVal = lengthVal >>> 7;
+        }
+        destBytes[nextPos++] = (byte) lengthVal;
+
+        // reverse order to optimize for decoding speed
+        int length = nextPos - startPos;
+        int i = 0;
+        for (; i < length / 2; i++) {
+            byte b = destBytes[startPos + i];
+            destBytes[startPos + i] = (byte) (destBytes[startPos + length - 1 - i] | CONTINUE_CHUNK);
+            destBytes[startPos + length - 1 - i] = (byte) (b | CONTINUE_CHUNK);
+        }
+        destBytes[startPos + i] |= CONTINUE_CHUNK;
+        destBytes[nextPos - 1] &= ENCODE_MASK;
+        return length;
+    }
+
+    public static VarLenIntDecoder createDecoder() {
+        return new VarLenIntDecoder();
+    }
+
+    // keep the stateful version for the ease of the continuously decoding behaviors.
+    public static class VarLenIntDecoder {
+
+        private byte[] bytes = null;
+        private int pos = 0;
+
+        public VarLenIntDecoder reset(byte[] bytes, int pos) {
+            this.bytes = bytes;
+            this.pos = pos;
+            return this;
+        }
+
+        /**
+         * @return the int value
+         */
+        public int decode() {
+            int sum = 0;
+            while ((bytes[pos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+                sum = (sum + (bytes[pos] & DECODE_MASK)) << 7;
+                pos++;
+            }
+            sum += bytes[pos++];
+            return sum;
+        }
+
+        public int getPos() {
+            return pos;
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
new file mode 100644
index 0000000..3cd0300
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import java.io.DataInput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.UTFDataFormatException;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class UTF8StringReader implements Serializable{
+
+    private byte[] bytearr = null;
+    private char[] chararr = null;
+
+    /**
+     * Reads from the
+     * stream <code>in</code> a representation
+     * of a Unicode  character string encoded in
+     * <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format;
+     * this string of characters is then returned as a <code>String</code>.
+     * The details of the modified UTF-8 representation
+     * are  exactly the same as for the <code>readUTF</code>
+     * method of <code>DataInput</code>.
+     *
+     * @param in a data input stream.
+     * @return a Unicode string.
+     * @throws EOFException           if the input stream reaches the end
+     *                                before all the bytes.
+     * @throws IOException            the stream has been closed and the contained
+     *                                input stream does not support reading after close, or
+     *                                another I/O error occurs.
+     * @throws UTFDataFormatException if the bytes do not represent a
+     *                                valid modified UTF-8 encoding of a Unicode string.
+     * @see java.io.DataInputStream#readUnsignedShort()
+     */
+    public final String readUTF(DataInput in) throws IOException {
+        int utflen = VarLenIntEncoderDecoder.decode(in);
+
+        if (bytearr == null || bytearr.length < utflen) {
+            bytearr = new byte[utflen * 2];
+            chararr = new char[utflen * 2];
+        }
+
+        int c, char2, char3;
+        int count = 0;
+        int chararr_count = 0;
+
+        in.readFully(bytearr, 0, utflen);
+
+        while (count < utflen) {
+            c = (int) bytearr[count] & 0xff;
+            if (c > 127)
+                break;
+            count++;
+            chararr[chararr_count++] = (char) c;
+        }
+
+        while (count < utflen) {
+            c = (int) bytearr[count] & 0xff;
+            switch (c >> 4) {
+                case 0:
+                case 1:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                case 7:
+                    /* 0xxxxxxx*/
+                    count++;
+                    chararr[chararr_count++] = (char) c;
+                    break;
+                case 12:
+                case 13:
+                    /* 110x xxxx   10xx xxxx*/
+                    count += 2;
+                    if (count > utflen)
+                        throw new UTFDataFormatException(
+                                "malformed input: partial character at end");
+                    char2 = (int) bytearr[count - 1];
+                    if ((char2 & 0xC0) != 0x80)
+                        throw new UTFDataFormatException(
+                                "malformed input around byte " + count);
+                    chararr[chararr_count++] = (char) (((c & 0x1F) << 6) |
+                            (char2 & 0x3F));
+                    break;
+                case 14:
+                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                    count += 3;
+                    if (count > utflen)
+                        throw new UTFDataFormatException(
+                                "malformed input: partial character at end");
+                    char2 = (int) bytearr[count - 2];
+                    char3 = (int) bytearr[count - 1];
+                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                        throw new UTFDataFormatException(
+                                "malformed input around byte " + (count - 1));
+                    chararr[chararr_count++] = (char) (((c & 0x0F) << 12) |
+                            ((char2 & 0x3F) << 6) |
+                            ((char3 & 0x3F) << 0));
+                    break;
+                default:
+                    /* 10xx xxxx,  1111 xxxx */
+                    throw new UTFDataFormatException(
+                            "malformed input around byte " + count);
+            }
+        }
+        // The number of chars produced may be less than utflen
+        return new String(chararr, 0, chararr_count);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
new file mode 100644
index 0000000..7929691
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -0,0 +1,422 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+/**
+ * A helper package to operate the UTF8String in Hyracks.
+ * Most of the codes were migrated from asterix-fuzzyjoin and hyracks-storage-am-invertedindex
+ */
+public class UTF8StringUtil {
+    public static char charAt(byte[] b, int s) {
+        if (s >= b.length) {
+            throw new ArrayIndexOutOfBoundsException("Are you crazy?");
+        }
+        int c = b[s] & 0xff;
+        switch (c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                return (char) c;
+
+            case 12:
+            case 13:
+                return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+
+            case 14:
+                return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+
+            default:
+                throw new IllegalArgumentException();
+        }
+    }
+
+    public static int charSize(byte[] b, int s) {
+        int c = b[s] & 0xff;
+        switch (c >> 4) {
+            case 0:
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 5:
+            case 6:
+            case 7:
+                return 1;
+
+            case 12:
+            case 13:
+                return 2;
+
+            case 14:
+                return 3;
+        }
+        throw new IllegalStateException();
+    }
+
+    public static int getModifiedUTF8Len(char c) {
+        if (c >= 0x0001 && c <= 0x007F) {
+            return 1;
+        } else if (c <= 0x07FF) {
+            return 2;
+        } else {
+            return 3;
+        }
+    }
+
+    public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
+        if (c >= 0x0001 && c <= 0x007F) {
+            dos.writeByte(c);
+            return 1;
+        } else if (c <= 0x07FF) {
+            dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
+            dos.writeByte((byte) (0x80 | (c & 0x3F)));
+            return 2;
+        } else {
+            dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
+            dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
+            dos.writeByte((byte) (0x80 | (c & 0x3F)));
+            return 3;
+        }
+    }
+
+    public static int writeCharAsModifiedUTF8(char c, OutputStream dos) throws IOException {
+        if (c >= 0x0001 && c <= 0x007F) {
+            dos.write(c);
+            return 1;
+        } else if (c <= 0x07FF) {
+            dos.write((byte) (0xC0 | ((c >> 6) & 0x3F)));
+            dos.write((byte) (0x80 | (c & 0x3F)));
+            return 2;
+        } else {
+            dos.write((byte) (0xE0 | ((c >> 12) & 0x0F)));
+            dos.write((byte) (0x80 | ((c >> 6) & 0x3F)));
+            dos.write((byte) (0x80 | (c & 0x3F)));
+            return 3;
+        }
+    }
+
+    public static int getStringLength(byte[] b, int s) {
+        int len = getUTFLength(b, s);
+        int pos = s + getNumBytesToStoreLength(len);
+        int end = pos + len;
+        int charCount = 0;
+        while (pos < end) {
+            charCount++;
+            pos += charSize(b, pos);
+        }
+        return charCount;
+    }
+
+    public static int getUTFLength(byte[] b, int s) {
+        return VarLenIntEncoderDecoder.decode(b, s);
+    }
+
+    public static int getNumBytesToStoreLength(int strlen) {
+        return VarLenIntEncoderDecoder.getBytesRequired(strlen);
+    }
+
+    public static int UTF8ToCodePoint(byte[] b, int s) {
+        if (b[s] >> 7 == 0) {
+            // 1 byte
+            return b[s];
+        } else if ((b[s] & 0xe0) == 0xc0) { /*0xe0 = 0b1110000*/
+            // 2 bytes
+            return ((int) (b[s] & 0x1f)) << 6 | /*0x3f = 0b00111111*/
+                    ((int) (b[s + 1] & 0x3f));
+        } else if ((b[s] & 0xf0) == 0xe0) {
+            // 3bytes
+            return ((int) (b[s] & 0xf)) << 12 | ((int) (b[s + 1] & 0x3f)) << 6
+                    | ((int) (b[s + 2] & 0x3f));
+        } else if ((b[s] & 0xf8) == 0xf0) {
+            // 4bytes
+            return ((int) (b[s] & 0x7)) << 18 | ((int) (b[s + 1] & 0x3f)) << 12
+                    | ((int) (b[s + 2] & 0x3f)) << 6 | ((int) (b[s + 3] & 0x3f));
+        } else if ((b[s] & 0xfc) == 0xf8) {
+            // 5bytes
+            return ((int) (b[s] & 0x3)) << 24 | ((int) (b[s + 1] & 0x3f)) << 18
+                    | ((int) (b[s + 2] & 0x3f)) << 12 | ((int) (b[s + 3] & 0x3f)) << 6
+                    | ((int) (b[s + 4] & 0x3f));
+        } else if ((b[s] & 0xfe) == 0xfc) {
+            // 6bytes
+            return ((int) (b[s] & 0x1)) << 30 | ((int) (b[s + 1] & 0x3f)) << 24
+                    | ((int) (b[s + 2] & 0x3f)) << 18 | ((int) (b[s + 3] & 0x3f)) << 12
+                    | ((int) (b[s + 4] & 0x3f)) << 6 | ((int) (b[s + 5] & 0x3f));
+        }
+        return 0;
+    }
+
+    public static int codePointToUTF8(int c, byte[] outputUTF8) {
+        if (c < 0x80) {
+            outputUTF8[0] = (byte) (c & 0x7F /* mask 7 lsb: 0b1111111 */);
+            return 1;
+        } else if (c < 0x0800) {
+            outputUTF8[0] = (byte) (c >> 6 & 0x1F | 0xC0);
+            outputUTF8[1] = (byte) (c & 0x3F | 0x80);
+            return 2;
+        } else if (c < 0x010000) {
+            outputUTF8[0] = (byte) (c >> 12 & 0x0F | 0xE0);
+            outputUTF8[1] = (byte) (c >> 6 & 0x3F | 0x80);
+            outputUTF8[2] = (byte) (c & 0x3F | 0x80);
+            return 3;
+        } else if (c < 0x200000) {
+            outputUTF8[0] = (byte) (c >> 18 & 0x07 | 0xF0);
+            outputUTF8[1] = (byte) (c >> 12 & 0x3F | 0x80);
+            outputUTF8[2] = (byte) (c >> 6 & 0x3F | 0x80);
+            outputUTF8[3] = (byte) (c & 0x3F | 0x80);
+            return 4;
+        } else if (c < 0x4000000) {
+            outputUTF8[0] = (byte) (c >> 24 & 0x03 | 0xF8);
+            outputUTF8[1] = (byte) (c >> 18 & 0x3F | 0x80);
+            outputUTF8[2] = (byte) (c >> 12 & 0x3F | 0x80);
+            outputUTF8[3] = (byte) (c >> 6 & 0x3F | 0x80);
+            outputUTF8[4] = (byte) (c & 0x3F | 0x80);
+            return 5;
+        } else if (c < 0x80000000) {
+            outputUTF8[0] = (byte) (c >> 30 & 0x01 | 0xFC);
+            outputUTF8[1] = (byte) (c >> 24 & 0x3F | 0x80);
+            outputUTF8[2] = (byte) (c >> 18 & 0x3F | 0x80);
+            outputUTF8[3] = (byte) (c >> 12 & 0x3F | 0x80);
+            outputUTF8[4] = (byte) (c >> 6 & 0x3F | 0x80);
+            outputUTF8[5] = (byte) (c & 0x3F | 0x80);
+            return 6;
+        }
+        return 0;
+    }
+
+    /**
+     * Compute the normalized key of the UTF8 string.
+     * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data.
+     * In the UTF8StringPTR case, we compute the integer value by using the first 2 chars.
+     * The comparator will first use this integer to get the result ( <,>, or =), it will check
+     * the actual bytes only if the normalized key is equal. Thus this normalized key must be
+     * consistent with the comparison result.
+     */
+    public static int normalize(byte[] bytes, int start) {
+        int len = getUTFLength(bytes, start);
+        long nk = 0;
+        int offset = start + getNumBytesToStoreLength(len);
+        for (int i = 0; i < 2; ++i) {
+            nk <<= 16;
+            if (i < len) {
+                nk += ((int) charAt(bytes, offset)) & 0xffff;
+                offset += charSize(bytes, offset);
+            }
+        }
+        return (int) (nk >> 1); // make it always positive.
+    }
+
+    public static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+        return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, false);
+    }
+
+    /**
+     * This function provides the raw bytes-based comparison for UTF8 strings.
+     * Note that the comparison may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
+     * But it works for single-byte character languages.
+     */
+    public static int rawByteCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+        return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, true);
+    }
+
+    public static int lowerCaseCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+        return compareTo(thisBytes, thisStart, thatBytes, thatStart, true, false);
+    }
+
+    public static int hash(byte[] bytes, int start, int coefficient, int r) {
+        return hash(bytes, start, false, false, coefficient, r);
+    }
+
+    public static int hash(byte[] bytes, int start) {
+        return hash(bytes, start, false, false, 31, Integer.MAX_VALUE);
+    }
+
+    /**
+     * This function provides the raw bytes-based hash function for UTF8 strings.
+     * Note that the hash values may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
+     * But it works for single-byte character languages.
+     */
+    public static int rawBytehash(byte[] bytes, int start) {
+        return hash(bytes, start, false, true, 31, Integer.MAX_VALUE);
+    }
+
+    public static int lowerCaseHash(byte[] bytes, int start) {
+        return hash(bytes, start, true, false, 31, Integer.MAX_VALUE);
+    }
+
+    public static StringBuilder toString(StringBuilder builder, byte[] bytes, int start) {
+        int utfLen = getUTFLength(bytes, start);
+        int offset = getNumBytesToStoreLength(utfLen);
+        while (utfLen > 0) {
+            char c = charAt(bytes, start + offset);
+            builder.append(c);
+            int cLen = getModifiedUTF8Len(c);
+            offset += cLen;
+            utfLen -= cLen;
+        }
+        return builder;
+    }
+
+    public static void printUTF8StringWithQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
+        printUTF8String(b, s, l, os, true);
+    }
+
+    public static void printUTF8StringNoQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
+        printUTF8String(b, s, l, os, false);
+    }
+
+    public static void printUTF8StringWithQuotes(String str, OutputStream os) throws IOException {
+        printUTF8String(str, os, true);
+    }
+
+    public static void printUTF8StringNoQuotes(String str, OutputStream os) throws IOException {
+        printUTF8String(str, os, false);
+    }
+
+    public static int encodeUTF8Length(int length, byte[] bytes, int start) {
+        return VarLenIntEncoderDecoder.encode(length, bytes, start);
+    }
+
+    public static int writeUTF8Length(int length, byte[] bytes, DataOutput out) throws IOException {
+        int nbytes = encodeUTF8Length(length, bytes, 0);
+        out.write(bytes, 0, nbytes);
+        return nbytes;
+    }
+
+    private static void printUTF8String(byte[] b, int s, int l, OutputStream os, boolean useQuotes) throws IOException {
+        int stringLength = getUTFLength(b, s);
+        int position = s + getNumBytesToStoreLength(stringLength);
+        int maxPosition = position + stringLength;
+        if (useQuotes) {
+            os.write('\"');
+        }
+        while (position < maxPosition) {
+            char c = charAt(b, position);
+            switch (c) {
+                // escape
+                case '\\':
+                case '"':
+                    os.write('\\');
+                    break;
+            }
+            int sz = charSize(b, position);
+            while (sz > 0) {
+                os.write(b[position]);
+                position++;
+                sz--;
+            }
+        }
+        if (useQuotes) {
+            os.write('\"');
+        }
+    }
+
+    private static void printUTF8String(String string, OutputStream os, boolean useQuotes) throws IOException {
+        if (useQuotes) {
+            os.write('\"');
+        }
+        for (int i = 0; i < string.length(); i++) {
+            char ch = string.charAt(i);
+            writeCharAsModifiedUTF8(ch, os);
+        }
+        if (useQuotes) {
+            os.write('\"');
+        }
+    }
+
+    private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart,
+            boolean useLowerCase, boolean useRawByte) {
+        int utflen1 = getUTFLength(thisBytes, thisStart);
+        int utflen2 = getUTFLength(thatBytes, thatStart);
+
+        int c1 = 0;
+        int c2 = 0;
+
+        int s1Start = thisStart + getNumBytesToStoreLength(utflen1);
+        int s2Start = thatStart + getNumBytesToStoreLength(utflen2);
+
+        while (c1 < utflen1 && c2 < utflen2) {
+            char ch1, ch2;
+            if (useRawByte) {
+                ch1 = (char) thisBytes[s1Start + c1];
+                ch2 = (char) thatBytes[s2Start + c2];
+            } else {
+                ch1 = (charAt(thisBytes, s1Start + c1));
+                ch2 = (charAt(thatBytes, s2Start + c2));
+
+                if (useLowerCase) {
+                    ch1 = Character.toLowerCase(ch1);
+                    ch2 = Character.toLowerCase(ch2);
+                }
+            }
+
+            if (ch1 != ch2) {
+                return ch1 - ch2;
+            }
+            c1 += charSize(thisBytes, s1Start + c1);
+            c2 += charSize(thatBytes, s2Start + c2);
+        }
+        return utflen1 - utflen2;
+    }
+
+    private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
+        int h = 0;
+        int utflen = getUTFLength(bytes, start);
+        int sStart = start + getNumBytesToStoreLength(utflen);
+        int c = 0;
+
+        while (c < utflen) {
+            char ch;
+            if (useRawByte) {
+                ch = (char) bytes[sStart + c];
+            } else {
+                ch = charAt(bytes, sStart + c);
+                if (useLowerCase) {
+                    ch = Character.toLowerCase(ch);
+                }
+            }
+            h = (coefficient * h + ch) % r;
+            c += charSize(bytes, sStart + c);
+        }
+        return h;
+    }
+
+    public static byte[] writeStringToBytes(String string) {
+        UTF8StringWriter writer = new UTF8StringWriter();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        try {
+            writer.writeUTF8(string, dos);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        return bos.toByteArray();
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
new file mode 100644
index 0000000..021c02f
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util.string;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class UTF8StringWriter implements Serializable{
+    private byte[] tempBytes;
+
+    public final void writeUTF8(CharSequence str, DataOutput out) throws IOException {
+        int strlen = str.length();
+        int utflen = 0;
+        char c;
+        int count = 0;
+
+        for (int i = 0; i < strlen; i++) {
+            c = str.charAt(i);
+            utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+        }
+
+        ensureTempSize(utflen);
+
+        count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+
+        int i = 0;
+        for (; i < strlen; i++) {
+            c = str.charAt(i);
+            if (!((c >= 0x0001) && (c <= 0x007F))) {
+                break;
+            }
+            tempBytes[count++] = (byte) c;
+        }
+
+        for (; i < strlen; i++) {
+            c = str.charAt(i);
+            count += writeToBytes(tempBytes, count, c);
+        }
+        out.write(tempBytes, 0, count);
+    }
+
+    public final void writeUTF8(char[] buffer, int start, int length, DataOutput out) throws IOException {
+        int utflen = 0;
+        int count = 0;
+        char c;
+
+        for (int i = 0; i < length; i++) {
+            c = buffer[i + start];
+            utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+        }
+
+        ensureTempSize(utflen);
+
+        count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+
+        int i = 0;
+        for (; i < length; i++) {
+            c = buffer[i + start];
+            if (!((c >= 0x0001) && (c <= 0x007F))) {
+                break;
+            }
+            tempBytes[count++] = (byte) c;
+        }
+
+        for (; i < length; i++) {
+            c = buffer[i + start];
+            count += writeToBytes(tempBytes, count, c);
+        }
+        out.write(tempBytes, 0, count);
+    }
+
+    private static int writeToBytes(byte[] tempBytes, int count, char c) {
+        int orig = count;
+        if ((c >= 0x0001) && (c <= 0x007F)) {
+            tempBytes[count++] = (byte) c;
+        } else if (c > 0x07FF) {
+            tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
+            tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
+            tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+        } else {
+            tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
+            tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+        }
+        return count - orig;
+    }
+
+    private void ensureTempSize(int utflen) {
+        if (tempBytes == null || tempBytes.length < utflen + 5) {
+            tempBytes = new byte[utflen + 5];
+        }
+
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java
new file mode 100644
index 0000000..193dca6
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.encoding;
+
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_FIVE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_FOUR_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_ONE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_THREE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_TWO_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.VarLenIntDecoder;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.createDecoder;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.decode;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.encode;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.getBytesRequired;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class VarLenIntEncoderDecoderTest {
+
+    int[] bounds = new int[] { 0, BOUND_ONE_BYTE, BOUND_TWO_BYTE, BOUND_THREE_BYTE, BOUND_FOUR_BYTE, BOUND_FIVE_BYTE };
+
+    @Test
+    public void testGetBytesRequired() throws Exception {
+        for (int bound = 0; bound < bounds.length - 1; bound++) {
+            assertEquals(bound + 1, getBytesRequired(bounds[bound]));
+            assertEquals(bound + 1, getBytesRequired(bounds[bound + 1] - 1));
+        }
+    }
+
+    @Test
+    public void testEncodeDecode() throws Exception {
+        byte[] bytes = new byte[10];
+        int startPos = 3;
+        for (int i = 1; i < bounds.length - 1; i++) {
+            testEncodeDecode(i, bounds[i] - 1, bytes, startPos);
+            testEncodeDecode(i + 1, bounds[i], bytes, startPos);
+            testEncodeDecode(i + 1, bounds[i] + 1, bytes, startPos);
+        }
+        // Integer.Max
+        testEncodeDecode(5, BOUND_FIVE_BYTE, bytes, startPos);
+    }
+
+    @Test
+    public void testCreateDecoder() throws Exception {
+        VarLenIntDecoder decoder = createDecoder();
+        byte[] bytes = new byte[100];
+        int pos = 1;
+        for (int b : bounds) {
+            pos += encode(b, bytes, pos);
+        }
+        decoder.reset(bytes, 1);
+        for (int b : bounds) {
+            assertEquals(b, decoder.decode());
+        }
+    }
+
+    protected void testEncodeDecode(int expectedBytes, int value, byte[] bytes, int startPos) throws IOException {
+        assertEquals(expectedBytes, encode(value, bytes, startPos));
+        assertEquals(value, decode(bytes, startPos));
+
+        ByteArrayInputStream bis = new ByteArrayInputStream(bytes, startPos, bytes.length - startPos);
+        DataInputStream dis = new DataInputStream(bis);
+        assertEquals(value, decode(dis));
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
new file mode 100644
index 0000000..bfc1fa8
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.string.UTF8StringSample.EMPTY_STRING;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_128;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_LARGE;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_LARGE_SUB_1;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM_SUB_1;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class UTF8StringReaderWriterTest {
+
+    UTF8StringWriter writer = new UTF8StringWriter();
+    UTF8StringReader reader = new UTF8StringReader();
+
+    @Test
+    public void testWriterReader() throws IOException {
+        writeAndReadOneString(EMPTY_STRING);
+        writeAndReadOneString(STRING_LEN_3);
+
+        writeAndReadOneString(STRING_LEN_127);
+        writeAndReadOneString(STRING_LEN_128);
+        writeAndReadOneString(STRING_LEN_MEDIUM_SUB_1);
+    }
+
+    @Test
+    public void testMedium() throws IOException {
+        writeAndReadOneString(STRING_LEN_MEDIUM);
+        writeAndReadOneString(STRING_LEN_LARGE_SUB_1);
+    }
+
+    @Test
+    public void testLarge() throws IOException {
+        writeAndReadOneString(STRING_LEN_LARGE);
+    }
+
+    @Test
+    public void testUTF8() throws IOException {
+        writeAndReadOneString(STRING_UTF8_3);
+        writeAndReadOneString(STRING_UTF8_MIX);
+    }
+
+    private void writeAndReadOneString(String testString) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream dos = new DataOutputStream(bos);
+        writer.writeUTF8(testString, dos);
+
+        ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray(), 0, bos.size());
+        assertEquals(testString, reader.readUTF(new DataInputStream(bis)));
+
+        int lastOffset = bos.size();
+        char[] charArray = testString.toCharArray();
+        writer.writeUTF8(charArray, 0, charArray.length, dos);
+
+        bis = new ByteArrayInputStream(bos.toByteArray(), lastOffset, bos.size());
+        assertEquals(testString, reader.readUTF(new DataInputStream(bis)));
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java
new file mode 100644
index 0000000..3e6e984
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_THREE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_TWO_BYTE;
+
+import java.util.Arrays;
+
+/**
+ * Util class to provide the sample test string
+ */
+public class UTF8StringSample {
+    public static String EMPTY_STRING = "";
+
+    public static char ONE_ASCII_CHAR = 'x';
+    public static char ONE_UTF8_CHAR = 'à';
+
+    public static String STRING_LEN_3 = "xyz";
+    public static String STRING_UTF8_3 = "锟斤拷";
+    public static String STRING_UTF8_MIX = "\uD841\uDF0E\uD841\uDF31锟X斤Y拷Zà"; // one, two, three, and four bytes
+    public static String STRING_UTF8_MIX_LOWERCASE = "\uD841\uDF0E\uD841\uDF31锟x斤y拷zà";
+
+    public static String STRING_LEN_127 = generateStringRepeatBy(ONE_ASCII_CHAR, 127);
+    public static String STRING_LEN_128 = generateStringRepeatBy(ONE_ASCII_CHAR, 128);
+
+    public static String STRING_LEN_MEDIUM_SUB_1 = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_TWO_BYTE - 1);
+    public static String STRING_LEN_MEDIUM = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_TWO_BYTE);
+
+    public static String STRING_LEN_LARGE_SUB_1 = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_THREE_BYTE - 1);
+    public static String STRING_LEN_LARGE = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_THREE_BYTE);
+
+    public static String generateStringRepeatBy(char c, int times) {
+        char[] chars = new char[times];
+        Arrays.fill(chars, c);
+        return new String(chars);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
new file mode 100644
index 0000000..0e3ed5c
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.string.UTF8StringUtil.writeStringToBytes;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_128;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX_LOWERCASE;
+import static org.apache.hyracks.util.string.UTF8StringUtil.charAt;
+import static org.apache.hyracks.util.string.UTF8StringUtil.charSize;
+import static org.apache.hyracks.util.string.UTF8StringUtil.compareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getModifiedUTF8Len;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getNumBytesToStoreLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getStringLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getUTFLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.lowerCaseCompareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.lowerCaseHash;
+import static org.apache.hyracks.util.string.UTF8StringUtil.normalize;
+import static org.apache.hyracks.util.string.UTF8StringUtil.rawByteCompareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.hash;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class UTF8StringUtilTest {
+
+    @Test
+    public void testCharAtCharSizeGetLen() throws Exception {
+        char[] utf8Mix = STRING_UTF8_MIX.toCharArray();
+        byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+        int pos = getNumBytesToStoreLength(getUTFLength(buffer, 0));
+        for (char c : utf8Mix) {
+            assertEquals(c, charAt(buffer, pos));
+            assertEquals(getModifiedUTF8Len(c), charSize(buffer, pos));
+            pos += charSize(buffer, pos);
+        }
+    }
+
+    @Test
+    public void testGetStringLength() throws Exception {
+        byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+        assertEquals(STRING_UTF8_MIX.length(), getStringLength(buffer, 0));
+    }
+
+    @Test
+    public void testCompareToAndNormolize() throws Exception {
+        testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX, OPTION.STANDARD);
+        testCompare(STRING_UTF8_3, STRING_UTF8_MIX, OPTION.STANDARD);
+        testCompare(STRING_LEN_MEDIUM, STRING_UTF8_MIX, OPTION.STANDARD);
+    }
+
+    public boolean isSameSign(int r1, int r2) {
+        if (r1 > 0) {
+            return r2 > 0;
+        }
+        if (r1 < 0) {
+            return r2 < 0;
+        }
+        return r2 == 0;
+    }
+
+    enum OPTION {STANDARD, RAW_BYTE, LOWERCASE}
+
+    public void testCompare(String str1, String str2, OPTION option) throws IOException {
+        byte[] buffer1 = writeStringToBytes(str1);
+        byte[] buffer2 = writeStringToBytes(str2);
+
+        switch (option) {
+            case STANDARD:
+                assertEquals(str1.compareTo(str2), compareTo(buffer1, 0, buffer2, 0));
+                int n1 = normalize(buffer1, 0);
+                int n2 = normalize(buffer2, 0);
+                assertTrue(isSameSign(str1.compareTo(str2), n1 - n2));
+                break;
+            case RAW_BYTE:
+                assertEquals(str1.compareTo(str2), rawByteCompareTo(buffer1, 0, buffer2, 0));
+                break;
+            case LOWERCASE:
+                assertEquals(str1.compareToIgnoreCase(str2), lowerCaseCompareTo(buffer1, 0, buffer2, 0));
+                break;
+        }
+
+    }
+
+    @Test
+    public void testRawByteCompareTo() throws Exception {
+        testCompare(STRING_LEN_MEDIUM, STRING_LEN_MEDIUM, OPTION.RAW_BYTE);
+        testCompare(STRING_LEN_127, STRING_LEN_128, OPTION.RAW_BYTE);
+    }
+
+    @Test
+    public void testLowerCaseCompareTo() throws Exception {
+        testCompare(STRING_LEN_127, STRING_LEN_128, OPTION.LOWERCASE);
+        testCompare(STRING_LEN_127, STRING_UTF8_MIX, OPTION.LOWERCASE);
+        testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX_LOWERCASE, OPTION.LOWERCASE);
+        testCompare(STRING_UTF8_MIX_LOWERCASE, STRING_UTF8_MIX, OPTION.LOWERCASE);
+    }
+
+    @Test
+    public void testToString() throws Exception {
+
+        StringBuilder sb = new StringBuilder();
+        byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+        assertEquals(STRING_UTF8_MIX, UTF8StringUtil.toString(sb, buffer, 0).toString());
+    }
+
+    @Test
+    public void testHash() throws IOException {
+        byte[] buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE);
+        int lowerHash = hash(buffer, 0);
+
+        buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE);
+        int upperHash = lowerCaseHash(buffer, 0);
+        assertEquals(lowerHash, upperHash);
+
+        int familyOne = hash(buffer, 0, 7, 297);
+        int familyTwo = hash(buffer, 0, 8, 297);
+        assertTrue(familyOne != familyTwo);
+    }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/pom.xml b/hyracks/pom.xml
index c1af7b9..61e06e4 100644
--- a/hyracks/pom.xml
+++ b/hyracks/pom.xml
@@ -96,6 +96,7 @@
   </pluginRepositories>
 
   <modules>
+    <module>hyracks-util</module>
     <module>hyracks-ipc</module>
     <module>hyracks-api</module>
     <module>hyracks-comm</module>


[5/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
index 3f25c3c..3e12837 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
@@ -19,50 +19,48 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
-import javax.xml.bind.DatatypeConverter;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
 import java.util.Arrays;
 
-import static org.junit.Assert.assertTrue;
+import javax.xml.bind.DatatypeConverter;
 
-public class ByteArrayHexParserFactoryTest {
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
 
-    public static byte[] subArray(byte[] bytes, int start) {
-        return Arrays.copyOfRange(bytes, start, bytes.length);
-    }
+public class ByteArrayHexParserFactoryTest {
 
     @Test
     public void testExtractPointableArrayFromHexString() throws Exception {
-        byte[] cache = new byte[] { };
+        testOneString("");
+        testOneString("ABCDEF0123456789");
 
-        String empty = "";
-        cache = ByteArrayHexParserFactory
-                .extractPointableArrayFromHexString(empty.toCharArray(), 0, empty.length(), cache);
+        testOneString("0123456789abcdef");
 
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == 0);
-        assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(empty));
+        char[] maxChars = new char[65540 * 2];
+        Arrays.fill(maxChars, 'f');
+        String maxString = new String(maxChars);
 
-        String everyChar = "ABCDEF0123456789";
-        cache = ByteArrayHexParserFactory
-                .extractPointableArrayFromHexString(everyChar.toCharArray(), 0, everyChar.length(), cache);
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == everyChar.length() / 2);
-        assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(everyChar));
+        testOneString(maxString);
+    }
 
-        String lowercase = "0123456789abcdef";
-        cache = ByteArrayHexParserFactory
-                .extractPointableArrayFromHexString(lowercase.toCharArray(), 0, lowercase.length(), cache);
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == lowercase.length() / 2);
-        assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(lowercase));
+    void testOneString(String test) throws HyracksDataException {
+        IValueParser parser = ByteArrayHexParserFactory.INSTANCE.createValueParser();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream outputStream = new DataOutputStream(bos);
+        ByteArrayPointable bytePtr = new ByteArrayPointable();
 
-        char[] maxChars = new char[ByteArrayPointable.MAX_LENGTH  * 2];
-        Arrays.fill(maxChars, 'f');
-        String maxString = new String(maxChars);
-        cache = ByteArrayHexParserFactory
-                .extractPointableArrayFromHexString(maxString.toCharArray(), 0, maxString.length(), cache);
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == maxString.length() / 2);
-        assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(maxString));
+        parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+
+        bytePtr.set(bos.toByteArray(), 0, bos.size());
+
+        assertTrue(bytePtr.getContentLength() == test.length() / 2);
+        assertEquals(DatatypeConverter.printHexBinary(ByteArrayPointable.copyContent(bytePtr)).toLowerCase(),
+                test.toLowerCase());
     }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
index 53c60a3..4e69437 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
@@ -29,7 +29,6 @@ import org.apache.hyracks.api.context.IHyracksTaskContext;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
 import org.apache.hyracks.dataflow.std.group.AggregateState;
 import org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptor;
@@ -68,6 +67,8 @@ public class MinMaxStringFieldAggregatorFactory implements IFieldAggregateDescri
             RecordDescriptor outRecordDescriptor) throws HyracksDataException {
         return new IFieldAggregateDescriptor() {
 
+            UTF8StringSerializerDeserializer utf8SerializerDeserializer = new UTF8StringSerializerDeserializer();
+
             @Override
             public void reset() {
             }
@@ -112,7 +113,7 @@ public class MinMaxStringFieldAggregatorFactory implements IFieldAggregateDescri
                 int tupleOffset = accessor.getTupleStartOffset(tIndex);
                 int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
                 int fieldLength = accessor.getFieldLength(tIndex, aggField);
-                String strField = UTF8StringSerializerDeserializer.INSTANCE.deserialize(new DataInputStream(
+                String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(
                         new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset
                                 + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
                 if (hasBinaryState) {
@@ -157,7 +158,7 @@ public class MinMaxStringFieldAggregatorFactory implements IFieldAggregateDescri
                 int tupleOffset = accessor.getTupleStartOffset(tIndex);
                 int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
                 int fieldLength = accessor.getFieldLength(tIndex, aggField);
-                String strField = UTF8StringSerializerDeserializer.INSTANCE.deserialize(new DataInputStream(
+                String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(
                         new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset
                                 + accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
                 if (hasBinaryState) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
index 61b29fd..9651529 100644
--- a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
+++ b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
@@ -46,7 +46,7 @@ import org.apache.hyracks.dataflow.std.structures.TuplePointer;
 
 public class VariableTupleMemoryManagerTest {
     ISerializerDeserializer[] fieldsSerDer = new ISerializerDeserializer[] {
-            IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+            IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
     RecordDescriptor recordDescriptor = new RecordDescriptor(fieldsSerDer);
     ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());
     VariableTupleMemoryManager tupleMemoryManager;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
index cc52b78..44a082f 100644
--- a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
+++ b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
@@ -25,9 +25,6 @@ import static org.junit.Assert.assertTrue;
 import java.nio.ByteBuffer;
 
 import org.apache.commons.lang3.ArrayUtils;
-import org.junit.Before;
-import org.junit.Test;
-
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
 import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -36,12 +33,15 @@ import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDese
 import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
 import org.apache.hyracks.dataflow.common.util.IntSerDeUtils;
 import org.apache.hyracks.dataflow.std.sort.Utility;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Before;
+import org.junit.Test;
 
 public class DeletableFrameTupleAppenderTest {
     DeletableFrameTupleAppender appender;
     ISerializerDeserializer[] fields = new ISerializerDeserializer[] {
             IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(),
     };
     RecordDescriptor recordDescriptor = new RecordDescriptor(fields);
     ArrayTupleBuilder builder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());
@@ -90,7 +90,8 @@ public class DeletableFrameTupleAppenderTest {
     }
 
     int assertTupleIsExpected(int i, int dataOffset) {
-        int tupleLength = 2 * 4 + 4 + 2 + i + 1;
+        int lenStrMeta = UTF8StringUtil.getNumBytesToStoreLength(i);
+        int tupleLength = 2 * 4 + 4 + lenStrMeta + i + 1;
         assertEquals(dataOffset, appender.getTupleStartOffset(i));
         assertEquals(tupleLength, appender.getTupleLength(i));
 
@@ -99,7 +100,7 @@ public class DeletableFrameTupleAppenderTest {
         assertEquals(i + 1,
                 IntSerDeUtils.getInt(appender.getBuffer().array(), appender.getAbsoluteFieldStartOffset(i, 0)));
         assertEquals(dataOffset + 2 * 4 + 4, appender.getAbsoluteFieldStartOffset(i, 1));
-        assertEquals(2 + i + 1, appender.getFieldLength(i, 1));
+        assertEquals(lenStrMeta + i + 1, appender.getFieldLength(i, 1));
         return tupleLength;
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
index 202096a..ac521f5 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
@@ -107,15 +107,15 @@ public class InsertPipelineExample {
         // string
         // we will use field 2 as primary key to fill a clustered index
         RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, // this field will
+                new UTF8StringSerializerDeserializer(), // this field will
                                                            // not go into B-Tree
-                UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+                new UTF8StringSerializerDeserializer(), // we will use this
                                                            // as payload
                 IntegerSerializerDeserializer.INSTANCE, // we will use this
                                                         // field as key
                 IntegerSerializerDeserializer.INSTANCE, // we will use this as
                                                         // payload
-                UTF8StringSerializerDeserializer.INSTANCE // we will use this as
+                new UTF8StringSerializerDeserializer() // we will use this as
                                                           // payload
                 });
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
index a493aec..734feb4 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
@@ -105,15 +105,15 @@ public class PrimaryIndexBulkLoadExample {
         // int, string
         // we will use field-index 2 as primary key to fill a clustered index
         RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, // this field will
+                new UTF8StringSerializerDeserializer(), // this field will
                                                            // not go into B-Tree
-                UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+                new UTF8StringSerializerDeserializer(), // we will use this
                                                            // as payload
                 IntegerSerializerDeserializer.INSTANCE, // we will use this
                                                         // field as key
                 IntegerSerializerDeserializer.INSTANCE, // we will use this as
                                                         // payload
-                UTF8StringSerializerDeserializer.INSTANCE // we will use this as
+                new UTF8StringSerializerDeserializer() // we will use this as
                                                           // payload
                 });
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
index 48b9942..df33132 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
@@ -111,8 +111,8 @@ public class PrimaryIndexSearchExample {
 
         // schema of tuples coming out of primary index
         RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE, });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), });
 
         // build tuple containing low and high search keys
         ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2); // high
@@ -129,8 +129,8 @@ public class PrimaryIndexSearchExample {
                                                                     // high key
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
index e8e2281..8d68021 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
@@ -105,10 +105,10 @@ public class SecondaryIndexBulkLoadExample {
                 IntegerSerializerDeserializer.INSTANCE, // we will use this as
                                                         // payload in secondary
                                                         // index
-                UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+                new UTF8StringSerializerDeserializer(), // we will use this
                                                            // ask key in
                                                            // secondary index
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
 
         int primaryFieldCount = 4;
         ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
index 1a1559f..0e80272 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
@@ -102,7 +102,7 @@ public class SecondaryIndexSearchExample {
 
         // schema of tuples coming out of secondary index
         RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
 
         int secondaryFieldCount = 2;
         ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
@@ -120,8 +120,8 @@ public class SecondaryIndexSearchExample {
 
         // schema of tuples coming out of primary index
         RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE, });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), });
 
         int primaryFieldCount = 4;
         ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
@@ -145,15 +145,15 @@ public class SecondaryIndexSearchExample {
         DataOutput dos = tb.getDataOutput();
 
         tb.reset();
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos); // low
+        new UTF8StringSerializerDeserializer().serialize("0", dos); // low
                                                                        // key
         tb.addFieldEndOffset();
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("f", dos); // high
+        new UTF8StringSerializerDeserializer().serialize("f", dos); // high
                                                                        // key
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
index 4aeebe7..837a8a4 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
@@ -82,9 +82,9 @@ public abstract class AbstractBTreeOperatorTest extends AbstractIntegrationTest
     protected final int[] primaryBloomFilterKeyFields = new int[primaryKeyFieldCount];
 
     protected final RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
     // to be set by subclasses
     protected String primaryFileName;
@@ -98,7 +98,7 @@ public abstract class AbstractBTreeOperatorTest extends AbstractIntegrationTest
     protected final int[] secondaryBloomFilterKeyFields = new int[secondaryKeyFieldCount];
 
     protected final RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
     protected String secondaryFileName;
     protected IFileSplitProvider secondarySplitProvider;
@@ -160,11 +160,11 @@ public abstract class AbstractBTreeOperatorTest extends AbstractIntegrationTest
                 "data/tpch0.001/orders-part1.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -217,11 +217,11 @@ public abstract class AbstractBTreeOperatorTest extends AbstractIntegrationTest
         DataOutput dos = tb.getDataOutput();
 
         tb.reset();
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+        new UTF8StringSerializerDeserializer().serialize("0", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
@@ -272,11 +272,11 @@ public abstract class AbstractBTreeOperatorTest extends AbstractIntegrationTest
                 "data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
index c4068a7..d4fb56d 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
@@ -59,11 +59,11 @@ public class BTreePrimaryIndexScanOperatorTest extends AbstractBTreeOperatorTest
         DataOutput dos = tb.getDataOutput();
 
         tb.reset();
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+        new UTF8StringSerializerDeserializer().serialize("0", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
index 5429135..e690423 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
@@ -61,14 +61,14 @@ public class BTreePrimaryIndexSearchOperatorTest extends AbstractBTreeOperatorTe
 
         tb.reset();
         // low key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("100", dos);
+        new UTF8StringSerializerDeserializer().serialize("100", dos);
         tb.addFieldEndOffset();
         // high key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("200", dos);
+        new UTF8StringSerializerDeserializer().serialize("200", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
index 97175f2..6ebc177 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
@@ -64,14 +64,14 @@ public class BTreeSecondaryIndexInsertOperatorTest extends AbstractBTreeOperator
 
         tb.reset();
         // low key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+        new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
         tb.addFieldEndOffset();
         // high key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+        new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
index 11c060b..2bba010 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
@@ -63,14 +63,14 @@ public class BTreeSecondaryIndexSearchOperatorTest extends AbstractBTreeOperator
 
         tb.reset();
         // low key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+        new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
         tb.addFieldEndOffset();
         // high key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+        new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
index 7abd14c..031ef76 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
@@ -63,14 +63,14 @@ public class BTreeSecondaryIndexUpsertOperatorTest extends AbstractBTreeOperator
 
         tb.reset();
         // low key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+        new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
         tb.addFieldEndOffset();
         // high key
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+        new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
index 7100920..9381727 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
@@ -100,9 +100,9 @@ public abstract class AbstractRTreeOperatorTest extends AbstractIntegrationTest
     protected final IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[primaryKeyFieldCount];
 
     protected final RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
             DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
             DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
 
@@ -119,7 +119,7 @@ public abstract class AbstractRTreeOperatorTest extends AbstractIntegrationTest
     protected final RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
             DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
             DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer() });
 
     // This is only used for the LSMRTree. We need a comparator Factories for
     // the BTree component of the LSMRTree.
@@ -220,11 +220,11 @@ public abstract class AbstractRTreeOperatorTest extends AbstractIntegrationTest
                 "data/orders-with-locations-part1.txt"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE,
                 DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
                 DoubleSerializerDeserializer.INSTANCE });
 
@@ -283,11 +283,11 @@ public abstract class AbstractRTreeOperatorTest extends AbstractIntegrationTest
         DataOutput dos = tb.getDataOutput();
 
         tb.reset();
-        UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+        new UTF8StringSerializerDeserializer().serialize("0", dos);
         tb.addFieldEndOffset();
 
-        ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
 
         ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
@@ -331,11 +331,11 @@ public abstract class AbstractRTreeOperatorTest extends AbstractIntegrationTest
                 "data/orders-with-locations-part2.txt"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE,
                 DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
                 DoubleSerializerDeserializer.INSTANCE });
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
index 8baf9e0..41e63a0 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
@@ -144,7 +144,7 @@ public class SerializationDeserializationTest {
     @Test
     public void serdeser01() throws Exception {
         RecordDescriptor rDes = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
         LineProcessor processor = new LineProcessor() {
             @Override
             public void process(String line, IDataWriter<Object[]> writer) throws Exception {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
index 2457ee9..c330f8e 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
@@ -77,14 +77,14 @@ public class AggregationTest extends AbstractIntegrationTest {
             new FileReference(new File("data/tpch0.001/lineitem.tbl"))) });
 
     final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
             IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
             IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
             FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
     final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(new IValueParserFactory[] {
             UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
@@ -115,7 +115,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -159,7 +159,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -199,7 +199,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -248,7 +248,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -292,7 +292,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -332,7 +332,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -381,8 +381,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 0 };
         int tableSize = 8;
@@ -425,8 +425,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 0 };
 
@@ -465,8 +465,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 0 };
         int frameLimits = 4;
@@ -513,7 +513,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 8, 0 };
@@ -558,7 +558,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 8, 0 };
@@ -599,7 +599,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 8, 0 };
@@ -648,7 +648,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
                 FloatSerializerDeserializer.INSTANCE });
 
@@ -694,7 +694,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
                 FloatSerializerDeserializer.INSTANCE });
 
@@ -736,7 +736,7 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
                 FloatSerializerDeserializer.INSTANCE });
 
@@ -788,8 +788,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 8, 0 };
         int tableSize = 8;
@@ -833,8 +833,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 8, 0 };
 
@@ -874,8 +874,8 @@ public class AggregationTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
 
         int[] keyFields = new int[] { 8, 0 };
         int frameLimits = 4;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
index bb8627a..f3721e6 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
@@ -66,7 +66,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
         RecordDescriptor desc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
                 spec,
@@ -81,7 +81,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
 
         RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
         PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
                 new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
                 new MultiFieldsAggregatorFactory(
@@ -138,7 +138,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
         RecordDescriptor desc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
                 spec,
@@ -153,7 +153,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
 
         RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
         PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
                 new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
                 new MultiFieldsAggregatorFactory(
@@ -211,7 +211,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
         RecordDescriptor desc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
                 spec,
@@ -226,7 +226,7 @@ public class CountOfCountsTest extends AbstractIntegrationTest {
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
 
         RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
         PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
                 new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
                 new MultiFieldsAggregatorFactory(

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
index f8fbfc6..55689e9 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
@@ -73,14 +73,14 @@ public class LocalityAwareConnectorTest extends AbstractMultiNCIntegrationTest {
             new FileSplit("asterix-004", new FileReference(new File("data/tpch0.001/lineitem.tbl"))) });
 
     final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
             IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
             IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
             FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
     final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(new IValueParserFactory[] {
             UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
@@ -110,7 +110,7 @@ public class LocalityAwareConnectorTest extends AbstractMultiNCIntegrationTest {
                 "asterix-003", "asterix-004");
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };
@@ -170,7 +170,7 @@ public class LocalityAwareConnectorTest extends AbstractMultiNCIntegrationTest {
                 "asterix-003", "asterix-004");
 
         RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
 
         int[] keyFields = new int[] { 0 };

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
index 04194d8..c574ec8 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
@@ -63,11 +63,11 @@ public class OptimizedSortMergeTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -120,11 +120,11 @@ public class OptimizedSortMergeTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
index a7612e9..b19b47f 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
@@ -60,7 +60,7 @@ public class ScanPrintTest extends AbstractIntegrationTest {
                 new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) });
 
         RecordDescriptor desc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
                 spec,
@@ -91,11 +91,11 @@ public class ScanPrintTest extends AbstractIntegrationTest {
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -131,10 +131,10 @@ public class ScanPrintTest extends AbstractIntegrationTest {
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
index 40b1687..0902da2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
@@ -61,11 +61,11 @@ public class SortMergeTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -109,11 +109,11 @@ public class SortMergeTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
index c104ec5..d1e9c7a 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
@@ -80,7 +80,7 @@ public class SplitOperatorTest extends AbstractIntegrationTest {
         DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(
                 new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '\u0000');
         RecordDescriptor stringRec = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE, });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), });
 
         FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(
                 inputSplits), stringParser, stringRec);


[6/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
new file mode 100644
index 0000000..dcd5458
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.DataOutputStream;
+import java.io.FilterOutputStream;
+import java.io.OutputStream;
+
+public class RewindableDataOutputStream extends DataOutputStream {
+    /**
+     * Creates a new data output stream to write data to the specified
+     * underlying output stream. The counter <code>written</code> is
+     * set to zero.
+     *
+     * @param out the underlying output stream, to be saved for later
+     *            use.
+     * @see FilterOutputStream#out
+     */
+    public RewindableDataOutputStream(OutputStream out) {
+        super(out);
+    }
+
+    /**
+     * Rewind the current position by {@code delta} to a previous position.
+     * This function is used to drop the already written delta bytes.
+     * In some cases, we write some bytes, and afterward we found we've written more than expected.
+     * Then we need to fix the position by rewind the current position to the expected one.
+     * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+     * at beginning, and it will shift the data and fix the position whenever required.
+     *
+     * @param delta
+     */
+    public void rewindWrittenBy(int delta) {
+        if (written < delta) {
+            throw new IndexOutOfBoundsException();
+        }
+        written -= delta;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
new file mode 100644
index 0000000..9dafef1
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8CharSequence implements CharSequence {
+
+    private char[] buf;
+    private int length;
+
+    @Override
+    public char charAt(int index) {
+        if (index >= length || index < 0) {
+            throw new IndexOutOfBoundsException("No index " + index + " for string of length " + length);
+        }
+        return buf[index];
+    }
+
+    @Override
+    public int length() {
+        return length;
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+        UTF8CharSequence carSeq = new UTF8CharSequence();
+        carSeq.length = end - start;
+        if (end != start) {
+            carSeq.buf = new char[carSeq.length];
+            System.arraycopy(buf, start, carSeq.buf, 0, carSeq.length);
+        }
+        return carSeq;
+    }
+
+    public void reset(UTF8StringPointable valuePtr) {
+        int utfLen = valuePtr.getUTF8Length();
+        if (buf == null || buf.length < utfLen) {
+            buf = new char[utfLen];
+        }
+        int bytePos = 0;
+        int charPos = 0;
+        while (bytePos < utfLen) {
+            buf[charPos++] = valuePtr.charAt(valuePtr.getMetaDataLength() + bytePos);
+            bytePos += valuePtr.charSize(valuePtr.getMetaDataLength() + bytePos);
+        }
+        this.length = charPos;
+    }
+
+    @Override
+    public String toString() {
+        return new String(buf, 0, length);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
new file mode 100644
index 0000000..eb29a98
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class UTF8StringBuilder extends AbstractVarLenObjectBuilder {
+
+    public void appendChar(char ch) throws IOException {
+        UTF8StringUtil.writeCharAsModifiedUTF8(ch, out);
+    }
+
+    public void appendString(String string) throws IOException {
+        for (int i = 0; i < string.length(); i++) {
+            appendChar(string.charAt(i));
+        }
+    }
+
+    public void appendUtf8StringPointable(UTF8StringPointable src, int byteStartOffset, int byteLength) throws IOException {
+        out.write(src.getByteArray(), byteStartOffset, byteLength);
+    }
+
+    public void appendUtf8StringPointable(UTF8StringPointable src) throws IOException {
+        appendUtf8StringPointable(src, src.getCharStartOffset(), src.getUTF8Length());
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
new file mode 100644
index 0000000..317527e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8StringCharacterIterator implements ICharIterator {
+
+    private UTF8StringPointable utf8Ptr;
+    private int pos;
+
+    public UTF8StringCharacterIterator reset(UTF8StringPointable utf8Ptr) {
+        this.utf8Ptr = utf8Ptr;
+        return reset();
+    }
+
+    public UTF8StringCharacterIterator reset() {
+        this.pos = utf8Ptr.getMetaDataLength();
+        return this;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return pos < utf8Ptr.getMetaDataLength() + utf8Ptr.getUTF8Length();
+    }
+
+    @Override
+    public char next() {
+        char ret = utf8Ptr.charAt(pos);
+        pos += utf8Ptr.charSize(pos);
+        return ret;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
index f58c8da..1713467 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
@@ -19,51 +19,39 @@
 
 package org.apache.hyracks.data.std.primitive;
 
-import org.junit.Test;
-
-import javax.xml.bind.DatatypeConverter;
+import static org.junit.Assert.assertTrue;
 
-import static org.junit.Assert.*;
+import org.junit.Test;
 
 public class ByteArrayPointableTest {
 
-    public static byte[] generatePointableBytes(byte[] bytes){
-        byte[] ret = new byte[bytes.length + ByteArrayPointable.SIZE_OF_LENGTH];
-        for (int i = 0; i < bytes.length; ++i){
-            ret[i+ ByteArrayPointable.SIZE_OF_LENGTH] = bytes[i];
-        }
-        ByteArrayPointable.putLength(bytes.length, ret, 0);
-        return ret;
-    }
-
     @Test
     public void testCompareTo() throws Exception {
-        byte [] bytes = generatePointableBytes(new byte[] { 1, 2, 3, 4});
-        ByteArrayPointable byteArrayPointable = new ByteArrayPointable();
-        byteArrayPointable.set(bytes, 0, bytes.length);
+        ByteArrayPointable byteArrayPointable = ByteArrayPointable
+                .generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 });
 
-        testEqual(byteArrayPointable, generatePointableBytes(new byte[] { 1,2 ,3,4}));
+        testEqual(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 }));
 
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {2}));
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,5}));
-        testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,4,5}));
+        testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 2 }, 0, 1));
+        testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 5 }));
+        testLessThan(byteArrayPointable,
+                ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4, 5 }));
 
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { }));
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 0}));
-        testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 1,2,3}));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] {}));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0 }));
+        testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3 }));
 
     }
 
-
-    void testEqual(ByteArrayPointable pointable, byte [] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) == 0);
+    void testEqual(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) == 0);
     }
 
-    void testLessThan(ByteArrayPointable pointable, byte[] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) < 0);
+    void testLessThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) < 0);
     }
 
-    void testGreaterThan(ByteArrayPointable pointable, byte[] bytes){
-        assertTrue(pointable.compareTo(bytes, 0, bytes.length) > 0);
+    void testGreaterThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+        assertTrue(pointable.compareTo(bytes) > 0);
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
new file mode 100644
index 0000000..f134718
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringPointableTest {
+    public static UTF8StringPointable STRING_EMPTY = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.EMPTY_STRING);
+    public static UTF8StringPointable STRING_UTF8_MIX = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX);
+    public static UTF8StringPointable STRING_UTF8_MIX_LOWERCASE = UTF8StringPointable.generateUTF8Pointable(
+            UTF8StringSample.STRING_UTF8_MIX_LOWERCASE);
+
+    public static UTF8StringPointable STRING_LEN_127 = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+    public static UTF8StringPointable STRING_LEN_128 = UTF8StringPointable
+            .generateUTF8Pointable(UTF8StringSample.STRING_LEN_128);
+
+    @Test
+    public void testGetStringLength() throws Exception {
+        UTF8StringPointable utf8Ptr = UTF8StringPointable.generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+        assertEquals(127, utf8Ptr.getUTF8Length());
+        assertEquals(1, utf8Ptr.getMetaDataLength());
+        assertEquals(127, utf8Ptr.getStringLength());
+
+        byte[] bytes = UTF8StringUtil.writeStringToBytes(UTF8StringSample.STRING_LEN_128);
+        utf8Ptr.set(bytes, 0, bytes.length);
+        assertEquals(128, utf8Ptr.getUTF8Length());
+        assertEquals(2, utf8Ptr.getMetaDataLength());
+        assertEquals(128, utf8Ptr.getStringLength());
+    }
+
+    @Test
+    public void testContains() throws Exception {
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, false));
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, true));
+        assertTrue(STRING_UTF8_MIX.contains(STRING_EMPTY, true));
+
+        assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX_LOWERCASE, true));
+        assertTrue(STRING_UTF8_MIX_LOWERCASE.contains(STRING_UTF8_MIX, true));
+    }
+
+    @Test
+    public void testStartsWith() throws Exception {
+        assertTrue(STRING_LEN_128.startsWith(STRING_LEN_127, true));
+        assertFalse(STRING_LEN_127.startsWith(STRING_LEN_128, true));
+
+        assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+    }
+
+    @Test
+    public void testEndsWith() throws Exception {
+        assertTrue(STRING_LEN_128.endsWith(STRING_LEN_127, true));
+        assertFalse(STRING_LEN_127.endsWith(STRING_LEN_128, true));
+
+        assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+    }
+
+    @Test
+    public void testConcat() throws Exception {
+        UTF8StringPointable expected = UTF8StringPointable.generateUTF8Pointable(
+                UTF8StringSample.generateStringRepeatBy(UTF8StringSample.ONE_ASCII_CHAR, 127 + 128));
+
+        GrowableArray storage = new GrowableArray();
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        STRING_LEN_127.concat(STRING_LEN_128, builder, storage);
+
+        UTF8StringPointable actual = new UTF8StringPointable();
+        actual.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, expected.compareTo(actual));
+
+        storage.reset();
+        STRING_LEN_127.concat(STRING_EMPTY, builder, storage);
+        actual.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_LEN_127.compareTo(actual));
+    }
+
+    @Test
+    public void testSubstr() throws Exception {
+        GrowableArray storage = new GrowableArray();
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+
+        STRING_LEN_128.substr(1, 127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_LEN_127.compareTo(result));
+
+        storage.reset();
+        STRING_UTF8_MIX.substr(0, UTF8StringSample.STRING_UTF8_MIX.length(), builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, STRING_UTF8_MIX.compareTo(result));
+    }
+
+    @Test
+    public void testSubstrBefore() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        STRING_LEN_128.substrBefore(STRING_LEN_127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_EMPTY.compareTo(result));
+
+        storage.reset();
+        UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+        UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("æ–‡");
+        UTF8StringPointable expect = UTF8StringPointable.generateUTF8Pointable("Mix中");
+        testPtr.substrBefore(pattern, builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, expect.compareTo(result));
+    }
+
+    @Test
+    public void testSubstrAfter() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        STRING_LEN_128.substrAfter(STRING_LEN_127, builder, storage);
+        UTF8StringPointable result = new UTF8StringPointable();
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        UTF8StringPointable expect = UTF8StringPointable
+                .generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR));
+        assertEquals(0, expect.compareTo(result));
+
+        storage.reset();
+        UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+        UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("æ–‡");
+        expect = UTF8StringPointable.generateUTF8Pointable("123");
+        testPtr.substrAfter(pattern, builder, storage);
+        result.set(storage.getByteArray(), 0, storage.getLength());
+        assertEquals(0, expect.compareTo(result));
+    }
+
+    @Test
+    public void testLowercase() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        UTF8StringPointable result = new UTF8StringPointable();
+        STRING_UTF8_MIX.lowercase(builder, storage);
+
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        assertEquals(0, STRING_UTF8_MIX_LOWERCASE.compareTo(result));
+    }
+
+    @Test
+    public void testUppercase() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray storage = new GrowableArray();
+
+        UTF8StringPointable result = new UTF8StringPointable();
+        STRING_UTF8_MIX_LOWERCASE.uppercase(builder, storage);
+
+        result.set(storage.getByteArray(), 0, storage.getLength());
+
+        UTF8StringPointable expected = UTF8StringPointable
+                .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX_LOWERCASE.toUpperCase());
+        assertEquals(0, expected.compareTo(result));
+
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
new file mode 100644
index 0000000..bc0c629
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringBuilderTest {
+
+    UTF8StringBuilder utf8StringBuilder = new UTF8StringBuilder();
+    GrowableArray storage = new GrowableArray();
+
+    @Test
+    public void testNormalBuilder() throws IOException {
+        testOneString(UTF8StringSample.EMPTY_STRING, 0);
+        testOneString(UTF8StringSample.EMPTY_STRING, 127);
+
+        testOneString(UTF8StringSample.STRING_UTF8_MIX, 127);
+        testOneString(UTF8StringSample.STRING_LEN_128, 128);
+
+        testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+    }
+
+    @Test
+    public void testShrinkAfterFinish() throws IOException {
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_FOUR_BYTE);
+    }
+
+    @Test
+    public void testIncreaseAfterFinish() throws IOException {
+        testOneString(UTF8StringSample.STRING_LEN_128, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+        testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+    }
+
+    public void testOneString(String testString, int estimateLength) throws IOException {
+        storage.reset();
+        utf8StringBuilder.reset(storage, estimateLength);
+        for (char c : testString.toCharArray()) {
+            utf8StringBuilder.appendChar(c);
+        }
+        utf8StringBuilder.finish();
+        assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+
+        UTF8StringPointable hyracksUtf = new UTF8StringPointable();
+        hyracksUtf.set(storage.getByteArray(), 0, storage.getLength());
+
+        GrowableArray storage2 = new GrowableArray();
+        utf8StringBuilder.reset(storage2, estimateLength);
+        utf8StringBuilder.appendUtf8StringPointable(hyracksUtf);
+        utf8StringBuilder.finish();
+        assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
new file mode 100644
index 0000000..5268c82
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
+
+public class UTF8StringCharacterIteratorTest {
+
+    private UTF8StringCharacterIterator iterator = new UTF8StringCharacterIterator();
+
+    private void testEachIterator(String testString) {
+        UTF8StringPointable ptr = UTF8StringPointable.generateUTF8Pointable(testString);
+        iterator.reset(ptr);
+        for (char ch : testString.toCharArray()) {
+            assertTrue(iterator.hasNext());
+            assertEquals(ch, iterator.next());
+        }
+        assertFalse(iterator.hasNext());
+
+        iterator.reset();
+        for (char ch : testString.toCharArray()) {
+            assertTrue(iterator.hasNext());
+            assertEquals(ch, iterator.next());
+        }
+        assertFalse(iterator.hasNext());
+    }
+
+    @Test
+    public void testIterator(){
+        testEachIterator(UTF8StringSample.EMPTY_STRING);
+        testEachIterator(UTF8StringSample.STRING_UTF8_MIX);
+        testEachIterator(UTF8StringSample.STRING_LEN_128);
+        testEachIterator(UTF8StringSample.STRING_LEN_128);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/pom.xml b/hyracks/hyracks-dataflow-common/pom.xml
index c6e85cd..ad4dfa7 100644
--- a/hyracks/hyracks-dataflow-common/pom.xml
+++ b/hyracks/hyracks-dataflow-common/pom.xml
@@ -17,48 +17,61 @@
  ! under the License.
  !-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <artifactId>hyracks-dataflow-common</artifactId>
-  <name>hyracks-dataflow-common</name>
-  <parent>
-    <groupId>org.apache.hyracks</groupId>
-    <artifactId>hyracks</artifactId>
-    <version>0.2.17-SNAPSHOT</version>
-  </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>hyracks-dataflow-common</artifactId>
+    <name>hyracks-dataflow-common</name>
+    <parent>
+        <groupId>org.apache.hyracks</groupId>
+        <artifactId>hyracks</artifactId>
+        <version>0.2.17-SNAPSHOT</version>
+    </parent>
 
-  <licenses>
-    <license>
-      <name>Apache License, Version 2.0</name>
-      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-      <distribution>repo</distribution>
-      <comments>A business-friendly OSS license</comments>
-    </license>
-  </licenses>
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+            <comments>A business-friendly OSS license</comments>
+        </license>
+    </licenses>
 
 
-  <dependencies>
-  	<dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-api</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-  		<type>jar</type>
-  		<scope>compile</scope>
-  	</dependency>
-  	<dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-data-std</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-    </dependency>
-       <dependency>
-  		<groupId>org.apache.hyracks</groupId>
-  		<artifactId>hyracks-control-nc</artifactId>
-  		<version>0.2.17-SNAPSHOT</version>
-        <scope>test</scope>
-    </dependency>
-    <dependency>
-         <groupId>commons-io</groupId>
-         <artifactId>commons-io</artifactId>
-    </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-api</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-data-std</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-control-nc</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+        </dependency>
+    </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
index 4c8bc1e..d16fca7 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
@@ -19,57 +19,71 @@
 
 package org.apache.hyracks.dataflow.common.data.marshalling;
 
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
 public class ByteArraySerializerDeserializer implements ISerializerDeserializer<byte[]> {
 
     private static final long serialVersionUID = 1L;
 
-    public final static ByteArraySerializerDeserializer INSTANCE = new ByteArraySerializerDeserializer();
-
-    private ByteArraySerializerDeserializer() {
+    public ByteArraySerializerDeserializer() {
     }
 
+    private byte[] metaBuffer = new byte[5];
+
+    /**
+     * Return a pure byte array which doesn't have the length encoding prefix
+     *
+     * @param in - Stream to read instance from.
+     * @return
+     * @throws HyracksDataException
+     */
     @Override
     public byte[] deserialize(DataInput in) throws HyracksDataException {
         try {
-            int length = in.readUnsignedShort();
-            byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
-            in.readFully(bytes, ByteArrayPointable.SIZE_OF_LENGTH, length);
-            ByteArrayPointable.putLength(length, bytes, 0);
+            int contentLength = VarLenIntEncoderDecoder.decode(in);
+            byte[] bytes = new byte[contentLength];
+            in.readFully(bytes, 0, contentLength);
             return bytes;
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
     }
 
+    /**
+     * a pure content only byte array which doesn't have the encoded length at the beginning.
+     * will write the entire array into the out
+     */
     @Override
     public void serialize(byte[] instance, DataOutput out) throws HyracksDataException {
-
-        if (instance.length > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException(
-                    "encoded byte array too long: " + instance.length + " bytes");
+        try {
+            int metaLength = VarLenIntEncoderDecoder.encode(instance.length, metaBuffer, 0);
+            out.write(metaBuffer, 0, metaLength);
+            out.write(instance);
+        } catch (IOException e) {
+            throw new HyracksDataException(e);
         }
+    }
+
+    public void serialize(ByteArrayPointable byteArrayPtr, DataOutput out) throws HyracksDataException {
         try {
-            int realLength = ByteArrayPointable.getFullLength(instance, 0);
-            out.write(instance, 0, realLength);
+            out.write(byteArrayPtr.getByteArray(), byteArrayPtr.getStartOffset(), byteArrayPtr.getLength());
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
     }
 
+    // A pure byte array, which doesn't have the length information encoded at the beginning
     public void serialize(byte[] instance, int start, int length, DataOutput out) throws HyracksDataException {
-        if (length > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException(
-                    "encoded byte array too long: " + instance.length + " bytes");
-        }
+        int metaLength = VarLenIntEncoderDecoder.encode(length, metaBuffer, 0);
         try {
+            out.write(metaBuffer, 0, metaLength);
             out.write(instance, start, length);
         } catch (IOException e) {
             throw new HyracksDataException(e);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index 2435672..aee11bc 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,19 +24,21 @@ import java.io.IOException;
 
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
-    public static final UTF8StringSerializerDeserializer INSTANCE = new UTF8StringSerializerDeserializer();
 
     private static final long serialVersionUID = 1L;
+    private UTF8StringReader reader = new UTF8StringReader();
+    private UTF8StringWriter writer = new UTF8StringWriter();
 
-    private UTF8StringSerializerDeserializer() {
-    }
+    public UTF8StringSerializerDeserializer() {}
 
     @Override
     public String deserialize(DataInput in) throws HyracksDataException {
         try {
-            return in.readUTF();
+            return reader.readUTF(in);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
@@ -45,7 +47,7 @@ public class UTF8StringSerializerDeserializer implements ISerializerDeserializer
     @Override
     public void serialize(String instance, DataOutput out) throws HyracksDataException {
         try {
-            out.writeUTF(instance);
+            writer.writeUTF8(instance, out);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
index b7d302b..3d081af 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
@@ -26,23 +26,12 @@ import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
 public class ByteArrayNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
     public static ByteArrayNormalizedKeyComputerFactory INSTANCE = new ByteArrayNormalizedKeyComputerFactory();
 
-    @Override public INormalizedKeyComputer createNormalizedKeyComputer() {
+    @Override
+    public INormalizedKeyComputer createNormalizedKeyComputer() {
         return new INormalizedKeyComputer() {
-            @Override public int normalize(byte[] bytes, int start, int length) {
-                int normalizedKey = 0;
-                int realLength = ByteArrayPointable.getLength(bytes, start);
-                for (int i = 0; i < 3; ++i) {
-                    normalizedKey <<= 8;
-                    if (i < realLength) {
-                        normalizedKey += bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + i] & 0xff;
-                    }
-                }
-                // last byte, shift 7 instead of 8 to avoid negative number
-                normalizedKey <<= 7;
-                if (3 < realLength) {
-                    normalizedKey += (bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + 3] & 0xfe) >> 1;
-                }
-                return normalizedKey;
+            @Override
+            public int normalize(byte[] bytes, int start, int length) {
+                return ByteArrayPointable.normalize(bytes, start);
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
index 941afda..79936de 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
@@ -20,7 +20,7 @@ package org.apache.hyracks.dataflow.common.data.normalizers;
 
 import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
 import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class UTF8StringNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
     private static final long serialVersionUID = 1L;
@@ -30,17 +30,7 @@ public class UTF8StringNormalizedKeyComputerFactory implements INormalizedKeyCom
         return new INormalizedKeyComputer() {
             @Override
             public int normalize(byte[] bytes, int start, int length) {
-                int len = UTF8StringPointable.getUTFLength(bytes, start);
-                int nk = 0;
-                int offset = start + 2;
-                for (int i = 0; i < 2; ++i) {
-                    nk <<= 16;
-                    if (i < len) {
-                        nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
-                        offset += UTF8StringPointable.charSize(bytes, offset);
-                    }
-                }
-                return nk;
+                return UTF8StringUtil.normalize(bytes, start);
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
index c71950b..c85d1b2 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
@@ -19,12 +19,12 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.Base64Parser;
 
 public class ByteArrayBase64ParserFactory implements IValueParserFactory {
 
@@ -33,20 +33,19 @@ public class ByteArrayBase64ParserFactory implements IValueParserFactory {
     private ByteArrayBase64ParserFactory() {
     }
 
-    @Override public IValueParser createValueParser() {
+    @Override
+    public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] buffer;
-            private byte[] quadruplet = new byte[4];
+            Base64Parser parser = new Base64Parser();
+            ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
 
-            @Override public void parse(char[] input, int start, int length, DataOutput out)
+            @Override
+            public void parse(char[] input, int start, int length, DataOutput out)
                     throws HyracksDataException {
-                if (length % 4 != 0) {
-                    throw new HyracksDataException(
-                            "Invalid Base64 string, the length of the string should be a multiple of 4");
-                }
-                buffer = extractPointableArrayFromBase64String(input, start, length, buffer, quadruplet);
+
+                parser.generatePureByteArrayFromBase64String(input, start, length);
                 try {
-                    out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+                    serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }
@@ -54,194 +53,4 @@ public class ByteArrayBase64ParserFactory implements IValueParserFactory {
         };
     }
 
-    // The following base64 related implementation is copied/changed base on javax.xml.bind.DatatypeConverterImpl.java
-    private static final byte[] decodeMap = initDecodeMap();
-    private static final byte PADDING = 127;
-
-    private static byte[] initDecodeMap() {
-        byte[] map = new byte[128];
-        Arrays.fill(map, (byte) -1);
-
-        int i;
-        for (i = 'A'; i <= 'Z'; i++) {
-            map[i] = (byte) (i - 'A');
-        }
-        for (i = 'a'; i <= 'z'; i++) {
-            map[i] = (byte) (i - 'a' + 26);
-        }
-        for (i = '0'; i <= '9'; i++) {
-            map[i] = (byte) (i - '0' + 52);
-        }
-        map['+'] = 62;
-        map['/'] = 63;
-        map['='] = PADDING;
-
-        return map;
-    }
-
-    /**
-     * computes the length of binary data speculatively.
-     * Our requirement is to create byte[] of the exact length to store the binary data.
-     * If we do this in a straight-forward way, it takes two passes over the data.
-     * Experiments show that this is a non-trivial overhead (35% or so is spent on
-     * the first pass in calculating the length.)
-     * So the approach here is that we compute the length speculatively, without looking
-     * at the whole contents. The obtained speculative value is never less than the
-     * actual length of the binary data, but it may be bigger. So if the speculation
-     * goes wrong, we'll pay the cost of reallocation and buffer copying.
-     * If the base64 text is tightly packed with no indentation nor illegal char
-     * (like what most web services produce), then the speculation of this method
-     * will be correct, so we get the performance benefit.
-     */
-    private static int guessLength(char[] chars, int start, int length) {
-
-        // compute the tail '=' chars
-        int j = length - 1;
-        for (; j >= 0; j--) {
-            byte code = decodeMap[chars[start + j]];
-            if (code == PADDING) {
-                continue;
-            }
-            if (code == -1) // most likely this base64 text is indented. go with the upper bound
-            {
-                return length / 4 * 3;
-            }
-            break;
-        }
-
-        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
-        int padSize = length - j;
-        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
-        {
-            return length / 4 * 3;
-        }
-
-        // so far this base64 looks like it's unindented tightly packed base64.
-        // take a chance and create an array with the expected size
-        return length / 4 * 3 - padSize;
-    }
-
-    private static int guessLength(byte[] chars, int start, int length) {
-
-        // compute the tail '=' chars
-        int j = length - 1;
-        for (; j >= 0; j--) {
-            byte code = decodeMap[chars[start + j]];
-            if (code == PADDING) {
-                continue;
-            }
-            if (code == -1) // most likely this base64 text is indented. go with the upper bound
-            {
-                return length / 4 * 3;
-            }
-            break;
-        }
-
-        j++;    // text.charAt(j) is now at some base64 char, so +1 to make it the size
-        int padSize = length - j;
-        if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
-        {
-            return length / 4 * 3;
-        }
-
-        // so far this base64 looks like it's unindented tightly packed base64.
-        // take a chance and create an array with the expected size
-        return length / 4 * 3 - padSize;
-    }
-
-    public static byte[] extractPointableArrayFromBase64String(byte[] input, int start, int length,
-            byte[] bufferNeedToReset, byte[] quadruplet)
-            throws HyracksDataException {
-        int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
-        final int buflen = guessLength(input, start, length) + contentOffset;
-        bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
-        int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
-                quadruplet);
-        if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    public static byte[] extractPointableArrayFromBase64String(char[] input, int start, int length,
-            byte[] bufferNeedToReset, byte[] quadruplet)
-            throws HyracksDataException {
-        int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
-        final int buflen = guessLength(input, start, length) + contentOffset;
-        bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
-        int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
-                quadruplet);
-        if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    static int parseBase64String(char[] input, int start, int length, byte[] out, int offset,
-            byte[] quadruplet) throws HyracksDataException {
-        int outLength = 0;
-
-        int i;
-        int q = 0;
-
-        // convert each quadruplet to three bytes.
-        for (i = 0; i < length; i++) {
-            char ch = input[start + i];
-            byte v = decodeMap[ch];
-
-            if (v == -1) {
-                throw new HyracksDataException("Invalid Base64 character");
-            }
-            quadruplet[q++] = v;
-
-            if (q == 4) {
-                // quadruplet is now filled.
-                out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
-                if (quadruplet[2] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
-                }
-                if (quadruplet[3] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
-                }
-                q = 0;
-            }
-        }
-
-        return outLength;
-    }
-
-    static int parseBase64String(byte[] input, int start, int length, byte[] out, int offset,
-            byte[] quadruplet) throws HyracksDataException {
-        int outLength = 0;
-
-        int i;
-        int q = 0;
-
-        // convert each quadruplet to three bytes.
-        for (i = 0; i < length; i++) {
-            char ch = (char)input[start + i];
-            byte v = decodeMap[ch];
-
-            if (v == -1) {
-                throw new HyracksDataException("Invalid Base64 character");
-            }
-            quadruplet[q++] = v;
-
-            if (q == 4) {
-                // quadruplet is now filled.
-                out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
-                if (quadruplet[2] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
-                }
-                if (quadruplet[3] != PADDING) {
-                    out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
-                }
-                q = 0;
-            }
-        }
-
-        return outLength;
-    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
index ec249f3..f1f1eb1 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
@@ -19,12 +19,12 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.HexParser;
 
 public class ByteArrayHexParserFactory implements IValueParserFactory {
     public static ByteArrayHexParserFactory INSTANCE = new ByteArrayHexParserFactory();
@@ -32,15 +32,18 @@ public class ByteArrayHexParserFactory implements IValueParserFactory {
     private ByteArrayHexParserFactory() {
     }
 
-    @Override public IValueParser createValueParser() {
+    @Override
+    public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] buffer = new byte[] { };
+            HexParser parser = new HexParser();
+            ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
 
-            @Override public void parse(char[] input, int start, int length, DataOutput out)
+            @Override
+            public void parse(char[] input, int start, int length, DataOutput out)
                     throws HyracksDataException {
                 try {
-                    buffer = extractPointableArrayFromHexString(input, start, length, buffer);
-                    out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+                    parser.generateByteArrayFromHexString(input, start, length);
+                    serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }
@@ -48,85 +51,4 @@ public class ByteArrayHexParserFactory implements IValueParserFactory {
         };
     }
 
-    public static boolean isValidHexChar(char c) {
-        if (c >= '0' && c <= '9'
-                || c >= 'a' && c <= 'f'
-                || c >= 'A' && c <= 'F') {
-            return true;
-        }
-        return false;
-    }
-
-    public static byte[] extractPointableArrayFromHexString(char[] input, int start, int length,
-            byte[] bufferNeedToReset) throws HyracksDataException {
-        if (length % 2 != 0) {
-            throw new HyracksDataException(
-                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
-        }
-        int byteLength = length / 2;
-        bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
-        extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
-                ByteArrayPointable.SIZE_OF_LENGTH);
-        if (byteLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    public static byte[] extractPointableArrayFromHexString(byte[] input, int start, int length,
-            byte[] bufferNeedToReset) throws HyracksDataException {
-        if (length % 2 != 0) {
-            throw new HyracksDataException(
-                    "Invalid hex string for binary type: the string length should be a muliple of 2.");
-        }
-        int byteLength = length / 2;
-        bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
-        extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
-                ByteArrayPointable.SIZE_OF_LENGTH);
-        if (byteLength > ByteArrayPointable.MAX_LENGTH) {
-            throw new HyracksDataException("The decoded byte array is too long.");
-        }
-        ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
-        return bufferNeedToReset;
-    }
-
-    static byte[] ensureCapacity(int capacity, byte[] original) {
-        if (original == null) {
-            return new byte[capacity];
-        }
-        if (original.length < capacity) {
-            return Arrays.copyOf(original, capacity);
-        }
-        return original;
-    }
-
-    private static int getValueFromValidHexChar(char c) throws HyracksDataException {
-        if (!isValidHexChar(c)) {
-            throw new HyracksDataException("Invalid hex character : " + c);
-        }
-        if (c >= '0' && c <= '9') {
-            return c - '0';
-        }
-        if (c >= 'a' && c <= 'f') {
-            return 10 + c - 'a';
-        }
-        return 10 + c - 'A';
-    }
-
-    private static void extractByteArrayFromHexString(char[] input, int start, int length, byte[] output,
-            int offset) throws HyracksDataException {
-        for (int i = 0; i < length; i += 2) {
-            output[offset + i / 2] = (byte) ((getValueFromValidHexChar(input[start + i]) << 4) +
-                    getValueFromValidHexChar(input[start + i + 1]));
-        }
-    }
-
-    private static void extractByteArrayFromHexString(byte[] input, int start, int length, byte[] output,
-            int offset) throws HyracksDataException {
-        for (int i = 0; i < length; i += 2) {
-            output[offset + i / 2] = (byte) ((getValueFromValidHexChar((char)input[start + i]) << 4) +
-                    getValueFromValidHexChar((char)input[start + i + 1]));
-        }
-    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
index 7294e2d..58ee687 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
@@ -22,6 +22,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 public class UTF8StringParserFactory implements IValueParserFactory {
     public static final IValueParserFactory INSTANCE = new UTF8StringParserFactory();
@@ -34,53 +35,12 @@ public class UTF8StringParserFactory implements IValueParserFactory {
     @Override
     public IValueParser createValueParser() {
         return new IValueParser() {
-            private byte[] utf8;
+            private UTF8StringWriter writer = new UTF8StringWriter();
 
             @Override
             public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
-                int utflen = 0;
-                for (int i = 0; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if ((ch >= 0x0001) && (ch <= 0x007F)) {
-                        utflen++;
-                    } else if (ch > 0x07ff) {
-                        utflen += 3;
-                    } else {
-                        utflen += 2;
-                    }
-                }
-
-                if (utf8 == null || utf8.length < utflen + 2) {
-                    utf8 = new byte[utflen + 2];
-                }
-
-                int count = 0;
-                utf8[count++] = (byte) ((utflen >>> 8) & 0xff);
-                utf8[count++] = (byte) ((utflen >>> 0) & 0xff);
-
-                int i = 0;
-                for (i = 0; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if (!((ch >= 0x0001) && (ch <= 0x007F)))
-                        break;
-                    utf8[count++] = (byte) ch;
-                }
-
-                for (; i < length; i++) {
-                    char ch = buffer[i + start];
-                    if ((ch >= 0x0001) && (ch <= 0x007F)) {
-                        utf8[count++] = (byte) ch;
-                    } else if (ch > 0x07FF) {
-                        utf8[count++] = (byte) (0xE0 | ((ch >> 12) & 0x0F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 6) & 0x3F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
-                    } else {
-                        utf8[count++] = (byte) (0xC0 | ((ch >> 6) & 0x1F));
-                        utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
-                    }
-                }
                 try {
-                    out.write(utf8, 0, utflen + 2);
+                    writer.writeUTF8(buffer, start, length, out);
                 } catch (IOException e) {
                     throw new HyracksDataException(e);
                 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
deleted file mode 100644
index 3b05824..0000000
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.dataflow.common.data.util;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public class StringUtils {
-    public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
-        if (c >= 0x0000 && c <= 0x007F) {
-            dos.writeByte(c);
-            return 1;
-        } else if (c <= 0x07FF) {
-            dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
-            dos.writeByte((byte) (0x80 | (c & 0x3F)));
-            return 2;
-        } else {
-            dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
-            dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
-            dos.writeByte((byte) (0x80 | (c & 0x3F)));
-            return 3;
-        }
-    }
-
-    public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
-        dos.write((len >>> 8) & 0xFF);
-        dos.write((len >>> 0) & 0xFF);
-    }
-
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
index ad4461d..05710ad 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
@@ -53,9 +53,9 @@ public class FrameFixedFieldTupleAppenderTest {
     FrameFixedFieldAppender appender;
     static ISerializerDeserializer[] fields = new ISerializerDeserializer[] {
             IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(),
             IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE,
+            new UTF8StringSerializerDeserializer(),
     };
     static RecordDescriptor recordDescriptor = new RecordDescriptor(fields);
     static ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
index 8534388..f0e831a 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
@@ -19,58 +19,44 @@
 
 package org.apache.hyracks.dataflow.common.data.marshalling;
 
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.util.Arrays;
-import java.util.Random;
 
-import static org.junit.Assert.assertTrue;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
 
 public class ByteArraySerializerDeserializerTest {
-    Random random = new Random();
 
-    public static byte[] generateRandomBytes(int maxSize, Random random) {
-        int size = random.nextInt(maxSize);
-        byte[] bytes = new byte[size + ByteArrayPointable.SIZE_OF_LENGTH];
-        random.nextBytes(bytes);
-        ByteArrayPointable.putLength(size, bytes, 0);
-        return bytes;
-    }
+    ByteArrayPointable bytePtr = new ByteArrayPointable();
+    ByteArraySerializerDeserializer serder = new ByteArraySerializerDeserializer();
 
     @Test
     public void testSerializeDeserializeRandomBytes() throws Exception {
-        for (int i = 0; i < 10; ++i) {
-            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
-            byte[] randomBytes = generateRandomBytes(ByteArrayPointable.MAX_LENGTH + 1, random);
+        testOneByteArray(UTF8StringSample.EMPTY_STRING.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_UTF8_MIX.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_128.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_MEDIUM.getBytes());
+        testOneByteArray(UTF8StringSample.STRING_LEN_LARGE.getBytes());
+    }
 
-            ByteArraySerializerDeserializer.INSTANCE.serialize(randomBytes, new DataOutputStream(outputStream));
-            byte[] result = outputStream.toByteArray();
-            assertTrue(Arrays.equals(randomBytes, result));
+    void testOneByteArray(byte[] testBytes) throws HyracksDataException {
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
 
-            ByteArrayInputStream inputStream = new ByteArrayInputStream(result);
-            assertTrue(Arrays.equals(randomBytes,
-                    ByteArraySerializerDeserializer.INSTANCE.deserialize(new DataInputStream(inputStream))));
-        }
+        serder.serialize(testBytes, new DataOutputStream(outputStream));
 
-    }
+        bytePtr.set(outputStream.toByteArray(), 0, outputStream.size());
+        assertTrue(Arrays.equals(testBytes, ByteArrayPointable.copyContent(bytePtr)));
+
+        ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
+        assertTrue(Arrays.equals(testBytes, serder.deserialize(new DataInputStream(inputStream))));
 
-    @Test
-    public void testPutGetLength() throws Exception {
-        final int size = 5;
-        byte[] newBytes = new byte[size];
-        for (int i = 0; i < 10; ++i) {
-            int length = random.nextInt(ByteArrayPointable.MAX_LENGTH +1);
-            for (int j = 0; j < size - 1; ++j) {
-                ByteArrayPointable.putLength(length, newBytes, j);
-                int result = ByteArrayPointable.getLength(newBytes, j);
-                assertTrue(result == length);
-            }
-        }
     }
 
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
index 1645631..4d3eb49 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
@@ -19,14 +19,13 @@
 
 package org.apache.hyracks.dataflow.common.data.normalizers;
 
-import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializerTest;
-import org.junit.Test;
+import static junit.framework.Assert.assertTrue;
 
 import java.util.Random;
 
-import static junit.framework.Assert.assertTrue;
+import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
 
 public class ByteArrayNormalizedKeyComputerFactoryTest {
 
@@ -34,33 +33,21 @@ public class ByteArrayNormalizedKeyComputerFactoryTest {
 
     INormalizedKeyComputer computer = ByteArrayNormalizedKeyComputerFactory.INSTANCE.createNormalizedKeyComputer();
 
-    public static ByteArrayPointable generateRandomByteArrayPointable(int maxSize, Random random) {
-        byte[] bytes = ByteArraySerializerDeserializerTest
-                .generateRandomBytes(maxSize, random);
-        ByteArrayPointable pointable = new ByteArrayPointable();
-        pointable.set(bytes, 0, bytes.length);
-        return pointable;
-    }
-
     @Test
     public void testRandomNormalizedKey() {
         for (int i = 0; i < 10; ++i) {
-            ByteArrayPointable pointable1 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
-                    random);
-
-            ByteArrayPointable pointable2 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
-                    random);
+            ByteArrayPointable pointable1 = generateRandomByteArrayPointableWithFixLength(
+                    Math.abs(random.nextInt((i + 1) * 10)), random);
+            ByteArrayPointable pointable2 = generateRandomByteArrayPointableWithFixLength(
+                    Math.abs(random.nextInt((i + 1) * 10)), random);
             assertNormalizeValue(pointable1, pointable2, computer);
         }
     }
 
     public static ByteArrayPointable generateRandomByteArrayPointableWithFixLength(int length, Random random) {
-        byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
+        byte[] bytes = new byte[length];
         random.nextBytes(bytes);
-        ByteArrayPointable pointable = new ByteArrayPointable();
-        ByteArrayPointable.putLength(length, bytes, 0);
-        pointable.set(bytes, 0, bytes.length);
-        return pointable;
+        return ByteArrayPointable.generatePointableFromPureBytes(bytes);
     }
 
     public static void assertNormalizeValue(ByteArrayPointable pointable1, ByteArrayPointable pointable2,
@@ -82,11 +69,12 @@ public class ByteArrayNormalizedKeyComputerFactoryTest {
             assertNormalizeValue(pointable1, pointable2, computer);
         }
 
-        byte[] bytes1 = new byte[] { 0, 4, 0, 25, 34, 42 };
-        byte[] bytes2 = new byte[] { 0, 4, (byte) 130, 25, 34, 42 };
+        ByteArrayPointable ptr1 = ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0, 25, 34, 42 });
+        ByteArrayPointable ptr2 = ByteArrayPointable.generatePointableFromPureBytes(
+                new byte[] { (byte) 130, 25, 34, 42 });
 
-        int n1 = computer.normalize(bytes1, 0, bytes1.length);
-        int n2 = computer.normalize(bytes2, 0, bytes2.length);
+        int n1 = computer.normalize(ptr1.getByteArray(), ptr1.getStartOffset(), ptr1.getLength());
+        int n2 = computer.normalize(ptr2.getByteArray(), ptr2.getStartOffset(), ptr2.getLength());
         assertTrue(n1 < n2);
 
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
index fe8b03b..cec6add 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
@@ -19,32 +19,25 @@
 
 package org.apache.hyracks.dataflow.common.data.parsers;
 
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import junit.framework.TestCase;
-import org.junit.Test;
+import static org.apache.hyracks.data.std.primitive.ByteArrayPointable.copyContent;
 
-import javax.xml.bind.DatatypeConverter;
 import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.util.Arrays;
 
-import static org.apache.hyracks.dataflow.common.data.parsers.ByteArrayHexParserFactoryTest.subArray;
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
+
+import junit.framework.TestCase;
 
 public class ByteArrayBase64ParserFactoryTest extends TestCase {
 
     @Test
     public void testParseBase64String() throws HyracksDataException {
-        IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
-        ByteArrayOutputStream bos = new ByteArrayOutputStream();
-        DataOutputStream outputStream = new DataOutputStream(bos);
-        String empty = "";
-
-        parser.parse(empty.toCharArray(), 0, empty.length(), outputStream);
-
-        byte[] cache = bos.toByteArray();
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == 0);
-        assertTrue(DatatypeConverter.printBase64Binary(subArray(cache, 2)).equalsIgnoreCase(empty));
+        testOneString("");
 
         StringBuilder everyChar = new StringBuilder();
         for (char c = 'a'; c <= 'z'; c++) {
@@ -58,21 +51,26 @@ public class ByteArrayBase64ParserFactoryTest extends TestCase {
         }
         everyChar.append("+/");
 
-        bos.reset();
-        parser.parse(everyChar.toString().toCharArray(), 0, everyChar.length(), outputStream);
-        cache = bos.toByteArray();
-        byte[] answer = DatatypeConverter.parseBase64Binary(everyChar.toString());
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == answer.length);
-        assertTrue(Arrays.equals(answer, subArray(cache, 2)));
+        testOneString(everyChar.toString());
+
+        byte[] longBytes = new byte[65536];
+        Arrays.fill(longBytes, (byte) 0xff);
+        String maxString = DatatypeConverter.printBase64Binary(longBytes);
 
-        byte[] maxBytes = new byte[ByteArrayPointable.MAX_LENGTH];
-        Arrays.fill(maxBytes, (byte) 0xff);
-        String maxString = DatatypeConverter.printBase64Binary(maxBytes);
-        bos.reset();
-        parser.parse(maxString.toCharArray(), 0, maxString.length(), outputStream);
-        cache = bos.toByteArray();
-        assertTrue(ByteArrayPointable.getLength(cache, 0) == maxBytes.length);
-        assertTrue(Arrays.equals(maxBytes, subArray(cache, 2)));
+        testOneString(maxString);
     }
 
+    void testOneString(String test) throws HyracksDataException {
+        IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        DataOutputStream outputStream = new DataOutputStream(bos);
+        ByteArrayPointable bytePtr = new ByteArrayPointable();
+
+        parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+        bytePtr.set(bos.toByteArray(), 0, bos.size());
+
+        byte[] answer = DatatypeConverter.parseBase64Binary(test);
+        assertTrue(bytePtr.getContentLength() == answer.length);
+        assertTrue(Arrays.equals(answer, copyContent(bytePtr)));
+    }
 }
\ No newline at end of file


[7/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

This patch is to change the encoding format that stores the length value of
the variable length type (e.g. String, ByteArray) from fix-size encoding
(2bytes) to variable-size encoding ( 1 to 5bytes)

It will solve the issue 1102 to enable us to store a String that longer
than 64K. Also for the common case of storing the short string ( <=
127), it will save one byte per string.

Some important changes include:
1. Add one hyracks-util package to consolidate all the hyracks
independent utility functions. It will reduce the chances of having
duplicate utils in different packages.
2. Move parts of Asterix string functions down to Hyracks
UTF8StringPointable object, which will benefit the other dependencies,
such as VXQuery.

Change-Id: I7e95df0f06984b784ebac2c84b97e56a50207d27
Reviewed-on: https://asterix-gerrit.ics.uci.edu/449
Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
Reviewed-by: Taewoo Kim <wa...@gmail.com>
Reviewed-by: Jianfeng Jia <ji...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/commit/26c3b536
Tree: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/tree/26c3b536
Diff: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/diff/26c3b536

Branch: refs/heads/master
Commit: 26c3b5361db6b2a3816ec6818e1be4d0b1ce1b3d
Parents: 492b6fe
Author: JavierJia <ji...@gmail.com>
Authored: Fri Oct 23 13:49:32 2015 -0700
Committer: Jianfeng Jia <ji...@gmail.com>
Committed: Wed Oct 28 17:20:56 2015 -0700

----------------------------------------------------------------------
 .../data/impl/UTF8StringPrinterFactory.java     |  22 +-
 .../algebricks/data/utils/WriteValueTools.java  |  53 +--
 .../algebricks-examples/piglet-example/pom.xml  |   5 +
 .../compiler/PigletPrinterFactoryProvider.java  |   4 +-
 .../piglet/metadata/PigletMetadataProvider.java |   2 +-
 .../piglet/runtime/PigletExpressionJobGen.java  |   4 +-
 algebricks/algebricks-examples/pom.xml          |   9 +-
 .../tests/pushruntime/PushRuntimeTest.java      |  46 +-
 .../algebricks/tests/tools/WriteValueTest.java  |   2 +-
 hyracks/hyracks-data/hyracks-data-std/pom.xml   |  46 +-
 .../UTF8StringBinaryHashFunctionFamily.java     |  14 +-
 .../hyracks/data/std/api/AbstractPointable.java |   8 +
 .../data/std/primitive/ByteArrayPointable.java  | 135 ++++--
 .../std/primitive/RawUTF8StringPointable.java   |  37 +-
 .../primitive/UTF8StringLowercasePointable.java |  73 +++
 .../data/std/primitive/UTF8StringPointable.java | 449 ++++++++++++++-----
 .../data/std/primitive/UTF8StringWriter.java    |  79 ----
 .../std/util/AbstractVarLenObjectBuilder.java   |  96 ++++
 .../util/ByteArrayAccessibleOutputStream.java   |  25 +-
 .../hyracks/data/std/util/ByteArrayBuilder.java |  33 ++
 .../hyracks/data/std/util/GrowableArray.java    |  21 +-
 .../hyracks/data/std/util/ICharIterator.java    |  27 ++
 .../std/util/RewindableDataOutputStream.java    |  56 +++
 .../hyracks/data/std/util/UTF8CharSequence.java |  71 +++
 .../data/std/util/UTF8StringBuilder.java        |  43 ++
 .../std/util/UTF8StringCharacterIterator.java   |  50 +++
 .../std/primitive/ByteArrayPointableTest.java   |  48 +-
 .../std/primitive/UTF8StringPointableTest.java  | 193 ++++++++
 .../data/std/util/UTF8StringBuilderTest.java    |  81 ++++
 .../util/UTF8StringCharacterIteratorTest.java   |  58 +++
 hyracks/hyracks-dataflow-common/pom.xml         |  95 ++--
 .../ByteArraySerializerDeserializer.java        |  56 ++-
 .../UTF8StringSerializerDeserializer.java       |  12 +-
 .../ByteArrayNormalizedKeyComputerFactory.java  |  21 +-
 .../UTF8StringNormalizedKeyComputerFactory.java |  14 +-
 .../parsers/ByteArrayBase64ParserFactory.java   | 217 +--------
 .../data/parsers/ByteArrayHexParserFactory.java | 102 +----
 .../data/parsers/UTF8StringParserFactory.java   |  46 +-
 .../dataflow/common/data/util/StringUtils.java  |  47 --
 .../FrameFixedFieldTupleAppenderTest.java       |   4 +-
 .../ByteArraySerializerDeserializerTest.java    |  56 +--
 ...teArrayNormalizedKeyComputerFactoryTest.java |  42 +-
 .../ByteArrayBase64ParserFactoryTest.java       |  58 ++-
 .../parsers/ByteArrayHexParserFactoryTest.java  |  60 ++-
 .../MinMaxStringFieldAggregatorFactory.java     |   7 +-
 .../VariableTupleMemoryManagerTest.java         |   2 +-
 .../util/DeletableFrameTupleAppenderTest.java   |  13 +-
 .../btree/client/InsertPipelineExample.java     |   6 +-
 .../client/PrimaryIndexBulkLoadExample.java     |   6 +-
 .../btree/client/PrimaryIndexSearchExample.java |   8 +-
 .../client/SecondaryIndexBulkLoadExample.java   |   4 +-
 .../client/SecondaryIndexSearchExample.java     |  14 +-
 .../am/btree/AbstractBTreeOperatorTest.java     |  34 +-
 .../BTreePrimaryIndexScanOperatorTest.java      |   6 +-
 .../BTreePrimaryIndexSearchOperatorTest.java    |   8 +-
 .../BTreeSecondaryIndexInsertOperatorTest.java  |   8 +-
 .../BTreeSecondaryIndexSearchOperatorTest.java  |   8 +-
 .../BTreeSecondaryIndexUpsertOperatorTest.java  |   8 +-
 .../am/rtree/AbstractRTreeOperatorTest.java     |  34 +-
 .../comm/SerializationDeserializationTest.java  |   2 +-
 .../tests/integration/AggregationTest.java      |  58 +--
 .../tests/integration/CountOfCountsTest.java    |  12 +-
 .../integration/LocalityAwareConnectorTest.java |  14 +-
 .../integration/OptimizedSortMergeTest.java     |  20 +-
 .../tests/integration/ScanPrintTest.java        |  20 +-
 .../tests/integration/SortMergeTest.java        |  20 +-
 .../tests/integration/SplitOperatorTest.java    |   2 +-
 ...TPCHCustomerOptimizedHybridHashJoinTest.java | 108 ++---
 .../TPCHCustomerOrderHashJoinTest.java          | 396 ++++++++--------
 .../TPCHCustomerOrderNestedLoopJoinTest.java    | 144 +++---
 .../hyracks/tests/integration/UnionTest.java    |   2 +-
 .../integration/VSizeFrameSortMergeTest.java    |  10 +-
 .../tests/unit/AbstractRunGeneratorTest.java    |   8 +-
 .../tests/unit/RunMergingFrameReaderTest.java   |   6 +-
 .../text/client/ExternalGroupClient.java        |   8 +-
 .../examples/text/client/WordCountMain.java     |   4 +-
 .../hyracks/examples/tpch/client/Common.java    |  36 +-
 .../hyracks/examples/tpch/client/Join.java      |   2 +-
 .../hyracks/hdfs/dataflow/DataflowTest.java     |   2 +-
 .../hyracks/hdfs2/dataflow/DataflowTest.java    |   2 +-
 hyracks/hyracks-storage-am-common/pom.xml       |   5 +
 .../TreeIndexStatsOperatorDescriptor.java       |   2 +-
 .../TreeIndexStatsOperatorNodePushable.java     |   3 +-
 .../common/tuples/TypeAwareTupleReference.java  |   5 +-
 .../am/common/tuples/TypeAwareTupleWriter.java  |  15 +-
 .../common/tuples/VarLenIntEncoderDecoder.java  |  92 ----
 .../pom.xml                                     |  73 +--
 .../search/AbstractTOccurrenceSearcher.java     |   2 +-
 .../AbstractUTF8StringBinaryTokenizer.java      |  45 +-
 .../tokenizers/AbstractUTF8Token.java           |  78 +++-
 .../DelimitedUTF8StringBinaryTokenizer.java     |  61 ++-
 .../tokenizers/HashedUTF8NGramToken.java        |   8 +-
 .../tokenizers/HashedUTF8WordToken.java         |  18 +-
 .../am/lsm/invertedindex/tokenizers/IToken.java |  24 +-
 .../NGramUTF8StringBinaryTokenizer.java         |  30 +-
 .../tokenizers/UTF8NGramToken.java              |  38 +-
 .../invertedindex/tokenizers/UTF8WordToken.java |  22 +-
 .../rtree/tuples/RTreeTypeAwareTupleWriter.java |   5 +-
 .../am/btree/OrderedIndexExamplesTest.java      |  16 +-
 .../am/btree/OrderedIndexMultiThreadTest.java   |   2 +-
 .../am/btree/OrderedIndexTestDriver.java        |  14 +-
 .../am/rtree/AbstractRTreeExamplesTest.java     |   4 +-
 .../storage/am/bloomfilter/BloomFilterTest.java |   6 +-
 .../MurmurHashForITupleReferenceTest.java       |   6 +-
 .../am/lsm/btree/tuples/LSMBTreeTuplesTest.java |  10 +-
 .../pom.xml                                     |  75 ++--
 .../tokenizers/NGramTokenizerTest.java          |  17 +-
 .../tokenizers/WordTokenizerTest.java           |  27 +-
 .../util/LSMInvertedIndexTestUtils.java         |   8 +-
 hyracks/hyracks-util/pom.xml                    |  58 +++
 .../apache/hyracks/util/bytes/Base64Parser.java | 250 +++++++++++
 .../hyracks/util/bytes/Base64Printer.java       | 125 ++++++
 .../apache/hyracks/util/bytes/HexParser.java    |  97 ++++
 .../apache/hyracks/util/bytes/HexPrinter.java   |  48 ++
 .../util/encoding/VarLenIntEncoderDecoder.java  | 145 ++++++
 .../hyracks/util/string/UTF8StringReader.java   | 131 ++++++
 .../hyracks/util/string/UTF8StringUtil.java     | 422 +++++++++++++++++
 .../hyracks/util/string/UTF8StringWriter.java   | 113 +++++
 .../encoding/VarLenIntEncoderDecoderTest.java   |  87 ++++
 .../util/string/UTF8StringReaderWriterTest.java |  90 ++++
 .../hyracks/util/string/UTF8StringSample.java   |  56 +++
 .../hyracks/util/string/UTF8StringUtilTest.java | 144 ++++++
 hyracks/pom.xml                                 |   1 +
 pom.xml                                         | 158 +++----
 124 files changed, 4310 insertions(+), 1988 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
index 8aa646e..1aa3370 100644
--- a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
+++ b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
@@ -18,12 +18,13 @@
  */
 package org.apache.hyracks.algebricks.data.impl;
 
+import java.io.IOException;
 import java.io.PrintStream;
 
 import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
 import org.apache.hyracks.algebricks.data.IPrinter;
 import org.apache.hyracks.algebricks.data.IPrinterFactory;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class UTF8StringPrinterFactory implements IPrinterFactory {
 
@@ -40,22 +41,11 @@ public class UTF8StringPrinterFactory implements IPrinterFactory {
 
             @Override
             public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
-                int strlen = UTF8StringPointable.getUTFLength(b, s);
-                int pos = s + 2;
-                int maxPos = pos + strlen;
-                ps.print("\"");
-                while (pos < maxPos) {
-                    char c = UTF8StringPointable.charAt(b, pos);
-                    switch (c) {
-                        case '\\':
-                        case '"':
-                            ps.print('\\');
-                            break;
-                    }
-                    ps.print(c);
-                    pos += UTF8StringPointable.charSize(b, pos);
+                try {
+                    UTF8StringUtil.printUTF8StringWithQuotes(b, s, l, ps);
+                } catch (IOException e) {
+                    throw new AlgebricksException(e);
                 }
-                ps.print("\"");
             }
 
             @Override

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
index 8a96ea6..97e7d95 100644
--- a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
+++ b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
@@ -20,14 +20,16 @@ package org.apache.hyracks.algebricks.data.utils;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.PrintStream;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public final class WriteValueTools {
 
     private final static int[] INT_INTERVALS = { 9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999,
             Integer.MAX_VALUE };
-    private final static int[] INT_DIVIDERS = { 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+    private final static int[] INT_DIVIDERS = { 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+            1000000000 };
     private final static int[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
 
     public static void writeInt(int i, OutputStream os) throws IOException {
@@ -75,50 +77,11 @@ public final class WriteValueTools {
         os.write(DIGITS[(int) (d % 10)]);
     }
 
-    public static void writeUTF8String(byte[] b, int s, int l, OutputStream os) throws IOException {
-        int stringLength = UTF8StringPointable.getUTFLength(b, s);
-        int position = s + 2;
-        int maxPosition = position + stringLength;
-        os.write('\"');
-        while (position < maxPosition) {
-            char c = UTF8StringPointable.charAt(b, position);
-            switch (c) {
-            // escape
-                case '\\':
-                case '"':
-                    os.write('\\');
-                    break;
-            }
-            int sz = UTF8StringPointable.charSize(b, position);
-            while (sz > 0) {
-                os.write(b[position]);
-                position++;
-                sz--;
-            }
-        }
-        os.write('\"');
+    public static void writeUTF8StringWithQuotes(String string, OutputStream ps) throws IOException {
+        UTF8StringUtil.printUTF8StringWithQuotes(string, ps);
     }
 
-    public static void writeUTF8StringNoQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
-        int stringLength = UTF8StringPointable.getUTFLength(b, s);
-        int position = s + 2;
-        int maxPosition = position + stringLength;
-        while (position < maxPosition) {
-            char c = UTF8StringPointable.charAt(b, position);
-            switch (c) {
-            // escape
-                case '\\':
-                case '"':
-                    os.write('\\');
-                    break;
-            }
-            int sz = UTF8StringPointable.charSize(b, position);
-            while (sz > 0) {
-                os.write(b[position]);
-                position++;
-                sz--;
-            }
-        }
+    public static void writeUTF8StringNoQuotes(String string, OutputStream ps) throws IOException {
+        UTF8StringUtil.printUTF8StringNoQuotes(string, ps);
     }
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-examples/piglet-example/pom.xml
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-examples/piglet-example/pom.xml b/algebricks/algebricks-examples/piglet-example/pom.xml
index a037db5..ae2ec51 100644
--- a/algebricks/algebricks-examples/piglet-example/pom.xml
+++ b/algebricks/algebricks-examples/piglet-example/pom.xml
@@ -111,5 +111,10 @@
       <artifactId>algebricks-compiler</artifactId>
       <version>0.2.17-SNAPSHOT</version>
     </dependency>
+      <dependency>
+          <groupId>org.apache.hyracks</groupId>
+          <artifactId>hyracks-util</artifactId>
+          <version>0.2.17-SNAPSHOT</version>
+      </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
index 6d64741..8049594 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
@@ -29,7 +29,9 @@ import org.apache.hyracks.algebricks.data.impl.IntegerPrinterFactory;
 import org.apache.hyracks.algebricks.data.utils.WriteValueTools;
 import org.apache.hyracks.algebricks.examples.piglet.types.Type;
 import org.apache.hyracks.data.std.primitive.FloatPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class PigletPrinterFactoryProvider implements IPrinterFactoryProvider {
 
@@ -73,7 +75,7 @@ public class PigletPrinterFactoryProvider implements IPrinterFactoryProvider {
                 @Override
                 public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
                     try {
-                        WriteValueTools.writeUTF8String(b, s, l, ps);
+                        UTF8StringUtil.printUTF8StringWithQuotes(b, s, l, ps);
                     } catch (IOException e) {
                         throw new AlgebricksException(e);
                     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
index 7d9b3db..8f9ab9f 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
@@ -110,7 +110,7 @@ public class PigletMetadataProvider implements IMetadataProvider<String, String>
 
                 case CHAR_ARRAY:
                     vpf = UTF8StringParserFactory.INSTANCE;
-                    serDeser = UTF8StringSerializerDeserializer.INSTANCE;
+                    serDeser = new UTF8StringSerializerDeserializer();
                     break;
 
                 case FLOAT:

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
index 6c173b2..1c3f9b8 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
@@ -53,6 +53,8 @@ import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDese
 import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
 
 public class PigletExpressionJobGen implements ILogicalExpressionJobGen {
+    private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
+
     @Override
     public ICopyEvaluatorFactory createEvaluatorFactory(ILogicalExpression expr, IVariableTypeEnvironment env,
             IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
@@ -74,7 +76,7 @@ public class PigletExpressionJobGen implements ILogicalExpressionJobGen {
 
                     case CHAR_ARRAY:
                         try {
-                            UTF8StringSerializerDeserializer.INSTANCE.serialize(image, dos);
+                            utf8SerDer.serialize(image, dos);
                         } catch (Exception e) {
                             throw new AlgebricksException(e);
                         }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-examples/pom.xml
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-examples/pom.xml b/algebricks/algebricks-examples/pom.xml
index 7ba1b5b..968db33 100644
--- a/algebricks/algebricks-examples/pom.xml
+++ b/algebricks/algebricks-examples/pom.xml
@@ -22,8 +22,15 @@
   <artifactId>algebricks-examples</artifactId>
   <packaging>pom</packaging>
   <name>algebricks-examples</name>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>algebricks-core</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+    </dependencies>
 
-  <parent>
+    <parent>
     <groupId>org.apache.hyracks</groupId>
     <artifactId>algebricks</artifactId>
     <version>0.2.17-SNAPSHOT</version>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
index 3c97878..7fcab17 100644
--- a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
+++ b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
@@ -275,10 +275,10 @@ public class PushRuntimeTest {
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
 
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -355,10 +355,10 @@ public class PushRuntimeTest {
                 "data/tpch0.001/customer-part1.tbl")));
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -407,10 +407,10 @@ public class PushRuntimeTest {
                 "data/tpch0.001/customer.tbl")));
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -492,10 +492,10 @@ public class PushRuntimeTest {
                 "data/tpch0.001/customer.tbl")));
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -663,7 +663,7 @@ public class PushRuntimeTest {
         DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(
                 new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '\u0000');
         RecordDescriptor stringRec = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE, });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), });
 
         FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(
                 inputSplits), stringParser, stringRec);
@@ -709,8 +709,8 @@ public class PushRuntimeTest {
                 "data/tpch0.001/nation.tbl")));
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
         FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider,
@@ -817,10 +817,10 @@ public class PushRuntimeTest {
                 "data/tpch0.001/customer.tbl")));
         IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
         RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
         IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
                 UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
----------------------------------------------------------------------
diff --git a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
index 0968478..6770494 100644
--- a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
+++ b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
@@ -97,7 +97,7 @@ public class WriteValueTest {
         interm.reset();
         dout.writeUTF(str);
         baaos.reset();
-        WriteValueTools.writeUTF8String(interm.getByteArray(), 0, interm.size(), baaos);
+        WriteValueTools.writeUTF8StringWithQuotes(str, baaos);
         byte[] b = str.getBytes("UTF-8");
         if (baaos.size() != b.length + 2) {
             throw new Exception("Expecting to write " + b + " in " + b.length + " bytes, but found " + baaos.size()

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/pom.xml b/hyracks/hyracks-data/hyracks-data-std/pom.xml
index 8546bdb..20c30ef 100644
--- a/hyracks/hyracks-data/hyracks-data-std/pom.xml
+++ b/hyracks/hyracks-data/hyracks-data-std/pom.xml
@@ -17,23 +17,35 @@
  ! under the License.
  !-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <artifactId>hyracks-data-std</artifactId>
-  <name>hyracks-data-std</name>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>hyracks-data-std</artifactId>
+    <name>hyracks-data-std</name>
 
-  <parent>
-    <groupId>org.apache.hyracks</groupId>
-    <artifactId>hyracks-data</artifactId>
-    <version>0.2.17-SNAPSHOT</version>
-  </parent>
+    <parent>
+        <groupId>org.apache.hyracks</groupId>
+        <artifactId>hyracks-data</artifactId>
+        <version>0.2.17-SNAPSHOT</version>
+    </parent>
 
-
-  <dependencies>
-  <dependency>
-  	<groupId>org.apache.hyracks</groupId>
-  	<artifactId>hyracks-api</artifactId>
-  	<version>0.2.17-SNAPSHOT</version>
-  </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-api</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
index affafea..ea661e3 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
@@ -20,7 +20,7 @@ package org.apache.hyracks.data.std.accessors;
 
 import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
 import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class UTF8StringBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
     public static final IBinaryHashFunctionFamily INSTANCE = new UTF8StringBinaryHashFunctionFamily();
@@ -40,17 +40,7 @@ public class UTF8StringBinaryHashFunctionFamily implements IBinaryHashFunctionFa
         return new IBinaryHashFunction() {
             @Override
             public int hash(byte[] bytes, int offset, int length) {
-                int h = 0;
-                int utflen = UTF8StringPointable.getUTFLength(bytes, offset);
-                int sStart = offset + 2;
-                int c = 0;
-
-                while (c < utflen) {
-                    char ch = UTF8StringPointable.charAt(bytes, sStart + c);
-                    h = (coefficient * h + ch) % r;
-                    c += UTF8StringPointable.charSize(bytes, sStart + c);
-                }
-                return h;
+                return UTF8StringUtil.hash(bytes, offset, coefficient, r);
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
index a10b0da..549a136 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
@@ -30,6 +30,7 @@ public abstract class AbstractPointable implements IPointable {
         this.bytes = bytes;
         this.start = start;
         this.length = length;
+        afterReset();
     }
 
     @Override
@@ -37,6 +38,13 @@ public abstract class AbstractPointable implements IPointable {
         set(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
     }
 
+    /**
+     * This method will be called after set the new bytes values.
+     * It could be used to reset the state of the inherited Pointable object.
+     */
+    protected void afterReset() {
+    }
+
     @Override
     public byte[] getByteArray() {
         return bytes;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
index 0a2a723..af54c7e 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
@@ -19,10 +19,33 @@
 
 package org.apache.hyracks.data.std.primitive;
 
+import java.io.Serializable;
+import java.util.Arrays;
+
 import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.data.std.api.*;
+import org.apache.hyracks.data.std.api.AbstractPointable;
+import org.apache.hyracks.data.std.api.IComparable;
+import org.apache.hyracks.data.std.api.IHashable;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class ByteArrayPointable extends AbstractPointable implements IHashable, IComparable, Serializable {
+
+    // These three values are cached to speed up the length data access.
+    // Since the we are using the variable-length encoding, we can save the repeated decoding efforts.
+    // WARNING: must call the resetConstants() method after each reset().
+    private int contentLength = -1;
+    private int metaLength = -1;
+    private int hash = 0;
 
-public class ByteArrayPointable extends AbstractPointable implements IHashable, IComparable {
+    @Override
+    protected void afterReset() {
+        contentLength = getContentLength(getByteArray(), getStartOffset());
+        metaLength = getNumberBytesToStoreMeta(contentLength);
+        hash = 0;
+    }
 
     public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
         private static final long serialVersionUID = 1L;
@@ -58,48 +81,106 @@ public class ByteArrayPointable extends AbstractPointable implements IHashable,
     }
 
     @Override
-    public int compareTo(byte[] bytes, int start, int length) {
-        int thislen = getLength(this.bytes, this.start);
-        int thatlen = getLength(bytes, start);
-
-        for (int thisIndex = 0, thatIndex = 0; thisIndex < thislen && thatIndex < thatlen; ++thisIndex, ++thatIndex) {
-            if (this.bytes[this.start + SIZE_OF_LENGTH + thisIndex] != bytes[start + SIZE_OF_LENGTH + thatIndex]) {
-                return (0xff & this.bytes[this.start + SIZE_OF_LENGTH + thisIndex]) - (0xff & bytes[start + SIZE_OF_LENGTH
-                        + thatIndex]);
+    public int compareTo(byte[] thatBytes, int thatStart, int thatLength) {
+        int thisArrayLen = getContentLength(this.bytes, this.start);
+        int thatArrayLen = getContentLength(thatBytes, thatStart);
+
+        int thisArrayStart = this.getContentStartOffset();
+        int thatArrayStart = thatStart + getNumberBytesToStoreMeta(thatArrayLen);
+
+        for (int thisIndex = 0, thatIndex = 0;
+             thisIndex < thisArrayLen && thatIndex < thatArrayLen; ++thisIndex, ++thatIndex) {
+            if (this.bytes[thisArrayStart + thisIndex] != thatBytes[thatArrayStart + thatIndex]) {
+                return (0xff & this.bytes[thisArrayStart + thisIndex]) - (0xff & thatBytes[thatArrayStart + thatIndex]);
             }
         }
-        return thislen - thatlen;
+        return thisArrayLen - thatArrayLen;
+    }
+
+    public int getContentLength() {
+        return contentLength;
+    }
+
+    public int getMetaLength() {
+        return metaLength;
     }
 
     @Override
     public int hash() {
-        int h = 0;
-        int realLength = getLength(bytes, start);
-        for (int i = 0; i < realLength; ++i) {
-            h = 31 * h + bytes[start + SIZE_OF_LENGTH + i];
+        if (hash == 0) {
+            int h = 0;
+            int realLength = getContentLength();
+            int startOffset = getContentStartOffset();
+            for (int i = 0; i < realLength; ++i) {
+                h = 31 * h + bytes[startOffset + i];
+            }
+            hash = h;
         }
-        return h;
+        return hash;
     }
 
     @Override
-    public int getLength(){
-        return getFullLength(getByteArray(), getStartOffset());
+    public int getLength() {
+        return getContentLength() + getMetaLength();
     }
 
-    public static final int SIZE_OF_LENGTH = 2;
-    public static final int MAX_LENGTH = 65535;
+    public int getContentStartOffset() {
+        return getStartOffset() + getMetaLength();
+    }
+
+    ///////////////// helper functions ////////////////////////////////
+    public static byte[] copyContent(ByteArrayPointable bytePtr) {
+        return Arrays.copyOfRange(bytePtr.getByteArray(), bytePtr.getContentStartOffset(),
+                bytePtr.getContentStartOffset() + bytePtr.getContentLength());
+    }
+
+    public static ByteArrayPointable generatePointableFromPureBytes(byte[] bytes) {
+        return generatePointableFromPureBytes(bytes, 0, bytes.length);
+    }
 
-    public static int getLength(byte[] bytes, int offset) {
-        return ((0xFF & bytes[offset]) << 8) + (0xFF & bytes[offset + 1]);
+    public static ByteArrayPointable generatePointableFromPureBytes(byte[] bytes, int start, int length) {
+        int metaLen = getNumberBytesToStoreMeta(length);
+        byte[] ret = new byte[length + metaLen];
+        VarLenIntEncoderDecoder.encode(length, ret, 0);
+        for (int i = 0; i < length; ++i) {
+            ret[i + metaLen] = bytes[start + i];
+        }
+        ByteArrayPointable ptr = new ByteArrayPointable();
+        ptr.set(ret, 0, ret.length);
+        return ptr;
+    }
+
+    public static int getContentLength(byte[] bytes, int offset) {
+        return VarLenIntEncoderDecoder.decode(bytes, offset);
     }
 
-    public static int getFullLength(byte[] bytes, int offset){
-        return getLength(bytes, offset) + SIZE_OF_LENGTH;
+    public static int getNumberBytesToStoreMeta(int length) {
+        return VarLenIntEncoderDecoder.getBytesRequired(length);
     }
 
-    public static void putLength(int length, byte[] bytes, int offset) {
-        bytes[offset] = (byte) ((length >>> 8) & 0xFF);
-        bytes[offset + 1] = (byte) ((length >>> 0) & 0xFF);
+    /**
+     * Compute the normalized key of the byte array.
+     * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data.
+     * In the ByteArray case, we compute the integer value by using the first 4 bytes.
+     * The comparator will first use this integer to get the result ( <,>, or =), it will check
+     * the actual bytes only if the normalized key is equal. Thus this normalized key must be
+     * consistent with the comparison result.
+     *
+     * @param bytesPtr
+     * @param start
+     * @return
+     */
+    public static int normalize(byte[] bytesPtr, int start) {
+        int len = getContentLength(bytesPtr, start);
+        long nk = 0;
+        start = start + getNumberBytesToStoreMeta(len);
+        for (int i = 0; i < 4; ++i) {
+            nk <<= 8;
+            if (i < len) {
+                nk |= bytesPtr[start + i] & 0xff;
+            }
+        }
+        return (int) (nk >> 1); // make it always positive.
     }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
index 2b1f557..70bac4d 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
@@ -24,6 +24,7 @@ import org.apache.hyracks.data.std.api.IComparable;
 import org.apache.hyracks.data.std.api.IHashable;
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 /**
  * This class provides the raw bytes-based comparison and hash function for UTF8 strings.
@@ -66,44 +67,16 @@ public final class RawUTF8StringPointable extends AbstractPointable implements I
 
     @Override
     public int compareTo(byte[] bytes, int start, int length) {
-        int utflen1 = UTF8StringPointable.getUTFLength(this.bytes, this.start);
-        int utflen2 = UTF8StringPointable.getUTFLength(bytes, start);
-
-        int c1 = 0;
-        int c2 = 0;
-
-        int s1Start = this.start + 2;
-        int s2Start = start + 2;
-
-        while (c1 < utflen1 && c2 < utflen2) {
-            char ch1 = (char) this.bytes[s1Start + c1];
-            char ch2 = (char) bytes[s2Start + c2];
-
-            if (ch1 != ch2) {
-                return ch1 - ch2;
-            }
-            c1++;
-            c2++;
-        }
-        return utflen1 - utflen2;
+        return UTF8StringUtil.rawByteCompareTo(this.bytes, this.start, bytes, start);
     }
 
     @Override
     public int hash() {
-        int h = 0;
-        int utflen = UTF8StringPointable.getUTFLength(bytes, start);
-        int sStart = start + 2;
-        int c = 0;
-
-        while (c < utflen) {
-            char ch = (char) bytes[sStart + c];
-            h = 31 * h + ch;
-            c++;
-        }
-        return h;
+        return UTF8StringUtil.rawBytehash(this.bytes, this.start);
     }
 
     public void toString(StringBuilder buffer) {
-        UTF8StringPointable.toString(buffer, bytes, start);
+        UTF8StringUtil.toString(buffer, bytes, start);
     }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java
new file mode 100644
index 0000000..6e4810c
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.primitive;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.data.std.api.AbstractPointable;
+import org.apache.hyracks.data.std.api.IComparable;
+import org.apache.hyracks.data.std.api.IHashable;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public final class UTF8StringLowercasePointable extends AbstractPointable implements IHashable, IComparable {
+    public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public boolean isFixedLength() {
+            return false;
+        }
+
+        @Override
+        public int getFixedLength() {
+            return 0;
+        }
+    };
+
+    public static final IPointableFactory FACTORY = new IPointableFactory() {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public IPointable createPointable() {
+            return new UTF8StringLowercasePointable();
+        }
+
+        @Override
+        public ITypeTraits getTypeTraits() {
+            return TYPE_TRAITS;
+        }
+    };
+
+    @Override
+    public int compareTo(IPointable pointer) {
+        return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+    }
+
+    @Override
+    public int compareTo(byte[] bytes, int start, int length) {
+        return UTF8StringUtil.lowerCaseCompareTo(this.bytes, this.start, bytes, start);
+    }
+
+    @Override
+    public int hash() {
+        return UTF8StringUtil.lowerCaseHash(bytes, start);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 8b41206..e311fa6 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -18,14 +18,42 @@
  */
 package org.apache.hyracks.data.std.primitive;
 
+import java.io.IOException;
+import java.nio.charset.Charset;
+
 import org.apache.hyracks.api.dataflow.value.ITypeTraits;
 import org.apache.hyracks.data.std.api.AbstractPointable;
 import org.apache.hyracks.data.std.api.IComparable;
 import org.apache.hyracks.data.std.api.IHashable;
 import org.apache.hyracks.data.std.api.IPointable;
 import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public final class UTF8StringPointable extends AbstractPointable implements IHashable, IComparable {
+
+    // These values are cached to speed up the length data access.
+    // Since we are using the variable-length encoding, we can save the repeated decoding efforts.
+    // WARNING: must call the resetConstants() method after each reset().
+    private int utf8Length;
+    private int metaLength;
+    private int hashValue;
+    private int stringLength;
+
+    /**
+     * reset those meta length.
+     * Since the {@code utf8Length} and the {@code metaLength} are often used, we compute those two values in advance.
+     * As for the {@code stringLength} and the {@code hashValue}, they will be lazily initialized after the first call.
+     */
+    @Override
+    protected void afterReset() {
+        utf8Length = UTF8StringUtil.getUTFLength(bytes, start);
+        metaLength = UTF8StringUtil.getNumBytesToStoreLength(getUTF8Length());
+        hashValue = 0;
+        stringLength = -1;
+    }
+
     public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
         private static final long serialVersionUID = 1L;
 
@@ -54,170 +82,367 @@ public final class UTF8StringPointable extends AbstractPointable implements IHas
         }
     };
 
+    public static UTF8StringPointable generateUTF8Pointable(String string) {
+        byte[] bytes;
+        bytes = UTF8StringUtil.writeStringToBytes(string);
+        UTF8StringPointable ptr = new UTF8StringPointable();
+        ptr.set(bytes, 0, bytes.length);
+        return ptr;
+    }
+
     /**
      * Returns the character at the given byte offset. The caller is responsible for making sure that
      * the provided offset is within bounds and points to the beginning of a valid UTF8 character.
-     * 
-     * @param offset
-     *            - Byte offset
+     *
+     * @param offset - Byte offset
      * @return Character at the given offset.
      */
     public char charAt(int offset) {
-        return charAt(bytes, start + offset);
+        return UTF8StringUtil.charAt(bytes, start + offset);
     }
 
-    public static char charAt(byte[] b, int s) {
-        int c = b[s] & 0xff;
-        switch (c >> 4) {
-            case 0:
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 5:
-            case 6:
-            case 7:
-                return (char) c;
+    public int charSize(int offset) {
+        return UTF8StringUtil.charSize(bytes, start + offset);
+    }
 
-            case 12:
-            case 13:
-                return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+    /**
+     * Gets the length of the string in characters.
+     * The first time call will need to go through the entire string, the following call will just return the pre-caculated result
+     *
+     * @return length of string in characters
+     */
+    public int getStringLength() {
+        if (stringLength < 0) {
+            stringLength = UTF8StringUtil.getStringLength(bytes, start);
+        }
+        return stringLength;
+    }
 
-            case 14:
-                return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+    /**
+     * Gets the length of the UTF-8 encoded string in bytes.
+     *
+     * @return length of UTF-8 encoded string in bytes
+     */
+    public int getUTF8Length() {
+        return utf8Length;
+    }
 
-            default:
-                throw new IllegalArgumentException();
-        }
+    public int getMetaDataLength() {
+        return metaLength;
     }
 
-    public int charSize(int offset) {
-        return charSize(bytes, start + offset);
+    public int getCharStartOffset() {
+        return getStartOffset() + getMetaDataLength();
     }
 
-    public static int charSize(byte[] b, int s) {
-        int c = b[s] & 0xff;
-        switch (c >> 4) {
-            case 0:
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 5:
-            case 6:
-            case 7:
-                return 1;
+    @Override
+    public int compareTo(IPointable pointer) {
+        return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+    }
 
-            case 12:
-            case 13:
-                return 2;
+    @Override
+    public int compareTo(byte[] bytes, int start, int length) {
+        return UTF8StringUtil.compareTo(this.bytes, this.start, bytes, start);
+    }
 
-            case 14:
-                return 3;
+    @Override
+    public int hash() {
+        if (hashValue == 0) {
+            hashValue = UTF8StringUtil.hash(this.bytes, this.start);
         }
-        throw new IllegalStateException();
+        return hashValue;
     }
 
-    public static int getModifiedUTF8Len(char c) {
-        if (c >= 0x0000 && c <= 0x007F) {
-            return 1;
-        } else if (c <= 0x07FF) {
-            return 2;
-        } else {
-            return 3;
-        }
+    public void toString(StringBuilder buffer) {
+        UTF8StringUtil.toString(buffer, bytes, start);
     }
 
-    /**
-     * Gets the length of the string in characters.
-     * 
-     * @return length of string in characters
+    public String toString() {
+        return new String(this.bytes, this.getCharStartOffset(), this.getUTF8Length(), Charset.forName("UTF-8"));
+    }
+
+    /****
+     * String functions
      */
-    public int getStringLength() {
-        return getStringLength(bytes, start);
+
+    public int ignoreCaseCompareTo(UTF8StringPointable other) {
+        return UTF8StringUtil.lowerCaseCompareTo(this.getByteArray(), this.getStartOffset(),
+                other.getByteArray(), other.getStartOffset());
     }
 
-    public static int getStringLength(byte[] b, int s) {
-        int pos = s + 2;
-        int end = pos + getUTFLength(b, s);
-        int charCount = 0;
-        while (pos < end) {
-            charCount++;
-            pos += charSize(b, pos);
-        }
-        return charCount;
+    public int find(UTF8StringPointable pattern, boolean ignoreCase) {
+        return find(this, pattern, ignoreCase);
     }
 
     /**
-     * Gets the length of the UTF-8 encoded string in bytes.
-     * 
-     * @return length of UTF-8 encoded string in bytes
+     * return the byte offset of the first character of the matching string. Not including the MetaLength
+     *
+     * @param src
+     * @param pattern
+     * @param ignoreCase
+     * @return
      */
-    public int getUTFLength() {
-        return getUTFLength(bytes, start);
+    public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+        final int srcUtfLen = src.getUTF8Length();
+        final int pttnUtfLen = pattern.getUTF8Length();
+        final int srcStart = src.getMetaDataLength();
+        final int pttnStart = pattern.getMetaDataLength();
+
+        int startMatch = 0;
+        int maxStart = srcUtfLen - pttnUtfLen;
+        while (startMatch <= maxStart) {
+            int c1 = startMatch;
+            int c2 = 0;
+            while (c1 < srcUtfLen && c2 < pttnUtfLen) {
+                char ch1 = src.charAt(srcStart + c1);
+                char ch2 = pattern.charAt(pttnStart + c2);
+
+                if (ch1 != ch2) {
+                    if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+                        break;
+                    }
+                }
+                c1 += src.charSize(srcStart + c1);
+                c2 += pattern.charSize(pttnStart + c2);
+            }
+            if (c2 == pttnUtfLen) {
+                return startMatch;
+            }
+            startMatch += src.charSize(srcStart + startMatch);
+        }
+        return -1;
     }
 
-    public static int getUTFLength(byte[] b, int s) {
-        return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
+    public boolean contains(UTF8StringPointable pattern, boolean ignoreCase) {
+        return contains(this, pattern, ignoreCase);
     }
 
-    @Override
-    public int compareTo(IPointable pointer) {
-        return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+    public static boolean contains(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+        return find(src, pattern, ignoreCase) >= 0;
     }
 
-    @Override
-    public int compareTo(byte[] bytes, int start, int length) {
-        int utflen1 = getUTFLength(this.bytes, this.start);
-        int utflen2 = getUTFLength(bytes, start);
+    public boolean startsWith(UTF8StringPointable pattern, boolean ignoreCase) {
+        return startsWith(this, pattern, ignoreCase);
+    }
+
+    public static boolean startsWith(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+        int utflen1 = src.getUTF8Length();
+        int utflen2 = pattern.getUTF8Length();
+        if (utflen2 > utflen1)
+            return false;
+
+        int s1Start = src.getMetaDataLength();
+        int s2Start = pattern.getMetaDataLength();
 
         int c1 = 0;
         int c2 = 0;
+        while (c1 < utflen1 && c2 < utflen2) {
+            char ch1 = src.charAt(s1Start + c1);
+            char ch2 = pattern.charAt(s2Start + c2);
+            if (ch1 != ch2) {
+                if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+                    break;
+                }
+            }
+            c1 += src.charSize(s1Start + c1);
+            c2 += pattern.charSize(s2Start + c2);
+        }
+        return (c2 == utflen2);
+    }
 
-        int s1Start = this.start + 2;
-        int s2Start = start + 2;
+    public boolean endsWith(UTF8StringPointable pattern, boolean ignoreCase) {
+        return endsWith(this, pattern, ignoreCase);
+    }
 
-        while (c1 < utflen1 && c2 < utflen2) {
-            char ch1 = charAt(this.bytes, s1Start + c1);
-            char ch2 = charAt(bytes, s2Start + c2);
+    public static boolean endsWith(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+        int len1 = src.getUTF8Length();
+        int len2 = pattern.getUTF8Length();
+        if (len2 > len1)
+            return false;
+
+        int s1Start = src.getMetaDataLength();
+        int s2Start = pattern.getMetaDataLength();
+
+        int c1 = len1 - len2;
+        int c2 = 0;
+        while (c1 < len1 && c2 < len2) {
+            char ch1 = src.charAt(s1Start + c1);
+            char ch2 = pattern.charAt(s2Start + c2);
 
             if (ch1 != ch2) {
-                return ch1 - ch2;
+                if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+                    break;
+                }
             }
-            c1 += charSize(this.bytes, s1Start + c1);
-            c2 += charSize(bytes, s2Start + c2);
+            c1 += src.charSize(s1Start + c1);
+            c2 += pattern.charSize(s2Start + c2);
         }
-        return utflen1 - utflen2;
+        return (c2 == len2);
     }
 
-    @Override
-    public int hash() {
-        int h = 0;
-        int utflen = getUTFLength(bytes, start);
-        int sStart = start + 2;
-        int c = 0;
+    public void concat(UTF8StringPointable next, UTF8StringBuilder builder, GrowableArray out) throws IOException {
+        concat(this, next, builder, out);
+    }
+
+    public static void concat(UTF8StringPointable first, UTF8StringPointable next, UTF8StringBuilder builder,
+            GrowableArray out) throws IOException {
+        int firstUtfLen = first.getUTF8Length();
+        int nextUtfLen = next.getUTF8Length();
+
+        builder.reset(out, firstUtfLen + nextUtfLen);
+        builder.appendUtf8StringPointable(first);
+        builder.appendUtf8StringPointable(next);
+        builder.finish();
+    }
+
+    public void substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out)
+            throws IOException {
+        substr(this, charOffset, charLength, builder, out);
+    }
+
+    public static void substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder,
+            GrowableArray out) throws IOException {
+        // Really don't understand why we need to support the charOffset < 0 case.
+        // At this time, usually there is mistake on user side, we'd better give him a warning.
+        // assert charOffset >= 0;
+        if (charOffset < 0) {
+            charOffset = 0;
+        }
+        if (charLength < 0) {
+            charLength = 0;
+        }
+
+        int utfLen = src.getUTF8Length();
+        int chIdx = 0;
+        int byteIdx = 0;
+        while (byteIdx < utfLen && chIdx < charOffset) {
+            byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
+            chIdx++;
+        }
+        if (byteIdx >= utfLen) {
+            // Again, why do we tolerant this kind of mistakes?
+            // throw new StringIndexOutOfBoundsException(charOffset);
+            builder.reset(out, 0);
+            builder.finish();
+            return;
+        }
+
+        builder.reset(out, Math.min(utfLen - byteIdx, (int) (charLength * 1.0 * byteIdx / chIdx)));
+        chIdx = 0;
+        while (byteIdx < utfLen && chIdx < charLength) {
+            builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx));
+            chIdx++;
+            byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
+        }
+        builder.finish();
+    }
+
+    public void substrBefore(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out)
+            throws IOException {
+        substrBefore(this, match, builder, out);
+    }
 
-        while (c < utflen) {
-            char ch = charAt(bytes, sStart + c);
-            h = 31 * h + ch;
-            c += charSize(bytes, sStart + c);
+    /**
+     * Write the substring before the given pattern. It will write a empty string if the matching fails.
+     *
+     * @param src
+     * @param match
+     * @param builder
+     * @param out
+     * @throws IOException
+     */
+    public static void substrBefore(
+            UTF8StringPointable src,
+            UTF8StringPointable match,
+            UTF8StringBuilder builder,
+            GrowableArray out) throws IOException {
+
+        int byteOffset = find(src, match, false);
+        if (byteOffset < 0) {
+            builder.reset(out, 0);
+            builder.finish();
+            return;
         }
-        return h;
+
+        final int srcMetaLen = src.getMetaDataLength();
+
+        builder.reset(out, byteOffset);
+        for (int idx = 0; idx < byteOffset; ) {
+            builder.appendChar(src.charAt(srcMetaLen + idx));
+            idx += src.charSize(srcMetaLen + idx);
+        }
+        builder.finish();
+    }
+
+    public void substrAfter(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out)
+            throws IOException {
+        substrAfter(this, match, builder, out);
     }
 
-    public static void toString(StringBuilder buffer, byte[] bytes, int start) {
-        int utfLen = getUTFLength(bytes, start);
-        int offset = 2;
-        while (utfLen > 0) {
-            char c = charAt(bytes, start + offset);
-            buffer.append(c);
-            int cLen = UTF8StringPointable.getModifiedUTF8Len(c);
-            offset += cLen;
-            utfLen -= cLen;
+    /**
+     * Write the substring after the given pattern. It will write a empty string if the matching fails.
+     *
+     * @param src
+     * @param match
+     * @param builder
+     * @param out
+     */
+    public static void substrAfter(
+            UTF8StringPointable src,
+            UTF8StringPointable match,
+            UTF8StringBuilder builder,
+            GrowableArray out) throws IOException {
+
+        int byteOffset = find(src, match, false);
+        if (byteOffset < 0) {
+            builder.reset(out, 0);
+            builder.finish();
+            return;
         }
+
+        final int srcUtfLen = src.getUTF8Length();
+        final int matchUtfLen = match.getUTF8Length();
+
+        final int resultLen = srcUtfLen - byteOffset - matchUtfLen;
+        builder.reset(out, resultLen);
+        builder.appendUtf8StringPointable(src, src.getCharStartOffset() + byteOffset + matchUtfLen, resultLen);
+        builder.finish();
     }
 
-    public void toString(StringBuilder buffer) {
-        toString(buffer, bytes, start);
+    public void lowercase(UTF8StringBuilder builder, GrowableArray out) throws IOException {
+        lowercase(this, builder, out);
+    }
+
+    public static void lowercase(UTF8StringPointable src, UTF8StringBuilder builder, GrowableArray out)
+            throws IOException {
+        final int srcUtfLen = src.getUTF8Length();
+        final int srcStart = src.getMetaDataLength();
+
+        builder.reset(out, srcUtfLen);
+        int byteIndex = 0;
+        while (byteIndex < srcUtfLen) {
+            builder.appendChar(Character.toLowerCase(src.charAt(srcStart + byteIndex)));
+            byteIndex += src.charSize(srcStart + byteIndex);
+        }
+        builder.finish();
     }
+
+    public void uppercase(UTF8StringBuilder builder, GrowableArray out) throws IOException {
+        uppercase(this, builder, out);
+    }
+
+    public static void uppercase(UTF8StringPointable src, UTF8StringBuilder builder, GrowableArray out)
+            throws IOException {
+        final int srcUtfLen = src.getUTF8Length();
+        final int srcStart = src.getMetaDataLength();
+
+        builder.reset(out, srcUtfLen);
+        int byteIndex = 0;
+        while (byteIndex < srcUtfLen) {
+            builder.appendChar(Character.toUpperCase(src.charAt(srcStart + byteIndex)));
+            byteIndex += src.charSize(srcStart + byteIndex);
+        }
+        builder.finish();
+    }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java
deleted file mode 100644
index ae7e903..0000000
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.data.std.primitive;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.UTFDataFormatException;
-
-public class UTF8StringWriter {
-    private byte[] tempBytes;
-
-    public void writeUTF8String(CharSequence str, DataOutput out) throws IOException {
-        int strlen = str.length();
-        int utflen = 0;
-        int c, count = 0;
-
-        for (int i = 0; i < strlen; i++) {
-            c = str.charAt(i);
-            if ((c >= 0x0001) && (c <= 0x007F)) {
-                utflen++;
-            } else if (c > 0x07FF) {
-                utflen += 3;
-            } else {
-                utflen += 2;
-            }
-        }
-
-        if (utflen > 65535) {
-            throw new UTFDataFormatException("encoded string too long: " + utflen + " bytes");
-        }
-
-        if (tempBytes == null || tempBytes.length < utflen + 2) {
-            tempBytes = new byte[utflen + 2];
-        }
-
-        tempBytes[count++] = (byte) ((utflen >>> 8) & 0xFF);
-        tempBytes[count++] = (byte) ((utflen >>> 0) & 0xFF);
-
-        int i = 0;
-        for (i = 0; i < strlen; i++) {
-            c = str.charAt(i);
-            if (!((c >= 0x0001) && (c <= 0x007F))) {
-                break;
-            }
-            tempBytes[count++] = (byte) c;
-        }
-
-        for (; i < strlen; i++) {
-            c = str.charAt(i);
-            if ((c >= 0x0001) && (c <= 0x007F)) {
-                tempBytes[count++] = (byte) c;
-            } else if (c > 0x07FF) {
-                tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
-                tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
-                tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
-            } else {
-                tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
-                tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
-            }
-        }
-        out.write(tempBytes, 0, utflen + 2);
-    }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
new file mode 100644
index 0000000..452710e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+/**
+ * This builder is used to build the variable length encoding object (e.g. UTF8String or ByteArray).
+ * The caller needs to give an estimated length when {@link #reset(GrowableArray, int)}.
+ * Then it can append the content byte by byte.
+ * Since the actual byte length to store the content length is not precise at the beginning, the caller need
+ * to explicitly call the {@link #finish()} function to notify that one object has finished building.
+ * Then internally this builder will take care of storing the actual length field at the beginning of the
+ * given storage array.
+ */
+public abstract class AbstractVarLenObjectBuilder {
+    protected GrowableArray ary;
+    protected DataOutput out;
+    protected int startOffset;
+    protected int estimateMetaLen;
+
+    /**
+     * Start to build an variable length object
+     *
+     * @param ary            the destination storage array
+     * @param estimateLength the estimate length of this object
+     * @throws IOException
+     */
+    public void reset(GrowableArray ary, int estimateLength) throws IOException {
+        this.ary = ary;
+        this.out = ary.getDataOutput();
+        this.startOffset = ary.getLength();
+        this.estimateMetaLen = VarLenIntEncoderDecoder.getBytesRequired(estimateLength);
+
+        // increase the offset
+        for (int i = 0; i < estimateMetaLen; i++) {
+            out.writeByte(0);
+        }
+    }
+
+    /**
+     * Finish building an variable length object.
+     * It will write the correct length of the object at the beginning of the storage array.
+     * Since the actual byte size for storing the length could be changed ( if the given estimated length varies too
+     * far from the actual length), we need to shift the data around in some cases.
+     * Specifically, if the varlength(actual length) > varlength(estimated length) we need to grow the storage and
+     * shift the content rightward. Else we need to shift the data leftward and tell the storage to rewind the
+     * difference to mark the correct position.
+     *
+     * @throws IOException
+     */
+    public void finish() throws IOException {
+        int actualDataLength = ary.getLength() - startOffset - estimateMetaLen;
+        int actualMetaLen = VarLenIntEncoderDecoder.getBytesRequired(actualDataLength);
+        if (actualMetaLen != estimateMetaLen) {// ugly but rare situation if the estimate vary a lot
+            int diff = estimateMetaLen - actualMetaLen;
+            int actualDataStart = startOffset + actualMetaLen;
+            if (diff > 0) { // shrink
+                for (int i = 0; i < actualDataLength; i++) {
+                    ary.getByteArray()[actualDataStart + i] = ary.getByteArray()[actualDataStart + i + diff];
+                }
+                ary.rewindPositionBy(diff);
+            } else { // increase space
+                diff = -diff;
+                for (int i = 0; i < diff; i++) {
+                    out.writeByte(0);
+                }
+                for (int i = ary.getLength() - 1; i >= actualDataStart + diff; i--) {
+                    ary.getByteArray()[i] = ary.getByteArray()[i - diff];
+                }
+            }
+        }
+        VarLenIntEncoderDecoder.encode(actualDataLength, ary.getByteArray(), startOffset);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
index 2f1ad1d..287e2f2 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
@@ -41,6 +41,26 @@ public class ByteArrayAccessibleOutputStream extends ByteArrayOutputStream {
         count += 1;
     }
 
+    /**
+     * Rewind the current position by {@code delta} to a previous position.
+     * This function is used to drop the already written delta bytes.
+     * In some cases, we write some bytes, and afterward we found we've written more than expected.
+     * Then we need to fix the position by rewind the current position to the expected one.
+     *
+     * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+     * at beginning, and it will shift the data and fix the position whenever required.
+     *
+     * It will throw {@link IndexOutOfBoundsException} if the {@code delta} is negative.
+     * Evil function, use with caution.
+     * @param delta
+     */
+    public void rewindPositionBy(int delta) {
+        if (delta < 0 || count < delta) {
+            throw new IndexOutOfBoundsException();
+        }
+        count -= delta;
+    }
+
     @Override
     public void write(byte[] b, int off, int len) {
         if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) - b.length > 0)) {
@@ -60,9 +80,8 @@ public class ByteArrayAccessibleOutputStream extends ByteArrayOutputStream {
     /**
      * Increases the capacity to ensure that it can hold at least the
      * number of elements specified by the minimum capacity argument.
-     * 
-     * @param minCapacity
-     *            the desired minimum capacity
+     *
+     * @param minCapacity the desired minimum capacity
      */
     private void grow(int minCapacity) {
         // overflow-conscious code

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java
new file mode 100644
index 0000000..61b15d4
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.IOException;
+
+public class ByteArrayBuilder extends AbstractVarLenObjectBuilder {
+
+    public void appendByte(byte b) throws IOException {
+        out.writeByte(b);
+    }
+
+    public void appendBytes(byte[] bytes, int start, int length) throws IOException {
+        out.write(bytes, start, length);
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
index d08412e..6e329ab 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
@@ -20,7 +20,6 @@
 package org.apache.hyracks.data.std.util;
 
 import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 
 import org.apache.hyracks.data.std.api.IDataOutputProvider;
@@ -28,7 +27,7 @@ import org.apache.hyracks.data.std.api.IValueReference;
 
 public class GrowableArray implements IDataOutputProvider {
     private final ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
-    private final DataOutputStream dos = new DataOutputStream(baaos);
+    private final RewindableDataOutputStream dos = new RewindableDataOutputStream(baaos);
 
     @Override
     public DataOutput getDataOutput() {
@@ -39,6 +38,24 @@ public class GrowableArray implements IDataOutputProvider {
         baaos.reset();
     }
 
+    /**
+     * Rewind the current position by {@code delta} to a previous position.
+     * This function is used to drop the already written delta bytes.
+     * In some cases, we write some bytes, and afterward we found we've written more than expected.
+     * Then we need to fix the position by rewind the current position to the expected one.
+     *
+     * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+     * at beginning, and it will shift the data and fix the position whenever required.
+     * It will throw {@link IndexOutOfBoundsException} if the {@code delta} is negative.
+     * Evil function, use with caution.
+     *
+     * @param delta
+     */
+    public void rewindPositionBy(int delta) {
+        baaos.rewindPositionBy(delta);
+        dos.rewindWrittenBy(delta);
+    }
+
     public byte[] getByteArray() {
         return baaos.getByteArray();
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java
new file mode 100644
index 0000000..118893b
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+public interface ICharIterator {
+
+    boolean hasNext();
+
+    char next();
+}



[4/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
index 6fdc797..a10513a 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
@@ -59,32 +59,32 @@ public class TPCHCustomerOptimizedHybridHashJoinTest extends AbstractIntegration
                 "data/tpch0.001/customer4.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders4.tbl"))) };
 
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -137,32 +137,32 @@ public class TPCHCustomerOptimizedHybridHashJoinTest extends AbstractIntegration
                 "data/tpch0.001/customer3.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders4.tbl"))) };
 
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -216,32 +216,32 @@ public class TPCHCustomerOptimizedHybridHashJoinTest extends AbstractIntegration
                 "data/tpch0.001/customer3.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders1.tbl"))) };
 
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
index a9cf4ad..7ee3682 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
@@ -78,31 +78,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -157,31 +157,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -240,31 +240,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -323,31 +323,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -407,31 +407,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -495,31 +495,31 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -584,32 +584,32 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -671,32 +671,32 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -762,32 +762,32 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -853,32 +853,32 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -940,32 +940,32 @@ public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
index 039d936..985f974 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
@@ -132,31 +132,31 @@ public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest
                 "data/tpch0.001/customer.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File(
                 "data/tpch0.001/orders.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -208,32 +208,32 @@ public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -285,32 +285,32 @@ public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -362,32 +362,32 @@ public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
         IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
         RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileSplit[] ordersSplits = new FileSplit[] {
                 new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
                 new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
         IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
         RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
index 57bfad0..508db7c 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
@@ -52,7 +52,7 @@ public class UnionTest extends AbstractIntegrationTest {
                 new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) });
 
         RecordDescriptor desc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(
                 spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
index d019f16..b774e0e 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
@@ -58,11 +58,11 @@ public class VSizeFrameSortMergeTest extends AbstractIntegrationTest {
             new FileSplit(NC2_ID, new FileReference(new File(INPUTS[1]))) };
     IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
     RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer() });
 
     @Test
     public void sortNormalMergeTest() throws Exception {


[3/7] incubator-asterixdb-hyracks git commit: ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray

Posted by ji...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
index 1f33915..3cc2a23 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
@@ -54,8 +54,8 @@ import org.junit.Test;
 
 public abstract class AbstractRunGeneratorTest {
     static TestUtils testUtils = new TestUtils();
-    static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE };
+    static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] {
+            IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
     static RecordDescriptor RecordDesc = new RecordDescriptor(SerDers);
     static Random GRandom = new Random(System.currentTimeMillis());
     static int[] SortFields = new int[] { 0, 1 };
@@ -153,7 +153,7 @@ public abstract class AbstractRunGeneratorTest {
             for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
                 tb.reset();
                 tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
-                tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+                tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
 
                 VSizeFrame frame = new VSizeFrame(ctx, FrameHelper.calcAlignedFrameSizeToStore(
                         tb.getFieldEndOffsets().length, tb.getSize(), ctx.getInitialFrameSize()));
@@ -173,7 +173,7 @@ public abstract class AbstractRunGeneratorTest {
             if (!keyValuePair.containsKey(key)) {
                 String value = generateRandomRecord(minRecordSize, maxRecordSize);
                 tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
-                tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+                tb.addField(new UTF8StringSerializerDeserializer(), value);
 
                 if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                     frameList.add(frame);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
index ca0a6bb..e6d10f2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
@@ -109,7 +109,7 @@ public class RunMergingFrameReaderTest {
             while (true) {
                 tb.reset();
                 tb.addField(IntegerSerializerDeserializer.INSTANCE, lastEntry.getKey());
-                tb.addField(UTF8StringSerializerDeserializer.INSTANCE, lastEntry.getValue());
+                tb.addField(new UTF8StringSerializerDeserializer(), lastEntry.getValue());
                 if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                     break;
                 } else {
@@ -148,7 +148,7 @@ public class RunMergingFrameReaderTest {
             for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
                 tb.reset();
                 tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
-                tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+                tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
                 int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
                 datasize += size;
                 if (size > maxtuple) {
@@ -164,7 +164,7 @@ public class RunMergingFrameReaderTest {
             int key = GRandom.nextInt(datasize + 1);
             if (!result.containsKey(key)) {
                 tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
-                tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+                tb.addField(new UTF8StringSerializerDeserializer(), value);
                 int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
                 datasize += size;
                 if (size > maxtuple) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
index 3f9b0e9..965e194 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
@@ -168,10 +168,10 @@ public class ExternalGroupClient {
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
                 FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
                 new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
index a3e1ee0..cb6006b 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
@@ -132,14 +132,14 @@ public class WordCountMain {
 
         IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
         RecordDescriptor wordDesc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
                 new WordTupleParserFactory(), wordDesc);
         createPartitionConstraint(spec, wordScanner, inSplits);
 
         RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
 
         IOperatorDescriptor gBy;
         int[] keys = new int[] { 0 };

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
index 28b62a7..ac172fd 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
@@ -34,27 +34,27 @@ import org.apache.hyracks.dataflow.std.file.FileSplit;
 
 public class Common {
     static RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
 
     static RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer() });
     static RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-            UTF8StringSerializerDeserializer.INSTANCE });
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+            new UTF8StringSerializerDeserializer() });
 
     static IValueParserFactory[] orderParserFactories = new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
                         UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
index dadb935..b20d2b8 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
@@ -229,7 +229,7 @@ public class Join {
         if (hasGroupBy) {
 
             RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
-                    UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+                    new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
 
             HashGroupOperatorDescriptor gby = new HashGroupOperatorDescriptor(
                     spec,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
index e36dd06..1515037 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
@@ -141,7 +141,7 @@ public class DataflowTest extends TestCase {
         String[] readSchedule = scheduler.getLocationConstraints(splits);
         JobSpecification jobSpec = new JobSpecification();
         RecordDescriptor recordDesc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
                 HyracksUtils.NC2_ID };

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
index a703e57..0b41b07 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
@@ -148,7 +148,7 @@ public class DataflowTest extends TestCase {
         String[] readSchedule = scheduler.getLocationConstraints(splits);
         JobSpecification jobSpec = new JobSpecification();
         RecordDescriptor recordDesc = new RecordDescriptor(
-                new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+                new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
         String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
                 HyracksUtils.NC2_ID };

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/pom.xml b/hyracks/hyracks-storage-am-common/pom.xml
index 727da41..86b0eac 100644
--- a/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks/hyracks-storage-am-common/pom.xml
@@ -68,5 +68,10 @@
   		<type>jar</type>
   		<scope>compile</scope>
   	</dependency>
+    <dependency>
+         <groupId>org.apache.hyracks</groupId>
+         <artifactId>hyracks-util</artifactId>
+         <version>0.2.17-SNAPSHOT</version>
+    </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
index c99b1e5..554cd3f 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
@@ -38,7 +38,7 @@ public class TreeIndexStatsOperatorDescriptor extends AbstractTreeIndexOperatorD
 
     private static final long serialVersionUID = 1L;
     private static final RecordDescriptor recDesc = new RecordDescriptor(
-            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+            new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
     public TreeIndexStatsOperatorDescriptor(IOperatorDescriptorRegistry spec, IStorageManagerInterface storageManager,
             IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
index 51ca3c2..584418c 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
@@ -38,6 +38,7 @@ public class TreeIndexStatsOperatorNodePushable extends AbstractUnaryOutputSourc
     private final AbstractTreeIndexOperatorDescriptor opDesc;
     private final IHyracksTaskContext ctx;
     private final TreeIndexDataflowHelper treeIndexHelper;
+    private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
     private TreeIndexStatsGatherer statsGatherer;
 
     public TreeIndexStatsOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
@@ -77,7 +78,7 @@ public class TreeIndexStatsOperatorNodePushable extends AbstractUnaryOutputSourc
             ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
             DataOutput dos = tb.getDataOutput();
             tb.reset();
-            UTF8StringSerializerDeserializer.INSTANCE.serialize(stats.toString(), dos);
+            utf8SerDer.serialize(stats.toString(), dos);
             tb.addFieldEndOffset();
             if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                 throw new HyracksDataException(

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
index 1a17a5a..c6a0035 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
@@ -19,11 +19,14 @@
 
 package org.apache.hyracks.storage.am.common.tuples;
 
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.VarLenIntDecoder;
+
 import java.nio.ByteBuffer;
 
 import org.apache.hyracks.api.dataflow.value.ITypeTraits;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
 
 public class TypeAwareTupleReference implements ITreeIndexTupleReference {
     protected ByteBuffer buf;
@@ -34,7 +37,7 @@ public class TypeAwareTupleReference implements ITreeIndexTupleReference {
     protected int dataStartOff;
 
     protected ITypeTraits[] typeTraits;
-    protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+    protected VarLenIntDecoder encDec = VarLenIntEncoderDecoder.createDecoder();
     protected int[] decodedFieldSlots;
 
     public TypeAwareTupleReference(ITypeTraits[] typeTraits) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
index 73e6e3e..c44cb6b 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
@@ -25,11 +25,12 @@ import org.apache.hyracks.api.dataflow.value.ITypeTraits;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
 
 public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
 
     protected ITypeTraits[] typeTraits;
-    protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+    protected VarLenIntEncoderDecoder.VarLenIntDecoder decoder = VarLenIntEncoderDecoder.createDecoder();
 
     public TypeAwareTupleWriter(ITypeTraits[] typeTraits) {
         this.typeTraits = typeTraits;
@@ -73,13 +74,11 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
         }
 
         // write field slots for variable length fields
-        encDec.reset(targetBuf, runner);
         for (int i = 0; i < tuple.getFieldCount(); i++) {
             if (!typeTraits[i].isFixedLength()) {
-                encDec.encode(tuple.getFieldLength(i));
+                runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
             }
         }
-        runner = encDec.getPos();
 
         // write data fields
         for (int i = 0; i < tuple.getFieldCount(); i++) {
@@ -100,13 +99,11 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
         }
 
         // write field slots for variable length fields
-        encDec.reset(targetBuf, runner);
         for (int i = startField; i < startField + numFields; i++) {
             if (!typeTraits[i].isFixedLength()) {
-                encDec.encode(tuple.getFieldLength(i));
+                runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
             }
         }
-        runner = encDec.getPos();
 
         for (int i = startField; i < startField + numFields; i++) {
             System.arraycopy(tuple.getFieldData(i), tuple.getFieldStart(i), targetBuf, runner, tuple.getFieldLength(i));
@@ -124,7 +121,7 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
         int fieldSlotBytes = 0;
         for (int i = 0; i < tuple.getFieldCount(); i++) {
             if (!typeTraits[i].isFixedLength()) {
-                fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+                fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
             }
         }
         return fieldSlotBytes;
@@ -138,7 +135,7 @@ public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
         int fieldSlotBytes = 0;
         for (int i = startField; i < startField + numFields; i++) {
             if (!typeTraits[i].isFixedLength()) {
-                fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+                fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
             }
         }
         return fieldSlotBytes;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
deleted file mode 100644
index cd3d366..0000000
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.common.tuples;
-
-// encodes positive integers in a variable-byte format
-
-public class VarLenIntEncoderDecoder {
-	public static final int ENCODE_MASK = 0x0000007F;
-	public static final byte CONTINUE_CHUNK = (byte) 0x80;
-	public static final byte DECODE_MASK = (byte) 0x7F;
-
-	private byte[] encTmp = new byte[5];
-
-	private int pos;
-	private byte[] bytes;
-
-	public void reset(byte[] bytes, int pos) {
-		this.bytes = bytes;
-		this.pos = pos;
-	}
-
-	public int encode(int val) {
-		int origPos = 0;
-		int tmpPos = 0;
-		while (val > ENCODE_MASK) {
-			encTmp[tmpPos++] = (byte) (val & ENCODE_MASK);
-			val = val >>> 7;
-		}
-		encTmp[tmpPos++] = (byte) (val);
-
-		// reverse order to optimize for decoding speed
-		for (int i = 0; i < tmpPos - 1; i++) {
-			bytes[pos++] = (byte) (encTmp[tmpPos - 1 - i] | CONTINUE_CHUNK);
-		}
-		bytes[pos++] = encTmp[0];
-
-		return pos - origPos;
-	}
-
-	public int decode() {
-		int sum = 0;
-		while ((bytes[pos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
-			sum = (sum + (bytes[pos] & DECODE_MASK)) << 7;
-			pos++;
-		}
-		sum += bytes[pos++];
-		return sum;
-	}
-
-	// calculate the number of bytes needed for encoding
-	public int getBytesRequired(int val) {
-		int byteCount = 0;
-		while (val > ENCODE_MASK) {
-			val = val >>> 7;
-			byteCount++;
-		}
-		return byteCount + 1;
-	}
-
-	public int getPos() {
-		return pos;
-	}
-
-	// fast encoding, slow decoding version
-	/*
-	 * public void encode(int val) { while(val > ENCODE_MASK) { bytes[pos++] =
-	 * (byte)(((byte)(val & ENCODE_MASK)) | CONTINUE_CHUNK); val = val >>> 7; }
-	 * bytes[pos++] = (byte)(val); }
-	 * 
-	 * public int decode() { int sum = 0; int shift = 0; while( (bytes[pos] &
-	 * CONTINUE_CHUNK) == CONTINUE_CHUNK) { sum = (sum + (bytes[pos] &
-	 * DECODE_MASK)) << 7 * shift++; pos++; } sum += bytes[pos++] << 7 * shift;
-	 * return sum; }
-	 */
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
index af70253..dc5282e 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
@@ -17,41 +17,46 @@
  ! under the License.
  !-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
 
-	<parent>
-		<artifactId>hyracks</artifactId>
-		<groupId>org.apache.hyracks</groupId>
-		<version>0.2.17-SNAPSHOT</version>
-		<relativePath>..</relativePath>
-	</parent>
+    <parent>
+        <artifactId>hyracks</artifactId>
+        <groupId>org.apache.hyracks</groupId>
+        <version>0.2.17-SNAPSHOT</version>
+        <relativePath>..</relativePath>
+    </parent>
 
-  <licenses>
-    <license>
-      <name>Apache License, Version 2.0</name>
-      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-      <distribution>repo</distribution>
-      <comments>A business-friendly OSS license</comments>
-    </license>
-  </licenses>
+    <licenses>
+        <license>
+            <name>Apache License, Version 2.0</name>
+            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+            <distribution>repo</distribution>
+            <comments>A business-friendly OSS license</comments>
+        </license>
+    </licenses>
 
-
-	<dependencies>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-storage-am-btree</artifactId>
-			<version>0.2.17-SNAPSHOT</version>
-			<type>jar</type>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-storage-am-lsm-common</artifactId>
-			<version>0.2.17-SNAPSHOT</version>
-			<type>jar</type>
-			<scope>compile</scope>
-	    </dependency>
-	</dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-storage-am-btree</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-storage-am-lsm-common</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+    </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
index f536a67..7d34198 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
@@ -50,7 +50,7 @@ import org.apache.hyracks.storage.am.lsm.invertedindex.util.ObjectCache;
 
 public abstract class AbstractTOccurrenceSearcher implements IInvertedIndexSearcher {
     protected static final RecordDescriptor QUERY_TOKEN_REC_DESC = new RecordDescriptor(
-            new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+            new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
 
     protected final int OBJECT_CACHE_INIT_SIZE = 10;
     protected final int OBJECT_CACHE_EXPAND_SIZE = 10;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
index 1460857..9d4446f 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
@@ -19,19 +19,16 @@
 
 package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
 
-    protected byte[] data;
-    protected int start;
-    protected int length;
-    protected int tokenLength;
-    protected int index;
-    protected int originalIndex;
-    protected int utf8Length;
-    protected boolean tokenCountCalculated = false;
-    protected short tokenCount;
+    protected byte[] sentenceBytes;
+    protected int sentenceStartOffset;
+    protected int sentenceEndOffset;
+    protected int sentenceUtf8Length;
+
+    protected int byteIndex;
 
     protected final IntArray tokensStart;
     protected final IntArray tokensLength;
@@ -59,27 +56,27 @@ public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokeni
         return token;
     }
 
+    //TODO: This UTF8Tokenizer strongly relies on the Asterix data format,
+    // i.e. the TypeTag and the byteIndex increasing both assume the given byte[] sentence
+    // is an AString object. A better way (if we want to keep the byte[] interface) would be
+    // giving this tokenizer the pure UTF8 character sequence whose {@code start} is the start
+    // of the first character, and move the shift offset to the caller.
     @Override
-    public void reset(byte[] data, int start, int length) {
-        this.start = start;
-        index = this.start;
+    public void reset(byte[] sentenceData, int start, int length) {
+        this.sentenceBytes = sentenceData;
+        this.sentenceStartOffset = start;
+        this.sentenceEndOffset = length + start;
+
+        byteIndex = this.sentenceStartOffset;
         if (sourceHasTypeTag) {
-            index++; // skip type tag
+            byteIndex++; // skip type tag
         }
-        utf8Length = UTF8StringPointable.getUTFLength(data, index);
-        index += 2; // skip utf8 length indicator
-        this.data = data;
-        this.length = length + start;
+        sentenceUtf8Length = UTF8StringUtil.getUTFLength(sentenceData, byteIndex);
+        byteIndex += UTF8StringUtil.getNumBytesToStoreLength(sentenceUtf8Length); // skip utf8 length indicator
 
-        tokenLength = 0;
         if (!ignoreTokenCount) {
             tokensStart.reset();
             tokensLength.reset();
         }
-
-        // Needed for calculating the number of tokens
-        originalIndex = index;
-        tokenCountCalculated = false;
-        tokenCount = 0;
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
index 7aeb6fa..9613fb9 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -21,17 +21,18 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 import java.io.DataOutput;
 import java.io.IOException;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public abstract class AbstractUTF8Token implements IToken {
     public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
 
-    protected int length;
+    protected byte[] data;
+    protected int startOffset;
+    protected int endOffset;
     protected int tokenLength;
-    protected int start;
     protected int tokenCount;
-    protected byte[] data;
     protected final byte tokenTypeTag;
     protected final byte countTypeTag;
 
@@ -51,24 +52,24 @@ public abstract class AbstractUTF8Token implements IToken {
     }
 
     @Override
-    public int getLength() {
-        return length;
+    public int getEndOffset() {
+        return endOffset;
     }
 
-    public int getLowerCaseUTF8Len(int size) {
+    public int getLowerCaseUTF8Len(int limit) {
         int lowerCaseUTF8Len = 0;
-        int pos = start;
-        for (int i = 0; i < size; i++) {
-            char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
-            lowerCaseUTF8Len += UTF8StringPointable.getModifiedUTF8Len(c);
-            pos += UTF8StringPointable.charSize(data, pos);
+        int pos = startOffset;
+        for (int i = 0; i < limit; i++) {
+            char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+            lowerCaseUTF8Len += UTF8StringUtil.getModifiedUTF8Len(c);
+            pos += UTF8StringUtil.charSize(data, pos);
         }
         return lowerCaseUTF8Len;
     }
 
     @Override
-    public int getStart() {
-        return start;
+    public int getStartOffset() {
+        return startOffset;
     }
 
     @Override
@@ -88,11 +89,20 @@ public abstract class AbstractUTF8Token implements IToken {
         }
     }
 
+    /**
+     * Note: the {@code startOffset} is the offset of first character, not the string length offset
+     *
+     * @param data
+     * @param startOffset
+     * @param endOffset
+     * @param tokenLength
+     * @param tokenCount  the count of this token in a document , or a record, or something else.
+     */
     @Override
-    public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
+    public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
         this.data = data;
-        this.start = start;
-        this.length = length;
+        this.startOffset = startOffset;
+        this.endOffset = endOffset;
         this.tokenLength = tokenLength;
         this.tokenCount = tokenCount;
     }
@@ -102,4 +112,38 @@ public abstract class AbstractUTF8Token implements IToken {
         handleCountTypeTag(out.getDataOutput());
         out.getDataOutput().writeInt(tokenCount);
     }
+
+    // The preChar and postChar are required to be a single byte utf8 char, e.g. ASCII char.
+    protected void serializeToken(UTF8StringBuilder builder, GrowableArray out, int numPreChars, int numPostChars,
+            char preChar, char postChar)
+            throws IOException {
+
+        handleTokenTypeTag(out.getDataOutput());
+
+        assert UTF8StringUtil.getModifiedUTF8Len(preChar) == 1 && UTF8StringUtil.getModifiedUTF8Len(postChar) == 1;
+        int actualUtfLen = endOffset - startOffset;
+
+        builder.reset(out, actualUtfLen + numPreChars + numPostChars);
+        // pre chars
+        for (int i = 0; i < numPreChars; i++) {
+            builder.appendChar(preChar);
+        }
+
+        /// regular chars
+        int numRegChars = tokenLength - numPreChars - numPostChars;
+        int pos = startOffset;
+        for (int i = 0; i < numRegChars; i++) {
+            char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+            builder.appendChar(c);
+            pos += UTF8StringUtil.charSize(data, pos);
+        }
+
+        // post chars
+        for (int i = 0; i < numPostChars; i++) {
+            builder.appendChar(postChar);
+        }
+
+        builder.finish();
+    }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index ddf3a43..f6d6be4 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -19,54 +19,66 @@
 
 package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
 
+    protected short tokenCount;
+    private boolean tokenCountCalculated;
+    private int originalIndex;
+
     public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
             ITokenFactory tokenFactory) {
         super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
     }
 
     @Override
+    public void reset(byte[] sentenceData, int start, int length) {
+        super.reset(sentenceData, start, length);
+        // Needed for calculating the number of tokens
+        tokenCount = 0;
+        tokenCountCalculated = false;
+        originalIndex = byteIndex;
+    }
+
+    @Override
     public boolean hasNext() {
         // skip delimiters
-        while (index < length && isSeparator(UTF8StringPointable.charAt(data, index))) {
-            index += UTF8StringPointable.charSize(data, index);
+        while (byteIndex < sentenceEndOffset && isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+            byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
         }
-        return index < length;
+        return byteIndex < sentenceEndOffset;
     }
 
-    private boolean isSeparator(char c) {
-        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+    private static boolean isSeparator(char c) {
+        return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
+                || Character.getType(c) == Character.OTHER_NUMBER);
     }
 
     @Override
     public void next() {
-        tokenLength = 0;
-        int currentTokenStart = index;
-        while (index < length && !isSeparator(UTF8StringPointable.charAt(data, index))) {
-            index += UTF8StringPointable.charSize(data, index);
+        int tokenLength = 0;
+        int currentTokenStart = byteIndex;
+        while (byteIndex < sentenceEndOffset && !isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+            byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
             tokenLength++;
         }
-        int tokenCount = 1;
+        int curTokenCount = 1;
         if (tokenLength > 0 && !ignoreTokenCount) {
             // search if we got the same token before
             for (int i = 0; i < tokensStart.length(); ++i) {
                 if (tokenLength == tokensLength.get(i)) {
                     int tokenStart = tokensStart.get(i);
-                    tokenCount++; // assume we found it
+                    curTokenCount++; // assume we found it
                     int offset = 0;
-                    int currLength = 0;
-                    while (currLength < tokenLength) {
+                    for (int charPos= 0; charPos < tokenLength; charPos++) {
                         // case insensitive comparison
-                        if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
-                                .toLowerCase(UTF8StringPointable.charAt(data, tokenStart + offset))) {
-                            tokenCount--;
+                        if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+                                != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tokenStart + offset))) {
+                            curTokenCount--;
                             break;
                         }
-                        offset += UTF8StringPointable.charSize(data, currentTokenStart + offset);
-                        currLength++;
+                        offset += UTF8StringUtil.charSize(sentenceBytes, currentTokenStart + offset);
                     }
                 }
             }
@@ -76,16 +88,19 @@ public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinary
         }
 
         // set token
-        token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
+        token.reset(sentenceBytes, currentTokenStart, byteIndex, tokenLength, curTokenCount);
+        tokenCount++;
     }
 
+
+    // TODO Why we bother to get the tokenCount in advance? It seems a caller's problem.
     @Override
     public short getTokensCount() {
         if (!tokenCountCalculated) {
             tokenCount = 0;
             boolean previousCharIsSeparator = true;
-            while (originalIndex < length) {
-                if (isSeparator(UTF8StringPointable.charAt(data, originalIndex))) {
+            while (originalIndex < sentenceEndOffset) {
+                if (isSeparator(UTF8StringUtil.charAt(sentenceBytes, originalIndex))) {
                     previousCharIsSeparator = true;
                 } else {
                     if (previousCharIsSeparator) {
@@ -93,7 +108,7 @@ public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinary
                         previousCharIsSeparator = false;
                     }
                 }
-                originalIndex += UTF8StringPointable.charSize(data, originalIndex);
+                originalIndex += UTF8StringUtil.charSize(sentenceBytes, originalIndex);
             }
         }
         return tokenCount;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 43ee3c0..8ffd355 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -21,8 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.IOException;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class HashedUTF8NGramToken extends UTF8NGramToken {
     public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
@@ -43,11 +43,11 @@ public class HashedUTF8NGramToken extends UTF8NGramToken {
 
         // regular chars
         int numRegGrams = tokenLength - numPreChars - numPostChars;
-        int pos = start;
+        int pos = startOffset;
         for (int i = 0; i < numRegGrams; i++) {
-            hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+            hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
             hash *= GOLDEN_RATIO_32;
-            pos += UTF8StringPointable.charSize(data, pos);
+            pos += UTF8StringUtil.charSize(data, pos);
         }
 
         // post chars

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
index 18f958d..150ffd6 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -21,8 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.IOException;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class HashedUTF8WordToken extends UTF8WordToken {
 
@@ -46,11 +46,11 @@ public class HashedUTF8WordToken extends UTF8WordToken {
         }
         int offset = 0;
         for (int i = 0; i < tokenLength; i++) {
-            if (UTF8StringPointable.charAt(t.getData(), t.getStart() + offset) != UTF8StringPointable.charAt(data,
-                    start + offset)) {
+            if (UTF8StringUtil.charAt(t.getData(), t.getStartOffset() + offset) != UTF8StringUtil.charAt(data,
+                    startOffset + offset)) {
                 return false;
             }
-            offset += UTF8StringPointable.charSize(data, start + offset);
+            offset += UTF8StringUtil.charSize(data, startOffset + offset);
         }
         return true;
     }
@@ -61,16 +61,16 @@ public class HashedUTF8WordToken extends UTF8WordToken {
     }
 
     @Override
-    public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
-        super.reset(data, start, length, tokenLength, tokenCount);
+    public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
+        super.reset(data, startOffset, endOffset, tokenLength, tokenCount);
 
         // pre-compute hash value using JAQL-like string hashing
-        int pos = start;
+        int pos = startOffset;
         hash = GOLDEN_RATIO_32;
         for (int i = 0; i < tokenLength; i++) {
-            hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+            hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
             hash *= GOLDEN_RATIO_32;
-            pos += UTF8StringPointable.charSize(data, pos);
+            pos += UTF8StringUtil.charSize(data, pos);
         }
         hash += tokenCount;
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
index d48af44..cb1b098 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
@@ -24,18 +24,26 @@ import java.io.IOException;
 import org.apache.hyracks.data.std.util.GrowableArray;
 
 public interface IToken {
-	public byte[] getData();
+    public byte[] getData();
 
-	public int getLength();
+    public int getEndOffset();
 
-	public int getStart();
+    public int getStartOffset();
 
-	public int getTokenLength();
+    public int getTokenLength();
 
-	public void reset(byte[] data, int start, int length, int tokenLength,
-			int tokenCount);
+    /**
+     * reset the storage byte array.
+     *
+     * @param data
+     * @param startOffset
+     * @param endOffset
+     * @param tokenLength
+     * @param tokenCount  the count of this token in a document , or a record, or something else.
+     */
+    public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount);
 
-	public void serializeToken(GrowableArray out) throws IOException;
+    public void serializeToken(GrowableArray out) throws IOException;
 
-	public void serializeTokenCount(GrowableArray out) throws IOException;
+    public void serializeTokenCount(GrowableArray out) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index def7ad2..9161a54 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
 
 package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
 
 public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
 
@@ -50,7 +50,7 @@ public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryToke
 
     @Override
     public void next() {
-        int currentTokenStart = index;
+        int currentTokenStart = byteIndex;
         int tokenCount = 1;
         int numPreChars = 0;
         int numPostChars = 0;
@@ -62,46 +62,48 @@ public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryToke
 
         concreteToken.setNumPrePostChars(numPreChars, numPostChars);
         if (numPreChars == 0) {
-            index += UTF8StringPointable.charSize(data, index);
+            byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
         }
 
         // compute token count
         // ignore pre and post grams for duplicate detection
         if (!ignoreTokenCount && numPreChars == 0 && numPostChars == 0) {
-            int tmpIndex = start + 2; // skip utf8 length indicator
+            int tmpIndex = sentenceStartOffset;
             if (sourceHasTypeTag) {
                 tmpIndex++; // skip type tag
             }
+            int utfLength = UTF8StringUtil.getUTFLength(sentenceBytes, tmpIndex);
+            tmpIndex += UTF8StringUtil.getNumBytesToStoreLength(utfLength); // skip utf8 length indicator
             while (tmpIndex < currentTokenStart) {
                 tokenCount++; // assume found
                 int offset = 0;
                 for (int j = 0; j < gramLength; j++) {
-                    if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
-                            .toLowerCase(UTF8StringPointable.charAt(data, tmpIndex + offset))) {
+                    if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+                            != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tmpIndex + offset))) {
                         tokenCount--;
                         break;
                     }
-                    offset += UTF8StringPointable.charSize(data, tmpIndex + offset);
+                    offset += UTF8StringUtil.charSize(sentenceBytes, tmpIndex + offset);
                 }
-                tmpIndex += UTF8StringPointable.charSize(data, tmpIndex);
+                tmpIndex += UTF8StringUtil.charSize(sentenceBytes, tmpIndex);
             }
         }
 
         // set token
-        token.reset(data, currentTokenStart, length, gramLength, tokenCount);
+        token.reset(sentenceBytes, currentTokenStart, sentenceEndOffset, gramLength, tokenCount);
     }
 
     @Override
-    public void reset(byte[] data, int start, int length) {
-        super.reset(data, start, length);
+    public void reset(byte[] sentenceData, int start, int length) {
+        super.reset(sentenceData, start, length);
         gramNum = 0;
 
         int numChars = 0;
-        int pos = index;
-        int end = pos + utf8Length;
+        int pos = byteIndex;
+        int end = pos + sentenceUtf8Length;
         while (pos < end) {
             numChars++;
-            pos += UTF8StringPointable.charSize(data, pos);
+            pos += UTF8StringUtil.charSize(sentenceData, pos);
         }
 
         if (usePrePost) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
index 7d68d6f..259288c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
@@ -21,9 +21,8 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.IOException;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
 
 public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
 
@@ -34,6 +33,8 @@ public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
     protected int numPreChars;
     protected int numPostChars;
 
+    private UTF8StringBuilder builder = new UTF8StringBuilder();
+
     public UTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
         super(tokenTypeTag, countTypeTag);
     }
@@ -50,38 +51,7 @@ public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
 
     @Override
     public void serializeToken(GrowableArray out) throws IOException {
-        handleTokenTypeTag(out.getDataOutput());
-        int tokenUTF8LenOff = out.getLength();
-
-        // regular chars
-        int numRegChars = tokenLength - numPreChars - numPostChars;
-
-        // assuming pre and post char need 1-byte each in utf8
-        int tokenUTF8Len = numPreChars + numPostChars;
-
-        // Write dummy UTF length which will be correctly set later.
-        out.getDataOutput().writeShort(0);
-
-        // pre chars
-        for (int i = 0; i < numPreChars; i++) {
-            StringUtils.writeCharAsModifiedUTF8(PRECHAR, out.getDataOutput());
-        }
-
-        int pos = start;
-        for (int i = 0; i < numRegChars; i++) {
-            char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
-            tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
-            pos += UTF8StringPointable.charSize(data, pos);
-        }
-
-        // post chars
-        for (int i = 0; i < numPostChars; i++) {
-            StringUtils.writeCharAsModifiedUTF8(POSTCHAR, out.getDataOutput());
-        }
-
-        // Set UTF length of token.
-        out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
-        out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+        super.serializeToken(builder, out, numPreChars, numPostChars, PRECHAR, POSTCHAR);
     }
 
     public void setNumPrePostChars(int numPreChars, int numPostChars) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
index caaa682..bc7085c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
@@ -21,31 +21,21 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
 import java.io.IOException;
 
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
 import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
 
 public class UTF8WordToken extends AbstractUTF8Token {
 
+    private static char NULL_PLACEHOLDER = 1; // can't be 0, cause utf8 modified char will use 2 bytes to write 0
+
+    private UTF8StringBuilder builder = new UTF8StringBuilder();
+
     public UTF8WordToken(byte tokenTypeTag, byte countTypeTag) {
         super(tokenTypeTag, countTypeTag);
     }
 
     @Override
     public void serializeToken(GrowableArray out) throws IOException {
-        handleTokenTypeTag(out.getDataOutput());
-        int tokenUTF8LenOff = out.getLength();
-        int tokenUTF8Len = 0;
-        // Write dummy UTF length which will be correctly set later.
-        out.getDataOutput().writeShort(0);
-        int pos = start;
-        for (int i = 0; i < tokenLength; i++) {
-            char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
-            tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
-            pos += UTF8StringPointable.charSize(data, pos);
-        }
-        // Set UTF length of token.
-        out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
-        out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+        super.serializeToken(builder, out, 0, 0, NULL_PLACEHOLDER, NULL_PLACEHOLDER);
     }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
index d2332f0..40b0481 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
@@ -24,6 +24,7 @@ import java.nio.ByteBuffer;
 import org.apache.hyracks.api.dataflow.value.ITypeTraits;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
 
 public class RTreeTypeAwareTupleWriter extends TypeAwareTupleWriter {
 
@@ -41,13 +42,11 @@ public class RTreeTypeAwareTupleWriter extends TypeAwareTupleWriter {
 
         // write field slots for variable length fields
         // since the r-tree has fixed length keys, we don't actually need this?
-        encDec.reset(targetBuf.array(), runner);
         for (int i = startField; i < startField + refs.length; i++) {
             if (!typeTraits[i].isFixedLength()) {
-                encDec.encode(refs[i].getFieldLength(i));
+                runner += VarLenIntEncoderDecoder.encode(refs[i].getFieldLength(i), targetBuf.array(), runner);
             }
         }
-        runner = encDec.getPos();
 
         // write data
         for (int i = 0; i < refs.length; i++) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index f79997b..b8f2166 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -164,8 +164,8 @@ public abstract class OrderedIndexExamplesTest {
         typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
         typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
         // Declare field serdes.
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Declare keys.
         int keyFieldCount = 1;
@@ -324,8 +324,8 @@ public abstract class OrderedIndexExamplesTest {
         typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
         typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
         // Declare field serdes.
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Declare keys.
         int keyFieldCount = 1;
@@ -408,8 +408,8 @@ public abstract class OrderedIndexExamplesTest {
         typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
         typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
         // Declare field serdes.
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Declare keys.
         int keyFieldCount = 1;
@@ -514,8 +514,8 @@ public abstract class OrderedIndexExamplesTest {
         typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
         typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
         // Declare field serdes.
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Declare keys.
         int keyFieldCount = 1;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
index 160f9bf..e181710 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
@@ -120,7 +120,7 @@ public abstract class OrderedIndexMultiThreadTest {
     @Test
     public void oneStringKeyAndValue() throws InterruptedException, TreeIndexException, HyracksException {
         ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
         int numKeys = 1;
         String dataMsg = "One String Key And Value";
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
index 0ec313b..b1e8a8c 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
@@ -124,8 +124,8 @@ public abstract class OrderedIndexTestDriver {
             LOGGER.info("BTree " + getTestOpName() + " Test With One String Key And Value.");
         }
 
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Range search in ["cbf", cc7"]
         ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf");
@@ -142,8 +142,8 @@ public abstract class OrderedIndexTestDriver {
             LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys.");
         }
 
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Range search in ["cbf", "ddd", cc7", "eee"]
         ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");
@@ -164,9 +164,9 @@ public abstract class OrderedIndexTestDriver {
             LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys And Values.");
         }
 
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
 
         // Range search in ["cbf", "ddd", cc7", "eee"]
         ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
index 6cd81c3..a3029f8 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
@@ -210,7 +210,7 @@ public abstract class AbstractRTreeExamplesTest {
         // Declare field serdes.
         ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
 
         // Declare RTree keys.
         int rtreeKeyFieldCount = 4;
@@ -350,7 +350,7 @@ public abstract class AbstractRTreeExamplesTest {
         // Declare field serdes.
         ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
                 IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+                IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
 
         // Declare RTree keys.
         int rtreeKeyFieldCount = 4;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
index 49df30f..80a69c4 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -135,9 +135,9 @@ public class BloomFilterTest extends AbstractBloomFilterTest {
                 bloomFilterSpec.getNumBucketsPerElements());
 
         int fieldCount = 5;
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
         ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
         ArrayTupleReference tuple = new ArrayTupleReference();
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
index 1d7aa90..3284f8d 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
@@ -109,7 +109,7 @@ public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
         }
 
         int fieldCount = 2;
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer() };
         ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
         ArrayTupleReference tuple = new ArrayTupleReference();
         String s = randomString(100, rnd);
@@ -137,8 +137,8 @@ public class MurmurHashForITupleReferenceTest extends AbstractBloomFilterTest {
         }
 
         int fieldCount = 3;
-        ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+        ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
         ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
         ArrayTupleReference tuple = new ArrayTupleReference();
         String s1 = randomString(40, rnd);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
index a7215a5..d537bf9 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
@@ -161,14 +161,14 @@ public class LSMBTreeTuplesTest {
         testLSMBTreeTuple(intFields);
         
         ISerializerDeserializer[] stringFields = new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE };
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+                new UTF8StringSerializerDeserializer() };
         testLSMBTreeTuple(stringFields);
         
         ISerializerDeserializer[] mixedFields = new ISerializerDeserializer[] {
-                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
-                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+                new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
                 IntegerSerializerDeserializer.INSTANCE };
         testLSMBTreeTuple(mixedFields);
     }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb-hyracks/blob/26c3b536/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
----------------------------------------------------------------------
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
index f2896cb..11a57a2 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -17,40 +17,47 @@
  ! under the License.
  !-->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
 
-	<parent>
-		<artifactId>hyracks-tests</artifactId>
-		<groupId>org.apache.hyracks</groupId>
-		<version>0.2.17-SNAPSHOT</version>
-		<relativePath>..</relativePath>
-	</parent>
+    <parent>
+        <artifactId>hyracks-tests</artifactId>
+        <groupId>org.apache.hyracks</groupId>
+        <version>0.2.17-SNAPSHOT</version>
+        <relativePath>..</relativePath>
+    </parent>
 
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-test-support</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-data-std</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hyracks</groupId>
+            <artifactId>hyracks-util</artifactId>
+            <version>0.2.17-SNAPSHOT</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
 
-	<dependencies>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
-			<version>0.2.17-SNAPSHOT</version>
-			<type>jar</type>
-			<scope>compile</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-test-support</artifactId>
-			<version>0.2.17-SNAPSHOT</version>
-			<type>jar</type>
-			<scope>test</scope>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.hyracks</groupId>
-			<artifactId>hyracks-data-std</artifactId>
-			<version>0.2.17-SNAPSHOT</version>
-			<type>jar</type>
-			<scope>test</scope>
-		</dependency>
-	</dependencies>
-
-</project>
+</project>
\ No newline at end of file