You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@spark.apache.org by rx...@apache.org on 2016/03/02 00:39:15 UTC

[1/4] spark git commit: [SPARK-13548][BUILD] Move tags and unsafe modules into common

Repository: spark
Updated Branches:
  refs/heads/master c27ba0d54 -> b0ee7d437


http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
deleted file mode 100644
index 693ec6e..0000000
--- a/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-public class PlatformUtilSuite {
-
-  @Test
-  public void overlappingCopyMemory() {
-    byte[] data = new byte[3 * 1024 * 1024];
-    int size = 2 * 1024 * 1024;
-    for (int i = 0; i < data.length; ++i) {
-      data[i] = (byte)i;
-    }
-
-    Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, Platform.BYTE_ARRAY_OFFSET, size);
-    for (int i = 0; i < data.length; ++i) {
-      Assert.assertEquals((byte)i, data[i]);
-    }
-
-    Platform.copyMemory(
-        data,
-        Platform.BYTE_ARRAY_OFFSET + 1,
-        data,
-        Platform.BYTE_ARRAY_OFFSET,
-        size);
-    for (int i = 0; i < size; ++i) {
-      Assert.assertEquals((byte)(i + 1), data[i]);
-    }
-
-    for (int i = 0; i < data.length; ++i) {
-      data[i] = (byte)i;
-    }
-    Platform.copyMemory(
-        data,
-        Platform.BYTE_ARRAY_OFFSET,
-        data,
-        Platform.BYTE_ARRAY_OFFSET + 1,
-        size);
-    for (int i = 0; i < size; ++i) {
-      Assert.assertEquals((byte)i, data[i + 1]);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
deleted file mode 100644
index fb8e53b..0000000
--- a/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.array;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import org.apache.spark.unsafe.memory.MemoryBlock;
-
-public class LongArraySuite {
-
-  @Test
-  public void basicTest() {
-    long[] bytes = new long[2];
-    LongArray arr = new LongArray(MemoryBlock.fromLongArray(bytes));
-    arr.set(0, 1L);
-    arr.set(1, 2L);
-    arr.set(1, 3L);
-    Assert.assertEquals(2, arr.size());
-    Assert.assertEquals(1L, arr.get(0));
-    Assert.assertEquals(3L, arr.get(1));
-
-    arr.zeroOut();
-    Assert.assertEquals(0L, arr.get(0));
-    Assert.assertEquals(0L, arr.get(1));
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java b/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
deleted file mode 100644
index e759cb3..0000000
--- a/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.hash;
-
-import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Set;
-
-import org.apache.spark.unsafe.Platform;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Test file based on Guava's Murmur3Hash32Test.
- */
-public class Murmur3_x86_32Suite {
-
-  private static final Murmur3_x86_32 hasher = new Murmur3_x86_32(0);
-
-  @Test
-  public void testKnownIntegerInputs() {
-    Assert.assertEquals(593689054, hasher.hashInt(0));
-    Assert.assertEquals(-189366624, hasher.hashInt(-42));
-    Assert.assertEquals(-1134849565, hasher.hashInt(42));
-    Assert.assertEquals(-1718298732, hasher.hashInt(Integer.MIN_VALUE));
-    Assert.assertEquals(-1653689534, hasher.hashInt(Integer.MAX_VALUE));
-  }
-
-  @Test
-  public void testKnownLongInputs() {
-    Assert.assertEquals(1669671676, hasher.hashLong(0L));
-    Assert.assertEquals(-846261623, hasher.hashLong(-42L));
-    Assert.assertEquals(1871679806, hasher.hashLong(42L));
-    Assert.assertEquals(1366273829, hasher.hashLong(Long.MIN_VALUE));
-    Assert.assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE));
-  }
-
-  @Test
-  public void randomizedStressTest() {
-    int size = 65536;
-    Random rand = new Random();
-
-    // A set used to track collision rate.
-    Set<Integer> hashcodes = new HashSet<>();
-    for (int i = 0; i < size; i++) {
-      int vint = rand.nextInt();
-      long lint = rand.nextLong();
-      Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
-      Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
-
-      hashcodes.add(hasher.hashLong(lint));
-    }
-
-    // A very loose bound.
-    Assert.assertTrue(hashcodes.size() > size * 0.95);
-  }
-
-  @Test
-  public void randomizedStressTestBytes() {
-    int size = 65536;
-    Random rand = new Random();
-
-    // A set used to track collision rate.
-    Set<Integer> hashcodes = new HashSet<>();
-    for (int i = 0; i < size; i++) {
-      int byteArrSize = rand.nextInt(100) * 8;
-      byte[] bytes = new byte[byteArrSize];
-      rand.nextBytes(bytes);
-
-      Assert.assertEquals(
-        hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
-        hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
-
-      hashcodes.add(hasher.hashUnsafeWords(
-        bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
-    }
-
-    // A very loose bound.
-    Assert.assertTrue(hashcodes.size() > size * 0.95);
-  }
-
-  @Test
-  public void randomizedStressTestPaddedStrings() {
-    int size = 64000;
-    // A set used to track collision rate.
-    Set<Integer> hashcodes = new HashSet<>();
-    for (int i = 0; i < size; i++) {
-      int byteArrSize = 8;
-      byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
-      byte[] paddedBytes = new byte[byteArrSize];
-      System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
-
-      Assert.assertEquals(
-        hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
-        hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
-
-      hashcodes.add(hasher.hashUnsafeWords(
-        paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
-    }
-
-    // A very loose bound.
-    Assert.assertTrue(hashcodes.size() > size * 0.95);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
deleted file mode 100644
index 9e69e26..0000000
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.unsafe.types;
-
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-import static org.apache.spark.unsafe.types.CalendarInterval.*;
-
-public class CalendarIntervalSuite {
-
-  @Test
-  public void equalsTest() {
-    CalendarInterval i1 = new CalendarInterval(3, 123);
-    CalendarInterval i2 = new CalendarInterval(3, 321);
-    CalendarInterval i3 = new CalendarInterval(1, 123);
-    CalendarInterval i4 = new CalendarInterval(3, 123);
-
-    assertNotSame(i1, i2);
-    assertNotSame(i1, i3);
-    assertNotSame(i2, i3);
-    assertEquals(i1, i4);
-  }
-
-  @Test
-  public void toStringTest() {
-    CalendarInterval i;
-
-    i = new CalendarInterval(34, 0);
-    assertEquals("interval 2 years 10 months", i.toString());
-
-    i = new CalendarInterval(-34, 0);
-    assertEquals("interval -2 years -10 months", i.toString());
-
-    i = new CalendarInterval(0, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
-    assertEquals("interval 3 weeks 13 hours 123 microseconds", i.toString());
-
-    i = new CalendarInterval(0, -3 * MICROS_PER_WEEK - 13 * MICROS_PER_HOUR - 123);
-    assertEquals("interval -3 weeks -13 hours -123 microseconds", i.toString());
-
-    i = new CalendarInterval(34, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
-    assertEquals("interval 2 years 10 months 3 weeks 13 hours 123 microseconds", i.toString());
-  }
-
-  @Test
-  public void fromStringTest() {
-    testSingleUnit("year", 3, 36, 0);
-    testSingleUnit("month", 3, 3, 0);
-    testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK);
-    testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY);
-    testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR);
-    testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE);
-    testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND);
-    testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI);
-    testSingleUnit("microsecond", 3, 0, 3);
-
-    String input;
-
-    input = "interval   -5  years  23   month";
-    CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
-    assertEquals(fromString(input), result);
-
-    input = "interval   -5  years  23   month   ";
-    assertEquals(fromString(input), result);
-
-    input = "  interval   -5  years  23   month   ";
-    assertEquals(fromString(input), result);
-
-    // Error cases
-    input = "interval   3month 1 hour";
-    assertNull(fromString(input));
-
-    input = "interval 3 moth 1 hour";
-    assertNull(fromString(input));
-
-    input = "interval";
-    assertNull(fromString(input));
-
-    input = "int";
-    assertNull(fromString(input));
-
-    input = "";
-    assertNull(fromString(input));
-
-    input = null;
-    assertNull(fromString(input));
-  }
-
-  @Test
-  public void fromYearMonthStringTest() {
-    String input;
-    CalendarInterval i;
-
-    input = "99-10";
-    i = new CalendarInterval(99 * 12 + 10, 0L);
-    assertEquals(fromYearMonthString(input), i);
-
-    input = "-8-10";
-    i = new CalendarInterval(-8 * 12 - 10, 0L);
-    assertEquals(fromYearMonthString(input), i);
-
-    try {
-      input = "99-15";
-      fromYearMonthString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("month 15 outside range"));
-    }
-  }
-
-  @Test
-  public void fromDayTimeStringTest() {
-    String input;
-    CalendarInterval i;
-
-    input = "5 12:40:30.999999999";
-    i = new CalendarInterval(0, 5 * MICROS_PER_DAY + 12 * MICROS_PER_HOUR +
-      40 * MICROS_PER_MINUTE + 30 * MICROS_PER_SECOND + 999999L);
-    assertEquals(fromDayTimeString(input), i);
-
-    input = "10 0:12:0.888";
-    i = new CalendarInterval(0, 10 * MICROS_PER_DAY + 12 * MICROS_PER_MINUTE);
-    assertEquals(fromDayTimeString(input), i);
-
-    input = "-3 0:0:0";
-    i = new CalendarInterval(0, -3 * MICROS_PER_DAY);
-    assertEquals(fromDayTimeString(input), i);
-
-    try {
-      input = "5 30:12:20";
-      fromDayTimeString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("hour 30 outside range"));
-    }
-
-    try {
-      input = "5 30-12";
-      fromDayTimeString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("not match day-time format"));
-    }
-  }
-
-  @Test
-  public void fromSingleUnitStringTest() {
-    String input;
-    CalendarInterval i;
-
-    input = "12";
-    i = new CalendarInterval(12 * 12, 0L);
-    assertEquals(fromSingleUnitString("year", input), i);
-
-    input = "100";
-    i = new CalendarInterval(0, 100 * MICROS_PER_DAY);
-    assertEquals(fromSingleUnitString("day", input), i);
-
-    input = "1999.38888";
-    i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
-    assertEquals(fromSingleUnitString("second", input), i);
-
-    try {
-      input = String.valueOf(Integer.MAX_VALUE);
-      fromSingleUnitString("year", input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("outside range"));
-    }
-
-    try {
-      input = String.valueOf(Long.MAX_VALUE / MICROS_PER_HOUR + 1);
-      fromSingleUnitString("hour", input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("outside range"));
-    }
-  }
-
-  @Test
-  public void addTest() {
-    String input = "interval 3 month 1 hour";
-    String input2 = "interval 2 month 100 hour";
-
-    CalendarInterval interval = fromString(input);
-    CalendarInterval interval2 = fromString(input2);
-
-    assertEquals(interval.add(interval2), new CalendarInterval(5, 101 * MICROS_PER_HOUR));
-
-    input = "interval -10 month -81 hour";
-    input2 = "interval 75 month 200 hour";
-
-    interval = fromString(input);
-    interval2 = fromString(input2);
-
-    assertEquals(interval.add(interval2), new CalendarInterval(65, 119 * MICROS_PER_HOUR));
-  }
-
-  @Test
-  public void subtractTest() {
-    String input = "interval 3 month 1 hour";
-    String input2 = "interval 2 month 100 hour";
-
-    CalendarInterval interval = fromString(input);
-    CalendarInterval interval2 = fromString(input2);
-
-    assertEquals(interval.subtract(interval2), new CalendarInterval(1, -99 * MICROS_PER_HOUR));
-
-    input = "interval -10 month -81 hour";
-    input2 = "interval 75 month 200 hour";
-
-    interval = fromString(input);
-    interval2 = fromString(input2);
-
-    assertEquals(interval.subtract(interval2), new CalendarInterval(-85, -281 * MICROS_PER_HOUR));
-  }
-
-  private static void testSingleUnit(String unit, int number, int months, long microseconds) {
-    String input1 = "interval " + number + " " + unit;
-    String input2 = "interval " + number + " " + unit + "s";
-    CalendarInterval result = new CalendarInterval(months, microseconds);
-    assertEquals(fromString(input1), result);
-    assertEquals(fromString(input2), result);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
----------------------------------------------------------------------
diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
deleted file mode 100644
index bef5d71..0000000
--- a/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ /dev/null
@@ -1,492 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.unsafe.types;
-
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-import java.util.HashMap;
-
-import com.google.common.collect.ImmutableMap;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-import static org.apache.spark.unsafe.types.UTF8String.*;
-
-public class UTF8StringSuite {
-
-  private static void checkBasic(String str, int len) throws UnsupportedEncodingException {
-    UTF8String s1 = fromString(str);
-    UTF8String s2 = fromBytes(str.getBytes("utf8"));
-    assertEquals(s1.numChars(), len);
-    assertEquals(s2.numChars(), len);
-
-    assertEquals(s1.toString(), str);
-    assertEquals(s2.toString(), str);
-    assertEquals(s1, s2);
-
-    assertEquals(s1.hashCode(), s2.hashCode());
-
-    assertEquals(0, s1.compareTo(s2));
-
-    assertTrue(s1.contains(s2));
-    assertTrue(s2.contains(s1));
-    assertTrue(s1.startsWith(s1));
-    assertTrue(s1.endsWith(s1));
-  }
-
-  @Test
-  public void basicTest() throws UnsupportedEncodingException {
-    checkBasic("", 0);
-    checkBasic("hello", 5);
-    checkBasic("大 千 世 界", 7);
-  }
-
-  @Test
-  public void emptyStringTest() {
-    assertEquals(EMPTY_UTF8, fromString(""));
-    assertEquals(EMPTY_UTF8, fromBytes(new byte[0]));
-    assertEquals(0, EMPTY_UTF8.numChars());
-    assertEquals(0, EMPTY_UTF8.numBytes());
-  }
-
-  @Test
-  public void prefix() {
-    assertTrue(fromString("a").getPrefix() - fromString("b").getPrefix() < 0);
-    assertTrue(fromString("ab").getPrefix() - fromString("b").getPrefix() < 0);
-    assertTrue(
-      fromString("abbbbbbbbbbbasdf").getPrefix() - fromString("bbbbbbbbbbbbasdf").getPrefix() < 0);
-    assertTrue(fromString("").getPrefix() - fromString("a").getPrefix() < 0);
-    assertTrue(fromString("你好").getPrefix() - fromString("世界").getPrefix() > 0);
-
-    byte[] buf1 = {1, 2, 3, 4, 5, 6, 7, 8, 9};
-    byte[] buf2 = {1, 2, 3};
-    UTF8String str1 = fromBytes(buf1, 0, 3);
-    UTF8String str2 = fromBytes(buf1, 0, 8);
-    UTF8String str3 = fromBytes(buf2);
-    assertTrue(str1.getPrefix() - str2.getPrefix() < 0);
-    assertEquals(str1.getPrefix(), str3.getPrefix());
-  }
-
-  @Test
-  public void compareTo() {
-    assertTrue(fromString("").compareTo(fromString("a")) < 0);
-    assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0);
-    assertTrue(fromString("abc0").compareTo(fromString("abc")) > 0);
-    assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabc")) == 0);
-    assertTrue(fromString("aBcabcabc").compareTo(fromString("Abcabcabc")) > 0);
-    assertTrue(fromString("Abcabcabc").compareTo(fromString("abcabcabC")) < 0);
-    assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabC")) > 0);
-
-    assertTrue(fromString("abc").compareTo(fromString("世界")) < 0);
-    assertTrue(fromString("你好").compareTo(fromString("世界")) > 0);
-    assertTrue(fromString("你好123").compareTo(fromString("你好122")) > 0);
-  }
-
-  protected static void testUpperandLower(String upper, String lower) {
-    UTF8String us = fromString(upper);
-    UTF8String ls = fromString(lower);
-    assertEquals(ls, us.toLowerCase());
-    assertEquals(us, ls.toUpperCase());
-    assertEquals(us, us.toUpperCase());
-    assertEquals(ls, ls.toLowerCase());
-  }
-
-  @Test
-  public void upperAndLower() {
-    testUpperandLower("", "");
-    testUpperandLower("0123456", "0123456");
-    testUpperandLower("ABCXYZ", "abcxyz");
-    testUpperandLower("ЀЁЂѺΏỀ", "ѐёђѻώề");
-    testUpperandLower("大千世界 数据砖头", "大千世界 数据砖头");
-  }
-
-  @Test
-  public void titleCase() {
-    assertEquals(fromString(""), fromString("").toTitleCase());
-    assertEquals(fromString("Ab Bc Cd"), fromString("ab bc cd").toTitleCase());
-    assertEquals(fromString("Ѐ Ё Ђ Ѻ Ώ Ề"), fromString("ѐ ё ђ ѻ ώ ề").toTitleCase());
-    assertEquals(fromString("大千世界 数据砖头"), fromString("大千世界 数据砖头").toTitleCase());
-  }
-
-  @Test
-  public void concatTest() {
-    assertEquals(EMPTY_UTF8, concat());
-    assertNull(concat((UTF8String) null));
-    assertEquals(EMPTY_UTF8, concat(EMPTY_UTF8));
-    assertEquals(fromString("ab"), concat(fromString("ab")));
-    assertEquals(fromString("ab"), concat(fromString("a"), fromString("b")));
-    assertEquals(fromString("abc"), concat(fromString("a"), fromString("b"), fromString("c")));
-    assertNull(concat(fromString("a"), null, fromString("c")));
-    assertNull(concat(fromString("a"), null, null));
-    assertNull(concat(null, null, null));
-    assertEquals(fromString("数据砖头"), concat(fromString("数据"), fromString("砖头")));
-  }
-
-  @Test
-  public void concatWsTest() {
-    // Returns null if the separator is null
-    assertNull(concatWs(null, (UTF8String) null));
-    assertNull(concatWs(null, fromString("a")));
-
-    // If separator is null, concatWs should skip all null inputs and never return null.
-    UTF8String sep = fromString("哈哈");
-    assertEquals(
-      EMPTY_UTF8,
-      concatWs(sep, EMPTY_UTF8));
-    assertEquals(
-      fromString("ab"),
-      concatWs(sep, fromString("ab")));
-    assertEquals(
-      fromString("a哈哈b"),
-      concatWs(sep, fromString("a"), fromString("b")));
-    assertEquals(
-      fromString("a哈哈b哈哈c"),
-      concatWs(sep, fromString("a"), fromString("b"), fromString("c")));
-    assertEquals(
-      fromString("a哈哈c"),
-      concatWs(sep, fromString("a"), null, fromString("c")));
-    assertEquals(
-      fromString("a"),
-      concatWs(sep, fromString("a"), null, null));
-    assertEquals(
-      EMPTY_UTF8,
-      concatWs(sep, null, null, null));
-    assertEquals(
-      fromString("数据哈哈砖头"),
-      concatWs(sep, fromString("数据"), fromString("砖头")));
-  }
-
-  @Test
-  public void contains() {
-    assertTrue(EMPTY_UTF8.contains(EMPTY_UTF8));
-    assertTrue(fromString("hello").contains(fromString("ello")));
-    assertFalse(fromString("hello").contains(fromString("vello")));
-    assertFalse(fromString("hello").contains(fromString("hellooo")));
-    assertTrue(fromString("大千世界").contains(fromString("千世界")));
-    assertFalse(fromString("大千世界").contains(fromString("世千")));
-    assertFalse(fromString("大千世界").contains(fromString("大千世界好")));
-  }
-
-  @Test
-  public void startsWith() {
-    assertTrue(EMPTY_UTF8.startsWith(EMPTY_UTF8));
-    assertTrue(fromString("hello").startsWith(fromString("hell")));
-    assertFalse(fromString("hello").startsWith(fromString("ell")));
-    assertFalse(fromString("hello").startsWith(fromString("hellooo")));
-    assertTrue(fromString("数据砖头").startsWith(fromString("数据")));
-    assertFalse(fromString("大千世界").startsWith(fromString("千")));
-    assertFalse(fromString("大千世界").startsWith(fromString("大千世界好")));
-  }
-
-  @Test
-  public void endsWith() {
-    assertTrue(EMPTY_UTF8.endsWith(EMPTY_UTF8));
-    assertTrue(fromString("hello").endsWith(fromString("ello")));
-    assertFalse(fromString("hello").endsWith(fromString("ellov")));
-    assertFalse(fromString("hello").endsWith(fromString("hhhello")));
-    assertTrue(fromString("大千世界").endsWith(fromString("世界")));
-    assertFalse(fromString("大千世界").endsWith(fromString("世")));
-    assertFalse(fromString("数据砖头").endsWith(fromString("我的数据砖头")));
-  }
-
-  @Test
-  public void substring() {
-    assertEquals(EMPTY_UTF8, fromString("hello").substring(0, 0));
-    assertEquals(fromString("el"), fromString("hello").substring(1, 3));
-    assertEquals(fromString("数"), fromString("数据砖头").substring(0, 1));
-    assertEquals(fromString("据砖"), fromString("数据砖头").substring(1, 3));
-    assertEquals(fromString("头"), fromString("数据砖头").substring(3, 5));
-    assertEquals(fromString("ߵ梷"), fromString("ߵ梷").substring(0, 2));
-  }
-
-  @Test
-  public void trims() {
-    assertEquals(fromString("hello"), fromString("  hello ").trim());
-    assertEquals(fromString("hello "), fromString("  hello ").trimLeft());
-    assertEquals(fromString("  hello"), fromString("  hello ").trimRight());
-
-    assertEquals(EMPTY_UTF8, fromString("  ").trim());
-    assertEquals(EMPTY_UTF8, fromString("  ").trimLeft());
-    assertEquals(EMPTY_UTF8, fromString("  ").trimRight());
-
-    assertEquals(fromString("数据砖头"), fromString("  数据砖头 ").trim());
-    assertEquals(fromString("数据砖头 "), fromString("  数据砖头 ").trimLeft());
-    assertEquals(fromString("  数据砖头"), fromString("  数据砖头 ").trimRight());
-
-    assertEquals(fromString("数据砖头"), fromString("数据砖头").trim());
-    assertEquals(fromString("数据砖头"), fromString("数据砖头").trimLeft());
-    assertEquals(fromString("数据砖头"), fromString("数据砖头").trimRight());
-  }
-
-  @Test
-  public void indexOf() {
-    assertEquals(0, EMPTY_UTF8.indexOf(EMPTY_UTF8, 0));
-    assertEquals(-1, EMPTY_UTF8.indexOf(fromString("l"), 0));
-    assertEquals(0, fromString("hello").indexOf(EMPTY_UTF8, 0));
-    assertEquals(2, fromString("hello").indexOf(fromString("l"), 0));
-    assertEquals(3, fromString("hello").indexOf(fromString("l"), 3));
-    assertEquals(-1, fromString("hello").indexOf(fromString("a"), 0));
-    assertEquals(2, fromString("hello").indexOf(fromString("ll"), 0));
-    assertEquals(-1, fromString("hello").indexOf(fromString("ll"), 4));
-    assertEquals(1, fromString("数据砖头").indexOf(fromString("据砖"), 0));
-    assertEquals(-1, fromString("数据砖头").indexOf(fromString("数"), 3));
-    assertEquals(0, fromString("数据砖头").indexOf(fromString("数"), 0));
-    assertEquals(3, fromString("数据砖头").indexOf(fromString("头"), 0));
-  }
-
-  @Test
-  public void substring_index() {
-    assertEquals(fromString("www.apache.org"),
-      fromString("www.apache.org").subStringIndex(fromString("."), 3));
-    assertEquals(fromString("www.apache"),
-      fromString("www.apache.org").subStringIndex(fromString("."), 2));
-    assertEquals(fromString("www"),
-      fromString("www.apache.org").subStringIndex(fromString("."), 1));
-    assertEquals(fromString(""),
-      fromString("www.apache.org").subStringIndex(fromString("."), 0));
-    assertEquals(fromString("org"),
-      fromString("www.apache.org").subStringIndex(fromString("."), -1));
-    assertEquals(fromString("apache.org"),
-      fromString("www.apache.org").subStringIndex(fromString("."), -2));
-    assertEquals(fromString("www.apache.org"),
-      fromString("www.apache.org").subStringIndex(fromString("."), -3));
-    // str is empty string
-    assertEquals(fromString(""),
-      fromString("").subStringIndex(fromString("."), 1));
-    // empty string delim
-    assertEquals(fromString(""),
-      fromString("www.apache.org").subStringIndex(fromString(""), 1));
-    // delim does not exist in str
-    assertEquals(fromString("www.apache.org"),
-      fromString("www.apache.org").subStringIndex(fromString("#"), 2));
-    // delim is 2 chars
-    assertEquals(fromString("www||apache"),
-      fromString("www||apache||org").subStringIndex(fromString("||"), 2));
-    assertEquals(fromString("apache||org"),
-      fromString("www||apache||org").subStringIndex(fromString("||"), -2));
-    // non ascii chars
-    assertEquals(fromString("大千世界大"),
-      fromString("大千世界大千世界").subStringIndex(fromString("千"), 2));
-    // overlapped delim
-    assertEquals(fromString("||"), fromString("||||||").subStringIndex(fromString("|||"), 3));
-    assertEquals(fromString("|||"), fromString("||||||").subStringIndex(fromString("|||"), -4));
-  }
-
-  @Test
-  public void reverse() {
-    assertEquals(fromString("olleh"), fromString("hello").reverse());
-    assertEquals(EMPTY_UTF8, EMPTY_UTF8.reverse());
-    assertEquals(fromString("者行孙"), fromString("孙行者").reverse());
-    assertEquals(fromString("者行孙 olleh"), fromString("hello 孙行者").reverse());
-  }
-
-  @Test
-  public void repeat() {
-    assertEquals(fromString("数d数d数d数d数d"), fromString("数d").repeat(5));
-    assertEquals(fromString("数d"), fromString("数d").repeat(1));
-    assertEquals(EMPTY_UTF8, fromString("数d").repeat(-1));
-  }
-
-  @Test
-  public void pad() {
-    assertEquals(fromString("hel"), fromString("hello").lpad(3, fromString("????")));
-    assertEquals(fromString("hello"), fromString("hello").lpad(5, fromString("????")));
-    assertEquals(fromString("?hello"), fromString("hello").lpad(6, fromString("????")));
-    assertEquals(fromString("???????hello"), fromString("hello").lpad(12, fromString("????")));
-    assertEquals(fromString("?????hello"), fromString("hello").lpad(10, fromString("?????")));
-    assertEquals(fromString("???????"), EMPTY_UTF8.lpad(7, fromString("?????")));
-
-    assertEquals(fromString("hel"), fromString("hello").rpad(3, fromString("????")));
-    assertEquals(fromString("hello"), fromString("hello").rpad(5, fromString("????")));
-    assertEquals(fromString("hello?"), fromString("hello").rpad(6, fromString("????")));
-    assertEquals(fromString("hello???????"), fromString("hello").rpad(12, fromString("????")));
-    assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????")));
-    assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????")));
-
-    assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????")));
-    assertEquals(fromString("?数据砖头"), fromString("数据砖头").lpad(5, fromString("????")));
-    assertEquals(fromString("??数据砖头"), fromString("数据砖头").lpad(6, fromString("????")));
-    assertEquals(fromString("孙行数据砖头"), fromString("数据砖头").lpad(6, fromString("孙行者")));
-    assertEquals(fromString("孙行者数据砖头"), fromString("数据砖头").lpad(7, fromString("孙行者")));
-    assertEquals(
-      fromString("孙行者孙行者孙行数据砖头"),
-      fromString("数据砖头").lpad(12, fromString("孙行者")));
-
-    assertEquals(fromString("数据砖"), fromString("数据砖头").rpad(3, fromString("????")));
-    assertEquals(fromString("数据砖头?"), fromString("数据砖头").rpad(5, fromString("????")));
-    assertEquals(fromString("数据砖头??"), fromString("数据砖头").rpad(6, fromString("????")));
-    assertEquals(fromString("数据砖头孙行"), fromString("数据砖头").rpad(6, fromString("孙行者")));
-    assertEquals(fromString("数据砖头孙行者"), fromString("数据砖头").rpad(7, fromString("孙行者")));
-    assertEquals(
-      fromString("数据砖头孙行者孙行者孙行"),
-      fromString("数据砖头").rpad(12, fromString("孙行者")));
-
-    assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, fromString("孙行者")));
-    assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, EMPTY_UTF8));
-    assertEquals(fromString("数据砖头"), fromString("数据砖头").lpad(5, EMPTY_UTF8));
-    assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, EMPTY_UTF8));
-    assertEquals(EMPTY_UTF8, EMPTY_UTF8.lpad(3, EMPTY_UTF8));
-
-    assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, fromString("孙行者")));
-    assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, EMPTY_UTF8));
-    assertEquals(fromString("数据砖头"), fromString("数据砖头").rpad(5, EMPTY_UTF8));
-    assertEquals(fromString("数据砖"), fromString("数据砖头").rpad(3, EMPTY_UTF8));
-    assertEquals(EMPTY_UTF8, EMPTY_UTF8.rpad(3, EMPTY_UTF8));
-  }
-
-  @Test
-  public void substringSQL() {
-    UTF8String e = fromString("example");
-    assertEquals(e.substringSQL(0, 2), fromString("ex"));
-    assertEquals(e.substringSQL(1, 2), fromString("ex"));
-    assertEquals(e.substringSQL(0, 7), fromString("example"));
-    assertEquals(e.substringSQL(1, 2), fromString("ex"));
-    assertEquals(e.substringSQL(0, 100), fromString("example"));
-    assertEquals(e.substringSQL(1, 100), fromString("example"));
-    assertEquals(e.substringSQL(2, 2), fromString("xa"));
-    assertEquals(e.substringSQL(1, 6), fromString("exampl"));
-    assertEquals(e.substringSQL(2, 100), fromString("xample"));
-    assertEquals(e.substringSQL(0, 0), fromString(""));
-    assertEquals(e.substringSQL(100, 4), EMPTY_UTF8);
-    assertEquals(e.substringSQL(0, Integer.MAX_VALUE), fromString("example"));
-    assertEquals(e.substringSQL(1, Integer.MAX_VALUE), fromString("example"));
-    assertEquals(e.substringSQL(2, Integer.MAX_VALUE), fromString("xample"));
-  }
-
-  @Test
-  public void split() {
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), -1),
-      new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi")}));
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
-      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
-    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
-      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
-  }
-
-  @Test
-  public void levenshteinDistance() {
-    assertEquals(0, EMPTY_UTF8.levenshteinDistance(EMPTY_UTF8));
-    assertEquals(1, EMPTY_UTF8.levenshteinDistance(fromString("a")));
-    assertEquals(7, fromString("aaapppp").levenshteinDistance(EMPTY_UTF8));
-    assertEquals(1, fromString("frog").levenshteinDistance(fromString("fog")));
-    assertEquals(3, fromString("fly").levenshteinDistance(fromString("ant")));
-    assertEquals(7, fromString("elephant").levenshteinDistance(fromString("hippo")));
-    assertEquals(7, fromString("hippo").levenshteinDistance(fromString("elephant")));
-    assertEquals(8, fromString("hippo").levenshteinDistance(fromString("zzzzzzzz")));
-    assertEquals(1, fromString("hello").levenshteinDistance(fromString("hallo")));
-    assertEquals(4, fromString("世界千世").levenshteinDistance(fromString("千a世b")));
-  }
-
-  @Test
-  public void translate() {
-    assertEquals(
-      fromString("1a2s3ae"),
-      fromString("translate").translate(ImmutableMap.of(
-        'r', '1',
-        'n', '2',
-        'l', '3',
-        't', '\0'
-      )));
-    assertEquals(
-      fromString("translate"),
-      fromString("translate").translate(new HashMap<Character, Character>()));
-    assertEquals(
-      fromString("asae"),
-      fromString("translate").translate(ImmutableMap.of(
-        'r', '\0',
-        'n', '\0',
-        'l', '\0',
-        't', '\0'
-      )));
-    assertEquals(
-      fromString("aa世b"),
-      fromString("花花世界").translate(ImmutableMap.of(
-        '花', 'a',
-        '界', 'b'
-      )));
-  }
-
-  @Test
-  public void createBlankString() {
-    assertEquals(fromString(" "), blankString(1));
-    assertEquals(fromString("  "), blankString(2));
-    assertEquals(fromString("   "), blankString(3));
-    assertEquals(fromString(""), blankString(0));
-  }
-
-  @Test
-  public void findInSet() {
-    assertEquals(1, fromString("ab").findInSet(fromString("ab")));
-    assertEquals(2, fromString("a,b").findInSet(fromString("b")));
-    assertEquals(3, fromString("abc,b,ab,c,def").findInSet(fromString("ab")));
-    assertEquals(1, fromString("ab,abc,b,ab,c,def").findInSet(fromString("ab")));
-    assertEquals(4, fromString(",,,ab,abc,b,ab,c,def").findInSet(fromString("ab")));
-    assertEquals(1, fromString(",ab,abc,b,ab,c,def").findInSet(fromString("")));
-    assertEquals(4, fromString("数据砖头,abc,b,ab,c,def").findInSet(fromString("ab")));
-    assertEquals(6, fromString("数据砖头,abc,b,ab,c,def").findInSet(fromString("def")));
-  }
-
-  @Test
-  public void soundex() {
-    assertEquals(fromString("Robert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
-    assertEquals(fromString("Ashcraft").soundex(), fromString("A261"));
-    assertEquals(fromString("Ashcroft").soundex(), fromString("A261"));
-    assertEquals(fromString("Burroughs").soundex(), fromString("B620"));
-    assertEquals(fromString("Burrows").soundex(), fromString("B620"));
-    assertEquals(fromString("Ekzampul").soundex(), fromString("E251"));
-    assertEquals(fromString("Example").soundex(), fromString("E251"));
-    assertEquals(fromString("Ellery").soundex(), fromString("E460"));
-    assertEquals(fromString("Euler").soundex(), fromString("E460"));
-    assertEquals(fromString("Ghosh").soundex(), fromString("G200"));
-    assertEquals(fromString("Gauss").soundex(), fromString("G200"));
-    assertEquals(fromString("Gutierrez").soundex(), fromString("G362"));
-    assertEquals(fromString("Heilbronn").soundex(), fromString("H416"));
-    assertEquals(fromString("Hilbert").soundex(), fromString("H416"));
-    assertEquals(fromString("Jackson").soundex(), fromString("J250"));
-    assertEquals(fromString("Kant").soundex(), fromString("K530"));
-    assertEquals(fromString("Knuth").soundex(), fromString("K530"));
-    assertEquals(fromString("Lee").soundex(), fromString("L000"));
-    assertEquals(fromString("Lukasiewicz").soundex(), fromString("L222"));
-    assertEquals(fromString("Lissajous").soundex(), fromString("L222"));
-    assertEquals(fromString("Ladd").soundex(), fromString("L300"));
-    assertEquals(fromString("Lloyd").soundex(), fromString("L300"));
-    assertEquals(fromString("Moses").soundex(), fromString("M220"));
-    assertEquals(fromString("O'Hara").soundex(), fromString("O600"));
-    assertEquals(fromString("Pfister").soundex(), fromString("P236"));
-    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
-    assertEquals(fromString("Robert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
-    assertEquals(fromString("Soundex").soundex(), fromString("S532"));
-    assertEquals(fromString("Sownteks").soundex(), fromString("S532"));
-    assertEquals(fromString("Tymczak").soundex(), fromString("T522"));
-    assertEquals(fromString("VanDeusen").soundex(), fromString("V532"));
-    assertEquals(fromString("Washington").soundex(), fromString("W252"));
-    assertEquals(fromString("Wheaton").soundex(), fromString("W350"));
-
-    assertEquals(fromString("a").soundex(), fromString("A000"));
-    assertEquals(fromString("ab").soundex(), fromString("A100"));
-    assertEquals(fromString("abc").soundex(), fromString("A120"));
-    assertEquals(fromString("abcd").soundex(), fromString("A123"));
-    assertEquals(fromString("").soundex(), fromString(""));
-    assertEquals(fromString("123").soundex(), fromString("123"));
-    assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
----------------------------------------------------------------------
diff --git a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
deleted file mode 100644
index b3bbd68..0000000
--- a/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types
-
-import org.apache.commons.lang3.StringUtils
-import org.scalacheck.{Arbitrary, Gen}
-import org.scalatest.prop.GeneratorDrivenPropertyChecks
-// scalastyle:off
-import org.scalatest.{FunSuite, Matchers}
-
-import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
-
-/**
- * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing.
- */
-class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenPropertyChecks with Matchers {
-// scalastyle:on
-
-  test("toString") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).toString() === s)
-    }
-  }
-
-  test("numChars") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).numChars() === s.length)
-    }
-  }
-
-  test("startsWith") {
-    forAll { (s: String) =>
-      val utf8 = toUTF8(s)
-      assert(utf8.startsWith(utf8))
-      for (i <- 1 to s.length) {
-        assert(utf8.startsWith(toUTF8(s.dropRight(i))))
-      }
-    }
-  }
-
-  test("endsWith") {
-    forAll { (s: String) =>
-      val utf8 = toUTF8(s)
-      assert(utf8.endsWith(utf8))
-      for (i <- 1 to s.length) {
-        assert(utf8.endsWith(toUTF8(s.drop(i))))
-      }
-    }
-  }
-
-  test("toUpperCase") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase))
-    }
-  }
-
-  test("toLowerCase") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase))
-    }
-  }
-
-  test("compare") {
-    forAll { (s1: String, s2: String) =>
-      assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2)))
-    }
-  }
-
-  test("substring") {
-    forAll { (s: String) =>
-      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
-        assert(toUTF8(s).substring(start, end).toString === s.substring(start, end))
-      }
-    }
-  }
-
-  test("contains") {
-    forAll { (s: String) =>
-      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
-        val substring = s.substring(start, end)
-        assert(toUTF8(s).contains(toUTF8(substring)) === s.contains(substring))
-      }
-    }
-  }
-
-  val whitespaceChar: Gen[Char] = Gen.choose(0x00, 0x20).map(_.toChar)
-  val whitespaceString: Gen[String] = Gen.listOf(whitespaceChar).map(_.mkString)
-  val randomString: Gen[String] = Arbitrary.arbString.arbitrary
-
-  test("trim, trimLeft, trimRight") {
-    // lTrim and rTrim are both modified from java.lang.String.trim
-    def lTrim(s: String): String = {
-      var st = 0
-      val array: Array[Char] = s.toCharArray
-      while ((st < s.length) && (array(st) <= ' ')) {
-        st += 1
-      }
-      if (st > 0) s.substring(st, s.length) else s
-    }
-    def rTrim(s: String): String = {
-      var len = s.length
-      val array: Array[Char] = s.toCharArray
-      while ((len > 0) && (array(len - 1) <= ' ')) {
-        len -= 1
-      }
-      if (len < s.length) s.substring(0, len) else s
-    }
-
-    forAll(
-        whitespaceString,
-        randomString,
-        whitespaceString
-    ) { (start: String, middle: String, end: String) =>
-      val s = start + middle + end
-      assert(toUTF8(s).trim() === toUTF8(s.trim()))
-      assert(toUTF8(s).trimLeft() === toUTF8(lTrim(s)))
-      assert(toUTF8(s).trimRight() === toUTF8(rTrim(s)))
-    }
-  }
-
-  test("reverse") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).reverse === toUTF8(s.reverse))
-    }
-  }
-
-  test("indexOf") {
-    forAll { (s: String) =>
-      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
-        val substring = s.substring(start, end)
-        assert(toUTF8(s).indexOf(toUTF8(substring), 0) === s.indexOf(substring))
-      }
-    }
-  }
-
-  val randomInt = Gen.choose(-100, 100)
-
-  test("repeat") {
-    def repeat(str: String, times: Int): String = {
-      if (times > 0) str * times else ""
-    }
-    // ScalaCheck always generating too large repeat times which might hang the test forever.
-    forAll(randomString, randomInt) { (s: String, times: Int) =>
-      assert(toUTF8(s).repeat(times) === toUTF8(repeat(s, times)))
-    }
-  }
-
-  test("lpad, rpad") {
-    def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = {
-      if (length <= 0) return ""
-      if (length <= origin.length) {
-        if (length <= 0) "" else origin.substring(0, length)
-      } else {
-        if (pad.length == 0) return origin
-        val toPad = length - origin.length
-        val partPad = if (toPad % pad.length == 0) "" else pad.substring(0, toPad % pad.length)
-        if (isLPad) {
-          pad * (toPad / pad.length) + partPad + origin
-        } else {
-          origin + pad * (toPad / pad.length) + partPad
-        }
-      }
-    }
-
-    forAll (
-      randomString,
-      randomString,
-      randomInt
-    ) { (s: String, pad: String, length: Int) =>
-      assert(toUTF8(s).lpad(length, toUTF8(pad)) ===
-        toUTF8(padding(s, pad, length, true)))
-      assert(toUTF8(s).rpad(length, toUTF8(pad)) ===
-        toUTF8(padding(s, pad, length, false)))
-    }
-  }
-
-  val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString))
-
-  test("concat") {
-    def concat(orgin: Seq[String]): String =
-      if (orgin.exists(_ == null)) null else orgin.mkString
-
-    forAll { (inputs: Seq[String]) =>
-      assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString))
-    }
-    forAll (nullalbeSeq) { (inputs: Seq[String]) =>
-      assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs)))
-    }
-  }
-
-  test("concatWs") {
-    def concatWs(sep: String, inputs: Seq[String]): String = {
-      if (sep == null) return null
-      inputs.filter(_ != null).mkString(sep)
-    }
-
-    forAll { (sep: String, inputs: Seq[String]) =>
-      assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
-        toUTF8(inputs.mkString(sep)))
-    }
-    forAll(randomString, nullalbeSeq) {(sep: String, inputs: Seq[String]) =>
-      assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
-        toUTF8(concatWs(sep, inputs)))
-    }
-  }
-
-  // TODO: enable this when we find a proper way to generate valid patterns
-  ignore("split") {
-    forAll { (s: String, pattern: String, limit: Int) =>
-      assert(toUTF8(s).split(toUTF8(pattern), limit) ===
-        s.split(pattern, limit).map(toUTF8(_)))
-    }
-  }
-
-  test("levenshteinDistance") {
-    forAll { (one: String, another: String) =>
-      assert(toUTF8(one).levenshteinDistance(toUTF8(another)) ===
-        StringUtils.getLevenshteinDistance(one, another))
-    }
-  }
-
-  test("hashCode") {
-    forAll { (s: String) =>
-      assert(toUTF8(s).hashCode() === toUTF8(s).hashCode())
-    }
-  }
-
-  test("equals") {
-    forAll { (one: String, another: String) =>
-      assert(toUTF8(one).equals(toUTF8(another)) === one.equals(another))
-    }
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[2/4] spark git commit: [SPARK-13548][BUILD] Move tags and unsafe modules into common

Posted by rx...@apache.org.

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
----------------------------------------------------------------------
diff --git a/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java b/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
deleted file mode 100644
index 1083001..0000000
--- a/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.tags;
-
-import java.lang.annotation.*;
-
-import org.scalatest.TagAnnotation;
-
-@TagAnnotation
-@Retention(RetentionPolicy.RUNTIME)
-@Target({ElementType.METHOD, ElementType.TYPE})
-public @interface ExtendedYarnTest { }

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/pom.xml
----------------------------------------------------------------------
diff --git a/unsafe/pom.xml b/unsafe/pom.xml
deleted file mode 100644
index 75fea55..0000000
--- a/unsafe/pom.xml
+++ /dev/null
@@ -1,110 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-unsafe_2.11</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Unsafe</name>
-  <url>http://spark.apache.org/</url>
-  <properties>
-    <sbt.project.name>unsafe</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.twitter</groupId>
-      <artifactId>chill_${scala.binary.version}</artifactId>
-    </dependency>
-
-    <!-- Core dependencies -->
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-    </dependency>
-
-    <!-- Provided dependencies -->
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-
-    <!-- Test dependencies -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>net.alchim31.maven</groupId>
-          <artifactId>scala-maven-plugin</artifactId>
-          <configuration>
-            <javacArgs combine.children="append">
-              <!-- This option is needed to suppress warnings from sun.misc.Unsafe usage -->
-              <javacArg>-XDignore.symbol.file</javacArg>
-            </javacArgs>
-          </configuration>
-        </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-compiler-plugin</artifactId>
-          <configuration>
-            <compilerArgs>
-              <!-- This option is needed to suppress warnings from sun.misc.Unsafe usage -->
-              <arg>-XDignore.symbol.file</arg>
-            </compilerArgs>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-  </build>
-</project>

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java b/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
deleted file mode 100644
index 5c9d5d9..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe;
-
-import java.io.IOException;
-
-public abstract class KVIterator<K, V> {
-
-  public abstract boolean next() throws IOException;
-
-  public abstract K getKey();
-
-  public abstract V getValue();
-
-  public abstract void close();
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
deleted file mode 100644
index 18761bf..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe;
-
-import java.lang.reflect.Field;
-
-import sun.misc.Unsafe;
-
-public final class Platform {
-
-  private static final Unsafe _UNSAFE;
-
-  public static final int BYTE_ARRAY_OFFSET;
-
-  public static final int SHORT_ARRAY_OFFSET;
-
-  public static final int INT_ARRAY_OFFSET;
-
-  public static final int LONG_ARRAY_OFFSET;
-
-  public static final int FLOAT_ARRAY_OFFSET;
-
-  public static final int DOUBLE_ARRAY_OFFSET;
-
-  public static int getInt(Object object, long offset) {
-    return _UNSAFE.getInt(object, offset);
-  }
-
-  public static void putInt(Object object, long offset, int value) {
-    _UNSAFE.putInt(object, offset, value);
-  }
-
-  public static boolean getBoolean(Object object, long offset) {
-    return _UNSAFE.getBoolean(object, offset);
-  }
-
-  public static void putBoolean(Object object, long offset, boolean value) {
-    _UNSAFE.putBoolean(object, offset, value);
-  }
-
-  public static byte getByte(Object object, long offset) {
-    return _UNSAFE.getByte(object, offset);
-  }
-
-  public static void putByte(Object object, long offset, byte value) {
-    _UNSAFE.putByte(object, offset, value);
-  }
-
-  public static short getShort(Object object, long offset) {
-    return _UNSAFE.getShort(object, offset);
-  }
-
-  public static void putShort(Object object, long offset, short value) {
-    _UNSAFE.putShort(object, offset, value);
-  }
-
-  public static long getLong(Object object, long offset) {
-    return _UNSAFE.getLong(object, offset);
-  }
-
-  public static void putLong(Object object, long offset, long value) {
-    _UNSAFE.putLong(object, offset, value);
-  }
-
-  public static float getFloat(Object object, long offset) {
-    return _UNSAFE.getFloat(object, offset);
-  }
-
-  public static void putFloat(Object object, long offset, float value) {
-    _UNSAFE.putFloat(object, offset, value);
-  }
-
-  public static double getDouble(Object object, long offset) {
-    return _UNSAFE.getDouble(object, offset);
-  }
-
-  public static void putDouble(Object object, long offset, double value) {
-    _UNSAFE.putDouble(object, offset, value);
-  }
-
-  public static Object getObjectVolatile(Object object, long offset) {
-    return _UNSAFE.getObjectVolatile(object, offset);
-  }
-
-  public static void putObjectVolatile(Object object, long offset, Object value) {
-    _UNSAFE.putObjectVolatile(object, offset, value);
-  }
-
-  public static long allocateMemory(long size) {
-    return _UNSAFE.allocateMemory(size);
-  }
-
-  public static void freeMemory(long address) {
-    _UNSAFE.freeMemory(address);
-  }
-
-  public static long reallocateMemory(long address, long oldSize, long newSize) {
-    long newMemory = _UNSAFE.allocateMemory(newSize);
-    copyMemory(null, address, null, newMemory, oldSize);
-    freeMemory(address);
-    return newMemory;
-  }
-
-  public static void setMemory(long address, byte value, long size) {
-    _UNSAFE.setMemory(address, size, value);
-  }
-
-  public static void copyMemory(
-    Object src, long srcOffset, Object dst, long dstOffset, long length) {
-    // Check if dstOffset is before or after srcOffset to determine if we should copy
-    // forward or backwards. This is necessary in case src and dst overlap.
-    if (dstOffset < srcOffset) {
-      while (length > 0) {
-        long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
-        _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
-        length -= size;
-        srcOffset += size;
-        dstOffset += size;
-      }
-    } else {
-      srcOffset += length;
-      dstOffset += length;
-      while (length > 0) {
-        long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
-        srcOffset -= size;
-        dstOffset -= size;
-        _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
-        length -= size;
-      }
-
-    }
-  }
-
-  /**
-   * Raises an exception bypassing compiler checks for checked exceptions.
-   */
-  public static void throwException(Throwable t) {
-    _UNSAFE.throwException(t);
-  }
-
-  /**
-   * Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to
-   * allow safepoint polling during a large copy.
-   */
-  private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L;
-
-  static {
-    sun.misc.Unsafe unsafe;
-    try {
-      Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
-      unsafeField.setAccessible(true);
-      unsafe = (sun.misc.Unsafe) unsafeField.get(null);
-    } catch (Throwable cause) {
-      unsafe = null;
-    }
-    _UNSAFE = unsafe;
-
-    if (_UNSAFE != null) {
-      BYTE_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(byte[].class);
-      SHORT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(short[].class);
-      INT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(int[].class);
-      LONG_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(long[].class);
-      FLOAT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(float[].class);
-      DOUBLE_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(double[].class);
-    } else {
-      BYTE_ARRAY_OFFSET = 0;
-      SHORT_ARRAY_OFFSET = 0;
-      INT_ARRAY_OFFSET = 0;
-      LONG_ARRAY_OFFSET = 0;
-      FLOAT_ARRAY_OFFSET = 0;
-      DOUBLE_ARRAY_OFFSET = 0;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
deleted file mode 100644
index cf42877..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.array;
-
-import org.apache.spark.unsafe.Platform;
-
-public class ByteArrayMethods {
-
-  private ByteArrayMethods() {
-    // Private constructor, since this class only contains static methods.
-  }
-
-  /** Returns the next number greater or equal num that is power of 2. */
-  public static long nextPowerOf2(long num) {
-    final long highBit = Long.highestOneBit(num);
-    return (highBit == num) ? num : highBit << 1;
-  }
-
-  public static int roundNumberOfBytesToNearestWord(int numBytes) {
-    int remainder = numBytes & 0x07;  // This is equivalent to `numBytes % 8`
-    if (remainder == 0) {
-      return numBytes;
-    } else {
-      return numBytes + (8 - remainder);
-    }
-  }
-
-  /**
-   * Optimized byte array equality check for byte arrays.
-   * @return true if the arrays are equal, false otherwise
-   */
-  public static boolean arrayEquals(
-      Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) {
-    int i = 0;
-    while (i <= length - 8) {
-      if (Platform.getLong(leftBase, leftOffset + i) !=
-        Platform.getLong(rightBase, rightOffset + i)) {
-        return false;
-      }
-      i += 8;
-    }
-    while (i < length) {
-      if (Platform.getByte(leftBase, leftOffset + i) !=
-        Platform.getByte(rightBase, rightOffset + i)) {
-        return false;
-      }
-      i += 1;
-    }
-    return true;
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
deleted file mode 100644
index 1a3cdff..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.array;
-
-import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.memory.MemoryBlock;
-
-/**
- * An array of long values. Compared with native JVM arrays, this:
- * <ul>
- *   <li>supports using both in-heap and off-heap memory</li>
- *   <li>has no bound checking, and thus can crash the JVM process when assert is turned off</li>
- * </ul>
- */
-public final class LongArray {
-
-  // This is a long so that we perform long multiplications when computing offsets.
-  private static final long WIDTH = 8;
-
-  private final MemoryBlock memory;
-  private final Object baseObj;
-  private final long baseOffset;
-
-  private final long length;
-
-  public LongArray(MemoryBlock memory) {
-    assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size > 4 billion elements";
-    this.memory = memory;
-    this.baseObj = memory.getBaseObject();
-    this.baseOffset = memory.getBaseOffset();
-    this.length = memory.size() / WIDTH;
-  }
-
-  public MemoryBlock memoryBlock() {
-    return memory;
-  }
-
-  public Object getBaseObject() {
-    return baseObj;
-  }
-
-  public long getBaseOffset() {
-    return baseOffset;
-  }
-
-  /**
-   * Returns the number of elements this array can hold.
-   */
-  public long size() {
-    return length;
-  }
-
-  /**
-   * Fill this all with 0L.
-   */
-  public void zeroOut() {
-    for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) {
-      Platform.putLong(baseObj, off, 0);
-    }
-  }
-
-  /**
-   * Sets the value at position {@code index}.
-   */
-  public void set(int index, long value) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    assert index < length : "index (" + index + ") should < length (" + length + ")";
-    Platform.putLong(baseObj, baseOffset + index * WIDTH, value);
-  }
-
-  /**
-   * Returns the value at position {@code index}.
-   */
-  public long get(int index) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    assert index < length : "index (" + index + ") should < length (" + length + ")";
-    return Platform.getLong(baseObj, baseOffset + index * WIDTH);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
deleted file mode 100644
index 7857bf6..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.bitset;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * Methods for working with fixed-size uncompressed bitsets.
- *
- * We assume that the bitset data is word-aligned (that is, a multiple of 8 bytes in length).
- *
- * Each bit occupies exactly one bit of storage.
- */
-public final class BitSetMethods {
-
-  private static final long WORD_SIZE = 8;
-
-  private BitSetMethods() {
-    // Make the default constructor private, since this only holds static methods.
-  }
-
-  /**
-   * Sets the bit at the specified index to {@code true}.
-   */
-  public static void set(Object baseObject, long baseOffset, int index) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
-    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
-    final long word = Platform.getLong(baseObject, wordOffset);
-    Platform.putLong(baseObject, wordOffset, word | mask);
-  }
-
-  /**
-   * Sets the bit at the specified index to {@code false}.
-   */
-  public static void unset(Object baseObject, long baseOffset, int index) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
-    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
-    final long word = Platform.getLong(baseObject, wordOffset);
-    Platform.putLong(baseObject, wordOffset, word & ~mask);
-  }
-
-  /**
-   * Returns {@code true} if the bit is set at the specified index.
-   */
-  public static boolean isSet(Object baseObject, long baseOffset, int index) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
-    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
-    final long word = Platform.getLong(baseObject, wordOffset);
-    return (word & mask) != 0;
-  }
-
-  /**
-   * Returns {@code true} if any bit is set.
-   */
-  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) {
-    long addr = baseOffset;
-    for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) {
-      if (Platform.getLong(baseObject, addr) != 0) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Returns the index of the first bit that is set to true that occurs on or after the
-   * specified starting index. If no such bit exists then {@code -1} is returned.
-   * <p>
-   * To iterate over the true bits in a BitSet, use the following loop:
-   * <pre>
-   * <code>
-   *  for (long i = bs.nextSetBit(0, sizeInWords); i &gt;= 0; i = bs.nextSetBit(i + 1, sizeInWords)) {
-   *    // operate on index i here
-   *  }
-   * </code>
-   * </pre>
-   *
-   * @param fromIndex the index to start checking from (inclusive)
-   * @param bitsetSizeInWords the size of the bitset, measured in 8-byte words
-   * @return the index of the next set bit, or -1 if there is no such bit
-   */
-  public static int nextSetBit(
-      Object baseObject,
-      long baseOffset,
-      int fromIndex,
-      int bitsetSizeInWords) {
-    int wi = fromIndex >> 6;
-    if (wi >= bitsetSizeInWords) {
-      return -1;
-    }
-
-    // Try to find the next set bit in the current word
-    final int subIndex = fromIndex & 0x3f;
-    long word = Platform.getLong(baseObject, baseOffset + wi * WORD_SIZE) >> subIndex;
-    if (word != 0) {
-      return (wi << 6) + subIndex + java.lang.Long.numberOfTrailingZeros(word);
-    }
-
-    // Find the next set bit in the rest of the words
-    wi += 1;
-    while (wi < bitsetSizeInWords) {
-      word = Platform.getLong(baseObject, baseOffset + wi * WORD_SIZE);
-      if (word != 0) {
-        return (wi << 6) + java.lang.Long.numberOfTrailingZeros(word);
-      }
-      wi += 1;
-    }
-
-    return -1;
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
deleted file mode 100644
index 5e7ee48..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.hash;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
- */
-public final class Murmur3_x86_32 {
-  private static final int C1 = 0xcc9e2d51;
-  private static final int C2 = 0x1b873593;
-
-  private final int seed;
-
-  public Murmur3_x86_32(int seed) {
-    this.seed = seed;
-  }
-
-  @Override
-  public String toString() {
-    return "Murmur3_32(seed=" + seed + ")";
-  }
-
-  public int hashInt(int input) {
-    return hashInt(input, seed);
-  }
-
-  public static int hashInt(int input, int seed) {
-    int k1 = mixK1(input);
-    int h1 = mixH1(seed, k1);
-
-    return fmix(h1, 4);
-  }
-
-  public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
-    return hashUnsafeWords(base, offset, lengthInBytes, seed);
-  }
-
-  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
-    // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
-    assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
-    int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
-    return fmix(h1, lengthInBytes);
-  }
-
-  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
-    assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
-    int lengthAligned = lengthInBytes - lengthInBytes % 4;
-    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
-    for (int i = lengthAligned; i < lengthInBytes; i++) {
-      int halfWord = Platform.getByte(base, offset + i);
-      int k1 = mixK1(halfWord);
-      h1 = mixH1(h1, k1);
-    }
-    return fmix(h1, lengthInBytes);
-  }
-
-  private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
-    assert (lengthInBytes % 4 == 0);
-    int h1 = seed;
-    for (int i = 0; i < lengthInBytes; i += 4) {
-      int halfWord = Platform.getInt(base, offset + i);
-      int k1 = mixK1(halfWord);
-      h1 = mixH1(h1, k1);
-    }
-    return h1;
-  }
-
-  public int hashLong(long input) {
-    return hashLong(input, seed);
-  }
-
-  public static int hashLong(long input, int seed) {
-    int low = (int) input;
-    int high = (int) (input >>> 32);
-
-    int k1 = mixK1(low);
-    int h1 = mixH1(seed, k1);
-
-    k1 = mixK1(high);
-    h1 = mixH1(h1, k1);
-
-    return fmix(h1, 8);
-  }
-
-  private static int mixK1(int k1) {
-    k1 *= C1;
-    k1 = Integer.rotateLeft(k1, 15);
-    k1 *= C2;
-    return k1;
-  }
-
-  private static int mixH1(int h1, int k1) {
-    h1 ^= k1;
-    h1 = Integer.rotateLeft(h1, 13);
-    h1 = h1 * 5 + 0xe6546b64;
-    return h1;
-  }
-
-  // Finalization mix - force all bits of a hash block to avalanche
-  private static int fmix(int h1, int length) {
-    h1 ^= length;
-    h1 ^= h1 >>> 16;
-    h1 *= 0x85ebca6b;
-    h1 ^= h1 >>> 13;
-    h1 *= 0xc2b2ae35;
-    h1 ^= h1 >>> 16;
-    return h1;
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
deleted file mode 100644
index 09847ce..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import javax.annotation.concurrent.GuardedBy;
-import java.lang.ref.WeakReference;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.Map;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM long primitive array.
- */
-public class HeapMemoryAllocator implements MemoryAllocator {
-
-  @GuardedBy("this")
-  private final Map<Long, LinkedList<WeakReference<MemoryBlock>>> bufferPoolsBySize =
-    new HashMap<>();
-
-  private static final int POOLING_THRESHOLD_BYTES = 1024 * 1024;
-
-  /**
-   * Returns true if allocations of the given size should go through the pooling mechanism and
-   * false otherwise.
-   */
-  private boolean shouldPool(long size) {
-    // Very small allocations are less likely to benefit from pooling.
-    return size >= POOLING_THRESHOLD_BYTES;
-  }
-
-  @Override
-  public MemoryBlock allocate(long size) throws OutOfMemoryError {
-    if (shouldPool(size)) {
-      synchronized (this) {
-        final LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
-        if (pool != null) {
-          while (!pool.isEmpty()) {
-            final WeakReference<MemoryBlock> blockReference = pool.pop();
-            final MemoryBlock memory = blockReference.get();
-            if (memory != null) {
-              assert (memory.size() == size);
-              return memory;
-            }
-          }
-          bufferPoolsBySize.remove(size);
-        }
-      }
-    }
-    long[] array = new long[(int) ((size + 7) / 8)];
-    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
-  }
-
-  @Override
-  public void free(MemoryBlock memory) {
-    final long size = memory.size();
-    if (shouldPool(size)) {
-      synchronized (this) {
-        LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
-        if (pool == null) {
-          pool = new LinkedList<>();
-          bufferPoolsBySize.put(size, pool);
-        }
-        pool.add(new WeakReference<>(memory));
-      }
-    } else {
-      // Do nothing
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
deleted file mode 100644
index 5192f68..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-public interface MemoryAllocator {
-
-  /**
-   * Allocates a contiguous block of memory. Note that the allocated memory is not guaranteed
-   * to be zeroed out (call `zero()` on the result if this is necessary).
-   */
-  MemoryBlock allocate(long size) throws OutOfMemoryError;
-
-  void free(MemoryBlock memory);
-
-  MemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
-
-  MemoryAllocator HEAP = new HeapMemoryAllocator();
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
deleted file mode 100644
index e3e7947..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import javax.annotation.Nullable;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A consecutive block of memory, starting at a {@link MemoryLocation} with a fixed size.
- */
-public class MemoryBlock extends MemoryLocation {
-
-  private final long length;
-
-  /**
-   * Optional page number; used when this MemoryBlock represents a page allocated by a
-   * TaskMemoryManager. This field is public so that it can be modified by the TaskMemoryManager,
-   * which lives in a different package.
-   */
-  public int pageNumber = -1;
-
-  public MemoryBlock(@Nullable Object obj, long offset, long length) {
-    super(obj, offset);
-    this.length = length;
-  }
-
-  /**
-   * Returns the size of the memory block.
-   */
-  public long size() {
-    return length;
-  }
-
-  /**
-   * Creates a memory block pointing to the memory used by the long array.
-   */
-  public static MemoryBlock fromLongArray(final long[] array) {
-    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
deleted file mode 100644
index 74ebc87..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import javax.annotation.Nullable;
-
-/**
- * A memory location. Tracked either by a memory address (with off-heap allocation),
- * or by an offset from a JVM object (in-heap allocation).
- */
-public class MemoryLocation {
-
-  @Nullable
-  Object obj;
-
-  long offset;
-
-  public MemoryLocation(@Nullable Object obj, long offset) {
-    this.obj = obj;
-    this.offset = offset;
-  }
-
-  public MemoryLocation() {
-    this(null, 0);
-  }
-
-  public void setObjAndOffset(Object newObj, long newOffset) {
-    this.obj = newObj;
-    this.offset = newOffset;
-  }
-
-  public final Object getBaseObject() {
-    return obj;
-  }
-
-  public final long getBaseOffset() {
-    return offset;
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
deleted file mode 100644
index 98ce711..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.memory;
-
-import org.apache.spark.unsafe.Platform;
-
-/**
- * A simple {@link MemoryAllocator} that uses {@code Unsafe} to allocate off-heap memory.
- */
-public class UnsafeMemoryAllocator implements MemoryAllocator {
-
-  @Override
-  public MemoryBlock allocate(long size) throws OutOfMemoryError {
-    long address = Platform.allocateMemory(size);
-    return new MemoryBlock(null, address, size);
-  }
-
-  @Override
-  public void free(MemoryBlock memory) {
-    assert (memory.obj == null) :
-      "baseObject not null; are you trying to use the off-heap allocator to free on-heap memory?";
-    Platform.freeMemory(memory.offset);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
deleted file mode 100644
index 3ced209..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import org.apache.spark.unsafe.Platform;
-
-import java.util.Arrays;
-
-public final class ByteArray {
-
-  public static final byte[] EMPTY_BYTE = new byte[0];
-
-  /**
-   * Writes the content of a byte array into a memory address, identified by an object and an
-   * offset. The target memory address must already been allocated, and have enough space to
-   * hold all the bytes in this string.
-   */
-  public static void writeToMemory(byte[] src, Object target, long targetOffset) {
-    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET, target, targetOffset, src.length);
-  }
-
-  /**
-   * Returns a 64-bit integer that can be used as the prefix used in sorting.
-   */
-  public static long getPrefix(byte[] bytes) {
-    if (bytes == null) {
-      return 0L;
-    } else {
-      final int minLen = Math.min(bytes.length, 8);
-      long p = 0;
-      for (int i = 0; i < minLen; ++i) {
-        p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
-            << (56 - 8 * i);
-      }
-      return p;
-    }
-  }
-
-  public static byte[] subStringSQL(byte[] bytes, int pos, int len) {
-    // This pos calculation is according to UTF8String#subStringSQL
-    if (pos > bytes.length) {
-      return EMPTY_BYTE;
-    }
-    int start = 0;
-    int end;
-    if (pos > 0) {
-      start = pos - 1;
-    } else if (pos < 0) {
-      start = bytes.length + pos;
-    }
-    if ((bytes.length - start) < len) {
-      end = bytes.length;
-    } else {
-      end = start + len;
-    }
-    start = Math.max(start, 0); // underflow
-    if (start >= end) {
-      return EMPTY_BYTE;
-    }
-    return Arrays.copyOfRange(bytes, start, end);
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
deleted file mode 100644
index 62edf6c..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import java.io.Serializable;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * The internal representation of interval type.
- */
-public final class CalendarInterval implements Serializable {
-  public static final long MICROS_PER_MILLI = 1000L;
-  public static final long MICROS_PER_SECOND = MICROS_PER_MILLI * 1000;
-  public static final long MICROS_PER_MINUTE = MICROS_PER_SECOND * 60;
-  public static final long MICROS_PER_HOUR = MICROS_PER_MINUTE * 60;
-  public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
-  public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
-
-  /**
-   * A function to generate regex which matches interval string's unit part like "3 years".
-   *
-   * First, we can leave out some units in interval string, and we only care about the value of
-   * unit, so here we use non-capturing group to wrap the actual regex.
-   * At the beginning of the actual regex, we should match spaces before the unit part.
-   * Next is the number part, starts with an optional "-" to represent negative value. We use
-   * capturing group to wrap this part as we need the value later.
-   * Finally is the unit name, ends with an optional "s".
-   */
-  private static String unitRegex(String unit) {
-    return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
-  }
-
-  private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
-    unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
-    unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
-
-  private static Pattern yearMonthPattern =
-    Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
-
-  private static Pattern dayTimePattern =
-    Pattern.compile("^(?:['|\"])?([+|-])?(\\d+) (\\d+):(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
-
-  private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
-
-  private static long toLong(String s) {
-    if (s == null) {
-      return 0;
-    } else {
-      return Long.valueOf(s);
-    }
-  }
-
-  public static CalendarInterval fromString(String s) {
-    if (s == null) {
-      return null;
-    }
-    s = s.trim();
-    Matcher m = p.matcher(s);
-    if (!m.matches() || s.equals("interval")) {
-      return null;
-    } else {
-      long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
-      long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
-      microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
-      microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
-      microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
-      microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
-      microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
-      microseconds += toLong(m.group(9));
-      return new CalendarInterval((int) months, microseconds);
-    }
-  }
-
-  public static long toLongWithRange(String fieldName,
-      String s, long minValue, long maxValue) throws IllegalArgumentException {
-    long result = 0;
-    if (s != null) {
-      result = Long.valueOf(s);
-      if (result < minValue || result > maxValue) {
-        throw new IllegalArgumentException(String.format("%s %d outside range [%d, %d]",
-          fieldName, result, minValue, maxValue));
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse YearMonth string in form: [-]YYYY-MM
-   *
-   * adapted from HiveIntervalYearMonth.valueOf
-   */
-  public static CalendarInterval fromYearMonthString(String s) throws IllegalArgumentException {
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException("Interval year-month string was null");
-    }
-    s = s.trim();
-    Matcher m = yearMonthPattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match year-month format of 'y-m': " + s);
-    } else {
-      try {
-        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
-        int years = (int) toLongWithRange("year", m.group(2), 0, Integer.MAX_VALUE);
-        int months = (int) toLongWithRange("month", m.group(3), 0, 11);
-        result = new CalendarInterval(sign * (years * 12 + months), 0);
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-          "Error parsing interval year-month string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn
-   *
-   * adapted from HiveIntervalDayTime.valueOf
-   */
-  public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumentException {
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException("Interval day-time string was null");
-    }
-    s = s.trim();
-    Matcher m = dayTimePattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
-    } else {
-      try {
-        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
-        long days = toLongWithRange("day", m.group(2), 0, Integer.MAX_VALUE);
-        long hours = toLongWithRange("hour", m.group(3), 0, 23);
-        long minutes = toLongWithRange("minute", m.group(4), 0, 59);
-        long seconds = toLongWithRange("second", m.group(5), 0, 59);
-        // Hive allow nanosecond precision interval
-        long nanos = toLongWithRange("nanosecond", m.group(7), 0L, 999999999L);
-        result = new CalendarInterval(0, sign * (
-          days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
-          seconds * MICROS_PER_SECOND + nanos / 1000L));
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-          "Error parsing interval day-time string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  public static CalendarInterval fromSingleUnitString(String unit, String s)
-      throws IllegalArgumentException {
-
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
-    }
-    s = s.trim();
-    Matcher m = quoteTrimPattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
-    } else {
-      try {
-        if (unit.equals("year")) {
-          int year = (int) toLongWithRange("year", m.group(1),
-            Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
-          result = new CalendarInterval(year * 12, 0L);
-
-        } else if (unit.equals("month")) {
-          int month = (int) toLongWithRange("month", m.group(1),
-            Integer.MIN_VALUE, Integer.MAX_VALUE);
-          result = new CalendarInterval(month, 0L);
-
-        } else if (unit.equals("week")) {
-          long week = toLongWithRange("week", m.group(1),
-                  Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
-          result = new CalendarInterval(0, week * MICROS_PER_WEEK);
-
-        } else if (unit.equals("day")) {
-          long day = toLongWithRange("day", m.group(1),
-            Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
-          result = new CalendarInterval(0, day * MICROS_PER_DAY);
-
-        } else if (unit.equals("hour")) {
-          long hour = toLongWithRange("hour", m.group(1),
-            Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
-          result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
-
-        } else if (unit.equals("minute")) {
-          long minute = toLongWithRange("minute", m.group(1),
-            Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
-          result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
-
-        } else if (unit.equals("second")) {
-          long micros = parseSecondNano(m.group(1));
-          result = new CalendarInterval(0, micros);
-
-        } else if (unit.equals("millisecond")) {
-          long millisecond = toLongWithRange("millisecond", m.group(1),
-                  Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
-          result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
-
-        } else if (unit.equals("microsecond")) {
-          long micros = Long.valueOf(m.group(1));
-          result = new CalendarInterval(0, micros);
-        }
-      } catch (Exception e) {
-        throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse second_nano string in ss.nnnnnnnnn format to microseconds
-   */
-  public static long parseSecondNano(String secondNano) throws IllegalArgumentException {
-    String[] parts = secondNano.split("\\.");
-    if (parts.length == 1) {
-      return toLongWithRange("second", parts[0], Long.MIN_VALUE / MICROS_PER_SECOND,
-        Long.MAX_VALUE / MICROS_PER_SECOND) * MICROS_PER_SECOND;
-
-    } else if (parts.length == 2) {
-      long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
-        Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
-      long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
-      return seconds * MICROS_PER_SECOND + nanos / 1000L;
-
-    } else {
-      throw new IllegalArgumentException(
-        "Interval string does not match second-nano format of ss.nnnnnnnnn");
-    }
-  }
-
-  public final int months;
-  public final long microseconds;
-
-  public CalendarInterval(int months, long microseconds) {
-    this.months = months;
-    this.microseconds = microseconds;
-  }
-
-  public CalendarInterval add(CalendarInterval that) {
-    int months = this.months + that.months;
-    long microseconds = this.microseconds + that.microseconds;
-    return new CalendarInterval(months, microseconds);
-  }
-
-  public CalendarInterval subtract(CalendarInterval that) {
-    int months = this.months - that.months;
-    long microseconds = this.microseconds - that.microseconds;
-    return new CalendarInterval(months, microseconds);
-  }
-
-  public CalendarInterval negate() {
-    return new CalendarInterval(-this.months, -this.microseconds);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (this == other) return true;
-    if (other == null || !(other instanceof CalendarInterval)) return false;
-
-    CalendarInterval o = (CalendarInterval) other;
-    return this.months == o.months && this.microseconds == o.microseconds;
-  }
-
-  @Override
-  public int hashCode() {
-    return 31 * months + (int) microseconds;
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("interval");
-
-    if (months != 0) {
-      appendUnit(sb, months / 12, "year");
-      appendUnit(sb, months % 12, "month");
-    }
-
-    if (microseconds != 0) {
-      long rest = microseconds;
-      appendUnit(sb, rest / MICROS_PER_WEEK, "week");
-      rest %= MICROS_PER_WEEK;
-      appendUnit(sb, rest / MICROS_PER_DAY, "day");
-      rest %= MICROS_PER_DAY;
-      appendUnit(sb, rest / MICROS_PER_HOUR, "hour");
-      rest %= MICROS_PER_HOUR;
-      appendUnit(sb, rest / MICROS_PER_MINUTE, "minute");
-      rest %= MICROS_PER_MINUTE;
-      appendUnit(sb, rest / MICROS_PER_SECOND, "second");
-      rest %= MICROS_PER_SECOND;
-      appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");
-      rest %= MICROS_PER_MILLI;
-      appendUnit(sb, rest, "microsecond");
-    }
-
-    return sb.toString();
-  }
-
-  private void appendUnit(StringBuilder sb, long value, String unit) {
-    if (value != 0) {
-      sb.append(" " + value + " " + unit + "s");
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
deleted file mode 100644
index 87706d0..0000000
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ /dev/null
@@ -1,1023 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.unsafe.types;
-
-import javax.annotation.Nonnull;
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.util.Arrays;
-import java.util.Map;
-
-import com.esotericsoftware.kryo.Kryo;
-import com.esotericsoftware.kryo.KryoSerializable;
-import com.esotericsoftware.kryo.io.Input;
-import com.esotericsoftware.kryo.io.Output;
-
-import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.array.ByteArrayMethods;
-import org.apache.spark.unsafe.hash.Murmur3_x86_32;
-
-import static org.apache.spark.unsafe.Platform.*;
-
-
-/**
- * A UTF-8 String for internal Spark use.
- * <p>
- * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
- * search, see http://en.wikipedia.org/wiki/UTF-8 for details.
- * <p>
- * Note: This is not designed for general use cases, should not be used outside SQL.
- */
-public final class UTF8String implements Comparable<UTF8String>, Externalizable, KryoSerializable {
-
-  // These are only updated by readExternal() or read()
-  @Nonnull
-  private Object base;
-  private long offset;
-  private int numBytes;
-
-  public Object getBaseObject() { return base; }
-  public long getBaseOffset() { return offset; }
-
-  private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5,
-    6, 6};
-
-  private static boolean isLittleEndian = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
-
-  private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
-  public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
-
-  /**
-   * Creates an UTF8String from byte array, which should be encoded in UTF-8.
-   *
-   * Note: `bytes` will be hold by returned UTF8String.
-   */
-  public static UTF8String fromBytes(byte[] bytes) {
-    if (bytes != null) {
-      return new UTF8String(bytes, BYTE_ARRAY_OFFSET, bytes.length);
-    } else {
-      return null;
-    }
-  }
-
-  /**
-   * Creates an UTF8String from byte array, which should be encoded in UTF-8.
-   *
-   * Note: `bytes` will be hold by returned UTF8String.
-   */
-  public static UTF8String fromBytes(byte[] bytes, int offset, int numBytes) {
-    if (bytes != null) {
-      return new UTF8String(bytes, BYTE_ARRAY_OFFSET + offset, numBytes);
-    } else {
-      return null;
-    }
-  }
-
-  /**
-   * Creates an UTF8String from given address (base and offset) and length.
-   */
-  public static UTF8String fromAddress(Object base, long offset, int numBytes) {
-    return new UTF8String(base, offset, numBytes);
-  }
-
-  /**
-   * Creates an UTF8String from String.
-   */
-  public static UTF8String fromString(String str) {
-    if (str == null) return null;
-    try {
-      return fromBytes(str.getBytes("utf-8"));
-    } catch (UnsupportedEncodingException e) {
-      // Turn the exception into unchecked so we can find out about it at runtime, but
-      // don't need to add lots of boilerplate code everywhere.
-      throwException(e);
-      return null;
-    }
-  }
-
-  /**
-   * Creates an UTF8String that contains `length` spaces.
-   */
-  public static UTF8String blankString(int length) {
-    byte[] spaces = new byte[length];
-    Arrays.fill(spaces, (byte) ' ');
-    return fromBytes(spaces);
-  }
-
-  protected UTF8String(Object base, long offset, int numBytes) {
-    this.base = base;
-    this.offset = offset;
-    this.numBytes = numBytes;
-  }
-
-  // for serialization
-  public UTF8String() {
-    this(null, 0, 0);
-  }
-
-  /**
-   * Writes the content of this string into a memory address, identified by an object and an offset.
-   * The target memory address must already been allocated, and have enough space to hold all the
-   * bytes in this string.
-   */
-  public void writeToMemory(Object target, long targetOffset) {
-    Platform.copyMemory(base, offset, target, targetOffset, numBytes);
-  }
-
-  public void writeTo(ByteBuffer buffer) {
-    assert(buffer.hasArray());
-    byte[] target = buffer.array();
-    int offset = buffer.arrayOffset();
-    int pos = buffer.position();
-    writeToMemory(target, Platform.BYTE_ARRAY_OFFSET + offset + pos);
-    buffer.position(pos + numBytes);
-  }
-
-  /**
-   * Returns the number of bytes for a code point with the first byte as `b`
-   * @param b The first byte of a code point
-   */
-  private static int numBytesForFirstByte(final byte b) {
-    final int offset = (b & 0xFF) - 192;
-    return (offset >= 0) ? bytesOfCodePointInUTF8[offset] : 1;
-  }
-
-  /**
-   * Returns the number of bytes
-   */
-  public int numBytes() {
-    return numBytes;
-  }
-
-  /**
-   * Returns the number of code points in it.
-   */
-  public int numChars() {
-    int len = 0;
-    for (int i = 0; i < numBytes; i += numBytesForFirstByte(getByte(i))) {
-      len += 1;
-    }
-    return len;
-  }
-
-  /**
-   * Returns a 64-bit integer that can be used as the prefix used in sorting.
-   */
-  public long getPrefix() {
-    // Since JVMs are either 4-byte aligned or 8-byte aligned, we check the size of the string.
-    // If size is 0, just return 0.
-    // If size is between 0 and 4 (inclusive), assume data is 4-byte aligned under the hood and
-    // use a getInt to fetch the prefix.
-    // If size is greater than 4, assume we have at least 8 bytes of data to fetch.
-    // After getting the data, we use a mask to mask out data that is not part of the string.
-    long p;
-    long mask = 0;
-    if (isLittleEndian) {
-      if (numBytes >= 8) {
-        p = Platform.getLong(base, offset);
-      } else if (numBytes > 4) {
-        p = Platform.getLong(base, offset);
-        mask = (1L << (8 - numBytes) * 8) - 1;
-      } else if (numBytes > 0) {
-        p = (long) Platform.getInt(base, offset);
-        mask = (1L << (8 - numBytes) * 8) - 1;
-      } else {
-        p = 0;
-      }
-      p = java.lang.Long.reverseBytes(p);
-    } else {
-      // byteOrder == ByteOrder.BIG_ENDIAN
-      if (numBytes >= 8) {
-        p = Platform.getLong(base, offset);
-      } else if (numBytes > 4) {
-        p = Platform.getLong(base, offset);
-        mask = (1L << (8 - numBytes) * 8) - 1;
-      } else if (numBytes > 0) {
-        p = ((long) Platform.getInt(base, offset)) << 32;
-        mask = (1L << (8 - numBytes) * 8) - 1;
-      } else {
-        p = 0;
-      }
-    }
-    p &= ~mask;
-    return p;
-  }
-
-  /**
-   * Returns the underline bytes, will be a copy of it if it's part of another array.
-   */
-  public byte[] getBytes() {
-    // avoid copy if `base` is `byte[]`
-    if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
-      && ((byte[]) base).length == numBytes) {
-      return (byte[]) base;
-    } else {
-      byte[] bytes = new byte[numBytes];
-      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
-      return bytes;
-    }
-  }
-
-  /**
-   * Returns a substring of this.
-   * @param start the position of first code point
-   * @param until the position after last code point, exclusive.
-   */
-  public UTF8String substring(final int start, final int until) {
-    if (until <= start || start >= numBytes) {
-      return EMPTY_UTF8;
-    }
-
-    int i = 0;
-    int c = 0;
-    while (i < numBytes && c < start) {
-      i += numBytesForFirstByte(getByte(i));
-      c += 1;
-    }
-
-    int j = i;
-    while (i < numBytes && c < until) {
-      i += numBytesForFirstByte(getByte(i));
-      c += 1;
-    }
-
-    if (i > j) {
-      byte[] bytes = new byte[i - j];
-      copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
-      return fromBytes(bytes);
-    } else {
-      return EMPTY_UTF8;
-    }
-  }
-
-  public UTF8String substringSQL(int pos, int length) {
-    // Information regarding the pos calculation:
-    // Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and
-    // negative indices for start positions. If a start index i is greater than 0, it
-    // refers to element i-1 in the sequence. If a start index i is less than 0, it refers
-    // to the -ith element before the end of the sequence. If a start index i is 0, it
-    // refers to the first element.
-    int len = numChars();
-    int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
-    int end = (length == Integer.MAX_VALUE) ? len : start + length;
-    return substring(start, end);
-  }
-
-  /**
-   * Returns whether this contains `substring` or not.
-   */
-  public boolean contains(final UTF8String substring) {
-    if (substring.numBytes == 0) {
-      return true;
-    }
-
-    byte first = substring.getByte(0);
-    for (int i = 0; i <= numBytes - substring.numBytes; i++) {
-      if (getByte(i) == first && matchAt(substring, i)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * Returns the byte at position `i`.
-   */
-  private byte getByte(int i) {
-    return Platform.getByte(base, offset + i);
-  }
-
-  private boolean matchAt(final UTF8String s, int pos) {
-    if (s.numBytes + pos > numBytes || pos < 0) {
-      return false;
-    }
-    return ByteArrayMethods.arrayEquals(base, offset + pos, s.base, s.offset, s.numBytes);
-  }
-
-  public boolean startsWith(final UTF8String prefix) {
-    return matchAt(prefix, 0);
-  }
-
-  public boolean endsWith(final UTF8String suffix) {
-    return matchAt(suffix, numBytes - suffix.numBytes);
-  }
-
-  /**
-   * Returns the upper case of this string
-   */
-  public UTF8String toUpperCase() {
-    if (numBytes == 0) {
-      return EMPTY_UTF8;
-    }
-
-    byte[] bytes = new byte[numBytes];
-    bytes[0] = (byte) Character.toTitleCase(getByte(0));
-    for (int i = 0; i < numBytes; i++) {
-      byte b = getByte(i);
-      if (numBytesForFirstByte(b) != 1) {
-        // fallback
-        return toUpperCaseSlow();
-      }
-      int upper = Character.toUpperCase((int) b);
-      if (upper > 127) {
-        // fallback
-        return toUpperCaseSlow();
-      }
-      bytes[i] = (byte) upper;
-    }
-    return fromBytes(bytes);
-  }
-
-  private UTF8String toUpperCaseSlow() {
-    return fromString(toString().toUpperCase());
-  }
-
-  /**
-   * Returns the lower case of this string
-   */
-  public UTF8String toLowerCase() {
-    if (numBytes == 0) {
-      return EMPTY_UTF8;
-    }
-
-    byte[] bytes = new byte[numBytes];
-    bytes[0] = (byte) Character.toTitleCase(getByte(0));
-    for (int i = 0; i < numBytes; i++) {
-      byte b = getByte(i);
-      if (numBytesForFirstByte(b) != 1) {
-        // fallback
-        return toLowerCaseSlow();
-      }
-      int lower = Character.toLowerCase((int) b);
-      if (lower > 127) {
-        // fallback
-        return toLowerCaseSlow();
-      }
-      bytes[i] = (byte) lower;
-    }
-    return fromBytes(bytes);
-  }
-
-  private UTF8String toLowerCaseSlow() {
-    return fromString(toString().toLowerCase());
-  }
-
-  /**
-   * Returns the title case of this string, that could be used as title.
-   */
-  public UTF8String toTitleCase() {
-    if (numBytes == 0) {
-      return EMPTY_UTF8;
-    }
-
-    byte[] bytes = new byte[numBytes];
-    for (int i = 0; i < numBytes; i++) {
-      byte b = getByte(i);
-      if (i == 0 || getByte(i - 1) == ' ') {
-        if (numBytesForFirstByte(b) != 1) {
-          // fallback
-          return toTitleCaseSlow();
-        }
-        int upper = Character.toTitleCase(b);
-        if (upper > 127) {
-          // fallback
-          return toTitleCaseSlow();
-        }
-        bytes[i] = (byte) upper;
-      } else {
-        bytes[i] = b;
-      }
-    }
-    return fromBytes(bytes);
-  }
-
-  private UTF8String toTitleCaseSlow() {
-    StringBuffer sb = new StringBuffer();
-    String s = toString();
-    sb.append(s);
-    sb.setCharAt(0, Character.toTitleCase(sb.charAt(0)));
-    for (int i = 1; i < s.length(); i++) {
-      if (sb.charAt(i - 1) == ' ') {
-        sb.setCharAt(i, Character.toTitleCase(sb.charAt(i)));
-      }
-    }
-    return fromString(sb.toString());
-  }
-
-  /*
-   * Returns the index of the string `match` in this String. This string has to be a comma separated
-   * list. If `match` contains a comma 0 will be returned. If the `match` isn't part of this String,
-   * 0 will be returned, else the index of match (1-based index)
-   */
-  public int findInSet(UTF8String match) {
-    if (match.contains(COMMA_UTF8)) {
-      return 0;
-    }
-
-    int n = 1, lastComma = -1;
-    for (int i = 0; i < numBytes; i++) {
-      if (getByte(i) == (byte) ',') {
-        if (i - (lastComma + 1) == match.numBytes &&
-          ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
-            match.numBytes)) {
-          return n;
-        }
-        lastComma = i;
-        n++;
-      }
-    }
-    if (numBytes - (lastComma + 1) == match.numBytes &&
-      ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
-        match.numBytes)) {
-      return n;
-    }
-    return 0;
-  }
-
-  /**
-   * Copy the bytes from the current UTF8String, and make a new UTF8String.
-   * @param start the start position of the current UTF8String in bytes.
-   * @param end the end position of the current UTF8String in bytes.
-   * @return a new UTF8String in the position of [start, end] of current UTF8String bytes.
-   */
-  private UTF8String copyUTF8String(int start, int end) {
-    int len = end - start + 1;
-    byte[] newBytes = new byte[len];
-    copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
-    return UTF8String.fromBytes(newBytes);
-  }
-
-  public UTF8String trim() {
-    int s = 0;
-    int e = this.numBytes - 1;
-    // skip all of the space (0x20) in the left side
-    while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
-    // skip all of the space (0x20) in the right side
-    while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
-    if (s > e) {
-      // empty string
-      return UTF8String.fromBytes(new byte[0]);
-    } else {
-      return copyUTF8String(s, e);
-    }
-  }
-
-  public UTF8String trimLeft() {
-    int s = 0;
-    // skip all of the space (0x20) in the left side
-    while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
-    if (s == this.numBytes) {
-      // empty string
-      return UTF8String.fromBytes(new byte[0]);
-    } else {
-      return copyUTF8String(s, this.numBytes - 1);
-    }
-  }
-
-  public UTF8String trimRight() {
-    int e = numBytes - 1;
-    // skip all of the space (0x20) in the right side
-    while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
-
-    if (e < 0) {
-      // empty string
-      return UTF8String.fromBytes(new byte[0]);
-    } else {
-      return copyUTF8String(0, e);
-    }
-  }
-
-  public UTF8String reverse() {
-    byte[] result = new byte[this.numBytes];
-
-    int i = 0; // position in byte
-    while (i < numBytes) {
-      int len = numBytesForFirstByte(getByte(i));
-      copyMemory(this.base, this.offset + i, result,
-        BYTE_ARRAY_OFFSET + result.length - i - len, len);
-
-      i += len;
-    }
-
-    return UTF8String.fromBytes(result);
-  }
-
-  public UTF8String repeat(int times) {
-    if (times <= 0) {
-      return EMPTY_UTF8;
-    }
-
-    byte[] newBytes = new byte[numBytes * times];
-    copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);
-
-    int copied = 1;
-    while (copied < times) {
-      int toCopy = Math.min(copied, times - copied);
-      System.arraycopy(newBytes, 0, newBytes, copied * numBytes, numBytes * toCopy);
-      copied += toCopy;
-    }
-
-    return UTF8String.fromBytes(newBytes);
-  }
-
-  /**
-   * Returns the position of the first occurrence of substr in
-   * current string from the specified position (0-based index).
-   *
-   * @param v the string to be searched
-   * @param start the start position of the current string for searching
-   * @return the position of the first occurrence of substr, if not found, -1 returned.
-   */
-  public int indexOf(UTF8String v, int start) {
-    if (v.numBytes() == 0) {
-      return 0;
-    }
-
-    // locate to the start position.
-    int i = 0; // position in byte
-    int c = 0; // position in character
-    while (i < numBytes && c < start) {
-      i += numBytesForFirstByte(getByte(i));
-      c += 1;
-    }
-
-    do {
-      if (i + v.numBytes > numBytes) {
-        return -1;
-      }
-      if (ByteArrayMethods.arrayEquals(base, offset + i, v.base, v.offset, v.numBytes)) {
-        return c;
-      }
-      i += numBytesForFirstByte(getByte(i));
-      c += 1;
-    } while (i < numBytes);
-
-    return -1;
-  }
-
-  /**
-   * Find the `str` from left to right.
-   */
-  private int find(UTF8String str, int start) {
-    assert (str.numBytes > 0);
-    while (start <= numBytes - str.numBytes) {
-      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
-        return start;
-      }
-      start += 1;
-    }
-    return -1;
-  }
-
-  /**
-   * Find the `str` from right to left.
-   */
-  private int rfind(UTF8String str, int start) {
-    assert (str.numBytes > 0);
-    while (start >= 0) {
-      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
-        return start;
-      }
-      start -= 1;
-    }
-    return -1;
-  }
-
-  /**
-   * Returns the substring from string str before count occurrences of the delimiter delim.
-   * If count is positive, everything the left of the final delimiter (counting from left) is
-   * returned. If count is negative, every to the right of the final delimiter (counting from the
-   * right) is returned. subStringIndex performs a case-sensitive match when searching for delim.
-   */
-  public UTF8String subStringIndex(UTF8String delim, int count) {
-    if (delim.numBytes == 0 || count == 0) {
-      return EMPTY_UTF8;
-    }
-    if (count > 0) {
-      int idx = -1;
-      while (count > 0) {
-        idx = find(delim, idx + 1);
-        if (idx >= 0) {
-          count --;
-        } else {
-          // can not find enough delim
-          return this;
-        }
-      }
-      if (idx == 0) {
-        return EMPTY_UTF8;
-      }
-      byte[] bytes = new byte[idx];
-      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, idx);
-      return fromBytes(bytes);
-
-    } else {
-      int idx = numBytes - delim.numBytes + 1;
-      count = -count;
-      while (count > 0) {
-        idx = rfind(delim, idx - 1);
-        if (idx >= 0) {
-          count --;
-        } else {
-          // can not find enough delim
-          return this;
-        }
-      }
-      if (idx + delim.numBytes == numBytes) {
-        return EMPTY_UTF8;
-      }
-      int size = numBytes - delim.numBytes - idx;
-      byte[] bytes = new byte[size];
-      copyMemory(base, offset + idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
-      return fromBytes(bytes);
-    }
-  }
-
-  /**
-   * Returns str, right-padded with pad to a length of len
-   * For example:
-   *   ('hi', 5, '??') =&gt; 'hi???'
-   *   ('hi', 1, '??') =&gt; 'h'
-   */
-  public UTF8String rpad(int len, UTF8String pad) {
-    int spaces = len - this.numChars(); // number of char need to pad
-    if (spaces <= 0 || pad.numBytes() == 0) {
-      // no padding at all, return the substring of the current string
-      return substring(0, len);
-    } else {
-      int padChars = pad.numChars();
-      int count = spaces / padChars; // how many padding string needed
-      // the partial string of the padding
-      UTF8String remain = pad.substring(0, spaces - padChars * count);
-
-      byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
-      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET, this.numBytes);
-      int offset = this.numBytes;
-      int idx = 0;
-      while (idx < count) {
-        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
-        ++ idx;
-        offset += pad.numBytes;
-      }
-      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
-
-      return UTF8String.fromBytes(data);
-    }
-  }
-
-  /**
-   * Returns str, left-padded with pad to a length of len.
-   * For example:
-   *   ('hi', 5, '??') =&gt; '???hi'
-   *   ('hi', 1, '??') =&gt; 'h'
-   */
-  public UTF8String lpad(int len, UTF8String pad) {
-    int spaces = len - this.numChars(); // number of char need to pad
-    if (spaces <= 0 || pad.numBytes() == 0) {
-      // no padding at all, return the substring of the current string
-      return substring(0, len);
-    } else {
-      int padChars = pad.numChars();
-      int count = spaces / padChars; // how many padding string needed
-      // the partial string of the padding
-      UTF8String remain = pad.substring(0, spaces - padChars * count);
-
-      byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
-
-      int offset = 0;
-      int idx = 0;
-      while (idx < count) {
-        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
-        ++ idx;
-        offset += pad.numBytes;
-      }
-      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
-      offset += remain.numBytes;
-      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes());
-
-      return UTF8String.fromBytes(data);
-    }
-  }
-
-  /**
-   * Concatenates input strings together into a single string. Returns null if any input is null.
-   */
-  public static UTF8String concat(UTF8String... inputs) {
-    // Compute the total length of the result.
-    int totalLength = 0;
-    for (int i = 0; i < inputs.length; i++) {
-      if (inputs[i] != null) {
-        totalLength += inputs[i].numBytes;
-      } else {
-        return null;
-      }
-    }
-
-    // Allocate a new byte array, and copy the inputs one by one into it.
-    final byte[] result = new byte[totalLength];
-    int offset = 0;
-    for (int i = 0; i < inputs.length; i++) {
-      int len = inputs[i].numBytes;
-      copyMemory(
-        inputs[i].base, inputs[i].offset,
-        result, BYTE_ARRAY_OFFSET + offset,
-        len);
-      offset += len;
-    }
-    return fromBytes(result);
-  }
-
-  /**
-   * Concatenates input strings together into a single string using the separator.
-   * A null input is skipped. For example, concat(",", "a", null, "c") would yield "a,c".
-   */
-  public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
-    if (separator == null) {
-      return null;
-    }
-
-    int numInputBytes = 0;  // total number of bytes from the inputs
-    int numInputs = 0;      // number of non-null inputs
-    for (int i = 0; i < inputs.length; i++) {
-      if (inputs[i] != null) {
-        numInputBytes += inputs[i].numBytes;
-        numInputs++;
-      }
-    }
-
-    if (numInputs == 0) {
-      // Return an empty string if there is no input, or all the inputs are null.
-      return fromBytes(new byte[0]);
-    }
-
-    // Allocate a new byte array, and copy the inputs one by one into it.
-    // The size of the new array is the size of all inputs, plus the separators.
-    final byte[] result = new byte[numInputBytes + (numInputs - 1) * separator.numBytes];
-    int offset = 0;
-
-    for (int i = 0, j = 0; i < inputs.length; i++) {
-      if (inputs[i] != null) {
-        int len = inputs[i].numBytes;
-        copyMemory(
-          inputs[i].base, inputs[i].offset,
-          result, BYTE_ARRAY_OFFSET + offset,
-          len);
-        offset += len;
-
-        j++;
-        // Add separator if this is not the last input.
-        if (j < numInputs) {
-          copyMemory(
-            separator.base, separator.offset,
-            result, BYTE_ARRAY_OFFSET + offset,
-            separator.numBytes);
-          offset += separator.numBytes;
-        }
-      }
-    }
-    return fromBytes(result);
-  }
-
-  public UTF8String[] split(UTF8String pattern, int limit) {
-    String[] splits = toString().split(pattern.toString(), limit);
-    UTF8String[] res = new UTF8String[splits.length];
-    for (int i = 0; i < res.length; i++) {
-      res[i] = fromString(splits[i]);
-    }
-    return res;
-  }
-
-  // TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
-  public UTF8String translate(Map<Character, Character> dict) {
-    String srcStr = this.toString();
-
-    StringBuilder sb = new StringBuilder();
-    for(int k = 0; k< srcStr.length(); k++) {
-      if (null == dict.get(srcStr.charAt(k))) {
-        sb.append(srcStr.charAt(k));
-      } else if ('\0' != dict.get(srcStr.charAt(k))){
-        sb.append(dict.get(srcStr.charAt(k)));
-      }
-    }
-    return fromString(sb.toString());
-  }
-
-  @Override
-  public String toString() {
-    try {
-      return new String(getBytes(), "utf-8");
-    } catch (UnsupportedEncodingException e) {
-      // Turn the exception into unchecked so we can find out about it at runtime, but
-      // don't need to add lots of boilerplate code everywhere.
-      throwException(e);
-      return "unknown";  // we will never reach here.
-    }
-  }
-
-  @Override
-  public UTF8String clone() {
-    return fromBytes(getBytes());
-  }
-
-  @Override
-  public int compareTo(@Nonnull final UTF8String other) {
-    int len = Math.min(numBytes, other.numBytes);
-    // TODO: compare 8 bytes as unsigned long
-    for (int i = 0; i < len; i ++) {
-      // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
-      int res = (getByte(i) & 0xFF) - (other.getByte(i) & 0xFF);
-      if (res != 0) {
-        return res;
-      }
-    }
-    return numBytes - other.numBytes;
-  }
-
-  public int compare(final UTF8String other) {
-    return compareTo(other);
-  }
-
-  @Override
-  public boolean equals(final Object other) {
-    if (other instanceof UTF8String) {
-      UTF8String o = (UTF8String) other;
-      if (numBytes != o.numBytes) {
-        return false;
-      }
-      return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
-    } else {
-      return false;
-    }
-  }
-
-  /**
-   * Levenshtein distance is a metric for measuring the distance of two strings. The distance is
-   * defined by the minimum number of single-character edits (i.e. insertions, deletions or
-   * substitutions) that are required to change one of the strings into the other.
-   */
-  public int levenshteinDistance(UTF8String other) {
-    // Implementation adopted from org.apache.common.lang3.StringUtils.getLevenshteinDistance
-
-    int n = numChars();
-    int m = other.numChars();
-
-    if (n == 0) {
-      return m;
-    } else if (m == 0) {
-      return n;
-    }
-
-    UTF8String s, t;
-
-    if (n <= m) {
-      s = this;
-      t = other;
-    } else {
-      s = other;
-      t = this;
-      int swap;
-      swap = n;
-      n = m;
-      m = swap;
-    }
-
-    int[] p = new int[n + 1];
-    int[] d = new int[n + 1];
-    int[] swap;
-
-    int i, i_bytes, j, j_bytes, num_bytes_j, cost;
-
-    for (i = 0; i <= n; i++) {
-      p[i] = i;
-    }
-
-    for (j = 0, j_bytes = 0; j < m; j_bytes += num_bytes_j, j++) {
-      num_bytes_j = numBytesForFirstByte(t.getByte(j_bytes));
-      d[0] = j + 1;
-
-      for (i = 0, i_bytes = 0; i < n; i_bytes += numBytesForFirstByte(s.getByte(i_bytes)), i++) {
-        if (s.getByte(i_bytes) != t.getByte(j_bytes) ||
-              num_bytes_j != numBytesForFirstByte(s.getByte(i_bytes))) {
-          cost = 1;
-        } else {
-          cost = (ByteArrayMethods.arrayEquals(t.base, t.offset + j_bytes, s.base,
-              s.offset + i_bytes, num_bytes_j)) ? 0 : 1;
-        }
-        d[i + 1] = Math.min(Math.min(d[i] + 1, p[i + 1] + 1), p[i] + cost);
-      }
-
-      swap = p;
-      p = d;
-      d = swap;
-    }
-
-    return p[n];
-  }
-
-  @Override
-  public int hashCode() {
-    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
-  }
-
-  /**
-   * Soundex mapping table
-   */
-  private static final byte[] US_ENGLISH_MAPPING = {'0', '1', '2', '3', '0', '1', '2', '7',
-    '0', '2', '2', '4', '5', '5', '0', '1', '2', '6', '2', '3', '0', '1', '7', '2', '0', '2'};
-
-  /**
-   * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names,
-   * but can also be used as a general purpose scheme to find word with similar phonemes.
-   * https://en.wikipedia.org/wiki/Soundex
-   */
-  public UTF8String soundex() {
-    if (numBytes == 0) {
-      return EMPTY_UTF8;
-    }
-
-    byte b = getByte(0);
-    if ('a' <= b && b <= 'z') {
-      b -= 32;
-    } else if (b < 'A' || 'Z' < b) {
-      // first character must be a letter
-      return this;
-    }
-    byte[] sx = {'0', '0', '0', '0'};
-    sx[0] = b;
-    int sxi = 1;
-    int idx = b - 'A';
-    byte lastCode = US_ENGLISH_MAPPING[idx];
-
-    for (int i = 1; i < numBytes; i++) {
-      b = getByte(i);
-      if ('a' <= b && b <= 'z') {
-        b -= 32;
-      } else if (b < 'A' || 'Z' < b) {
-        // not a letter, skip it
-        lastCode = '0';
-        continue;
-      }
-      idx = b - 'A';
-      byte code = US_ENGLISH_MAPPING[idx];
-      if (code == '7') {
-        // ignore it
-      } else {
-        if (code != '0' && code != lastCode) {
-          sx[sxi++] = code;
-          if (sxi > 3) break;
-        }
-        lastCode = code;
-      }
-    }
-    return UTF8String.fromBytes(sx);
-  }
-
-  public void writeExternal(ObjectOutput out) throws IOException {
-    byte[] bytes = getBytes();
-    out.writeInt(bytes.length);
-    out.write(bytes);
-  }
-
-  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
-    offset = BYTE_ARRAY_OFFSET;
-    numBytes = in.readInt();
-    base = new byte[numBytes];
-    in.readFully((byte[]) base);
-  }
-
-  @Override
-  public void write(Kryo kryo, Output out) {
-    byte[] bytes = getBytes();
-    out.writeInt(bytes.length);
-    out.write(bytes);
-  }
-
-  @Override
-  public void read(Kryo kryo, Input in) {
-    this.offset = BYTE_ARRAY_OFFSET;
-    this.numBytes = in.readInt();
-    this.base = new byte[numBytes];
-    in.read((byte[]) base);
-  }
-
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[3/4] spark git commit: [SPARK-13548][BUILD] Move tags and unsafe modules into common

Posted by rx...@apache.org.

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
new file mode 100644
index 0000000..87706d0
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -0,0 +1,1023 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import javax.annotation.Nonnull;
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.Map;
+
+import com.esotericsoftware.kryo.Kryo;
+import com.esotericsoftware.kryo.KryoSerializable;
+import com.esotericsoftware.kryo.io.Input;
+import com.esotericsoftware.kryo.io.Output;
+
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.hash.Murmur3_x86_32;
+
+import static org.apache.spark.unsafe.Platform.*;
+
+
+/**
+ * A UTF-8 String for internal Spark use.
+ * <p>
+ * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison,
+ * search, see http://en.wikipedia.org/wiki/UTF-8 for details.
+ * <p>
+ * Note: This is not designed for general use cases, should not be used outside SQL.
+ */
+public final class UTF8String implements Comparable<UTF8String>, Externalizable, KryoSerializable {
+
+  // These are only updated by readExternal() or read()
+  @Nonnull
+  private Object base;
+  private long offset;
+  private int numBytes;
+
+  public Object getBaseObject() { return base; }
+  public long getBaseOffset() { return offset; }
+
+  private static int[] bytesOfCodePointInUTF8 = {2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5,
+    6, 6};
+
+  private static boolean isLittleEndian = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+  private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
+  public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
+
+  /**
+   * Creates an UTF8String from byte array, which should be encoded in UTF-8.
+   *
+   * Note: `bytes` will be hold by returned UTF8String.
+   */
+  public static UTF8String fromBytes(byte[] bytes) {
+    if (bytes != null) {
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET, bytes.length);
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Creates an UTF8String from byte array, which should be encoded in UTF-8.
+   *
+   * Note: `bytes` will be hold by returned UTF8String.
+   */
+  public static UTF8String fromBytes(byte[] bytes, int offset, int numBytes) {
+    if (bytes != null) {
+      return new UTF8String(bytes, BYTE_ARRAY_OFFSET + offset, numBytes);
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Creates an UTF8String from given address (base and offset) and length.
+   */
+  public static UTF8String fromAddress(Object base, long offset, int numBytes) {
+    return new UTF8String(base, offset, numBytes);
+  }
+
+  /**
+   * Creates an UTF8String from String.
+   */
+  public static UTF8String fromString(String str) {
+    if (str == null) return null;
+    try {
+      return fromBytes(str.getBytes("utf-8"));
+    } catch (UnsupportedEncodingException e) {
+      // Turn the exception into unchecked so we can find out about it at runtime, but
+      // don't need to add lots of boilerplate code everywhere.
+      throwException(e);
+      return null;
+    }
+  }
+
+  /**
+   * Creates an UTF8String that contains `length` spaces.
+   */
+  public static UTF8String blankString(int length) {
+    byte[] spaces = new byte[length];
+    Arrays.fill(spaces, (byte) ' ');
+    return fromBytes(spaces);
+  }
+
+  protected UTF8String(Object base, long offset, int numBytes) {
+    this.base = base;
+    this.offset = offset;
+    this.numBytes = numBytes;
+  }
+
+  // for serialization
+  public UTF8String() {
+    this(null, 0, 0);
+  }
+
+  /**
+   * Writes the content of this string into a memory address, identified by an object and an offset.
+   * The target memory address must already been allocated, and have enough space to hold all the
+   * bytes in this string.
+   */
+  public void writeToMemory(Object target, long targetOffset) {
+    Platform.copyMemory(base, offset, target, targetOffset, numBytes);
+  }
+
+  public void writeTo(ByteBuffer buffer) {
+    assert(buffer.hasArray());
+    byte[] target = buffer.array();
+    int offset = buffer.arrayOffset();
+    int pos = buffer.position();
+    writeToMemory(target, Platform.BYTE_ARRAY_OFFSET + offset + pos);
+    buffer.position(pos + numBytes);
+  }
+
+  /**
+   * Returns the number of bytes for a code point with the first byte as `b`
+   * @param b The first byte of a code point
+   */
+  private static int numBytesForFirstByte(final byte b) {
+    final int offset = (b & 0xFF) - 192;
+    return (offset >= 0) ? bytesOfCodePointInUTF8[offset] : 1;
+  }
+
+  /**
+   * Returns the number of bytes
+   */
+  public int numBytes() {
+    return numBytes;
+  }
+
+  /**
+   * Returns the number of code points in it.
+   */
+  public int numChars() {
+    int len = 0;
+    for (int i = 0; i < numBytes; i += numBytesForFirstByte(getByte(i))) {
+      len += 1;
+    }
+    return len;
+  }
+
+  /**
+   * Returns a 64-bit integer that can be used as the prefix used in sorting.
+   */
+  public long getPrefix() {
+    // Since JVMs are either 4-byte aligned or 8-byte aligned, we check the size of the string.
+    // If size is 0, just return 0.
+    // If size is between 0 and 4 (inclusive), assume data is 4-byte aligned under the hood and
+    // use a getInt to fetch the prefix.
+    // If size is greater than 4, assume we have at least 8 bytes of data to fetch.
+    // After getting the data, we use a mask to mask out data that is not part of the string.
+    long p;
+    long mask = 0;
+    if (isLittleEndian) {
+      if (numBytes >= 8) {
+        p = Platform.getLong(base, offset);
+      } else if (numBytes > 4) {
+        p = Platform.getLong(base, offset);
+        mask = (1L << (8 - numBytes) * 8) - 1;
+      } else if (numBytes > 0) {
+        p = (long) Platform.getInt(base, offset);
+        mask = (1L << (8 - numBytes) * 8) - 1;
+      } else {
+        p = 0;
+      }
+      p = java.lang.Long.reverseBytes(p);
+    } else {
+      // byteOrder == ByteOrder.BIG_ENDIAN
+      if (numBytes >= 8) {
+        p = Platform.getLong(base, offset);
+      } else if (numBytes > 4) {
+        p = Platform.getLong(base, offset);
+        mask = (1L << (8 - numBytes) * 8) - 1;
+      } else if (numBytes > 0) {
+        p = ((long) Platform.getInt(base, offset)) << 32;
+        mask = (1L << (8 - numBytes) * 8) - 1;
+      } else {
+        p = 0;
+      }
+    }
+    p &= ~mask;
+    return p;
+  }
+
+  /**
+   * Returns the underline bytes, will be a copy of it if it's part of another array.
+   */
+  public byte[] getBytes() {
+    // avoid copy if `base` is `byte[]`
+    if (offset == BYTE_ARRAY_OFFSET && base instanceof byte[]
+      && ((byte[]) base).length == numBytes) {
+      return (byte[]) base;
+    } else {
+      byte[] bytes = new byte[numBytes];
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, numBytes);
+      return bytes;
+    }
+  }
+
+  /**
+   * Returns a substring of this.
+   * @param start the position of first code point
+   * @param until the position after last code point, exclusive.
+   */
+  public UTF8String substring(final int start, final int until) {
+    if (until <= start || start >= numBytes) {
+      return EMPTY_UTF8;
+    }
+
+    int i = 0;
+    int c = 0;
+    while (i < numBytes && c < start) {
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    }
+
+    int j = i;
+    while (i < numBytes && c < until) {
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    }
+
+    if (i > j) {
+      byte[] bytes = new byte[i - j];
+      copyMemory(base, offset + j, bytes, BYTE_ARRAY_OFFSET, i - j);
+      return fromBytes(bytes);
+    } else {
+      return EMPTY_UTF8;
+    }
+  }
+
+  public UTF8String substringSQL(int pos, int length) {
+    // Information regarding the pos calculation:
+    // Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and
+    // negative indices for start positions. If a start index i is greater than 0, it
+    // refers to element i-1 in the sequence. If a start index i is less than 0, it refers
+    // to the -ith element before the end of the sequence. If a start index i is 0, it
+    // refers to the first element.
+    int len = numChars();
+    int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
+    int end = (length == Integer.MAX_VALUE) ? len : start + length;
+    return substring(start, end);
+  }
+
+  /**
+   * Returns whether this contains `substring` or not.
+   */
+  public boolean contains(final UTF8String substring) {
+    if (substring.numBytes == 0) {
+      return true;
+    }
+
+    byte first = substring.getByte(0);
+    for (int i = 0; i <= numBytes - substring.numBytes; i++) {
+      if (getByte(i) == first && matchAt(substring, i)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns the byte at position `i`.
+   */
+  private byte getByte(int i) {
+    return Platform.getByte(base, offset + i);
+  }
+
+  private boolean matchAt(final UTF8String s, int pos) {
+    if (s.numBytes + pos > numBytes || pos < 0) {
+      return false;
+    }
+    return ByteArrayMethods.arrayEquals(base, offset + pos, s.base, s.offset, s.numBytes);
+  }
+
+  public boolean startsWith(final UTF8String prefix) {
+    return matchAt(prefix, 0);
+  }
+
+  public boolean endsWith(final UTF8String suffix) {
+    return matchAt(suffix, numBytes - suffix.numBytes);
+  }
+
+  /**
+   * Returns the upper case of this string
+   */
+  public UTF8String toUpperCase() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte[] bytes = new byte[numBytes];
+    bytes[0] = (byte) Character.toTitleCase(getByte(0));
+    for (int i = 0; i < numBytes; i++) {
+      byte b = getByte(i);
+      if (numBytesForFirstByte(b) != 1) {
+        // fallback
+        return toUpperCaseSlow();
+      }
+      int upper = Character.toUpperCase((int) b);
+      if (upper > 127) {
+        // fallback
+        return toUpperCaseSlow();
+      }
+      bytes[i] = (byte) upper;
+    }
+    return fromBytes(bytes);
+  }
+
+  private UTF8String toUpperCaseSlow() {
+    return fromString(toString().toUpperCase());
+  }
+
+  /**
+   * Returns the lower case of this string
+   */
+  public UTF8String toLowerCase() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte[] bytes = new byte[numBytes];
+    bytes[0] = (byte) Character.toTitleCase(getByte(0));
+    for (int i = 0; i < numBytes; i++) {
+      byte b = getByte(i);
+      if (numBytesForFirstByte(b) != 1) {
+        // fallback
+        return toLowerCaseSlow();
+      }
+      int lower = Character.toLowerCase((int) b);
+      if (lower > 127) {
+        // fallback
+        return toLowerCaseSlow();
+      }
+      bytes[i] = (byte) lower;
+    }
+    return fromBytes(bytes);
+  }
+
+  private UTF8String toLowerCaseSlow() {
+    return fromString(toString().toLowerCase());
+  }
+
+  /**
+   * Returns the title case of this string, that could be used as title.
+   */
+  public UTF8String toTitleCase() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte[] bytes = new byte[numBytes];
+    for (int i = 0; i < numBytes; i++) {
+      byte b = getByte(i);
+      if (i == 0 || getByte(i - 1) == ' ') {
+        if (numBytesForFirstByte(b) != 1) {
+          // fallback
+          return toTitleCaseSlow();
+        }
+        int upper = Character.toTitleCase(b);
+        if (upper > 127) {
+          // fallback
+          return toTitleCaseSlow();
+        }
+        bytes[i] = (byte) upper;
+      } else {
+        bytes[i] = b;
+      }
+    }
+    return fromBytes(bytes);
+  }
+
+  private UTF8String toTitleCaseSlow() {
+    StringBuffer sb = new StringBuffer();
+    String s = toString();
+    sb.append(s);
+    sb.setCharAt(0, Character.toTitleCase(sb.charAt(0)));
+    for (int i = 1; i < s.length(); i++) {
+      if (sb.charAt(i - 1) == ' ') {
+        sb.setCharAt(i, Character.toTitleCase(sb.charAt(i)));
+      }
+    }
+    return fromString(sb.toString());
+  }
+
+  /*
+   * Returns the index of the string `match` in this String. This string has to be a comma separated
+   * list. If `match` contains a comma 0 will be returned. If the `match` isn't part of this String,
+   * 0 will be returned, else the index of match (1-based index)
+   */
+  public int findInSet(UTF8String match) {
+    if (match.contains(COMMA_UTF8)) {
+      return 0;
+    }
+
+    int n = 1, lastComma = -1;
+    for (int i = 0; i < numBytes; i++) {
+      if (getByte(i) == (byte) ',') {
+        if (i - (lastComma + 1) == match.numBytes &&
+          ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+            match.numBytes)) {
+          return n;
+        }
+        lastComma = i;
+        n++;
+      }
+    }
+    if (numBytes - (lastComma + 1) == match.numBytes &&
+      ByteArrayMethods.arrayEquals(base, offset + (lastComma + 1), match.base, match.offset,
+        match.numBytes)) {
+      return n;
+    }
+    return 0;
+  }
+
+  /**
+   * Copy the bytes from the current UTF8String, and make a new UTF8String.
+   * @param start the start position of the current UTF8String in bytes.
+   * @param end the end position of the current UTF8String in bytes.
+   * @return a new UTF8String in the position of [start, end] of current UTF8String bytes.
+   */
+  private UTF8String copyUTF8String(int start, int end) {
+    int len = end - start + 1;
+    byte[] newBytes = new byte[len];
+    copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
+    return UTF8String.fromBytes(newBytes);
+  }
+
+  public UTF8String trim() {
+    int s = 0;
+    int e = this.numBytes - 1;
+    // skip all of the space (0x20) in the left side
+    while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
+    // skip all of the space (0x20) in the right side
+    while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
+    if (s > e) {
+      // empty string
+      return UTF8String.fromBytes(new byte[0]);
+    } else {
+      return copyUTF8String(s, e);
+    }
+  }
+
+  public UTF8String trimLeft() {
+    int s = 0;
+    // skip all of the space (0x20) in the left side
+    while (s < this.numBytes && getByte(s) <= 0x20 && getByte(s) >= 0x00) s++;
+    if (s == this.numBytes) {
+      // empty string
+      return UTF8String.fromBytes(new byte[0]);
+    } else {
+      return copyUTF8String(s, this.numBytes - 1);
+    }
+  }
+
+  public UTF8String trimRight() {
+    int e = numBytes - 1;
+    // skip all of the space (0x20) in the right side
+    while (e >= 0 && getByte(e) <= 0x20 && getByte(e) >= 0x00) e--;
+
+    if (e < 0) {
+      // empty string
+      return UTF8String.fromBytes(new byte[0]);
+    } else {
+      return copyUTF8String(0, e);
+    }
+  }
+
+  public UTF8String reverse() {
+    byte[] result = new byte[this.numBytes];
+
+    int i = 0; // position in byte
+    while (i < numBytes) {
+      int len = numBytesForFirstByte(getByte(i));
+      copyMemory(this.base, this.offset + i, result,
+        BYTE_ARRAY_OFFSET + result.length - i - len, len);
+
+      i += len;
+    }
+
+    return UTF8String.fromBytes(result);
+  }
+
+  public UTF8String repeat(int times) {
+    if (times <= 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte[] newBytes = new byte[numBytes * times];
+    copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);
+
+    int copied = 1;
+    while (copied < times) {
+      int toCopy = Math.min(copied, times - copied);
+      System.arraycopy(newBytes, 0, newBytes, copied * numBytes, numBytes * toCopy);
+      copied += toCopy;
+    }
+
+    return UTF8String.fromBytes(newBytes);
+  }
+
+  /**
+   * Returns the position of the first occurrence of substr in
+   * current string from the specified position (0-based index).
+   *
+   * @param v the string to be searched
+   * @param start the start position of the current string for searching
+   * @return the position of the first occurrence of substr, if not found, -1 returned.
+   */
+  public int indexOf(UTF8String v, int start) {
+    if (v.numBytes() == 0) {
+      return 0;
+    }
+
+    // locate to the start position.
+    int i = 0; // position in byte
+    int c = 0; // position in character
+    while (i < numBytes && c < start) {
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    }
+
+    do {
+      if (i + v.numBytes > numBytes) {
+        return -1;
+      }
+      if (ByteArrayMethods.arrayEquals(base, offset + i, v.base, v.offset, v.numBytes)) {
+        return c;
+      }
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    } while (i < numBytes);
+
+    return -1;
+  }
+
+  /**
+   * Find the `str` from left to right.
+   */
+  private int find(UTF8String str, int start) {
+    assert (str.numBytes > 0);
+    while (start <= numBytes - str.numBytes) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
+        return start;
+      }
+      start += 1;
+    }
+    return -1;
+  }
+
+  /**
+   * Find the `str` from right to left.
+   */
+  private int rfind(UTF8String str, int start) {
+    assert (str.numBytes > 0);
+    while (start >= 0) {
+      if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
+        return start;
+      }
+      start -= 1;
+    }
+    return -1;
+  }
+
+  /**
+   * Returns the substring from string str before count occurrences of the delimiter delim.
+   * If count is positive, everything the left of the final delimiter (counting from left) is
+   * returned. If count is negative, every to the right of the final delimiter (counting from the
+   * right) is returned. subStringIndex performs a case-sensitive match when searching for delim.
+   */
+  public UTF8String subStringIndex(UTF8String delim, int count) {
+    if (delim.numBytes == 0 || count == 0) {
+      return EMPTY_UTF8;
+    }
+    if (count > 0) {
+      int idx = -1;
+      while (count > 0) {
+        idx = find(delim, idx + 1);
+        if (idx >= 0) {
+          count --;
+        } else {
+          // can not find enough delim
+          return this;
+        }
+      }
+      if (idx == 0) {
+        return EMPTY_UTF8;
+      }
+      byte[] bytes = new byte[idx];
+      copyMemory(base, offset, bytes, BYTE_ARRAY_OFFSET, idx);
+      return fromBytes(bytes);
+
+    } else {
+      int idx = numBytes - delim.numBytes + 1;
+      count = -count;
+      while (count > 0) {
+        idx = rfind(delim, idx - 1);
+        if (idx >= 0) {
+          count --;
+        } else {
+          // can not find enough delim
+          return this;
+        }
+      }
+      if (idx + delim.numBytes == numBytes) {
+        return EMPTY_UTF8;
+      }
+      int size = numBytes - delim.numBytes - idx;
+      byte[] bytes = new byte[size];
+      copyMemory(base, offset + idx + delim.numBytes, bytes, BYTE_ARRAY_OFFSET, size);
+      return fromBytes(bytes);
+    }
+  }
+
+  /**
+   * Returns str, right-padded with pad to a length of len
+   * For example:
+   *   ('hi', 5, '??') =&gt; 'hi???'
+   *   ('hi', 1, '??') =&gt; 'h'
+   */
+  public UTF8String rpad(int len, UTF8String pad) {
+    int spaces = len - this.numChars(); // number of char need to pad
+    if (spaces <= 0 || pad.numBytes() == 0) {
+      // no padding at all, return the substring of the current string
+      return substring(0, len);
+    } else {
+      int padChars = pad.numChars();
+      int count = spaces / padChars; // how many padding string needed
+      // the partial string of the padding
+      UTF8String remain = pad.substring(0, spaces - padChars * count);
+
+      byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET, this.numBytes);
+      int offset = this.numBytes;
+      int idx = 0;
+      while (idx < count) {
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        ++ idx;
+        offset += pad.numBytes;
+      }
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+
+      return UTF8String.fromBytes(data);
+    }
+  }
+
+  /**
+   * Returns str, left-padded with pad to a length of len.
+   * For example:
+   *   ('hi', 5, '??') =&gt; '???hi'
+   *   ('hi', 1, '??') =&gt; 'h'
+   */
+  public UTF8String lpad(int len, UTF8String pad) {
+    int spaces = len - this.numChars(); // number of char need to pad
+    if (spaces <= 0 || pad.numBytes() == 0) {
+      // no padding at all, return the substring of the current string
+      return substring(0, len);
+    } else {
+      int padChars = pad.numChars();
+      int count = spaces / padChars; // how many padding string needed
+      // the partial string of the padding
+      UTF8String remain = pad.substring(0, spaces - padChars * count);
+
+      byte[] data = new byte[this.numBytes + pad.numBytes * count + remain.numBytes];
+
+      int offset = 0;
+      int idx = 0;
+      while (idx < count) {
+        copyMemory(pad.base, pad.offset, data, BYTE_ARRAY_OFFSET + offset, pad.numBytes);
+        ++ idx;
+        offset += pad.numBytes;
+      }
+      copyMemory(remain.base, remain.offset, data, BYTE_ARRAY_OFFSET + offset, remain.numBytes);
+      offset += remain.numBytes;
+      copyMemory(this.base, this.offset, data, BYTE_ARRAY_OFFSET + offset, numBytes());
+
+      return UTF8String.fromBytes(data);
+    }
+  }
+
+  /**
+   * Concatenates input strings together into a single string. Returns null if any input is null.
+   */
+  public static UTF8String concat(UTF8String... inputs) {
+    // Compute the total length of the result.
+    int totalLength = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        totalLength += inputs[i].numBytes;
+      } else {
+        return null;
+      }
+    }
+
+    // Allocate a new byte array, and copy the inputs one by one into it.
+    final byte[] result = new byte[totalLength];
+    int offset = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      int len = inputs[i].numBytes;
+      copyMemory(
+        inputs[i].base, inputs[i].offset,
+        result, BYTE_ARRAY_OFFSET + offset,
+        len);
+      offset += len;
+    }
+    return fromBytes(result);
+  }
+
+  /**
+   * Concatenates input strings together into a single string using the separator.
+   * A null input is skipped. For example, concat(",", "a", null, "c") would yield "a,c".
+   */
+  public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
+    if (separator == null) {
+      return null;
+    }
+
+    int numInputBytes = 0;  // total number of bytes from the inputs
+    int numInputs = 0;      // number of non-null inputs
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        numInputBytes += inputs[i].numBytes;
+        numInputs++;
+      }
+    }
+
+    if (numInputs == 0) {
+      // Return an empty string if there is no input, or all the inputs are null.
+      return fromBytes(new byte[0]);
+    }
+
+    // Allocate a new byte array, and copy the inputs one by one into it.
+    // The size of the new array is the size of all inputs, plus the separators.
+    final byte[] result = new byte[numInputBytes + (numInputs - 1) * separator.numBytes];
+    int offset = 0;
+
+    for (int i = 0, j = 0; i < inputs.length; i++) {
+      if (inputs[i] != null) {
+        int len = inputs[i].numBytes;
+        copyMemory(
+          inputs[i].base, inputs[i].offset,
+          result, BYTE_ARRAY_OFFSET + offset,
+          len);
+        offset += len;
+
+        j++;
+        // Add separator if this is not the last input.
+        if (j < numInputs) {
+          copyMemory(
+            separator.base, separator.offset,
+            result, BYTE_ARRAY_OFFSET + offset,
+            separator.numBytes);
+          offset += separator.numBytes;
+        }
+      }
+    }
+    return fromBytes(result);
+  }
+
+  public UTF8String[] split(UTF8String pattern, int limit) {
+    String[] splits = toString().split(pattern.toString(), limit);
+    UTF8String[] res = new UTF8String[splits.length];
+    for (int i = 0; i < res.length; i++) {
+      res[i] = fromString(splits[i]);
+    }
+    return res;
+  }
+
+  // TODO: Need to use `Code Point` here instead of Char in case the character longer than 2 bytes
+  public UTF8String translate(Map<Character, Character> dict) {
+    String srcStr = this.toString();
+
+    StringBuilder sb = new StringBuilder();
+    for(int k = 0; k< srcStr.length(); k++) {
+      if (null == dict.get(srcStr.charAt(k))) {
+        sb.append(srcStr.charAt(k));
+      } else if ('\0' != dict.get(srcStr.charAt(k))){
+        sb.append(dict.get(srcStr.charAt(k)));
+      }
+    }
+    return fromString(sb.toString());
+  }
+
+  @Override
+  public String toString() {
+    try {
+      return new String(getBytes(), "utf-8");
+    } catch (UnsupportedEncodingException e) {
+      // Turn the exception into unchecked so we can find out about it at runtime, but
+      // don't need to add lots of boilerplate code everywhere.
+      throwException(e);
+      return "unknown";  // we will never reach here.
+    }
+  }
+
+  @Override
+  public UTF8String clone() {
+    return fromBytes(getBytes());
+  }
+
+  @Override
+  public int compareTo(@Nonnull final UTF8String other) {
+    int len = Math.min(numBytes, other.numBytes);
+    // TODO: compare 8 bytes as unsigned long
+    for (int i = 0; i < len; i ++) {
+      // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
+      int res = (getByte(i) & 0xFF) - (other.getByte(i) & 0xFF);
+      if (res != 0) {
+        return res;
+      }
+    }
+    return numBytes - other.numBytes;
+  }
+
+  public int compare(final UTF8String other) {
+    return compareTo(other);
+  }
+
+  @Override
+  public boolean equals(final Object other) {
+    if (other instanceof UTF8String) {
+      UTF8String o = (UTF8String) other;
+      if (numBytes != o.numBytes) {
+        return false;
+      }
+      return ByteArrayMethods.arrayEquals(base, offset, o.base, o.offset, numBytes);
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * Levenshtein distance is a metric for measuring the distance of two strings. The distance is
+   * defined by the minimum number of single-character edits (i.e. insertions, deletions or
+   * substitutions) that are required to change one of the strings into the other.
+   */
+  public int levenshteinDistance(UTF8String other) {
+    // Implementation adopted from org.apache.common.lang3.StringUtils.getLevenshteinDistance
+
+    int n = numChars();
+    int m = other.numChars();
+
+    if (n == 0) {
+      return m;
+    } else if (m == 0) {
+      return n;
+    }
+
+    UTF8String s, t;
+
+    if (n <= m) {
+      s = this;
+      t = other;
+    } else {
+      s = other;
+      t = this;
+      int swap;
+      swap = n;
+      n = m;
+      m = swap;
+    }
+
+    int[] p = new int[n + 1];
+    int[] d = new int[n + 1];
+    int[] swap;
+
+    int i, i_bytes, j, j_bytes, num_bytes_j, cost;
+
+    for (i = 0; i <= n; i++) {
+      p[i] = i;
+    }
+
+    for (j = 0, j_bytes = 0; j < m; j_bytes += num_bytes_j, j++) {
+      num_bytes_j = numBytesForFirstByte(t.getByte(j_bytes));
+      d[0] = j + 1;
+
+      for (i = 0, i_bytes = 0; i < n; i_bytes += numBytesForFirstByte(s.getByte(i_bytes)), i++) {
+        if (s.getByte(i_bytes) != t.getByte(j_bytes) ||
+              num_bytes_j != numBytesForFirstByte(s.getByte(i_bytes))) {
+          cost = 1;
+        } else {
+          cost = (ByteArrayMethods.arrayEquals(t.base, t.offset + j_bytes, s.base,
+              s.offset + i_bytes, num_bytes_j)) ? 0 : 1;
+        }
+        d[i + 1] = Math.min(Math.min(d[i] + 1, p[i + 1] + 1), p[i] + cost);
+      }
+
+      swap = p;
+      p = d;
+      d = swap;
+    }
+
+    return p[n];
+  }
+
+  @Override
+  public int hashCode() {
+    return Murmur3_x86_32.hashUnsafeBytes(base, offset, numBytes, 42);
+  }
+
+  /**
+   * Soundex mapping table
+   */
+  private static final byte[] US_ENGLISH_MAPPING = {'0', '1', '2', '3', '0', '1', '2', '7',
+    '0', '2', '2', '4', '5', '5', '0', '1', '2', '6', '2', '3', '0', '1', '7', '2', '0', '2'};
+
+  /**
+   * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names,
+   * but can also be used as a general purpose scheme to find word with similar phonemes.
+   * https://en.wikipedia.org/wiki/Soundex
+   */
+  public UTF8String soundex() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
+    }
+
+    byte b = getByte(0);
+    if ('a' <= b && b <= 'z') {
+      b -= 32;
+    } else if (b < 'A' || 'Z' < b) {
+      // first character must be a letter
+      return this;
+    }
+    byte[] sx = {'0', '0', '0', '0'};
+    sx[0] = b;
+    int sxi = 1;
+    int idx = b - 'A';
+    byte lastCode = US_ENGLISH_MAPPING[idx];
+
+    for (int i = 1; i < numBytes; i++) {
+      b = getByte(i);
+      if ('a' <= b && b <= 'z') {
+        b -= 32;
+      } else if (b < 'A' || 'Z' < b) {
+        // not a letter, skip it
+        lastCode = '0';
+        continue;
+      }
+      idx = b - 'A';
+      byte code = US_ENGLISH_MAPPING[idx];
+      if (code == '7') {
+        // ignore it
+      } else {
+        if (code != '0' && code != lastCode) {
+          sx[sxi++] = code;
+          if (sxi > 3) break;
+        }
+        lastCode = code;
+      }
+    }
+    return UTF8String.fromBytes(sx);
+  }
+
+  public void writeExternal(ObjectOutput out) throws IOException {
+    byte[] bytes = getBytes();
+    out.writeInt(bytes.length);
+    out.write(bytes);
+  }
+
+  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+    offset = BYTE_ARRAY_OFFSET;
+    numBytes = in.readInt();
+    base = new byte[numBytes];
+    in.readFully((byte[]) base);
+  }
+
+  @Override
+  public void write(Kryo kryo, Output out) {
+    byte[] bytes = getBytes();
+    out.writeInt(bytes.length);
+    out.write(bytes);
+  }
+
+  @Override
+  public void read(Kryo kryo, Input in) {
+    this.offset = BYTE_ARRAY_OFFSET;
+    this.numBytes = in.readInt();
+    this.base = new byte[numBytes];
+    in.read((byte[]) base);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
new file mode 100644
index 0000000..693ec6e
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class PlatformUtilSuite {
+
+  @Test
+  public void overlappingCopyMemory() {
+    byte[] data = new byte[3 * 1024 * 1024];
+    int size = 2 * 1024 * 1024;
+    for (int i = 0; i < data.length; ++i) {
+      data[i] = (byte)i;
+    }
+
+    Platform.copyMemory(data, Platform.BYTE_ARRAY_OFFSET, data, Platform.BYTE_ARRAY_OFFSET, size);
+    for (int i = 0; i < data.length; ++i) {
+      Assert.assertEquals((byte)i, data[i]);
+    }
+
+    Platform.copyMemory(
+        data,
+        Platform.BYTE_ARRAY_OFFSET + 1,
+        data,
+        Platform.BYTE_ARRAY_OFFSET,
+        size);
+    for (int i = 0; i < size; ++i) {
+      Assert.assertEquals((byte)(i + 1), data[i]);
+    }
+
+    for (int i = 0; i < data.length; ++i) {
+      data[i] = (byte)i;
+    }
+    Platform.copyMemory(
+        data,
+        Platform.BYTE_ARRAY_OFFSET,
+        data,
+        Platform.BYTE_ARRAY_OFFSET + 1,
+        size);
+    for (int i = 0; i < size; ++i) {
+      Assert.assertEquals((byte)i, data[i + 1]);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
new file mode 100644
index 0000000..fb8e53b
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/LongArraySuite.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.array;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.spark.unsafe.memory.MemoryBlock;
+
+public class LongArraySuite {
+
+  @Test
+  public void basicTest() {
+    long[] bytes = new long[2];
+    LongArray arr = new LongArray(MemoryBlock.fromLongArray(bytes));
+    arr.set(0, 1L);
+    arr.set(1, 2L);
+    arr.set(1, 3L);
+    Assert.assertEquals(2, arr.size());
+    Assert.assertEquals(1L, arr.get(0));
+    Assert.assertEquals(3L, arr.get(1));
+
+    arr.zeroOut();
+    Assert.assertEquals(0L, arr.get(0));
+    Assert.assertEquals(0L, arr.get(1));
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
new file mode 100644
index 0000000..e759cb3
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/hash/Murmur3_x86_32Suite.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.hash;
+
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.spark.unsafe.Platform;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test file based on Guava's Murmur3Hash32Test.
+ */
+public class Murmur3_x86_32Suite {
+
+  private static final Murmur3_x86_32 hasher = new Murmur3_x86_32(0);
+
+  @Test
+  public void testKnownIntegerInputs() {
+    Assert.assertEquals(593689054, hasher.hashInt(0));
+    Assert.assertEquals(-189366624, hasher.hashInt(-42));
+    Assert.assertEquals(-1134849565, hasher.hashInt(42));
+    Assert.assertEquals(-1718298732, hasher.hashInt(Integer.MIN_VALUE));
+    Assert.assertEquals(-1653689534, hasher.hashInt(Integer.MAX_VALUE));
+  }
+
+  @Test
+  public void testKnownLongInputs() {
+    Assert.assertEquals(1669671676, hasher.hashLong(0L));
+    Assert.assertEquals(-846261623, hasher.hashLong(-42L));
+    Assert.assertEquals(1871679806, hasher.hashLong(42L));
+    Assert.assertEquals(1366273829, hasher.hashLong(Long.MIN_VALUE));
+    Assert.assertEquals(-2106506049, hasher.hashLong(Long.MAX_VALUE));
+  }
+
+  @Test
+  public void randomizedStressTest() {
+    int size = 65536;
+    Random rand = new Random();
+
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int vint = rand.nextInt();
+      long lint = rand.nextLong();
+      Assert.assertEquals(hasher.hashInt(vint), hasher.hashInt(vint));
+      Assert.assertEquals(hasher.hashLong(lint), hasher.hashLong(lint));
+
+      hashcodes.add(hasher.hashLong(lint));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+
+  @Test
+  public void randomizedStressTestBytes() {
+    int size = 65536;
+    Random rand = new Random();
+
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int byteArrSize = rand.nextInt(100) * 8;
+      byte[] bytes = new byte[byteArrSize];
+      rand.nextBytes(bytes);
+
+      Assert.assertEquals(
+        hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
+        hasher.hashUnsafeWords(bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+
+      hashcodes.add(hasher.hashUnsafeWords(
+        bytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+
+  @Test
+  public void randomizedStressTestPaddedStrings() {
+    int size = 64000;
+    // A set used to track collision rate.
+    Set<Integer> hashcodes = new HashSet<>();
+    for (int i = 0; i < size; i++) {
+      int byteArrSize = 8;
+      byte[] strBytes = String.valueOf(i).getBytes(StandardCharsets.UTF_8);
+      byte[] paddedBytes = new byte[byteArrSize];
+      System.arraycopy(strBytes, 0, paddedBytes, 0, strBytes.length);
+
+      Assert.assertEquals(
+        hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize),
+        hasher.hashUnsafeWords(paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+
+      hashcodes.add(hasher.hashUnsafeWords(
+        paddedBytes, Platform.BYTE_ARRAY_OFFSET, byteArrSize));
+    }
+
+    // A very loose bound.
+    Assert.assertTrue(hashcodes.size() > size * 0.95);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
new file mode 100644
index 0000000..9e69e26
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -0,0 +1,240 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.unsafe.types;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.apache.spark.unsafe.types.CalendarInterval.*;
+
+public class CalendarIntervalSuite {
+
+  @Test
+  public void equalsTest() {
+    CalendarInterval i1 = new CalendarInterval(3, 123);
+    CalendarInterval i2 = new CalendarInterval(3, 321);
+    CalendarInterval i3 = new CalendarInterval(1, 123);
+    CalendarInterval i4 = new CalendarInterval(3, 123);
+
+    assertNotSame(i1, i2);
+    assertNotSame(i1, i3);
+    assertNotSame(i2, i3);
+    assertEquals(i1, i4);
+  }
+
+  @Test
+  public void toStringTest() {
+    CalendarInterval i;
+
+    i = new CalendarInterval(34, 0);
+    assertEquals("interval 2 years 10 months", i.toString());
+
+    i = new CalendarInterval(-34, 0);
+    assertEquals("interval -2 years -10 months", i.toString());
+
+    i = new CalendarInterval(0, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
+    assertEquals("interval 3 weeks 13 hours 123 microseconds", i.toString());
+
+    i = new CalendarInterval(0, -3 * MICROS_PER_WEEK - 13 * MICROS_PER_HOUR - 123);
+    assertEquals("interval -3 weeks -13 hours -123 microseconds", i.toString());
+
+    i = new CalendarInterval(34, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
+    assertEquals("interval 2 years 10 months 3 weeks 13 hours 123 microseconds", i.toString());
+  }
+
+  @Test
+  public void fromStringTest() {
+    testSingleUnit("year", 3, 36, 0);
+    testSingleUnit("month", 3, 3, 0);
+    testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK);
+    testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY);
+    testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR);
+    testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE);
+    testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND);
+    testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI);
+    testSingleUnit("microsecond", 3, 0, 3);
+
+    String input;
+
+    input = "interval   -5  years  23   month";
+    CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
+    assertEquals(fromString(input), result);
+
+    input = "interval   -5  years  23   month   ";
+    assertEquals(fromString(input), result);
+
+    input = "  interval   -5  years  23   month   ";
+    assertEquals(fromString(input), result);
+
+    // Error cases
+    input = "interval   3month 1 hour";
+    assertNull(fromString(input));
+
+    input = "interval 3 moth 1 hour";
+    assertNull(fromString(input));
+
+    input = "interval";
+    assertNull(fromString(input));
+
+    input = "int";
+    assertNull(fromString(input));
+
+    input = "";
+    assertNull(fromString(input));
+
+    input = null;
+    assertNull(fromString(input));
+  }
+
+  @Test
+  public void fromYearMonthStringTest() {
+    String input;
+    CalendarInterval i;
+
+    input = "99-10";
+    i = new CalendarInterval(99 * 12 + 10, 0L);
+    assertEquals(fromYearMonthString(input), i);
+
+    input = "-8-10";
+    i = new CalendarInterval(-8 * 12 - 10, 0L);
+    assertEquals(fromYearMonthString(input), i);
+
+    try {
+      input = "99-15";
+      fromYearMonthString(input);
+      fail("Expected to throw an exception for the invalid input");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("month 15 outside range"));
+    }
+  }
+
+  @Test
+  public void fromDayTimeStringTest() {
+    String input;
+    CalendarInterval i;
+
+    input = "5 12:40:30.999999999";
+    i = new CalendarInterval(0, 5 * MICROS_PER_DAY + 12 * MICROS_PER_HOUR +
+      40 * MICROS_PER_MINUTE + 30 * MICROS_PER_SECOND + 999999L);
+    assertEquals(fromDayTimeString(input), i);
+
+    input = "10 0:12:0.888";
+    i = new CalendarInterval(0, 10 * MICROS_PER_DAY + 12 * MICROS_PER_MINUTE);
+    assertEquals(fromDayTimeString(input), i);
+
+    input = "-3 0:0:0";
+    i = new CalendarInterval(0, -3 * MICROS_PER_DAY);
+    assertEquals(fromDayTimeString(input), i);
+
+    try {
+      input = "5 30:12:20";
+      fromDayTimeString(input);
+      fail("Expected to throw an exception for the invalid input");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("hour 30 outside range"));
+    }
+
+    try {
+      input = "5 30-12";
+      fromDayTimeString(input);
+      fail("Expected to throw an exception for the invalid input");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("not match day-time format"));
+    }
+  }
+
+  @Test
+  public void fromSingleUnitStringTest() {
+    String input;
+    CalendarInterval i;
+
+    input = "12";
+    i = new CalendarInterval(12 * 12, 0L);
+    assertEquals(fromSingleUnitString("year", input), i);
+
+    input = "100";
+    i = new CalendarInterval(0, 100 * MICROS_PER_DAY);
+    assertEquals(fromSingleUnitString("day", input), i);
+
+    input = "1999.38888";
+    i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
+    assertEquals(fromSingleUnitString("second", input), i);
+
+    try {
+      input = String.valueOf(Integer.MAX_VALUE);
+      fromSingleUnitString("year", input);
+      fail("Expected to throw an exception for the invalid input");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("outside range"));
+    }
+
+    try {
+      input = String.valueOf(Long.MAX_VALUE / MICROS_PER_HOUR + 1);
+      fromSingleUnitString("hour", input);
+      fail("Expected to throw an exception for the invalid input");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getMessage().contains("outside range"));
+    }
+  }
+
+  @Test
+  public void addTest() {
+    String input = "interval 3 month 1 hour";
+    String input2 = "interval 2 month 100 hour";
+
+    CalendarInterval interval = fromString(input);
+    CalendarInterval interval2 = fromString(input2);
+
+    assertEquals(interval.add(interval2), new CalendarInterval(5, 101 * MICROS_PER_HOUR));
+
+    input = "interval -10 month -81 hour";
+    input2 = "interval 75 month 200 hour";
+
+    interval = fromString(input);
+    interval2 = fromString(input2);
+
+    assertEquals(interval.add(interval2), new CalendarInterval(65, 119 * MICROS_PER_HOUR));
+  }
+
+  @Test
+  public void subtractTest() {
+    String input = "interval 3 month 1 hour";
+    String input2 = "interval 2 month 100 hour";
+
+    CalendarInterval interval = fromString(input);
+    CalendarInterval interval2 = fromString(input2);
+
+    assertEquals(interval.subtract(interval2), new CalendarInterval(1, -99 * MICROS_PER_HOUR));
+
+    input = "interval -10 month -81 hour";
+    input2 = "interval 75 month 200 hour";
+
+    interval = fromString(input);
+    interval2 = fromString(input2);
+
+    assertEquals(interval.subtract(interval2), new CalendarInterval(-85, -281 * MICROS_PER_HOUR));
+  }
+
+  private static void testSingleUnit(String unit, int number, int months, long microseconds) {
+    String input1 = "interval " + number + " " + unit;
+    String input2 = "interval " + number + " " + unit + "s";
+    CalendarInterval result = new CalendarInterval(months, microseconds);
+    assertEquals(fromString(input1), result);
+    assertEquals(fromString(input2), result);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
new file mode 100644
index 0000000..bef5d71
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -0,0 +1,492 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.unsafe.types;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import com.google.common.collect.ImmutableMap;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+import static org.apache.spark.unsafe.types.UTF8String.*;
+
+public class UTF8StringSuite {
+
+  private static void checkBasic(String str, int len) throws UnsupportedEncodingException {
+    UTF8String s1 = fromString(str);
+    UTF8String s2 = fromBytes(str.getBytes("utf8"));
+    assertEquals(s1.numChars(), len);
+    assertEquals(s2.numChars(), len);
+
+    assertEquals(s1.toString(), str);
+    assertEquals(s2.toString(), str);
+    assertEquals(s1, s2);
+
+    assertEquals(s1.hashCode(), s2.hashCode());
+
+    assertEquals(0, s1.compareTo(s2));
+
+    assertTrue(s1.contains(s2));
+    assertTrue(s2.contains(s1));
+    assertTrue(s1.startsWith(s1));
+    assertTrue(s1.endsWith(s1));
+  }
+
+  @Test
+  public void basicTest() throws UnsupportedEncodingException {
+    checkBasic("", 0);
+    checkBasic("hello", 5);
+    checkBasic("大 千 世 界", 7);
+  }
+
+  @Test
+  public void emptyStringTest() {
+    assertEquals(EMPTY_UTF8, fromString(""));
+    assertEquals(EMPTY_UTF8, fromBytes(new byte[0]));
+    assertEquals(0, EMPTY_UTF8.numChars());
+    assertEquals(0, EMPTY_UTF8.numBytes());
+  }
+
+  @Test
+  public void prefix() {
+    assertTrue(fromString("a").getPrefix() - fromString("b").getPrefix() < 0);
+    assertTrue(fromString("ab").getPrefix() - fromString("b").getPrefix() < 0);
+    assertTrue(
+      fromString("abbbbbbbbbbbasdf").getPrefix() - fromString("bbbbbbbbbbbbasdf").getPrefix() < 0);
+    assertTrue(fromString("").getPrefix() - fromString("a").getPrefix() < 0);
+    assertTrue(fromString("你好").getPrefix() - fromString("世界").getPrefix() > 0);
+
+    byte[] buf1 = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    byte[] buf2 = {1, 2, 3};
+    UTF8String str1 = fromBytes(buf1, 0, 3);
+    UTF8String str2 = fromBytes(buf1, 0, 8);
+    UTF8String str3 = fromBytes(buf2);
+    assertTrue(str1.getPrefix() - str2.getPrefix() < 0);
+    assertEquals(str1.getPrefix(), str3.getPrefix());
+  }
+
+  @Test
+  public void compareTo() {
+    assertTrue(fromString("").compareTo(fromString("a")) < 0);
+    assertTrue(fromString("abc").compareTo(fromString("ABC")) > 0);
+    assertTrue(fromString("abc0").compareTo(fromString("abc")) > 0);
+    assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabc")) == 0);
+    assertTrue(fromString("aBcabcabc").compareTo(fromString("Abcabcabc")) > 0);
+    assertTrue(fromString("Abcabcabc").compareTo(fromString("abcabcabC")) < 0);
+    assertTrue(fromString("abcabcabc").compareTo(fromString("abcabcabC")) > 0);
+
+    assertTrue(fromString("abc").compareTo(fromString("世界")) < 0);
+    assertTrue(fromString("你好").compareTo(fromString("世界")) > 0);
+    assertTrue(fromString("你好123").compareTo(fromString("你好122")) > 0);
+  }
+
+  protected static void testUpperandLower(String upper, String lower) {
+    UTF8String us = fromString(upper);
+    UTF8String ls = fromString(lower);
+    assertEquals(ls, us.toLowerCase());
+    assertEquals(us, ls.toUpperCase());
+    assertEquals(us, us.toUpperCase());
+    assertEquals(ls, ls.toLowerCase());
+  }
+
+  @Test
+  public void upperAndLower() {
+    testUpperandLower("", "");
+    testUpperandLower("0123456", "0123456");
+    testUpperandLower("ABCXYZ", "abcxyz");
+    testUpperandLower("ЀЁЂѺΏỀ", "ѐёђѻώề");
+    testUpperandLower("大千世界 数据砖头", "大千世界 数据砖头");
+  }
+
+  @Test
+  public void titleCase() {
+    assertEquals(fromString(""), fromString("").toTitleCase());
+    assertEquals(fromString("Ab Bc Cd"), fromString("ab bc cd").toTitleCase());
+    assertEquals(fromString("Ѐ Ё Ђ Ѻ Ώ Ề"), fromString("ѐ ё ђ ѻ ώ ề").toTitleCase());
+    assertEquals(fromString("大千世界 数据砖头"), fromString("大千世界 数据砖头").toTitleCase());
+  }
+
+  @Test
+  public void concatTest() {
+    assertEquals(EMPTY_UTF8, concat());
+    assertNull(concat((UTF8String) null));
+    assertEquals(EMPTY_UTF8, concat(EMPTY_UTF8));
+    assertEquals(fromString("ab"), concat(fromString("ab")));
+    assertEquals(fromString("ab"), concat(fromString("a"), fromString("b")));
+    assertEquals(fromString("abc"), concat(fromString("a"), fromString("b"), fromString("c")));
+    assertNull(concat(fromString("a"), null, fromString("c")));
+    assertNull(concat(fromString("a"), null, null));
+    assertNull(concat(null, null, null));
+    assertEquals(fromString("数据砖头"), concat(fromString("数据"), fromString("砖头")));
+  }
+
+  @Test
+  public void concatWsTest() {
+    // Returns null if the separator is null
+    assertNull(concatWs(null, (UTF8String) null));
+    assertNull(concatWs(null, fromString("a")));
+
+    // If separator is null, concatWs should skip all null inputs and never return null.
+    UTF8String sep = fromString("哈哈");
+    assertEquals(
+      EMPTY_UTF8,
+      concatWs(sep, EMPTY_UTF8));
+    assertEquals(
+      fromString("ab"),
+      concatWs(sep, fromString("ab")));
+    assertEquals(
+      fromString("a哈哈b"),
+      concatWs(sep, fromString("a"), fromString("b")));
+    assertEquals(
+      fromString("a哈哈b哈哈c"),
+      concatWs(sep, fromString("a"), fromString("b"), fromString("c")));
+    assertEquals(
+      fromString("a哈哈c"),
+      concatWs(sep, fromString("a"), null, fromString("c")));
+    assertEquals(
+      fromString("a"),
+      concatWs(sep, fromString("a"), null, null));
+    assertEquals(
+      EMPTY_UTF8,
+      concatWs(sep, null, null, null));
+    assertEquals(
+      fromString("数据哈哈砖头"),
+      concatWs(sep, fromString("数据"), fromString("砖头")));
+  }
+
+  @Test
+  public void contains() {
+    assertTrue(EMPTY_UTF8.contains(EMPTY_UTF8));
+    assertTrue(fromString("hello").contains(fromString("ello")));
+    assertFalse(fromString("hello").contains(fromString("vello")));
+    assertFalse(fromString("hello").contains(fromString("hellooo")));
+    assertTrue(fromString("大千世界").contains(fromString("千世界")));
+    assertFalse(fromString("大千世界").contains(fromString("世千")));
+    assertFalse(fromString("大千世界").contains(fromString("大千世界好")));
+  }
+
+  @Test
+  public void startsWith() {
+    assertTrue(EMPTY_UTF8.startsWith(EMPTY_UTF8));
+    assertTrue(fromString("hello").startsWith(fromString("hell")));
+    assertFalse(fromString("hello").startsWith(fromString("ell")));
+    assertFalse(fromString("hello").startsWith(fromString("hellooo")));
+    assertTrue(fromString("数据砖头").startsWith(fromString("数据")));
+    assertFalse(fromString("大千世界").startsWith(fromString("千")));
+    assertFalse(fromString("大千世界").startsWith(fromString("大千世界好")));
+  }
+
+  @Test
+  public void endsWith() {
+    assertTrue(EMPTY_UTF8.endsWith(EMPTY_UTF8));
+    assertTrue(fromString("hello").endsWith(fromString("ello")));
+    assertFalse(fromString("hello").endsWith(fromString("ellov")));
+    assertFalse(fromString("hello").endsWith(fromString("hhhello")));
+    assertTrue(fromString("大千世界").endsWith(fromString("世界")));
+    assertFalse(fromString("大千世界").endsWith(fromString("世")));
+    assertFalse(fromString("数据砖头").endsWith(fromString("我的数据砖头")));
+  }
+
+  @Test
+  public void substring() {
+    assertEquals(EMPTY_UTF8, fromString("hello").substring(0, 0));
+    assertEquals(fromString("el"), fromString("hello").substring(1, 3));
+    assertEquals(fromString("数"), fromString("数据砖头").substring(0, 1));
+    assertEquals(fromString("据砖"), fromString("数据砖头").substring(1, 3));
+    assertEquals(fromString("头"), fromString("数据砖头").substring(3, 5));
+    assertEquals(fromString("ߵ梷"), fromString("ߵ梷").substring(0, 2));
+  }
+
+  @Test
+  public void trims() {
+    assertEquals(fromString("hello"), fromString("  hello ").trim());
+    assertEquals(fromString("hello "), fromString("  hello ").trimLeft());
+    assertEquals(fromString("  hello"), fromString("  hello ").trimRight());
+
+    assertEquals(EMPTY_UTF8, fromString("  ").trim());
+    assertEquals(EMPTY_UTF8, fromString("  ").trimLeft());
+    assertEquals(EMPTY_UTF8, fromString("  ").trimRight());
+
+    assertEquals(fromString("数据砖头"), fromString("  数据砖头 ").trim());
+    assertEquals(fromString("数据砖头 "), fromString("  数据砖头 ").trimLeft());
+    assertEquals(fromString("  数据砖头"), fromString("  数据砖头 ").trimRight());
+
+    assertEquals(fromString("数据砖头"), fromString("数据砖头").trim());
+    assertEquals(fromString("数据砖头"), fromString("数据砖头").trimLeft());
+    assertEquals(fromString("数据砖头"), fromString("数据砖头").trimRight());
+  }
+
+  @Test
+  public void indexOf() {
+    assertEquals(0, EMPTY_UTF8.indexOf(EMPTY_UTF8, 0));
+    assertEquals(-1, EMPTY_UTF8.indexOf(fromString("l"), 0));
+    assertEquals(0, fromString("hello").indexOf(EMPTY_UTF8, 0));
+    assertEquals(2, fromString("hello").indexOf(fromString("l"), 0));
+    assertEquals(3, fromString("hello").indexOf(fromString("l"), 3));
+    assertEquals(-1, fromString("hello").indexOf(fromString("a"), 0));
+    assertEquals(2, fromString("hello").indexOf(fromString("ll"), 0));
+    assertEquals(-1, fromString("hello").indexOf(fromString("ll"), 4));
+    assertEquals(1, fromString("数据砖头").indexOf(fromString("据砖"), 0));
+    assertEquals(-1, fromString("数据砖头").indexOf(fromString("数"), 3));
+    assertEquals(0, fromString("数据砖头").indexOf(fromString("数"), 0));
+    assertEquals(3, fromString("数据砖头").indexOf(fromString("头"), 0));
+  }
+
+  @Test
+  public void substring_index() {
+    assertEquals(fromString("www.apache.org"),
+      fromString("www.apache.org").subStringIndex(fromString("."), 3));
+    assertEquals(fromString("www.apache"),
+      fromString("www.apache.org").subStringIndex(fromString("."), 2));
+    assertEquals(fromString("www"),
+      fromString("www.apache.org").subStringIndex(fromString("."), 1));
+    assertEquals(fromString(""),
+      fromString("www.apache.org").subStringIndex(fromString("."), 0));
+    assertEquals(fromString("org"),
+      fromString("www.apache.org").subStringIndex(fromString("."), -1));
+    assertEquals(fromString("apache.org"),
+      fromString("www.apache.org").subStringIndex(fromString("."), -2));
+    assertEquals(fromString("www.apache.org"),
+      fromString("www.apache.org").subStringIndex(fromString("."), -3));
+    // str is empty string
+    assertEquals(fromString(""),
+      fromString("").subStringIndex(fromString("."), 1));
+    // empty string delim
+    assertEquals(fromString(""),
+      fromString("www.apache.org").subStringIndex(fromString(""), 1));
+    // delim does not exist in str
+    assertEquals(fromString("www.apache.org"),
+      fromString("www.apache.org").subStringIndex(fromString("#"), 2));
+    // delim is 2 chars
+    assertEquals(fromString("www||apache"),
+      fromString("www||apache||org").subStringIndex(fromString("||"), 2));
+    assertEquals(fromString("apache||org"),
+      fromString("www||apache||org").subStringIndex(fromString("||"), -2));
+    // non ascii chars
+    assertEquals(fromString("大千世界大"),
+      fromString("大千世界大千世界").subStringIndex(fromString("千"), 2));
+    // overlapped delim
+    assertEquals(fromString("||"), fromString("||||||").subStringIndex(fromString("|||"), 3));
+    assertEquals(fromString("|||"), fromString("||||||").subStringIndex(fromString("|||"), -4));
+  }
+
+  @Test
+  public void reverse() {
+    assertEquals(fromString("olleh"), fromString("hello").reverse());
+    assertEquals(EMPTY_UTF8, EMPTY_UTF8.reverse());
+    assertEquals(fromString("者行孙"), fromString("孙行者").reverse());
+    assertEquals(fromString("者行孙 olleh"), fromString("hello 孙行者").reverse());
+  }
+
+  @Test
+  public void repeat() {
+    assertEquals(fromString("数d数d数d数d数d"), fromString("数d").repeat(5));
+    assertEquals(fromString("数d"), fromString("数d").repeat(1));
+    assertEquals(EMPTY_UTF8, fromString("数d").repeat(-1));
+  }
+
+  @Test
+  public void pad() {
+    assertEquals(fromString("hel"), fromString("hello").lpad(3, fromString("????")));
+    assertEquals(fromString("hello"), fromString("hello").lpad(5, fromString("????")));
+    assertEquals(fromString("?hello"), fromString("hello").lpad(6, fromString("????")));
+    assertEquals(fromString("???????hello"), fromString("hello").lpad(12, fromString("????")));
+    assertEquals(fromString("?????hello"), fromString("hello").lpad(10, fromString("?????")));
+    assertEquals(fromString("???????"), EMPTY_UTF8.lpad(7, fromString("?????")));
+
+    assertEquals(fromString("hel"), fromString("hello").rpad(3, fromString("????")));
+    assertEquals(fromString("hello"), fromString("hello").rpad(5, fromString("????")));
+    assertEquals(fromString("hello?"), fromString("hello").rpad(6, fromString("????")));
+    assertEquals(fromString("hello???????"), fromString("hello").rpad(12, fromString("????")));
+    assertEquals(fromString("hello?????"), fromString("hello").rpad(10, fromString("?????")));
+    assertEquals(fromString("???????"), EMPTY_UTF8.rpad(7, fromString("?????")));
+
+    assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, fromString("????")));
+    assertEquals(fromString("?数据砖头"), fromString("数据砖头").lpad(5, fromString("????")));
+    assertEquals(fromString("??数据砖头"), fromString("数据砖头").lpad(6, fromString("????")));
+    assertEquals(fromString("孙行数据砖头"), fromString("数据砖头").lpad(6, fromString("孙行者")));
+    assertEquals(fromString("孙行者数据砖头"), fromString("数据砖头").lpad(7, fromString("孙行者")));
+    assertEquals(
+      fromString("孙行者孙行者孙行数据砖头"),
+      fromString("数据砖头").lpad(12, fromString("孙行者")));
+
+    assertEquals(fromString("数据砖"), fromString("数据砖头").rpad(3, fromString("????")));
+    assertEquals(fromString("数据砖头?"), fromString("数据砖头").rpad(5, fromString("????")));
+    assertEquals(fromString("数据砖头??"), fromString("数据砖头").rpad(6, fromString("????")));
+    assertEquals(fromString("数据砖头孙行"), fromString("数据砖头").rpad(6, fromString("孙行者")));
+    assertEquals(fromString("数据砖头孙行者"), fromString("数据砖头").rpad(7, fromString("孙行者")));
+    assertEquals(
+      fromString("数据砖头孙行者孙行者孙行"),
+      fromString("数据砖头").rpad(12, fromString("孙行者")));
+
+    assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, fromString("孙行者")));
+    assertEquals(EMPTY_UTF8, fromString("数据砖头").lpad(-10, EMPTY_UTF8));
+    assertEquals(fromString("数据砖头"), fromString("数据砖头").lpad(5, EMPTY_UTF8));
+    assertEquals(fromString("数据砖"), fromString("数据砖头").lpad(3, EMPTY_UTF8));
+    assertEquals(EMPTY_UTF8, EMPTY_UTF8.lpad(3, EMPTY_UTF8));
+
+    assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, fromString("孙行者")));
+    assertEquals(EMPTY_UTF8, fromString("数据砖头").rpad(-10, EMPTY_UTF8));
+    assertEquals(fromString("数据砖头"), fromString("数据砖头").rpad(5, EMPTY_UTF8));
+    assertEquals(fromString("数据砖"), fromString("数据砖头").rpad(3, EMPTY_UTF8));
+    assertEquals(EMPTY_UTF8, EMPTY_UTF8.rpad(3, EMPTY_UTF8));
+  }
+
+  @Test
+  public void substringSQL() {
+    UTF8String e = fromString("example");
+    assertEquals(e.substringSQL(0, 2), fromString("ex"));
+    assertEquals(e.substringSQL(1, 2), fromString("ex"));
+    assertEquals(e.substringSQL(0, 7), fromString("example"));
+    assertEquals(e.substringSQL(1, 2), fromString("ex"));
+    assertEquals(e.substringSQL(0, 100), fromString("example"));
+    assertEquals(e.substringSQL(1, 100), fromString("example"));
+    assertEquals(e.substringSQL(2, 2), fromString("xa"));
+    assertEquals(e.substringSQL(1, 6), fromString("exampl"));
+    assertEquals(e.substringSQL(2, 100), fromString("xample"));
+    assertEquals(e.substringSQL(0, 0), fromString(""));
+    assertEquals(e.substringSQL(100, 4), EMPTY_UTF8);
+    assertEquals(e.substringSQL(0, Integer.MAX_VALUE), fromString("example"));
+    assertEquals(e.substringSQL(1, Integer.MAX_VALUE), fromString("example"));
+    assertEquals(e.substringSQL(2, Integer.MAX_VALUE), fromString("xample"));
+  }
+
+  @Test
+  public void split() {
+    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), -1),
+      new UTF8String[]{fromString("ab"), fromString("def"), fromString("ghi")}));
+    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
+      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
+    assertTrue(Arrays.equals(fromString("ab,def,ghi").split(fromString(","), 2),
+      new UTF8String[]{fromString("ab"), fromString("def,ghi")}));
+  }
+
+  @Test
+  public void levenshteinDistance() {
+    assertEquals(0, EMPTY_UTF8.levenshteinDistance(EMPTY_UTF8));
+    assertEquals(1, EMPTY_UTF8.levenshteinDistance(fromString("a")));
+    assertEquals(7, fromString("aaapppp").levenshteinDistance(EMPTY_UTF8));
+    assertEquals(1, fromString("frog").levenshteinDistance(fromString("fog")));
+    assertEquals(3, fromString("fly").levenshteinDistance(fromString("ant")));
+    assertEquals(7, fromString("elephant").levenshteinDistance(fromString("hippo")));
+    assertEquals(7, fromString("hippo").levenshteinDistance(fromString("elephant")));
+    assertEquals(8, fromString("hippo").levenshteinDistance(fromString("zzzzzzzz")));
+    assertEquals(1, fromString("hello").levenshteinDistance(fromString("hallo")));
+    assertEquals(4, fromString("世界千世").levenshteinDistance(fromString("千a世b")));
+  }
+
+  @Test
+  public void translate() {
+    assertEquals(
+      fromString("1a2s3ae"),
+      fromString("translate").translate(ImmutableMap.of(
+        'r', '1',
+        'n', '2',
+        'l', '3',
+        't', '\0'
+      )));
+    assertEquals(
+      fromString("translate"),
+      fromString("translate").translate(new HashMap<Character, Character>()));
+    assertEquals(
+      fromString("asae"),
+      fromString("translate").translate(ImmutableMap.of(
+        'r', '\0',
+        'n', '\0',
+        'l', '\0',
+        't', '\0'
+      )));
+    assertEquals(
+      fromString("aa世b"),
+      fromString("花花世界").translate(ImmutableMap.of(
+        '花', 'a',
+        '界', 'b'
+      )));
+  }
+
+  @Test
+  public void createBlankString() {
+    assertEquals(fromString(" "), blankString(1));
+    assertEquals(fromString("  "), blankString(2));
+    assertEquals(fromString("   "), blankString(3));
+    assertEquals(fromString(""), blankString(0));
+  }
+
+  @Test
+  public void findInSet() {
+    assertEquals(1, fromString("ab").findInSet(fromString("ab")));
+    assertEquals(2, fromString("a,b").findInSet(fromString("b")));
+    assertEquals(3, fromString("abc,b,ab,c,def").findInSet(fromString("ab")));
+    assertEquals(1, fromString("ab,abc,b,ab,c,def").findInSet(fromString("ab")));
+    assertEquals(4, fromString(",,,ab,abc,b,ab,c,def").findInSet(fromString("ab")));
+    assertEquals(1, fromString(",ab,abc,b,ab,c,def").findInSet(fromString("")));
+    assertEquals(4, fromString("数据砖头,abc,b,ab,c,def").findInSet(fromString("ab")));
+    assertEquals(6, fromString("数据砖头,abc,b,ab,c,def").findInSet(fromString("def")));
+  }
+
+  @Test
+  public void soundex() {
+    assertEquals(fromString("Robert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
+    assertEquals(fromString("Ashcraft").soundex(), fromString("A261"));
+    assertEquals(fromString("Ashcroft").soundex(), fromString("A261"));
+    assertEquals(fromString("Burroughs").soundex(), fromString("B620"));
+    assertEquals(fromString("Burrows").soundex(), fromString("B620"));
+    assertEquals(fromString("Ekzampul").soundex(), fromString("E251"));
+    assertEquals(fromString("Example").soundex(), fromString("E251"));
+    assertEquals(fromString("Ellery").soundex(), fromString("E460"));
+    assertEquals(fromString("Euler").soundex(), fromString("E460"));
+    assertEquals(fromString("Ghosh").soundex(), fromString("G200"));
+    assertEquals(fromString("Gauss").soundex(), fromString("G200"));
+    assertEquals(fromString("Gutierrez").soundex(), fromString("G362"));
+    assertEquals(fromString("Heilbronn").soundex(), fromString("H416"));
+    assertEquals(fromString("Hilbert").soundex(), fromString("H416"));
+    assertEquals(fromString("Jackson").soundex(), fromString("J250"));
+    assertEquals(fromString("Kant").soundex(), fromString("K530"));
+    assertEquals(fromString("Knuth").soundex(), fromString("K530"));
+    assertEquals(fromString("Lee").soundex(), fromString("L000"));
+    assertEquals(fromString("Lukasiewicz").soundex(), fromString("L222"));
+    assertEquals(fromString("Lissajous").soundex(), fromString("L222"));
+    assertEquals(fromString("Ladd").soundex(), fromString("L300"));
+    assertEquals(fromString("Lloyd").soundex(), fromString("L300"));
+    assertEquals(fromString("Moses").soundex(), fromString("M220"));
+    assertEquals(fromString("O'Hara").soundex(), fromString("O600"));
+    assertEquals(fromString("Pfister").soundex(), fromString("P236"));
+    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
+    assertEquals(fromString("Robert").soundex(), fromString("R163"));
+    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
+    assertEquals(fromString("Soundex").soundex(), fromString("S532"));
+    assertEquals(fromString("Sownteks").soundex(), fromString("S532"));
+    assertEquals(fromString("Tymczak").soundex(), fromString("T522"));
+    assertEquals(fromString("VanDeusen").soundex(), fromString("V532"));
+    assertEquals(fromString("Washington").soundex(), fromString("W252"));
+    assertEquals(fromString("Wheaton").soundex(), fromString("W350"));
+
+    assertEquals(fromString("a").soundex(), fromString("A000"));
+    assertEquals(fromString("ab").soundex(), fromString("A100"));
+    assertEquals(fromString("abc").soundex(), fromString("A120"));
+    assertEquals(fromString("abcd").soundex(), fromString("A123"));
+    assertEquals(fromString("").soundex(), fromString(""));
+    assertEquals(fromString("123").soundex(), fromString("123"));
+    assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
----------------------------------------------------------------------
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
new file mode 100644
index 0000000..b3bbd68
--- /dev/null
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types
+
+import org.apache.commons.lang3.StringUtils
+import org.scalacheck.{Arbitrary, Gen}
+import org.scalatest.prop.GeneratorDrivenPropertyChecks
+// scalastyle:off
+import org.scalatest.{FunSuite, Matchers}
+
+import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
+
+/**
+ * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing.
+ */
+class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenPropertyChecks with Matchers {
+// scalastyle:on
+
+  test("toString") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).toString() === s)
+    }
+  }
+
+  test("numChars") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).numChars() === s.length)
+    }
+  }
+
+  test("startsWith") {
+    forAll { (s: String) =>
+      val utf8 = toUTF8(s)
+      assert(utf8.startsWith(utf8))
+      for (i <- 1 to s.length) {
+        assert(utf8.startsWith(toUTF8(s.dropRight(i))))
+      }
+    }
+  }
+
+  test("endsWith") {
+    forAll { (s: String) =>
+      val utf8 = toUTF8(s)
+      assert(utf8.endsWith(utf8))
+      for (i <- 1 to s.length) {
+        assert(utf8.endsWith(toUTF8(s.drop(i))))
+      }
+    }
+  }
+
+  test("toUpperCase") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).toUpperCase === toUTF8(s.toUpperCase))
+    }
+  }
+
+  test("toLowerCase") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).toLowerCase === toUTF8(s.toLowerCase))
+    }
+  }
+
+  test("compare") {
+    forAll { (s1: String, s2: String) =>
+      assert(Math.signum(toUTF8(s1).compareTo(toUTF8(s2))) === Math.signum(s1.compareTo(s2)))
+    }
+  }
+
+  test("substring") {
+    forAll { (s: String) =>
+      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
+        assert(toUTF8(s).substring(start, end).toString === s.substring(start, end))
+      }
+    }
+  }
+
+  test("contains") {
+    forAll { (s: String) =>
+      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
+        val substring = s.substring(start, end)
+        assert(toUTF8(s).contains(toUTF8(substring)) === s.contains(substring))
+      }
+    }
+  }
+
+  val whitespaceChar: Gen[Char] = Gen.choose(0x00, 0x20).map(_.toChar)
+  val whitespaceString: Gen[String] = Gen.listOf(whitespaceChar).map(_.mkString)
+  val randomString: Gen[String] = Arbitrary.arbString.arbitrary
+
+  test("trim, trimLeft, trimRight") {
+    // lTrim and rTrim are both modified from java.lang.String.trim
+    def lTrim(s: String): String = {
+      var st = 0
+      val array: Array[Char] = s.toCharArray
+      while ((st < s.length) && (array(st) <= ' ')) {
+        st += 1
+      }
+      if (st > 0) s.substring(st, s.length) else s
+    }
+    def rTrim(s: String): String = {
+      var len = s.length
+      val array: Array[Char] = s.toCharArray
+      while ((len > 0) && (array(len - 1) <= ' ')) {
+        len -= 1
+      }
+      if (len < s.length) s.substring(0, len) else s
+    }
+
+    forAll(
+        whitespaceString,
+        randomString,
+        whitespaceString
+    ) { (start: String, middle: String, end: String) =>
+      val s = start + middle + end
+      assert(toUTF8(s).trim() === toUTF8(s.trim()))
+      assert(toUTF8(s).trimLeft() === toUTF8(lTrim(s)))
+      assert(toUTF8(s).trimRight() === toUTF8(rTrim(s)))
+    }
+  }
+
+  test("reverse") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).reverse === toUTF8(s.reverse))
+    }
+  }
+
+  test("indexOf") {
+    forAll { (s: String) =>
+      for (start <- 0 to s.length; end <- 0 to s.length; if start <= end) {
+        val substring = s.substring(start, end)
+        assert(toUTF8(s).indexOf(toUTF8(substring), 0) === s.indexOf(substring))
+      }
+    }
+  }
+
+  val randomInt = Gen.choose(-100, 100)
+
+  test("repeat") {
+    def repeat(str: String, times: Int): String = {
+      if (times > 0) str * times else ""
+    }
+    // ScalaCheck always generating too large repeat times which might hang the test forever.
+    forAll(randomString, randomInt) { (s: String, times: Int) =>
+      assert(toUTF8(s).repeat(times) === toUTF8(repeat(s, times)))
+    }
+  }
+
+  test("lpad, rpad") {
+    def padding(origin: String, pad: String, length: Int, isLPad: Boolean): String = {
+      if (length <= 0) return ""
+      if (length <= origin.length) {
+        if (length <= 0) "" else origin.substring(0, length)
+      } else {
+        if (pad.length == 0) return origin
+        val toPad = length - origin.length
+        val partPad = if (toPad % pad.length == 0) "" else pad.substring(0, toPad % pad.length)
+        if (isLPad) {
+          pad * (toPad / pad.length) + partPad + origin
+        } else {
+          origin + pad * (toPad / pad.length) + partPad
+        }
+      }
+    }
+
+    forAll (
+      randomString,
+      randomString,
+      randomInt
+    ) { (s: String, pad: String, length: Int) =>
+      assert(toUTF8(s).lpad(length, toUTF8(pad)) ===
+        toUTF8(padding(s, pad, length, true)))
+      assert(toUTF8(s).rpad(length, toUTF8(pad)) ===
+        toUTF8(padding(s, pad, length, false)))
+    }
+  }
+
+  val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString))
+
+  test("concat") {
+    def concat(orgin: Seq[String]): String =
+      if (orgin.exists(_ == null)) null else orgin.mkString
+
+    forAll { (inputs: Seq[String]) =>
+      assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString))
+    }
+    forAll (nullalbeSeq) { (inputs: Seq[String]) =>
+      assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs)))
+    }
+  }
+
+  test("concatWs") {
+    def concatWs(sep: String, inputs: Seq[String]): String = {
+      if (sep == null) return null
+      inputs.filter(_ != null).mkString(sep)
+    }
+
+    forAll { (sep: String, inputs: Seq[String]) =>
+      assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
+        toUTF8(inputs.mkString(sep)))
+    }
+    forAll(randomString, nullalbeSeq) {(sep: String, inputs: Seq[String]) =>
+      assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
+        toUTF8(concatWs(sep, inputs)))
+    }
+  }
+
+  // TODO: enable this when we find a proper way to generate valid patterns
+  ignore("split") {
+    forAll { (s: String, pattern: String, limit: Int) =>
+      assert(toUTF8(s).split(toUTF8(pattern), limit) ===
+        s.split(pattern, limit).map(toUTF8(_)))
+    }
+  }
+
+  test("levenshteinDistance") {
+    forAll { (one: String, another: String) =>
+      assert(toUTF8(one).levenshteinDistance(toUTF8(another)) ===
+        StringUtils.getLevenshteinDistance(one, another))
+    }
+  }
+
+  test("hashCode") {
+    forAll { (s: String) =>
+      assert(toUTF8(s).hashCode() === toUTF8(s).hashCode())
+    }
+  }
+
+  test("equals") {
+    forAll { (one: String, another: String) =>
+      assert(toUTF8(one).equals(toUTF8(another)) === one.equals(another))
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2376e30..2148379 100644
--- a/pom.xml
+++ b/pom.xml
@@ -89,7 +89,8 @@
     <module>common/sketch</module>
     <module>common/network-common</module>
     <module>common/network-shuffle</module>
-    <module>tags</module>
+    <module>common/unsafe</module>
+    <module>common/tags</module>
     <module>core</module>
     <module>graphx</module>
     <module>mllib</module>
@@ -99,7 +100,6 @@
     <module>sql/core</module>
     <module>sql/hive</module>
     <module>docker-integration-tests</module>
-    <module>unsafe</module>
     <module>assembly</module>
     <module>external/twitter</module>
     <module>external/flume</module>

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/tags/README.md
----------------------------------------------------------------------
diff --git a/tags/README.md b/tags/README.md
deleted file mode 100644
index 01e5126..0000000
--- a/tags/README.md
+++ /dev/null
@@ -1 +0,0 @@
-This module includes annotations in Java that are used to annotate test suites.

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/tags/pom.xml
----------------------------------------------------------------------
diff --git a/tags/pom.xml b/tags/pom.xml
deleted file mode 100644
index 3e8e6f6..0000000
--- a/tags/pom.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.11</artifactId>
-    <version>2.0.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-test-tags_2.11</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Test Tags</name>
-  <url>http://spark.apache.org/</url>
-  <properties>
-    <sbt.project.name>test-tags</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.binary.version}</artifactId>
-      <scope>compile</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/tags/src/main/java/org/apache/spark/tags/DockerTest.java
----------------------------------------------------------------------
diff --git a/tags/src/main/java/org/apache/spark/tags/DockerTest.java b/tags/src/main/java/org/apache/spark/tags/DockerTest.java
deleted file mode 100644
index 0fecf3b..0000000
--- a/tags/src/main/java/org/apache/spark/tags/DockerTest.java
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.tags;
-
-import java.lang.annotation.*;
-import org.scalatest.TagAnnotation;
-
-@TagAnnotation
-@Retention(RetentionPolicy.RUNTIME)
-@Target({ElementType.METHOD, ElementType.TYPE})
-public @interface DockerTest { }

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
----------------------------------------------------------------------
diff --git a/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java b/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
deleted file mode 100644
index 83279e5..0000000
--- a/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.tags;
-
-import java.lang.annotation.*;
-
-import org.scalatest.TagAnnotation;
-
-@TagAnnotation
-@Retention(RetentionPolicy.RUNTIME)
-@Target({ElementType.METHOD, ElementType.TYPE})
-public @interface ExtendedHiveTest { }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org

[4/4] spark git commit: [SPARK-13548][BUILD] Move tags and unsafe modules into common

Posted by rx...@apache.org.

[SPARK-13548][BUILD] Move tags and unsafe modules into common

## What changes were proposed in this pull request?
This patch moves tags and unsafe modules into common directory to remove 2 top level non-user-facing directories.

## How was this patch tested?
Jenkins should suffice.

Author: Reynold Xin <rx...@databricks.com>

Closes #11426 from rxin/SPARK-13548.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b0ee7d43
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b0ee7d43
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b0ee7d43

Branch: refs/heads/master
Commit: b0ee7d43730469ad61fdf6b7b75cc1b1efb62c31
Parents: c27ba0d
Author: Reynold Xin <rx...@databricks.com>
Authored: Tue Mar 1 15:39:13 2016 -0800
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Mar 1 15:39:13 2016 -0800

----------------------------------------------------------------------
 common/tags/README.md                           |    1 +
 common/tags/pom.xml                             |   50 +
 .../java/org/apache/spark/tags/DockerTest.java  |   26 +
 .../org/apache/spark/tags/ExtendedHiveTest.java |   27 +
 .../org/apache/spark/tags/ExtendedYarnTest.java |   27 +
 common/unsafe/pom.xml                           |  110 ++
 .../org/apache/spark/unsafe/KVIterator.java     |   31 +
 .../java/org/apache/spark/unsafe/Platform.java  |  189 ++++
 .../spark/unsafe/array/ByteArrayMethods.java    |   66 ++
 .../apache/spark/unsafe/array/LongArray.java    |   94 ++
 .../spark/unsafe/bitset/BitSetMethods.java      |  129 +++
 .../spark/unsafe/hash/Murmur3_x86_32.java       |  126 +++
 .../unsafe/memory/HeapMemoryAllocator.java      |   86 ++
 .../spark/unsafe/memory/MemoryAllocator.java    |   33 +
 .../apache/spark/unsafe/memory/MemoryBlock.java |   56 +
 .../spark/unsafe/memory/MemoryLocation.java     |   54 +
 .../unsafe/memory/UnsafeMemoryAllocator.java    |   39 +
 .../apache/spark/unsafe/types/ByteArray.java    |   77 ++
 .../spark/unsafe/types/CalendarInterval.java    |  324 ++++++
 .../apache/spark/unsafe/types/UTF8String.java   | 1023 ++++++++++++++++++
 .../apache/spark/unsafe/PlatformUtilSuite.java  |   61 ++
 .../spark/unsafe/array/LongArraySuite.java      |   42 +
 .../spark/unsafe/hash/Murmur3_x86_32Suite.java  |  120 ++
 .../unsafe/types/CalendarIntervalSuite.java     |  240 ++++
 .../spark/unsafe/types/UTF8StringSuite.java     |  492 +++++++++
 .../types/UTF8StringPropertyCheckSuite.scala    |  248 +++++
 pom.xml                                         |    4 +-
 tags/README.md                                  |    1 -
 tags/pom.xml                                    |   50 -
 .../java/org/apache/spark/tags/DockerTest.java  |   26 -
 .../org/apache/spark/tags/ExtendedHiveTest.java |   27 -
 .../org/apache/spark/tags/ExtendedYarnTest.java |   27 -
 unsafe/pom.xml                                  |  110 --
 .../org/apache/spark/unsafe/KVIterator.java     |   31 -
 .../java/org/apache/spark/unsafe/Platform.java  |  189 ----
 .../spark/unsafe/array/ByteArrayMethods.java    |   66 --
 .../apache/spark/unsafe/array/LongArray.java    |   94 --
 .../spark/unsafe/bitset/BitSetMethods.java      |  129 ---
 .../spark/unsafe/hash/Murmur3_x86_32.java       |  126 ---
 .../unsafe/memory/HeapMemoryAllocator.java      |   86 --
 .../spark/unsafe/memory/MemoryAllocator.java    |   33 -
 .../apache/spark/unsafe/memory/MemoryBlock.java |   56 -
 .../spark/unsafe/memory/MemoryLocation.java     |   54 -
 .../unsafe/memory/UnsafeMemoryAllocator.java    |   39 -
 .../apache/spark/unsafe/types/ByteArray.java    |   77 --
 .../spark/unsafe/types/CalendarInterval.java    |  324 ------
 .../apache/spark/unsafe/types/UTF8String.java   | 1023 ------------------
 .../apache/spark/unsafe/PlatformUtilSuite.java  |   61 --
 .../spark/unsafe/array/LongArraySuite.java      |   42 -
 .../spark/unsafe/hash/Murmur3_x86_32Suite.java  |  120 --
 .../unsafe/types/CalendarIntervalSuite.java     |  240 ----
 .../spark/unsafe/types/UTF8StringSuite.java     |  492 ---------
 .../types/UTF8StringPropertyCheckSuite.scala    |  248 -----
 53 files changed, 3773 insertions(+), 3773 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/tags/README.md
----------------------------------------------------------------------
diff --git a/common/tags/README.md b/common/tags/README.md
new file mode 100644
index 0000000..01e5126
--- /dev/null
+++ b/common/tags/README.md
@@ -0,0 +1 @@
+This module includes annotations in Java that are used to annotate test suites.

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/tags/pom.xml
----------------------------------------------------------------------
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
new file mode 100644
index 0000000..8e702b4
--- /dev/null
+++ b/common/tags/pom.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.11</artifactId>
+    <version>2.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-test-tags_2.11</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Test Tags</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>test-tags</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.binary.version}</artifactId>
+      <scope>compile</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/tags/src/main/java/org/apache/spark/tags/DockerTest.java
----------------------------------------------------------------------
diff --git a/common/tags/src/main/java/org/apache/spark/tags/DockerTest.java b/common/tags/src/main/java/org/apache/spark/tags/DockerTest.java
new file mode 100644
index 0000000..0fecf3b
--- /dev/null
+++ b/common/tags/src/main/java/org/apache/spark/tags/DockerTest.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import java.lang.annotation.*;
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface DockerTest { }

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
----------------------------------------------------------------------
diff --git a/common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java b/common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
new file mode 100644
index 0000000..83279e5
--- /dev/null
+++ b/common/tags/src/main/java/org/apache/spark/tags/ExtendedHiveTest.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import java.lang.annotation.*;
+
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface ExtendedHiveTest { }

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
----------------------------------------------------------------------
diff --git a/common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java b/common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
new file mode 100644
index 0000000..1083001
--- /dev/null
+++ b/common/tags/src/main/java/org/apache/spark/tags/ExtendedYarnTest.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import java.lang.annotation.*;
+
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface ExtendedYarnTest { }

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/pom.xml
----------------------------------------------------------------------
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
new file mode 100644
index 0000000..5250014
--- /dev/null
+++ b/common/unsafe/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.11</artifactId>
+    <version>2.0.0-SNAPSHOT</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-unsafe_2.11</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Unsafe</name>
+  <url>http://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>unsafe</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>chill_${scala.binary.version}</artifactId>
+    </dependency>
+
+    <!-- Core dependencies -->
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+
+    <!-- Provided dependencies -->
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- Test dependencies -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <configuration>
+            <javacArgs combine.children="append">
+              <!-- This option is needed to suppress warnings from sun.misc.Unsafe usage -->
+              <javacArg>-XDignore.symbol.file</javacArg>
+            </javacArgs>
+          </configuration>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <configuration>
+            <compilerArgs>
+              <!-- This option is needed to suppress warnings from sun.misc.Unsafe usage -->
+              <arg>-XDignore.symbol.file</arg>
+            </compilerArgs>
+          </configuration>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
new file mode 100644
index 0000000..5c9d5d9
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/KVIterator.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import java.io.IOException;
+
+public abstract class KVIterator<K, V> {
+
+  public abstract boolean next() throws IOException;
+
+  public abstract K getKey();
+
+  public abstract V getValue();
+
+  public abstract void close();
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
new file mode 100644
index 0000000..18761bf
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe;
+
+import java.lang.reflect.Field;
+
+import sun.misc.Unsafe;
+
+public final class Platform {
+
+  private static final Unsafe _UNSAFE;
+
+  public static final int BYTE_ARRAY_OFFSET;
+
+  public static final int SHORT_ARRAY_OFFSET;
+
+  public static final int INT_ARRAY_OFFSET;
+
+  public static final int LONG_ARRAY_OFFSET;
+
+  public static final int FLOAT_ARRAY_OFFSET;
+
+  public static final int DOUBLE_ARRAY_OFFSET;
+
+  public static int getInt(Object object, long offset) {
+    return _UNSAFE.getInt(object, offset);
+  }
+
+  public static void putInt(Object object, long offset, int value) {
+    _UNSAFE.putInt(object, offset, value);
+  }
+
+  public static boolean getBoolean(Object object, long offset) {
+    return _UNSAFE.getBoolean(object, offset);
+  }
+
+  public static void putBoolean(Object object, long offset, boolean value) {
+    _UNSAFE.putBoolean(object, offset, value);
+  }
+
+  public static byte getByte(Object object, long offset) {
+    return _UNSAFE.getByte(object, offset);
+  }
+
+  public static void putByte(Object object, long offset, byte value) {
+    _UNSAFE.putByte(object, offset, value);
+  }
+
+  public static short getShort(Object object, long offset) {
+    return _UNSAFE.getShort(object, offset);
+  }
+
+  public static void putShort(Object object, long offset, short value) {
+    _UNSAFE.putShort(object, offset, value);
+  }
+
+  public static long getLong(Object object, long offset) {
+    return _UNSAFE.getLong(object, offset);
+  }
+
+  public static void putLong(Object object, long offset, long value) {
+    _UNSAFE.putLong(object, offset, value);
+  }
+
+  public static float getFloat(Object object, long offset) {
+    return _UNSAFE.getFloat(object, offset);
+  }
+
+  public static void putFloat(Object object, long offset, float value) {
+    _UNSAFE.putFloat(object, offset, value);
+  }
+
+  public static double getDouble(Object object, long offset) {
+    return _UNSAFE.getDouble(object, offset);
+  }
+
+  public static void putDouble(Object object, long offset, double value) {
+    _UNSAFE.putDouble(object, offset, value);
+  }
+
+  public static Object getObjectVolatile(Object object, long offset) {
+    return _UNSAFE.getObjectVolatile(object, offset);
+  }
+
+  public static void putObjectVolatile(Object object, long offset, Object value) {
+    _UNSAFE.putObjectVolatile(object, offset, value);
+  }
+
+  public static long allocateMemory(long size) {
+    return _UNSAFE.allocateMemory(size);
+  }
+
+  public static void freeMemory(long address) {
+    _UNSAFE.freeMemory(address);
+  }
+
+  public static long reallocateMemory(long address, long oldSize, long newSize) {
+    long newMemory = _UNSAFE.allocateMemory(newSize);
+    copyMemory(null, address, null, newMemory, oldSize);
+    freeMemory(address);
+    return newMemory;
+  }
+
+  public static void setMemory(long address, byte value, long size) {
+    _UNSAFE.setMemory(address, size, value);
+  }
+
+  public static void copyMemory(
+    Object src, long srcOffset, Object dst, long dstOffset, long length) {
+    // Check if dstOffset is before or after srcOffset to determine if we should copy
+    // forward or backwards. This is necessary in case src and dst overlap.
+    if (dstOffset < srcOffset) {
+      while (length > 0) {
+        long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+        _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+        length -= size;
+        srcOffset += size;
+        dstOffset += size;
+      }
+    } else {
+      srcOffset += length;
+      dstOffset += length;
+      while (length > 0) {
+        long size = Math.min(length, UNSAFE_COPY_THRESHOLD);
+        srcOffset -= size;
+        dstOffset -= size;
+        _UNSAFE.copyMemory(src, srcOffset, dst, dstOffset, size);
+        length -= size;
+      }
+
+    }
+  }
+
+  /**
+   * Raises an exception bypassing compiler checks for checked exceptions.
+   */
+  public static void throwException(Throwable t) {
+    _UNSAFE.throwException(t);
+  }
+
+  /**
+   * Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to
+   * allow safepoint polling during a large copy.
+   */
+  private static final long UNSAFE_COPY_THRESHOLD = 1024L * 1024L;
+
+  static {
+    sun.misc.Unsafe unsafe;
+    try {
+      Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
+      unsafeField.setAccessible(true);
+      unsafe = (sun.misc.Unsafe) unsafeField.get(null);
+    } catch (Throwable cause) {
+      unsafe = null;
+    }
+    _UNSAFE = unsafe;
+
+    if (_UNSAFE != null) {
+      BYTE_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(byte[].class);
+      SHORT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(short[].class);
+      INT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(int[].class);
+      LONG_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(long[].class);
+      FLOAT_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(float[].class);
+      DOUBLE_ARRAY_OFFSET = _UNSAFE.arrayBaseOffset(double[].class);
+    } else {
+      BYTE_ARRAY_OFFSET = 0;
+      SHORT_ARRAY_OFFSET = 0;
+      INT_ARRAY_OFFSET = 0;
+      LONG_ARRAY_OFFSET = 0;
+      FLOAT_ARRAY_OFFSET = 0;
+      DOUBLE_ARRAY_OFFSET = 0;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
new file mode 100644
index 0000000..cf42877
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.array;
+
+import org.apache.spark.unsafe.Platform;
+
+public class ByteArrayMethods {
+
+  private ByteArrayMethods() {
+    // Private constructor, since this class only contains static methods.
+  }
+
+  /** Returns the next number greater or equal num that is power of 2. */
+  public static long nextPowerOf2(long num) {
+    final long highBit = Long.highestOneBit(num);
+    return (highBit == num) ? num : highBit << 1;
+  }
+
+  public static int roundNumberOfBytesToNearestWord(int numBytes) {
+    int remainder = numBytes & 0x07;  // This is equivalent to `numBytes % 8`
+    if (remainder == 0) {
+      return numBytes;
+    } else {
+      return numBytes + (8 - remainder);
+    }
+  }
+
+  /**
+   * Optimized byte array equality check for byte arrays.
+   * @return true if the arrays are equal, false otherwise
+   */
+  public static boolean arrayEquals(
+      Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) {
+    int i = 0;
+    while (i <= length - 8) {
+      if (Platform.getLong(leftBase, leftOffset + i) !=
+        Platform.getLong(rightBase, rightOffset + i)) {
+        return false;
+      }
+      i += 8;
+    }
+    while (i < length) {
+      if (Platform.getByte(leftBase, leftOffset + i) !=
+        Platform.getByte(rightBase, rightOffset + i)) {
+        return false;
+      }
+      i += 1;
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
new file mode 100644
index 0000000..1a3cdff
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.array;
+
+import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.memory.MemoryBlock;
+
+/**
+ * An array of long values. Compared with native JVM arrays, this:
+ * <ul>
+ *   <li>supports using both in-heap and off-heap memory</li>
+ *   <li>has no bound checking, and thus can crash the JVM process when assert is turned off</li>
+ * </ul>
+ */
+public final class LongArray {
+
+  // This is a long so that we perform long multiplications when computing offsets.
+  private static final long WIDTH = 8;
+
+  private final MemoryBlock memory;
+  private final Object baseObj;
+  private final long baseOffset;
+
+  private final long length;
+
+  public LongArray(MemoryBlock memory) {
+    assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size > 4 billion elements";
+    this.memory = memory;
+    this.baseObj = memory.getBaseObject();
+    this.baseOffset = memory.getBaseOffset();
+    this.length = memory.size() / WIDTH;
+  }
+
+  public MemoryBlock memoryBlock() {
+    return memory;
+  }
+
+  public Object getBaseObject() {
+    return baseObj;
+  }
+
+  public long getBaseOffset() {
+    return baseOffset;
+  }
+
+  /**
+   * Returns the number of elements this array can hold.
+   */
+  public long size() {
+    return length;
+  }
+
+  /**
+   * Fill this all with 0L.
+   */
+  public void zeroOut() {
+    for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) {
+      Platform.putLong(baseObj, off, 0);
+    }
+  }
+
+  /**
+   * Sets the value at position {@code index}.
+   */
+  public void set(int index, long value) {
+    assert index >= 0 : "index (" + index + ") should >= 0";
+    assert index < length : "index (" + index + ") should < length (" + length + ")";
+    Platform.putLong(baseObj, baseOffset + index * WIDTH, value);
+  }
+
+  /**
+   * Returns the value at position {@code index}.
+   */
+  public long get(int index) {
+    assert index >= 0 : "index (" + index + ") should >= 0";
+    assert index < length : "index (" + index + ") should < length (" + length + ")";
+    return Platform.getLong(baseObj, baseOffset + index * WIDTH);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
new file mode 100644
index 0000000..7857bf6
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.bitset;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * Methods for working with fixed-size uncompressed bitsets.
+ *
+ * We assume that the bitset data is word-aligned (that is, a multiple of 8 bytes in length).
+ *
+ * Each bit occupies exactly one bit of storage.
+ */
+public final class BitSetMethods {
+
+  private static final long WORD_SIZE = 8;
+
+  private BitSetMethods() {
+    // Make the default constructor private, since this only holds static methods.
+  }
+
+  /**
+   * Sets the bit at the specified index to {@code true}.
+   */
+  public static void set(Object baseObject, long baseOffset, int index) {
+    assert index >= 0 : "index (" + index + ") should >= 0";
+    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
+    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
+    final long word = Platform.getLong(baseObject, wordOffset);
+    Platform.putLong(baseObject, wordOffset, word | mask);
+  }
+
+  /**
+   * Sets the bit at the specified index to {@code false}.
+   */
+  public static void unset(Object baseObject, long baseOffset, int index) {
+    assert index >= 0 : "index (" + index + ") should >= 0";
+    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
+    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
+    final long word = Platform.getLong(baseObject, wordOffset);
+    Platform.putLong(baseObject, wordOffset, word & ~mask);
+  }
+
+  /**
+   * Returns {@code true} if the bit is set at the specified index.
+   */
+  public static boolean isSet(Object baseObject, long baseOffset, int index) {
+    assert index >= 0 : "index (" + index + ") should >= 0";
+    final long mask = 1L << (index & 0x3f);  // mod 64 and shift
+    final long wordOffset = baseOffset + (index >> 6) * WORD_SIZE;
+    final long word = Platform.getLong(baseObject, wordOffset);
+    return (word & mask) != 0;
+  }
+
+  /**
+   * Returns {@code true} if any bit is set.
+   */
+  public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) {
+    long addr = baseOffset;
+    for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) {
+      if (Platform.getLong(baseObject, addr) != 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns the index of the first bit that is set to true that occurs on or after the
+   * specified starting index. If no such bit exists then {@code -1} is returned.
+   * <p>
+   * To iterate over the true bits in a BitSet, use the following loop:
+   * <pre>
+   * <code>
+   *  for (long i = bs.nextSetBit(0, sizeInWords); i &gt;= 0; i = bs.nextSetBit(i + 1, sizeInWords)) {
+   *    // operate on index i here
+   *  }
+   * </code>
+   * </pre>
+   *
+   * @param fromIndex the index to start checking from (inclusive)
+   * @param bitsetSizeInWords the size of the bitset, measured in 8-byte words
+   * @return the index of the next set bit, or -1 if there is no such bit
+   */
+  public static int nextSetBit(
+      Object baseObject,
+      long baseOffset,
+      int fromIndex,
+      int bitsetSizeInWords) {
+    int wi = fromIndex >> 6;
+    if (wi >= bitsetSizeInWords) {
+      return -1;
+    }
+
+    // Try to find the next set bit in the current word
+    final int subIndex = fromIndex & 0x3f;
+    long word = Platform.getLong(baseObject, baseOffset + wi * WORD_SIZE) >> subIndex;
+    if (word != 0) {
+      return (wi << 6) + subIndex + java.lang.Long.numberOfTrailingZeros(word);
+    }
+
+    // Find the next set bit in the rest of the words
+    wi += 1;
+    while (wi < bitsetSizeInWords) {
+      word = Platform.getLong(baseObject, baseOffset + wi * WORD_SIZE);
+      if (word != 0) {
+        return (wi << 6) + java.lang.Long.numberOfTrailingZeros(word);
+      }
+      wi += 1;
+    }
+
+    return -1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
new file mode 100644
index 0000000..5e7ee48
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.hash;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
+ */
+public final class Murmur3_x86_32 {
+  private static final int C1 = 0xcc9e2d51;
+  private static final int C2 = 0x1b873593;
+
+  private final int seed;
+
+  public Murmur3_x86_32(int seed) {
+    this.seed = seed;
+  }
+
+  @Override
+  public String toString() {
+    return "Murmur3_32(seed=" + seed + ")";
+  }
+
+  public int hashInt(int input) {
+    return hashInt(input, seed);
+  }
+
+  public static int hashInt(int input, int seed) {
+    int k1 = mixK1(input);
+    int h1 = mixH1(seed, k1);
+
+    return fmix(h1, 4);
+  }
+
+  public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
+    return hashUnsafeWords(base, offset, lengthInBytes, seed);
+  }
+
+  public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
+    // This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
+    assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
+    int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
+    return fmix(h1, lengthInBytes);
+  }
+
+  public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
+    assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
+    int lengthAligned = lengthInBytes - lengthInBytes % 4;
+    int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
+    for (int i = lengthAligned; i < lengthInBytes; i++) {
+      int halfWord = Platform.getByte(base, offset + i);
+      int k1 = mixK1(halfWord);
+      h1 = mixH1(h1, k1);
+    }
+    return fmix(h1, lengthInBytes);
+  }
+
+  private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
+    assert (lengthInBytes % 4 == 0);
+    int h1 = seed;
+    for (int i = 0; i < lengthInBytes; i += 4) {
+      int halfWord = Platform.getInt(base, offset + i);
+      int k1 = mixK1(halfWord);
+      h1 = mixH1(h1, k1);
+    }
+    return h1;
+  }
+
+  public int hashLong(long input) {
+    return hashLong(input, seed);
+  }
+
+  public static int hashLong(long input, int seed) {
+    int low = (int) input;
+    int high = (int) (input >>> 32);
+
+    int k1 = mixK1(low);
+    int h1 = mixH1(seed, k1);
+
+    k1 = mixK1(high);
+    h1 = mixH1(h1, k1);
+
+    return fmix(h1, 8);
+  }
+
+  private static int mixK1(int k1) {
+    k1 *= C1;
+    k1 = Integer.rotateLeft(k1, 15);
+    k1 *= C2;
+    return k1;
+  }
+
+  private static int mixH1(int h1, int k1) {
+    h1 ^= k1;
+    h1 = Integer.rotateLeft(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+    return h1;
+  }
+
+  // Finalization mix - force all bits of a hash block to avalanche
+  private static int fmix(int h1, int length) {
+    h1 ^= length;
+    h1 ^= h1 >>> 16;
+    h1 *= 0x85ebca6b;
+    h1 ^= h1 >>> 13;
+    h1 *= 0xc2b2ae35;
+    h1 ^= h1 >>> 16;
+    return h1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
new file mode 100644
index 0000000..09847ce
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/HeapMemoryAllocator.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+import javax.annotation.concurrent.GuardedBy;
+import java.lang.ref.WeakReference;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * A simple {@link MemoryAllocator} that can allocate up to 16GB using a JVM long primitive array.
+ */
+public class HeapMemoryAllocator implements MemoryAllocator {
+
+  @GuardedBy("this")
+  private final Map<Long, LinkedList<WeakReference<MemoryBlock>>> bufferPoolsBySize =
+    new HashMap<>();
+
+  private static final int POOLING_THRESHOLD_BYTES = 1024 * 1024;
+
+  /**
+   * Returns true if allocations of the given size should go through the pooling mechanism and
+   * false otherwise.
+   */
+  private boolean shouldPool(long size) {
+    // Very small allocations are less likely to benefit from pooling.
+    return size >= POOLING_THRESHOLD_BYTES;
+  }
+
+  @Override
+  public MemoryBlock allocate(long size) throws OutOfMemoryError {
+    if (shouldPool(size)) {
+      synchronized (this) {
+        final LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        if (pool != null) {
+          while (!pool.isEmpty()) {
+            final WeakReference<MemoryBlock> blockReference = pool.pop();
+            final MemoryBlock memory = blockReference.get();
+            if (memory != null) {
+              assert (memory.size() == size);
+              return memory;
+            }
+          }
+          bufferPoolsBySize.remove(size);
+        }
+      }
+    }
+    long[] array = new long[(int) ((size + 7) / 8)];
+    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, size);
+  }
+
+  @Override
+  public void free(MemoryBlock memory) {
+    final long size = memory.size();
+    if (shouldPool(size)) {
+      synchronized (this) {
+        LinkedList<WeakReference<MemoryBlock>> pool = bufferPoolsBySize.get(size);
+        if (pool == null) {
+          pool = new LinkedList<>();
+          bufferPoolsBySize.put(size, pool);
+        }
+        pool.add(new WeakReference<>(memory));
+      }
+    } else {
+      // Do nothing
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
new file mode 100644
index 0000000..5192f68
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryAllocator.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+public interface MemoryAllocator {
+
+  /**
+   * Allocates a contiguous block of memory. Note that the allocated memory is not guaranteed
+   * to be zeroed out (call `zero()` on the result if this is necessary).
+   */
+  MemoryBlock allocate(long size) throws OutOfMemoryError;
+
+  void free(MemoryBlock memory);
+
+  MemoryAllocator UNSAFE = new UnsafeMemoryAllocator();
+
+  MemoryAllocator HEAP = new HeapMemoryAllocator();
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
new file mode 100644
index 0000000..e3e7947
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryBlock.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+import javax.annotation.Nullable;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * A consecutive block of memory, starting at a {@link MemoryLocation} with a fixed size.
+ */
+public class MemoryBlock extends MemoryLocation {
+
+  private final long length;
+
+  /**
+   * Optional page number; used when this MemoryBlock represents a page allocated by a
+   * TaskMemoryManager. This field is public so that it can be modified by the TaskMemoryManager,
+   * which lives in a different package.
+   */
+  public int pageNumber = -1;
+
+  public MemoryBlock(@Nullable Object obj, long offset, long length) {
+    super(obj, offset);
+    this.length = length;
+  }
+
+  /**
+   * Returns the size of the memory block.
+   */
+  public long size() {
+    return length;
+  }
+
+  /**
+   * Creates a memory block pointing to the memory used by the long array.
+   */
+  public static MemoryBlock fromLongArray(final long[] array) {
+    return new MemoryBlock(array, Platform.LONG_ARRAY_OFFSET, array.length * 8);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
new file mode 100644
index 0000000..74ebc87
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/MemoryLocation.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+import javax.annotation.Nullable;
+
+/**
+ * A memory location. Tracked either by a memory address (with off-heap allocation),
+ * or by an offset from a JVM object (in-heap allocation).
+ */
+public class MemoryLocation {
+
+  @Nullable
+  Object obj;
+
+  long offset;
+
+  public MemoryLocation(@Nullable Object obj, long offset) {
+    this.obj = obj;
+    this.offset = offset;
+  }
+
+  public MemoryLocation() {
+    this(null, 0);
+  }
+
+  public void setObjAndOffset(Object newObj, long newOffset) {
+    this.obj = newObj;
+    this.offset = newOffset;
+  }
+
+  public final Object getBaseObject() {
+    return obj;
+  }
+
+  public final long getBaseOffset() {
+    return offset;
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
new file mode 100644
index 0000000..98ce711
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/memory/UnsafeMemoryAllocator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.memory;
+
+import org.apache.spark.unsafe.Platform;
+
+/**
+ * A simple {@link MemoryAllocator} that uses {@code Unsafe} to allocate off-heap memory.
+ */
+public class UnsafeMemoryAllocator implements MemoryAllocator {
+
+  @Override
+  public MemoryBlock allocate(long size) throws OutOfMemoryError {
+    long address = Platform.allocateMemory(size);
+    return new MemoryBlock(null, address, size);
+  }
+
+  @Override
+  public void free(MemoryBlock memory) {
+    assert (memory.obj == null) :
+      "baseObject not null; are you trying to use the off-heap allocator to free on-heap memory?";
+    Platform.freeMemory(memory.offset);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
new file mode 100644
index 0000000..3ced209
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import org.apache.spark.unsafe.Platform;
+
+import java.util.Arrays;
+
+public final class ByteArray {
+
+  public static final byte[] EMPTY_BYTE = new byte[0];
+
+  /**
+   * Writes the content of a byte array into a memory address, identified by an object and an
+   * offset. The target memory address must already been allocated, and have enough space to
+   * hold all the bytes in this string.
+   */
+  public static void writeToMemory(byte[] src, Object target, long targetOffset) {
+    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET, target, targetOffset, src.length);
+  }
+
+  /**
+   * Returns a 64-bit integer that can be used as the prefix used in sorting.
+   */
+  public static long getPrefix(byte[] bytes) {
+    if (bytes == null) {
+      return 0L;
+    } else {
+      final int minLen = Math.min(bytes.length, 8);
+      long p = 0;
+      for (int i = 0; i < minLen; ++i) {
+        p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
+            << (56 - 8 * i);
+      }
+      return p;
+    }
+  }
+
+  public static byte[] subStringSQL(byte[] bytes, int pos, int len) {
+    // This pos calculation is according to UTF8String#subStringSQL
+    if (pos > bytes.length) {
+      return EMPTY_BYTE;
+    }
+    int start = 0;
+    int end;
+    if (pos > 0) {
+      start = pos - 1;
+    } else if (pos < 0) {
+      start = bytes.length + pos;
+    }
+    if ((bytes.length - start) < len) {
+      end = bytes.length;
+    } else {
+      end = start + len;
+    }
+    start = Math.max(start, 0); // underflow
+    if (start >= end) {
+      return EMPTY_BYTE;
+    }
+    return Arrays.copyOfRange(bytes, start, end);
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b0ee7d43/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
----------------------------------------------------------------------
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
new file mode 100644
index 0000000..62edf6c
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import java.io.Serializable;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * The internal representation of interval type.
+ */
+public final class CalendarInterval implements Serializable {
+  public static final long MICROS_PER_MILLI = 1000L;
+  public static final long MICROS_PER_SECOND = MICROS_PER_MILLI * 1000;
+  public static final long MICROS_PER_MINUTE = MICROS_PER_SECOND * 60;
+  public static final long MICROS_PER_HOUR = MICROS_PER_MINUTE * 60;
+  public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
+  public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
+
+  /**
+   * A function to generate regex which matches interval string's unit part like "3 years".
+   *
+   * First, we can leave out some units in interval string, and we only care about the value of
+   * unit, so here we use non-capturing group to wrap the actual regex.
+   * At the beginning of the actual regex, we should match spaces before the unit part.
+   * Next is the number part, starts with an optional "-" to represent negative value. We use
+   * capturing group to wrap this part as we need the value later.
+   * Finally is the unit name, ends with an optional "s".
+   */
+  private static String unitRegex(String unit) {
+    return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
+  }
+
+  private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
+    unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
+    unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
+
+  private static Pattern yearMonthPattern =
+    Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
+
+  private static Pattern dayTimePattern =
+    Pattern.compile("^(?:['|\"])?([+|-])?(\\d+) (\\d+):(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
+
+  private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
+
+  private static long toLong(String s) {
+    if (s == null) {
+      return 0;
+    } else {
+      return Long.valueOf(s);
+    }
+  }
+
+  public static CalendarInterval fromString(String s) {
+    if (s == null) {
+      return null;
+    }
+    s = s.trim();
+    Matcher m = p.matcher(s);
+    if (!m.matches() || s.equals("interval")) {
+      return null;
+    } else {
+      long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
+      long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
+      microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
+      microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
+      microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
+      microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
+      microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
+      microseconds += toLong(m.group(9));
+      return new CalendarInterval((int) months, microseconds);
+    }
+  }
+
+  public static long toLongWithRange(String fieldName,
+      String s, long minValue, long maxValue) throws IllegalArgumentException {
+    long result = 0;
+    if (s != null) {
+      result = Long.valueOf(s);
+      if (result < minValue || result > maxValue) {
+        throw new IllegalArgumentException(String.format("%s %d outside range [%d, %d]",
+          fieldName, result, minValue, maxValue));
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Parse YearMonth string in form: [-]YYYY-MM
+   *
+   * adapted from HiveIntervalYearMonth.valueOf
+   */
+  public static CalendarInterval fromYearMonthString(String s) throws IllegalArgumentException {
+    CalendarInterval result = null;
+    if (s == null) {
+      throw new IllegalArgumentException("Interval year-month string was null");
+    }
+    s = s.trim();
+    Matcher m = yearMonthPattern.matcher(s);
+    if (!m.matches()) {
+      throw new IllegalArgumentException(
+        "Interval string does not match year-month format of 'y-m': " + s);
+    } else {
+      try {
+        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
+        int years = (int) toLongWithRange("year", m.group(2), 0, Integer.MAX_VALUE);
+        int months = (int) toLongWithRange("month", m.group(3), 0, 11);
+        result = new CalendarInterval(sign * (years * 12 + months), 0);
+      } catch (Exception e) {
+        throw new IllegalArgumentException(
+          "Error parsing interval year-month string: " + e.getMessage(), e);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn
+   *
+   * adapted from HiveIntervalDayTime.valueOf
+   */
+  public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumentException {
+    CalendarInterval result = null;
+    if (s == null) {
+      throw new IllegalArgumentException("Interval day-time string was null");
+    }
+    s = s.trim();
+    Matcher m = dayTimePattern.matcher(s);
+    if (!m.matches()) {
+      throw new IllegalArgumentException(
+        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
+    } else {
+      try {
+        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
+        long days = toLongWithRange("day", m.group(2), 0, Integer.MAX_VALUE);
+        long hours = toLongWithRange("hour", m.group(3), 0, 23);
+        long minutes = toLongWithRange("minute", m.group(4), 0, 59);
+        long seconds = toLongWithRange("second", m.group(5), 0, 59);
+        // Hive allow nanosecond precision interval
+        long nanos = toLongWithRange("nanosecond", m.group(7), 0L, 999999999L);
+        result = new CalendarInterval(0, sign * (
+          days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
+          seconds * MICROS_PER_SECOND + nanos / 1000L));
+      } catch (Exception e) {
+        throw new IllegalArgumentException(
+          "Error parsing interval day-time string: " + e.getMessage(), e);
+      }
+    }
+    return result;
+  }
+
+  public static CalendarInterval fromSingleUnitString(String unit, String s)
+      throws IllegalArgumentException {
+
+    CalendarInterval result = null;
+    if (s == null) {
+      throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
+    }
+    s = s.trim();
+    Matcher m = quoteTrimPattern.matcher(s);
+    if (!m.matches()) {
+      throw new IllegalArgumentException(
+        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
+    } else {
+      try {
+        if (unit.equals("year")) {
+          int year = (int) toLongWithRange("year", m.group(1),
+            Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
+          result = new CalendarInterval(year * 12, 0L);
+
+        } else if (unit.equals("month")) {
+          int month = (int) toLongWithRange("month", m.group(1),
+            Integer.MIN_VALUE, Integer.MAX_VALUE);
+          result = new CalendarInterval(month, 0L);
+
+        } else if (unit.equals("week")) {
+          long week = toLongWithRange("week", m.group(1),
+                  Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
+          result = new CalendarInterval(0, week * MICROS_PER_WEEK);
+
+        } else if (unit.equals("day")) {
+          long day = toLongWithRange("day", m.group(1),
+            Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
+          result = new CalendarInterval(0, day * MICROS_PER_DAY);
+
+        } else if (unit.equals("hour")) {
+          long hour = toLongWithRange("hour", m.group(1),
+            Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
+          result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
+
+        } else if (unit.equals("minute")) {
+          long minute = toLongWithRange("minute", m.group(1),
+            Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
+          result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
+
+        } else if (unit.equals("second")) {
+          long micros = parseSecondNano(m.group(1));
+          result = new CalendarInterval(0, micros);
+
+        } else if (unit.equals("millisecond")) {
+          long millisecond = toLongWithRange("millisecond", m.group(1),
+                  Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
+          result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
+
+        } else if (unit.equals("microsecond")) {
+          long micros = Long.valueOf(m.group(1));
+          result = new CalendarInterval(0, micros);
+        }
+      } catch (Exception e) {
+        throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Parse second_nano string in ss.nnnnnnnnn format to microseconds
+   */
+  public static long parseSecondNano(String secondNano) throws IllegalArgumentException {
+    String[] parts = secondNano.split("\\.");
+    if (parts.length == 1) {
+      return toLongWithRange("second", parts[0], Long.MIN_VALUE / MICROS_PER_SECOND,
+        Long.MAX_VALUE / MICROS_PER_SECOND) * MICROS_PER_SECOND;
+
+    } else if (parts.length == 2) {
+      long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
+        Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
+      long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
+      return seconds * MICROS_PER_SECOND + nanos / 1000L;
+
+    } else {
+      throw new IllegalArgumentException(
+        "Interval string does not match second-nano format of ss.nnnnnnnnn");
+    }
+  }
+
+  public final int months;
+  public final long microseconds;
+
+  public CalendarInterval(int months, long microseconds) {
+    this.months = months;
+    this.microseconds = microseconds;
+  }
+
+  public CalendarInterval add(CalendarInterval that) {
+    int months = this.months + that.months;
+    long microseconds = this.microseconds + that.microseconds;
+    return new CalendarInterval(months, microseconds);
+  }
+
+  public CalendarInterval subtract(CalendarInterval that) {
+    int months = this.months - that.months;
+    long microseconds = this.microseconds - that.microseconds;
+    return new CalendarInterval(months, microseconds);
+  }
+
+  public CalendarInterval negate() {
+    return new CalendarInterval(-this.months, -this.microseconds);
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) return true;
+    if (other == null || !(other instanceof CalendarInterval)) return false;
+
+    CalendarInterval o = (CalendarInterval) other;
+    return this.months == o.months && this.microseconds == o.microseconds;
+  }
+
+  @Override
+  public int hashCode() {
+    return 31 * months + (int) microseconds;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("interval");
+
+    if (months != 0) {
+      appendUnit(sb, months / 12, "year");
+      appendUnit(sb, months % 12, "month");
+    }
+
+    if (microseconds != 0) {
+      long rest = microseconds;
+      appendUnit(sb, rest / MICROS_PER_WEEK, "week");
+      rest %= MICROS_PER_WEEK;
+      appendUnit(sb, rest / MICROS_PER_DAY, "day");
+      rest %= MICROS_PER_DAY;
+      appendUnit(sb, rest / MICROS_PER_HOUR, "hour");
+      rest %= MICROS_PER_HOUR;
+      appendUnit(sb, rest / MICROS_PER_MINUTE, "minute");
+      rest %= MICROS_PER_MINUTE;
+      appendUnit(sb, rest / MICROS_PER_SECOND, "second");
+      rest %= MICROS_PER_SECOND;
+      appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");
+      rest %= MICROS_PER_MILLI;
+      appendUnit(sb, rest, "microsecond");
+    }
+
+    return sb.toString();
+  }
+
+  private void appendUnit(StringBuilder sb, long value, String unit) {
+    if (value != 0) {
+      sb.append(" " + value + " " + unit + "s");
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org