You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2015/12/30 17:33:57 UTC

hive git commit: HIVE-12372 : Improve to support the multibyte character at lpad and rpad (Shinichi Yamashita via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 7b7d288d6 -> 1a460b503


HIVE-12372 : Improve to support the multibyte character at lpad and rpad (Shinichi Yamashita via Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a460b50
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a460b50
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a460b50

Branch: refs/heads/master
Commit: 1a460b503788a5329dc374aaafc1eb4eb79f6f46
Parents: 7b7d288
Author: Shinichi Yamashita <ya...@oss.nttdata.co.jp>
Authored: Tue Dec 29 16:23:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Wed Dec 30 08:33:35 2015 -0800

----------------------------------------------------------------------
 .../hive/ql/udf/generic/GenericUDFBasePad.java  | 19 +++++++------------
 .../hive/ql/udf/generic/GenericUDFLpad.java     | 20 ++++++++------------
 .../hive/ql/udf/generic/GenericUDFRpad.java     | 16 +++++++---------
 .../hive/ql/udf/generic/TestGenericUDFLpad.java |  2 ++
 .../hive/ql/udf/generic/TestGenericUDFRpad.java | 10 ++++++----
 5 files changed, 30 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1a460b50/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
index 33839f7..f0b8e3b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
@@ -36,9 +36,11 @@ public abstract class GenericUDFBasePad extends GenericUDF {
   private transient Converter converter3;
   private Text result = new Text();
   private String udfName;
+  private StringBuilder builder;
 
   public GenericUDFBasePad(String _udfName) {
     this.udfName = _udfName;
+    this.builder = new StringBuilder();
   }
 
   @Override
@@ -68,17 +70,10 @@ public abstract class GenericUDFBasePad extends GenericUDF {
       return null;
     }
     int len = lenW.get();
+    builder.setLength(0);
 
-    byte[] data = result.getBytes();
-    if (data.length < len) {
-      data = new byte[len];
-    }
-
-    byte[] txt = str.getBytes();
-    byte[] padTxt = pad.getBytes();
-
-    performOp(data, txt, padTxt, len, str, pad);
-    result.set(data, 0, len);
+    performOp(builder, len, str.toString(), pad.toString());
+    result.set(builder.toString());
     return result;
   }
 
@@ -87,8 +82,8 @@ public abstract class GenericUDFBasePad extends GenericUDF {
     return getStandardDisplayString(udfName, children);
   }
 
-  protected abstract void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str,
-      Text pad);
+  protected abstract void performOp(
+      StringBuilder builder, int len, String str, String pad);
 
   // Convert input arguments to Text, if necessary.
   private Converter checkTextArguments(ObjectInspector[] arguments, int i)

http://git-wip-us.apache.org/repos/asf/hive/blob/1a460b50/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java
index f15a869..32b2ea2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLpad.java
@@ -39,21 +39,17 @@ public class GenericUDFLpad extends GenericUDFBasePad {
   }
 
   @Override
-  protected void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str, Text pad) {
-    // The length of the padding needed
-    int pos = Math.max(len - str.getLength(), 0);
+  protected void performOp(
+      StringBuilder builder, int len, String str, String pad) {
+    int pos = Math.max(len - str.length(), 0);
 
-    // Copy the padding
-    for (int i = 0; i < pos; i += pad.getLength()) {
-      for (int j = 0; j < pad.getLength() && j < pos - i; j++) {
-        data[i + j] = padTxt[j];
-      }
+    for (int i = 0; i < pos; i += pad.length()) {
+      builder.append(pad);
     }
+    builder.setLength(pos);
 
     // Copy the text
-    for (int i = 0; pos + i < len && i < str.getLength(); i++) {
-      data[pos + i] = txt[i];
-    }
+    builder.append(str, 0, str.length());
+    builder.setLength(len);
   }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1a460b50/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java
index b512332..a063b37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFRpad.java
@@ -38,19 +38,17 @@ public class GenericUDFRpad extends GenericUDFBasePad {
   }
 
   @Override
-  protected void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str, Text pad) {
-    int pos;
+  protected void performOp(
+      StringBuilder builder, int len, String str, String pad) {
+    int pos = str.length();
     // Copy the text
-    for (pos = 0; pos < str.getLength() && pos < len; pos++) {
-      data[pos] = txt[pos];
-    }
+    builder.append(str, 0, pos);
 
     // Copy the padding
     while (pos < len) {
-      for (int i = 0; i < pad.getLength() && i < len - pos; i++) {
-        data[pos + i] = padTxt[i];
-      }
-      pos += pad.getLength();
+      builder.append(pad);
+      pos += pad.length();
     }
+    builder.setLength(len);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/1a460b50/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLpad.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLpad.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLpad.java
index 6344417..f2d1fa9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLpad.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFLpad.java
@@ -41,6 +41,8 @@ public class TestGenericUDFLpad extends TestCase {
     udf.initialize(arguments);
     runAndVerify("hi", 5, "??", "???hi", udf);
     runAndVerify("hi", 1, "??", "h", udf);
+    runAndVerify("hi", 5, "??", "???hi", udf);
+    runAndVerify("hi", 1, "??", "h", udf);
   }
 
   private void runAndVerify(String str, int len, String pad, String expResult, GenericUDF udf)

http://git-wip-us.apache.org/repos/asf/hive/blob/1a460b50/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRpad.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRpad.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRpad.java
index 71ffcb9..62908fd 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRpad.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFRpad.java
@@ -31,16 +31,18 @@ import junit.framework.TestCase;
 
 public class TestGenericUDFRpad extends TestCase {
 
-  public void testLpad() throws HiveException {
-    GenericUDFLpad udf = new GenericUDFLpad();
+  public void testRpad() throws HiveException {
+    GenericUDFRpad udf = new GenericUDFRpad();
     ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
     ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableIntObjectInspector;
     ObjectInspector valueOI3 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
     ObjectInspector[] arguments = { valueOI1, valueOI2, valueOI3 };
 
     udf.initialize(arguments);
-    runAndVerify("hi", 5, "??", "???hi", udf);
+    runAndVerify("hi", 5, "??", "hi???", udf);
     runAndVerify("hi", 1, "??", "h", udf);
+    runAndVerify("hi", 5, "??", "hi???", udf);
+    runAndVerify("hi", 1, "??", "h", udf);
   }
 
   private void runAndVerify(String str, int len, String pad, String expResult, GenericUDF udf)
@@ -50,6 +52,6 @@ public class TestGenericUDFRpad extends TestCase {
     DeferredObject valueObj3 = new DeferredJavaObject(new Text(pad));
     DeferredObject[] args = { valueObj1, valueObj2, valueObj3 };
     Text output = (Text) udf.evaluate(args);
-    assertEquals("lpad() test ", expResult, output.toString());
+    assertEquals("rpad() test ", expResult, output.toString());
   }
 }