You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2017/05/13 17:39:04 UTC

[6/7] drill git commit: DRILL-5450: Fix initcap function to convert upper case characters correctly

DRILL-5450: Fix initcap function to convert upper case characters correctly

This closes #821


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/cb9547a6
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/cb9547a6
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/cb9547a6

Branch: refs/heads/master
Commit: cb9547a67ada2f848b145b4611c463cd25154295
Parents: 27c5f45
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Thu Apr 27 11:44:18 2017 +0000
Committer: Parth Chandra <pc...@maprtech.com>
Committed: Fri May 12 17:07:51 2017 -0700

----------------------------------------------------------------------
 .../expr/fn/impl/StringFunctionHelpers.java     | 51 ++++-------
 .../exec/expr/fn/impl/StringFunctions.java      | 16 +---
 .../exec/expr/fn/impl/TestStringFunctions.java  | 93 +++++++++++++++++++-
 3 files changed, 112 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
index 88f3417..207d96c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
 
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -15,7 +15,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- ******************************************************************************/
+*/
 package org.apache.drill.exec.expr.fn.impl;
 
 import io.netty.buffer.DrillBuf;
@@ -144,41 +144,28 @@ public class StringFunctionHelpers {
     return result;
   }
 
-  // Assumes Alpha as [A-Za-z0-9]
-  // white space is treated as everything else.
+  /**
+   * Capitalizes first letter in each word.
+   * Any symbol except digits and letters is considered as word delimiter.
+   *
+   * @param start start position in input buffer
+   * @param end end position in input buffer
+   * @param inBuf buffer with input characters
+   * @param outBuf buffer with output characters
+   */
   public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf outBuf) {
-    boolean capNext = true;
+    boolean capitalizeNext = true;
     int out = 0;
     for (int id = start; id < end; id++, out++) {
-      byte currentByte = inBuf.getByte(id);
-
-      // 'A - Z' : 0x41 - 0x5A
-      // 'a - z' : 0x61 - 0x7A
-      // '0-9' : 0x30 - 0x39
-      if (capNext) { // curCh is whitespace or first character of word.
-        if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
-          capNext = false;
-        } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
-          capNext = false;
-        } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
-          capNext = false;
-          currentByte -= 0x20; // Uppercase this character
-        }
-        // else {} whitespace
-      } else { // Inside of a word or white space after end of word.
-        if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
-          // noop
-        } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
-          currentByte -= 0x20; // Lowercase this character
-        } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
-          // noop
-        } else { // whitespace
-          capNext = true;
-        }
+      int currentByte = inBuf.getByte(id);
+      if (Character.isLetterOrDigit(currentByte)) {
+        currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : Character.toLowerCase(currentByte);
+        capitalizeNext = false;
+      } else {
+        capitalizeNext = true;
       }
-
       outBuf.setByte(out, currentByte);
-    } // end of for_loop
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index a6fa255..e5fe957 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -505,13 +505,7 @@ public class StringFunctions{
 
       for (int id = input.start; id < input.end; id++) {
         byte  currentByte = input.buffer.getByte(id);
-
-        // 'A - Z' : 0x41 - 0x5A
-        // 'a - z' : 0x61 - 0x7A
-        if (currentByte >= 0x41 && currentByte <= 0x5A) {
-          currentByte += 0x20;
-        }
-        out.buffer.setByte(id - input.start, currentByte) ;
+        out.buffer.setByte(id - input.start, Character.toLowerCase(currentByte)) ;
       }
     }
   }
@@ -541,13 +535,7 @@ public class StringFunctions{
 
       for (int id = input.start; id < input.end; id++) {
         byte currentByte = input.buffer.getByte(id);
-
-        // 'A - Z' : 0x41 - 0x5A
-        // 'a - z' : 0x61 - 0x7A
-        if (currentByte >= 0x61 && currentByte <= 0x7A) {
-          currentByte -= 0x20;
-        }
-        out.buffer.setByte(id - input.start, currentByte) ;
+        out.buffer.setByte(id - input.start, Character.toUpperCase(currentByte)) ;
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index fe099d7..4723d20 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -19,17 +19,26 @@ package org.apache.drill.exec.expr.fn.impl;
 
 import static org.junit.Assert.assertTrue;
 
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.integration.junit4.JMockit;
+import org.apache.calcite.util.ConversionUtil;
+import org.apache.calcite.util.Util;
 import org.apache.commons.io.FileUtils;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.exec.util.Text;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.ImmutableList;
+import org.junit.runner.RunWith;
 
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
+import java.nio.charset.Charset;
 
+@RunWith(JMockit.class)
 public class TestStringFunctions extends BaseTestQuery {
 
   @Test
@@ -38,14 +47,14 @@ public class TestStringFunctions extends BaseTestQuery {
         .sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
         .ordered()
         .baselineColumns("res1")
-        .baselineValues(1l)
+        .baselineValues(1L)
         .go();
 
     testBuilder()
         .sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
         .ordered()
         .baselineColumns("res1")
-        .baselineValues(1l)
+        .baselineValues(1L)
         .go();
   }
 
@@ -308,4 +317,84 @@ public class TestStringFunctions extends BaseTestQuery {
       FileUtils.deleteQuietly(path);
     }
   }
+
+  @Test
+  public void testLower() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "lower('ABC') col_upper,\n" +
+            "lower('abc') col_lower,\n" +
+            "lower('AbC aBc') col_space,\n" +
+            "lower('123ABC$!abc123.') as col_special,\n" +
+            "lower('') as col_empty,\n" +
+            "lower(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("abc", "abc", "abc abc", "123abc$!abc123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Test
+  public void testUpper() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "upper('ABC')as col_upper,\n" +
+            "upper('abc') as col_lower,\n" +
+            "upper('AbC aBc') as col_space,\n" +
+            "upper('123ABC$!abc123.') as col_special,\n" +
+            "upper('') as col_empty,\n" +
+            "upper(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("ABC", "ABC", "ABC ABC", "123ABC$!ABC123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Test
+  public void testInitcap() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "initcap('ABC')as col_upper,\n" +
+            "initcap('abc') as col_lower,\n" +
+            "initcap('AbC aBc') as col_space,\n" +
+            "initcap('123ABC$!abc123.') as col_special,\n" +
+            "initcap('') as col_empty,\n" +
+            "initcap(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("Abc", "Abc", "Abc Abc", "123abc$!Abc123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Ignore("DRILL-5477")
+  @Test
+  public void testMultiByteEncoding() throws Exception {
+    // mock calcite util method to return utf charset
+    // instead of setting saffron.default.charset at system level
+    new MockUp<Util>()
+    {
+      @Mock
+      Charset getDefaultCharset() {
+        return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
+      }
+    };
+
+    testBuilder()
+        .sqlQuery("select\n" +
+            "upper('привет')as col_upper,\n" +
+            "lower('ПРИВЕТ') as col_lower,\n" +
+            "initcap('приВЕТ') as col_initcap\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_initcap")
+        .baselineValues("ПРИВЕТ", "привет", "Привет")
+        .build()
+        .run();
+  }
 }