You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pa...@apache.org on 2017/05/13 17:39:04 UTC
[6/7] drill git commit: DRILL-5450: Fix initcap function to convert
upper case characters correctly
DRILL-5450: Fix initcap function to convert upper case characters correctly
This closes #821
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/cb9547a6
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/cb9547a6
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/cb9547a6
Branch: refs/heads/master
Commit: cb9547a67ada2f848b145b4611c463cd25154295
Parents: 27c5f45
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Thu Apr 27 11:44:18 2017 +0000
Committer: Parth Chandra <pc...@maprtech.com>
Committed: Fri May 12 17:07:51 2017 -0700
----------------------------------------------------------------------
.../expr/fn/impl/StringFunctionHelpers.java | 51 ++++-------
.../exec/expr/fn/impl/StringFunctions.java | 16 +---
.../exec/expr/fn/impl/TestStringFunctions.java | 93 +++++++++++++++++++-
3 files changed, 112 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
index 88f3417..207d96c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -15,7 +15,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- ******************************************************************************/
+*/
package org.apache.drill.exec.expr.fn.impl;
import io.netty.buffer.DrillBuf;
@@ -144,41 +144,28 @@ public class StringFunctionHelpers {
return result;
}
- // Assumes Alpha as [A-Za-z0-9]
- // white space is treated as everything else.
+ /**
+ * Capitalizes first letter in each word.
+ * Any symbol except digits and letters is considered as word delimiter.
+ *
+ * @param start start position in input buffer
+ * @param end end position in input buffer
+ * @param inBuf buffer with input characters
+ * @param outBuf buffer with output characters
+ */
public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf outBuf) {
- boolean capNext = true;
+ boolean capitalizeNext = true;
int out = 0;
for (int id = start; id < end; id++, out++) {
- byte currentByte = inBuf.getByte(id);
-
- // 'A - Z' : 0x41 - 0x5A
- // 'a - z' : 0x61 - 0x7A
- // '0-9' : 0x30 - 0x39
- if (capNext) { // curCh is whitespace or first character of word.
- if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
- capNext = false;
- } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
- capNext = false;
- } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
- capNext = false;
- currentByte -= 0x20; // Uppercase this character
- }
- // else {} whitespace
- } else { // Inside of a word or white space after end of word.
- if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
- // noop
- } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
- currentByte -= 0x20; // Lowercase this character
- } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
- // noop
- } else { // whitespace
- capNext = true;
- }
+ int currentByte = inBuf.getByte(id);
+ if (Character.isLetterOrDigit(currentByte)) {
+ currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : Character.toLowerCase(currentByte);
+ capitalizeNext = false;
+ } else {
+ capitalizeNext = true;
}
-
outBuf.setByte(out, currentByte);
- } // end of for_loop
+ }
}
/**
http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index a6fa255..e5fe957 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -505,13 +505,7 @@ public class StringFunctions{
for (int id = input.start; id < input.end; id++) {
byte currentByte = input.buffer.getByte(id);
-
- // 'A - Z' : 0x41 - 0x5A
- // 'a - z' : 0x61 - 0x7A
- if (currentByte >= 0x41 && currentByte <= 0x5A) {
- currentByte += 0x20;
- }
- out.buffer.setByte(id - input.start, currentByte) ;
+ out.buffer.setByte(id - input.start, Character.toLowerCase(currentByte)) ;
}
}
}
@@ -541,13 +535,7 @@ public class StringFunctions{
for (int id = input.start; id < input.end; id++) {
byte currentByte = input.buffer.getByte(id);
-
- // 'A - Z' : 0x41 - 0x5A
- // 'a - z' : 0x61 - 0x7A
- if (currentByte >= 0x61 && currentByte <= 0x7A) {
- currentByte -= 0x20;
- }
- out.buffer.setByte(id - input.start, currentByte) ;
+ out.buffer.setByte(id - input.start, Character.toUpperCase(currentByte)) ;
}
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/cb9547a6/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index fe099d7..4723d20 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -19,17 +19,26 @@ package org.apache.drill.exec.expr.fn.impl;
import static org.junit.Assert.assertTrue;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.integration.junit4.JMockit;
+import org.apache.calcite.util.ConversionUtil;
+import org.apache.calcite.util.Util;
import org.apache.commons.io.FileUtils;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.util.Text;
+import org.junit.Ignore;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
+import org.junit.runner.RunWith;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
+import java.nio.charset.Charset;
+@RunWith(JMockit.class)
public class TestStringFunctions extends BaseTestQuery {
@Test
@@ -38,14 +47,14 @@ public class TestStringFunctions extends BaseTestQuery {
.sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
.ordered()
.baselineColumns("res1")
- .baselineValues(1l)
+ .baselineValues(1L)
.go();
testBuilder()
.sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
.ordered()
.baselineColumns("res1")
- .baselineValues(1l)
+ .baselineValues(1L)
.go();
}
@@ -308,4 +317,84 @@ public class TestStringFunctions extends BaseTestQuery {
FileUtils.deleteQuietly(path);
}
}
+
+ @Test
+ public void testLower() throws Exception {
+ testBuilder()
+ .sqlQuery("select\n" +
+ "lower('ABC') col_upper,\n" +
+ "lower('abc') col_lower,\n" +
+ "lower('AbC aBc') col_space,\n" +
+ "lower('123ABC$!abc123.') as col_special,\n" +
+ "lower('') as col_empty,\n" +
+ "lower(cast(null as varchar(10))) as col_null\n" +
+ "from (values(1))")
+ .unOrdered()
+ .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+ .baselineValues("abc", "abc", "abc abc", "123abc$!abc123.", "", null)
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testUpper() throws Exception {
+ testBuilder()
+ .sqlQuery("select\n" +
+ "upper('ABC')as col_upper,\n" +
+ "upper('abc') as col_lower,\n" +
+ "upper('AbC aBc') as col_space,\n" +
+ "upper('123ABC$!abc123.') as col_special,\n" +
+ "upper('') as col_empty,\n" +
+ "upper(cast(null as varchar(10))) as col_null\n" +
+ "from (values(1))")
+ .unOrdered()
+ .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+ .baselineValues("ABC", "ABC", "ABC ABC", "123ABC$!ABC123.", "", null)
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testInitcap() throws Exception {
+ testBuilder()
+ .sqlQuery("select\n" +
+ "initcap('ABC')as col_upper,\n" +
+ "initcap('abc') as col_lower,\n" +
+ "initcap('AbC aBc') as col_space,\n" +
+ "initcap('123ABC$!abc123.') as col_special,\n" +
+ "initcap('') as col_empty,\n" +
+ "initcap(cast(null as varchar(10))) as col_null\n" +
+ "from (values(1))")
+ .unOrdered()
+ .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+ .baselineValues("Abc", "Abc", "Abc Abc", "123abc$!Abc123.", "", null)
+ .build()
+ .run();
+ }
+
+ @Ignore("DRILL-5477")
+ @Test
+ public void testMultiByteEncoding() throws Exception {
+ // mock calcite util method to return utf charset
+ // instead of setting saffron.default.charset at system level
+ new MockUp<Util>()
+ {
+ @Mock
+ Charset getDefaultCharset() {
+ return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
+ }
+ };
+
+ testBuilder()
+ .sqlQuery("select\n" +
+ "upper('привет')as col_upper,\n" +
+ "lower('ПРИВЕТ') as col_lower,\n" +
+ "initcap('приВЕТ') as col_initcap\n" +
+ "from (values(1))")
+ .unOrdered()
+ .baselineColumns("col_upper", "col_lower", "col_initcap")
+ .baselineValues("ПРИВЕТ", "привет", "Привет")
+ .build()
+ .run();
+ }
}