You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by ja...@apache.org on 2017/11/15 18:48:25 UTC
[30/37] phoenix git commit: PHOENIX-4237 Allow sorting on (Java)
collation keys for non-English locales (Shehzaad Nakhoda)
PHOENIX-4237 Allow sorting on (Java) collation keys for non-English locales (Shehzaad Nakhoda)
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/81019c64
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/81019c64
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/81019c64
Branch: refs/heads/4.x-HBase-1.1
Commit: 81019c644a1085f81ab1f84af46e411660320171
Parents: 5820ff4
Author: James Taylor <jt...@salesforce.com>
Authored: Fri Nov 3 09:17:29 2017 -0700
Committer: James Taylor <jt...@salesforce.com>
Committed: Wed Nov 15 10:46:41 2017 -0800
----------------------------------------------------------------------
LICENSE | 43 ++--
phoenix-core/pom.xml | 5 +
.../phoenix/end2end/CollationKeyFunctionIT.java | 181 ++++++++++++++
.../phoenix/expression/ExpressionType.java | 4 +-
.../function/CollationKeyFunction.java | 199 +++++++++++++++
.../apache/phoenix/jdbc/PhoenixConnection.java | 3 +
.../apache/phoenix/util/VarBinaryFormatter.java | 52 ++++
.../function/CollationKeyFunctionTest.java | 243 +++++++++++++++++++
phoenix-server/pom.xml | 1 +
9 files changed, 713 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/LICENSE
----------------------------------------------------------------------
diff --git a/LICENSE b/LICENSE
index 08e5e10..7bd8ad1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -236,23 +236,32 @@ Font Awesome fonts (http://fontawesome.io/)
3-Clause BSD License:
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/pom.xml
----------------------------------------------------------------------
diff --git a/phoenix-core/pom.xml b/phoenix-core/pom.xml
index 6f3adb4..f82cddc 100644
--- a/phoenix-core/pom.xml
+++ b/phoenix-core/pom.xml
@@ -471,5 +471,10 @@
<artifactId>stream</artifactId>
<version>${stream.version}</version>
</dependency>
+ <dependency>
+ <groupId>com.salesforce.i18n</groupId>
+ <artifactId>i18n-util</artifactId>
+ <version>1.0.1</version>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/it/java/org/apache/phoenix/end2end/CollationKeyFunctionIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/CollationKeyFunctionIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CollationKeyFunctionIT.java
new file mode 100644
index 0000000..efbab64
--- /dev/null
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/CollationKeyFunctionIT.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.end2end;
+
+import static org.apache.phoenix.util.TestUtil.closeStmtAndConn;
+import static org.junit.Assert.assertEquals;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.text.Collator;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * End2End test that tests the COLLATION_KEY in an ORDER BY clause
+ *
+ */
+public class CollationKeyFunctionIT extends ParallelStatsDisabledIT {
+
+ private String tableName;
+ private String[] dataArray = new String[] {
+ // (0-6) chinese characters
+ "\u963f", "\u55c4", "\u963e", "\u554a", "\u4ec8", "\u3d9a", "\u9f51",
+ // (7-13) western characters, some with accent
+ "a", "b", "ä", "A", "a", "ä", "A" };
+
+ @Before
+ public void initAndPopulateTable() throws Exception {
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ tableName = generateUniqueName();
+ try {
+ conn = DriverManager.getConnection(getUrl());
+ String ddl = "CREATE TABLE " + tableName + " (id INTEGER PRIMARY KEY, data VARCHAR)";
+ conn.createStatement().execute(ddl);
+
+ // insert dataArray into the table, with the index into the array as
+ // the id
+ for (int i = 0; i < dataArray.length; i++) {
+ PreparedStatement ps = conn.prepareStatement("upsert into " + tableName + " values(?, ?)");
+ ps.setInt(1, i);
+ ps.setString(2, dataArray[i]);
+ ps.executeUpdate();
+ }
+ conn.commit();
+ } finally {
+ closeStmtAndConn(stmt, conn);
+ }
+ }
+
+ @Test
+ public void testZhSort() throws Exception {
+ queryWithCollKeyDefaultArgsWithExpectedOrder("zh", 0, 6, new Integer[] { 3, 0, 1, 6, 5, 4, 2 });
+ }
+
+ @Test
+ public void testZhTwSort() throws Exception {
+ queryWithCollKeyDefaultArgsWithExpectedOrder("zh_TW", 0, 6, new Integer[] { 0, 3, 4, 1, 5, 2, 6 });
+ }
+
+ @Test
+ public void testZhTwStrokeSort() throws Exception {
+ queryWithCollKeyDefaultArgsWithExpectedOrder("zh_TW_STROKE", 0, 6, new Integer[] { 4, 2, 0, 3, 1, 6, 5 });
+ }
+
+ @Test
+ public void testZhStrokeSort() throws Exception {
+ queryWithCollKeyDefaultArgsWithExpectedOrder("zh__STROKE", 0, 6, new Integer[] { 0, 1, 3, 4, 6, 2, 5 });
+ }
+
+ @Test
+ public void testZhPinyinSort() throws Exception {
+ queryWithCollKeyDefaultArgsWithExpectedOrder("zh__PINYIN", 0, 6, new Integer[] { 0, 1, 3, 4, 6, 2, 5 });
+ }
+
+ @Test
+ public void testUpperCaseSort() throws Exception {
+ queryWithCollKeyUpperCaseWithExpectedOrder("en", 7, 13, new Integer[] { 7, 10, 11, 13, 9, 12, 8 });
+ }
+
+ @Test
+ public void testPrimaryStrengthSort() throws Exception {
+ queryWithCollKeyWithStrengthWithExpectedOrder("en", Collator.PRIMARY, false, 7, 13,
+ new Integer[] { 7, 9, 10, 11, 12, 13, 8 });
+ }
+
+ @Test
+ public void testSecondaryStrengthSort() throws Exception {
+ queryWithCollKeyWithStrengthWithExpectedOrder("en", Collator.SECONDARY, false, 7, 13,
+ new Integer[] { 7, 10, 11, 13, 9, 12, 8 });
+ }
+
+ @Test
+ public void testTertiaryStrengthSort() throws Exception {
+ queryWithCollKeyWithStrengthWithExpectedOrder("en", Collator.TERTIARY, false, 7, 13,
+ new Integer[] { 7, 11, 10, 13, 9, 12, 8 });
+ }
+
+ @Test
+ public void testTertiaryStrengthSortDesc() throws Exception {
+ queryWithCollKeyWithStrengthWithExpectedOrder("en", Collator.TERTIARY, true, 7, 13,
+ new Integer[] { 8, 12, 9, 13, 10, 11, 7 });
+ }
+
+
+ /**
+ * Issue a query ordered by the collation key (with COLLATION_KEY called
+ * with default args) of the data column according to the provided
+ * localeString, and compare the ID and data columns to the expected order.
+ *
+ * @param expectedIndexOrder
+ * an array of indexes into the dataArray in the order we expect.
+ * This is the same as the ID column
+ * @throws SQLException
+ */
+ private void queryWithCollKeyDefaultArgsWithExpectedOrder(String localeString, Integer beginIndex, Integer endIndex,
+ Integer[] expectedIndexOrder) throws Exception {
+ String query = String.format(
+ "SELECT id, data FROM %s WHERE ID BETWEEN %d AND %d ORDER BY COLLATION_KEY(data, '%s')", tableName,
+ beginIndex, endIndex, localeString);
+ queryWithExpectedOrder(query, expectedIndexOrder);
+ }
+
+ /**
+ * Same as above, except the upperCase collator argument is set to true
+ */
+ private void queryWithCollKeyUpperCaseWithExpectedOrder(String localeString, Integer beginIndex, Integer endIndex,
+ Integer[] expectedIndexOrder) throws Exception {
+ String query = String.format(
+ "SELECT id, data FROM %s WHERE ID BETWEEN %d AND %d ORDER BY COLLATION_KEY(data, '%s', true), id",
+ tableName, beginIndex, endIndex, localeString);
+ queryWithExpectedOrder(query, expectedIndexOrder);
+ }
+
+ /**
+ * Same as above, except the collator strength is set
+ */
+ private void queryWithCollKeyWithStrengthWithExpectedOrder(String localeString, Integer strength, boolean isDescending,
+ Integer beginIndex, Integer endIndex, Integer[] expectedIndexOrder) throws Exception {
+ String sortOrder = isDescending ? "DESC" : "";
+
+ String query = String.format(
+ "SELECT id, data FROM %s WHERE ID BETWEEN %d AND %d ORDER BY COLLATION_KEY(data, '%s', false, %d) %s, id %s",
+ tableName, beginIndex, endIndex, localeString, strength, sortOrder, sortOrder);
+ queryWithExpectedOrder(query, expectedIndexOrder);
+ }
+
+ private void queryWithExpectedOrder(String query, Integer[] expectedIndexOrder) throws Exception {
+ Connection conn = DriverManager.getConnection(getUrl());
+ PreparedStatement ps = conn.prepareStatement(query);
+ ResultSet rs = ps.executeQuery();
+ int i = 0;
+ while (rs.next()) {
+ int expectedId = expectedIndexOrder[i];
+ assertEquals("For row " + i + ": The ID did not match the expected index", expectedId, rs.getInt(1));
+ assertEquals("For row " + i + ": The data did not match the expected entry from the data array",
+ dataArray[expectedId], rs.getString(2));
+ i++;
+ }
+ assertEquals("The result set returned a different number of rows from the data array", expectedIndexOrder.length, i);
+ }
+}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/main/java/org/apache/phoenix/expression/ExpressionType.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/ExpressionType.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/ExpressionType.java
index 4f26e87..9a53eb1 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/ExpressionType.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/ExpressionType.java
@@ -42,6 +42,7 @@ import org.apache.phoenix.expression.function.CeilTimestampExpression;
import org.apache.phoenix.expression.function.CeilWeekExpression;
import org.apache.phoenix.expression.function.CeilYearExpression;
import org.apache.phoenix.expression.function.CoalesceFunction;
+import org.apache.phoenix.expression.function.CollationKeyFunction;
import org.apache.phoenix.expression.function.ConvertTimezoneFunction;
import org.apache.phoenix.expression.function.CountAggregateFunction;
import org.apache.phoenix.expression.function.DayOfMonthFunction;
@@ -294,7 +295,8 @@ public enum ExpressionType {
ArrayColumnExpression(SingleCellColumnExpression.class),
FirstValuesFunction(FirstValuesFunction.class),
LastValuesFunction(LastValuesFunction.class),
- DistinctCountHyperLogLogAggregateFunction(DistinctCountHyperLogLogAggregateFunction.class);
+ DistinctCountHyperLogLogAggregateFunction(DistinctCountHyperLogLogAggregateFunction.class),
+ CollationKeyFunction(CollationKeyFunction.class);
ExpressionType(Class<? extends Expression> clazz) {
this.clazz = clazz;
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
new file mode 100644
index 0000000..827f70a
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/CollationKeyFunction.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.sql.SQLException;
+import java.text.Collator;
+import java.util.List;
+import java.util.Locale;
+
+import org.apache.commons.lang.BooleanUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.LiteralExpression;
+import org.apache.phoenix.parse.FunctionParseNode;
+import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.schema.types.PVarbinary;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.apache.phoenix.util.VarBinaryFormatter;
+
+import com.force.db.i18n.LinguisticSort;
+import com.force.i18n.LocaleUtils;
+
+/**
+ * A Phoenix Function that calculates a collation key for an input string based
+ * on a caller-provided locale and collator strength and decomposition settings.
+ *
+ * The locale should be specified as xx_yy_variant where xx is the ISO 639-1
+ * 2-letter language code, yy is the the ISO 3166 2-letter country code. Both
+ * countryCode and variant are optional. For example, zh_TW_STROKE, zh_TW and zh
+ * are all valid locale representations. Note the language code, country code
+ * and variant are used as arguments to the constructor of java.util.Locale.
+ *
+ * This function uses the open-source i18n-util package to obtain the collators
+ * it needs from the provided locale.
+ *
+ * The LinguisticSort implementation in i18n-util encapsulates sort-related
+ * functionality for a substantive list of locales. For each locale, it provides
+ * a collator and an Oracle-specific database function that can be used to sort
+ * strings according to the natural language rules of that locale.
+ *
+ * This function uses the collator returned by LinguisticSort.getCollator to
+ * produce a collation key for its input string. A user can expect that the
+ * sorting semantics of this function for a given locale is equivalent to the
+ * sorting behaviour of an Oracle query that is constructed using the Oracle
+ * functions returned by LinguisticSort for that locale.
+ *
+ * The optional third argument to the function is a boolean that specifies
+ * whether to use the upper-case collator (case-insensitive) returned by
+ * LinguisticSort.getUpperCaseCollator.
+ *
+ * The optional fourth and fifth arguments are used to set respectively the
+ * strength and composition of the collator returned by LinguisticSort using the
+ * setStrength and setDecomposition methods of java.text.Collator.
+ *
+ */
+@FunctionParseNode.BuiltInFunction(name = CollationKeyFunction.NAME, args = {
+ // input string
+ @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }),
+ // ISO Code for Locale
+ @FunctionParseNode.Argument(allowedTypes = { PVarchar.class }, isConstant = true),
+ // whether to use special upper case collator
+ @FunctionParseNode.Argument(allowedTypes = { PBoolean.class }, defaultValue = "false", isConstant = true),
+ // collator strength
+ @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true),
+ // collator decomposition
+ @FunctionParseNode.Argument(allowedTypes = { PInteger.class }, defaultValue = "null", isConstant = true) })
+public class CollationKeyFunction extends ScalarFunction {
+
+ private static final Log LOG = LogFactory.getLog(CollationKeyFunction.class);
+
+ public static final String NAME = "COLLATION_KEY";
+
+ private Collator collator;
+
+ public CollationKeyFunction() {
+ }
+
+ public CollationKeyFunction(List<Expression> children) throws SQLException {
+ super(children);
+ initialize();
+ }
+
+ @Override
+ public void readFields(DataInput input) throws IOException {
+ super.readFields(input);
+ initialize();
+ }
+
+ @Override
+ public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) {
+ Expression expression = getChildren().get(0);
+ if (!expression.evaluate(tuple, ptr)) {
+ return false;
+ }
+ String inputString = (String) PVarchar.INSTANCE.toObject(ptr, expression.getSortOrder());
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("CollationKey inputString: " + inputString);
+ }
+ byte[] collationKeyByteArray = collator.getCollationKey(inputString).toByteArray();
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("CollationKey bytes: " + VarBinaryFormatter.INSTANCE.format(collationKeyByteArray));
+ }
+
+ ptr.set(collationKeyByteArray);
+ return true;
+ }
+
+ private void initialize() {
+ String localeISOCode = getLiteralValue(1, String.class);
+ Boolean useSpecialUpperCaseCollator = getLiteralValue(2, Boolean.class);
+ Integer collatorStrength = getLiteralValue(3, Integer.class);
+ Integer collatorDecomposition = getLiteralValue(4, Integer.class);
+
+ if (LOG.isTraceEnabled()) {
+ StringBuilder logInputsMessage = new StringBuilder();
+ logInputsMessage.append("Input (literal) arguments:").append("localeISOCode: " + localeISOCode)
+ .append(", useSpecialUpperCaseCollator: " + useSpecialUpperCaseCollator)
+ .append(", collatorStrength: " + collatorStrength)
+ .append(", collatorDecomposition: " + collatorDecomposition);
+ LOG.trace(logInputsMessage);
+ }
+
+ Locale locale = LocaleUtils.get().getLocaleByIsoCode(localeISOCode);
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(String.format("Locale: " + locale.toLanguageTag()));
+ }
+
+ LinguisticSort linguisticSort = LinguisticSort.get(locale);
+
+ collator = BooleanUtils.isTrue(useSpecialUpperCaseCollator) ? linguisticSort.getUpperCaseCollator(false)
+ : linguisticSort.getCollator();
+
+ if (collatorStrength != null) {
+ collator.setStrength(collatorStrength);
+ }
+
+ if (collatorDecomposition != null) {
+ collator.setDecomposition(collatorDecomposition);
+ }
+
+ if (LOG.isTraceEnabled()) {
+ LOG.trace(String.format("Collator: [strength: %d, decomposition: %d], Special-Upper-Case: %s",
+ collator.getStrength(), collator.getDecomposition(),
+ BooleanUtils.isTrue(useSpecialUpperCaseCollator)));
+ }
+ }
+
+ @Override
+ public PDataType getDataType() {
+ return PVarbinary.INSTANCE;
+ }
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public boolean isThreadSafe() {
+ // ICU4J Collators are not thread-safe unless they are frozen.
+ // TODO: Look into calling freeze() on them to be able return true here.
+ return false;
+ }
+
+ private <T> T getLiteralValue(int childIndex, Class<T> type) {
+ Expression expression = getChildren().get(childIndex);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("child: " + childIndex + ", expression: " + expression);
+ }
+ // It's safe to assume expression is a LiteralExpression since
+ // only arguments marked as isConstant = true should be handled through
+ // this method.
+ return type.cast(((LiteralExpression) expression).getValue());
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java b/phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java
index 730f754..4555190 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/jdbc/PhoenixConnection.java
@@ -105,6 +105,7 @@ import org.apache.phoenix.schema.types.PTimestamp;
import org.apache.phoenix.schema.types.PUnsignedDate;
import org.apache.phoenix.schema.types.PUnsignedTime;
import org.apache.phoenix.schema.types.PUnsignedTimestamp;
+import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.trace.util.Tracing;
import org.apache.phoenix.transaction.PhoenixTransactionContext;
import org.apache.phoenix.util.DateUtil;
@@ -116,6 +117,7 @@ import org.apache.phoenix.util.ReadOnlyProps;
import org.apache.phoenix.util.SQLCloseable;
import org.apache.phoenix.util.SQLCloseables;
import org.apache.phoenix.util.SchemaUtil;
+import org.apache.phoenix.util.VarBinaryFormatter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Objects;
@@ -336,6 +338,7 @@ public class PhoenixConnection implements Connection, MetaDataMutated, SQLClosea
formatters.put(PUnsignedTimestamp.INSTANCE, timestampFormat);
formatters.put(PDecimal.INSTANCE,
FunctionArgumentType.NUMERIC.getFormatter(numberPattern));
+ formatters.put(PVarbinary.INSTANCE, VarBinaryFormatter.INSTANCE);
// We do not limit the metaData on a connection less than the global
// one,
// as there's not much that will be cached here.
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/main/java/org/apache/phoenix/util/VarBinaryFormatter.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/util/VarBinaryFormatter.java b/phoenix-core/src/main/java/org/apache/phoenix/util/VarBinaryFormatter.java
new file mode 100644
index 0000000..7f0d030
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/VarBinaryFormatter.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.util;
+
+import java.text.FieldPosition;
+import java.text.Format;
+import java.text.ParsePosition;
+
+import org.apache.commons.codec.binary.Hex;
+
+/**
+ * A formatter that formats a byte array to a hexadecimal string
+ * (with each byte converted to a 2-digit hex sequence)
+ *
+ * @author snakhoda-sfdc
+ */
+public class VarBinaryFormatter extends Format {
+
+ private static final long serialVersionUID = -7940880118392024750L;
+
+ public static final VarBinaryFormatter INSTANCE = new VarBinaryFormatter();
+
+ @Override
+ public StringBuffer format(Object obj, StringBuffer toAppendTo, FieldPosition pos) {
+ if (!(obj instanceof byte[])) {
+ throw new IllegalArgumentException("VarBinaryFormatter can only format byte arrays");
+ }
+ String hexString = Hex.encodeHexString((byte[]) obj);
+ toAppendTo.append(hexString);
+ return toAppendTo;
+ }
+
+ @Override
+ public Object parseObject(String source, ParsePosition pos) {
+ return new UnsupportedOperationException();
+ }
+}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-core/src/test/java/org/apache/phoenix/expression/function/CollationKeyFunctionTest.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/expression/function/CollationKeyFunctionTest.java b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/CollationKeyFunctionTest.java
new file mode 100644
index 0000000..f57a937
--- /dev/null
+++ b/phoenix-core/src/test/java/org/apache/phoenix/expression/function/CollationKeyFunctionTest.java
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.expression.function;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.fail;
+
+import java.text.Collator;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang.builder.ToStringBuilder;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.phoenix.expression.Expression;
+import org.apache.phoenix.expression.LiteralExpression;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.types.PBoolean;
+import org.apache.phoenix.schema.types.PInteger;
+import org.apache.phoenix.schema.types.PVarchar;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+import com.google.common.primitives.UnsignedBytes;
+
+/**
+ * "Unit" tests for CollationKeyFunction
+ *
+ */
+public class CollationKeyFunctionTest {
+
+ private static String[] chineseChars = new String[] { "\u963f", "\u55c4", "\u963e", "\u554a", "\u4ec8", "\u3d9a",
+ "\u9f51" };
+
+ private static Comparator<byte[]> collationKeyComparator = UnsignedBytes.lexicographicalComparator();
+
+ private static Comparator<ByteArrayAndInteger> collationKeyAndIndexComparator = new Comparator<ByteArrayAndInteger>() {
+ @Override
+ public int compare(ByteArrayAndInteger o1, ByteArrayAndInteger o2) {
+ int compareResult = collationKeyComparator.compare(o1.byteArray, o2.byteArray);
+ if (compareResult == 0) {
+ compareResult = o1.integer.compareTo(o2.integer);
+ }
+ return compareResult;
+ }
+ };
+
+ private static class ByteArrayAndInteger {
+
+ private ByteArrayAndInteger(byte[] byteArray, Integer integer) {
+ super();
+ this.byteArray = byteArray;
+ this.integer = integer;
+ }
+
+ byte[] byteArray;
+ Integer integer;
+
+ public String toString() {
+ return ToStringBuilder.reflectionToString(this);
+ }
+
+ public static ByteArrayAndInteger findFirstIntegerMatch(List<ByteArrayAndInteger> list,
+ Integer matchingInteger) {
+ for (ByteArrayAndInteger entry : list) {
+ if (entry.integer.equals(matchingInteger)) {
+ return entry;
+ }
+ }
+ return null;
+ }
+ }
+
+ @Test
+ public void testZhSort() throws Exception {
+ testSortOrderNoEquals(chineseChars, "zh", Boolean.FALSE, null, null, new Integer[] { 3, 0, 1, 6, 5, 4, 2 });
+ }
+
+ @Test
+ public void testZhTwSort() throws Exception {
+ testSortOrderNoEquals(chineseChars, "zh_TW", Boolean.FALSE, null, null, new Integer[] { 0, 3, 4, 1, 5, 2, 6 });
+ }
+
+ @Test
+ public void testZhTwStrokeSort() throws Exception {
+ testSortOrderNoEquals(chineseChars, "zh_TW_STROKE", Boolean.FALSE, null, null,
+ new Integer[] { 4, 2, 0, 3, 1, 6, 5 });
+ }
+
+ @Test
+ public void testZhStrokeSort() throws Exception {
+ testSortOrderNoEquals(chineseChars, "zh__STROKE", Boolean.FALSE, null, null,
+ new Integer[] { 0, 1, 3, 4, 6, 2, 5 });
+ }
+
+ @Test
+ public void testZhPinyinSort() throws Exception {
+ testSortOrderNoEquals(chineseChars, "zh__PINYIN", Boolean.FALSE, null, null,
+ new Integer[] { 0, 1, 3, 4, 6, 2, 5 });
+ }
+
+ @Test
+ public void testUpperCaseCollationKeyBytes() throws Exception {
+ testCollationKeysEqual(new String[] { "abcdef", "ABCDEF", "aBcDeF" }, "en", Boolean.TRUE, null, null);
+ }
+
+ @Test
+ public void testEqualCollationKeysForPrimaryStrength() throws Exception {
+ // "a", "A", "ä" are considered equivalent
+ testCollationKeysEqual(new String[] { "a", "A", "ä" }, "en", Boolean.FALSE, Collator.PRIMARY, null);
+ testSortOrderNoEquals(new String[] { "b", "a" }, "en", Boolean.FALSE, Collator.PRIMARY, null,
+ new Integer[] { 1, 0 });
+
+ }
+
+ @Test
+ public void testCollationKeyBytesForSecondaryStrength() throws Exception {
+ // "a" and "A" are considered equivalent but not "ä"
+ testCollationKeysEqual(new String[] { "a", "A" }, "en", Boolean.FALSE, Collator.SECONDARY, null);
+ testSortOrderNoEquals(new String[] { "b", "a", "ä" }, "en", Boolean.FALSE, Collator.SECONDARY, null,
+ new Integer[] { 1, 2, 0 });
+ }
+
+ @Test
+ public void testCollationKeyBytesForTertiaryStrength() throws Exception {
+ // none of these are considered equivalent
+ testSortOrderNoEquals(new String[] { "b", "a", "ä", "A" }, "en", Boolean.FALSE, Collator.TERTIARY, null,
+ new Integer[] { 1, 3, 2, 0 });
+ }
+
+ /**
+ * Just test that changing the decomposition mode works for basic sorting.
+ * TODO: Actually test for the accented characters and languages where this
+ * actually matters.
+ */
+ @Test
+ public void testCollationKeyBytesForFullDecomposition() throws Exception {
+ testCollationKeysEqual(new String[] { "a", "A" }, "en", Boolean.FALSE, null, Collator.FULL_DECOMPOSITION);
+ }
+
+ /** HELPER METHODS **/
+ private void testSortOrderNoEquals(String[] inputStrings, String locale, Boolean uppercaseCollator,
+ Integer strength, Integer decomposition, Integer[] expectedOrder) throws Exception {
+ List<ByteArrayAndInteger> sortedCollationKeysAndIndexes = calculateCollationKeys(inputStrings, locale,
+ uppercaseCollator, strength, decomposition);
+ Collections.sort(sortedCollationKeysAndIndexes, collationKeyAndIndexComparator);
+ testCollationKeysNotEqual(inputStrings, sortedCollationKeysAndIndexes);
+
+ Integer[] sortedIndexes = new Integer[sortedCollationKeysAndIndexes.size()];
+ for (int i = 0; i < sortedIndexes.length; i++) {
+ sortedIndexes[i] = sortedCollationKeysAndIndexes.get(i).integer;
+ }
+ assertArrayEquals(expectedOrder, sortedIndexes);
+ }
+
+ private List<ByteArrayAndInteger> calculateCollationKeys(String[] inputStrings, String locale,
+ Boolean upperCaseCollator, Integer strength, Integer decomposition) throws Exception {
+ List<ByteArrayAndInteger> collationKeysAndIndexes = Lists.newArrayList();
+ for (int i = 0; i < inputStrings.length; i++) {
+ byte[] thisCollationKeyBytes = callFunction(inputStrings[i], locale, upperCaseCollator, strength,
+ decomposition, SortOrder.ASC);
+ collationKeysAndIndexes.add(new ByteArrayAndInteger(thisCollationKeyBytes, i));
+ }
+ return collationKeysAndIndexes;
+ }
+
+ private void testCollationKeysEqual(String[] inputStrings, String locale, Boolean upperCaseCollator,
+ Integer strength, Integer decomposition) throws Exception {
+ List<ByteArrayAndInteger> collationKeysAndIndexes = calculateCollationKeys(inputStrings, locale,
+ upperCaseCollator, strength, decomposition);
+
+ for (int i = 0, j = 1; i < inputStrings.length && j < inputStrings.length; i++, j++) {
+ byte[] iByteArray = ByteArrayAndInteger.findFirstIntegerMatch(collationKeysAndIndexes, i).byteArray;
+ byte[] jByteArray = ByteArrayAndInteger.findFirstIntegerMatch(collationKeysAndIndexes, j).byteArray;
+ boolean isPairEqual = collationKeyComparator.compare(iByteArray, jByteArray) == 0;
+ if (!isPairEqual) {
+ fail(String.format("Collation keys for inputStrings [%s] and [%s] ([%s], [%s]) were not equal",
+ inputStrings[i], inputStrings[j], Hex.encodeHexString(iByteArray),
+ Hex.encodeHexString(jByteArray)));
+ }
+ }
+ }
+
+ private void testCollationKeysNotEqual(String[] inputStrings, List<ByteArrayAndInteger> collationKeysAndIndexes)
+ throws Exception {
+ for (int i = 0; i < inputStrings.length; i++) {
+ for (int j = i + 1; j < inputStrings.length; j++) {
+ byte[] iByteArray = ByteArrayAndInteger.findFirstIntegerMatch(collationKeysAndIndexes, i).byteArray;
+ byte[] jByteArray = ByteArrayAndInteger.findFirstIntegerMatch(collationKeysAndIndexes, j).byteArray;
+ boolean isPairEqual = collationKeyComparator.compare(iByteArray, jByteArray) == 0;
+ if (isPairEqual) {
+ fail(String.format("Collation keys for inputStrings [%s] and [%s] ([%s], [%s]) were equal",
+ inputStrings[i], inputStrings[j], Hex.encodeHexString(iByteArray),
+ Hex.encodeHexString(jByteArray)));
+ }
+ }
+ }
+ }
+
+ private static byte[] callFunction(String inputStr, String localeIsoCode, Boolean upperCaseCollator,
+ Integer strength, Integer decomposition, SortOrder sortOrder) throws Exception {
+ LiteralExpression inputStrLiteral, localeIsoCodeLiteral, upperCaseBooleanLiteral, strengthLiteral,
+ decompositionLiteral;
+ inputStrLiteral = LiteralExpression.newConstant(inputStr, PVarchar.INSTANCE, sortOrder);
+ localeIsoCodeLiteral = LiteralExpression.newConstant(localeIsoCode, PVarchar.INSTANCE, sortOrder);
+ upperCaseBooleanLiteral = LiteralExpression.newConstant(upperCaseCollator, PBoolean.INSTANCE, sortOrder);
+ strengthLiteral = LiteralExpression.newConstant(strength, PInteger.INSTANCE, sortOrder);
+ decompositionLiteral = LiteralExpression.newConstant(decomposition, PInteger.INSTANCE, sortOrder);
+ return callFunction(inputStrLiteral, localeIsoCodeLiteral, upperCaseBooleanLiteral, strengthLiteral,
+ decompositionLiteral);
+
+ }
+
+ private static byte[] callFunction(LiteralExpression inputStrLiteral, LiteralExpression localeIsoCodeLiteral,
+ LiteralExpression upperCaseBooleanLiteral, LiteralExpression strengthLiteral,
+ LiteralExpression decompositionLiteral) throws Exception {
+ List<Expression> expressions = Lists.newArrayList((Expression) inputStrLiteral,
+ (Expression) localeIsoCodeLiteral, (Expression) upperCaseBooleanLiteral, (Expression) strengthLiteral,
+ (Expression) decompositionLiteral);
+ Expression collationKeyFunction = new CollationKeyFunction(expressions);
+ ImmutableBytesWritable ptr = new ImmutableBytesWritable();
+ boolean ret = collationKeyFunction.evaluate(null, ptr);
+ byte[] result = ret
+ ? (byte[]) collationKeyFunction.getDataType().toObject(ptr, collationKeyFunction.getSortOrder()) : null;
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/81019c64/phoenix-server/pom.xml
----------------------------------------------------------------------
diff --git a/phoenix-server/pom.xml b/phoenix-server/pom.xml
index 3576425..67832ad 100644
--- a/phoenix-server/pom.xml
+++ b/phoenix-server/pom.xml
@@ -134,6 +134,7 @@
<include>io.dropwizard.metrics:metrics-core</include>
<include>org.apache.thrift:libthrift</include>
<include>com.clearspring.analytics:stream</include>
+ <include>com.salesforce.i18n:i18n-util</include>
</includes>
<excludes>
<exclude>org.apache.phoenix:phoenix-server</exclude>