You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2021/09/08 19:10:13 UTC
[asterixdb] 02/03: Merge branch 'gerrit/mad-hatter' into
'gerrit/cheshire-cat'
This is an automated email from the ASF dual-hosted git repository.
mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 20314d118059da634847e1ddb1b1d744e552e713
Merge: 2521ade cc6143b
Author: Ali Alsuliman <al...@gmail.com>
AuthorDate: Wed Sep 8 00:19:26 2021 +0300
Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'
Change-Id: I3e700b07781bec8fc5b9eabf15a1249ce2be0272
.../substr-ASTERIXDB-2949.0.query.sqlpp | 25 ++++++++++++++++
.../substr-ASTERIXDB-2949.0.adm | 1 +
.../test/resources/runtimets/testsuite_sqlpp.xml | 5 ++++
.../data/std/primitive/UTF8StringPointable.java | 3 +-
.../data/std/util/AbstractVarLenObjectBuilder.java | 5 ++--
.../std/primitive/UTF8StringPointableTest.java | 33 ++++++++++++++++++++++
6 files changed, 69 insertions(+), 3 deletions(-)
diff --cc asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index f0b20bf,600dde8..1e142a2
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@@ -9926,8 -9301,13 +9926,13 @@@
</compilation-unit>
</test-case>
<test-case FilePath="string">
- <compilation-unit name="substring-after-1">
- <output-dir compare="Text">substring-after-1</output-dir>
++ <compilation-unit name="substr-ASTERIXDB-2949">
++ <output-dir compare="Text">substr-ASTERIXDB-2949</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
- <compilation-unit name="substring-after-2">
- <output-dir compare="Text">substring-after-2</output-dir>
+ <compilation-unit name="regexp_position_with_flag/offset0/regex_position0_with_flag">
+ <output-dir compare="Text">regexp_position_with_flag/offset0/regex_position0_with_flag</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="string">
diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 828de18,eff71de..49f6221
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@@ -463,12 -368,14 +463,13 @@@ public final class UTF8StringPointable
return false;
}
- builder.reset(out, Math.min(utfLen - byteIdx, (int) (codePointLength * 1.0 * byteIdx / codePointIdx)));
- // for byteIdx = 0, this estimate assumes that every char size = 1 byte
- int estimateOutBytes = byteIdx == 0 ? charLength : (int) (charLength * 1.0 * byteIdx / chIdx);
++ int estimateOutBytes = byteIdx == 0 ? codePointLength : (int) (codePointLength * 1.0 * byteIdx / codePointIdx);
+ builder.reset(out, Math.min(utfLen - byteIdx, estimateOutBytes));
- chIdx = 0;
- while (byteIdx < utfLen && chIdx < charLength) {
- builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx));
- chIdx++;
- byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
+ codePointIdx = 0;
+ while (byteIdx < utfLen && codePointIdx < codePointLength) {
+ builder.appendCodePoint(src.codePointAt(src.getMetaDataLength() + byteIdx));
+ codePointIdx++;
+ byteIdx += src.codePointSize(src.getMetaDataLength() + byteIdx);
}
builder.finish();
return true;
diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
index dcf9a10,ed439cb..f088c7e
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@@ -306,60 -255,39 +306,93 @@@ public class UTF8StringPointableTest
result.set(storage.getByteArray(), 0, storage.getLength());
expected = generateUTF8Pointable("is is it.i am;here. ");
assertEquals(0, expected.compareTo(result));
+
+ // Test Emoji trim
+ input = STRING_POINTABLE_EMOJI_FAMILY_OF_4;
+ pattern = "👨👦";
+ patternPointable = generateUTF8Pointable(pattern);
+ codePointSet.clear();
+ patternPointable.getCodePoints(codePointSet);
+
+ // Trim left
+ storage.reset();
+ input.trim(builder, storage, true, false, codePointSet);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ expected = generateUTF8Pointable("\u200D" + "👨👦👦");
+ assertEquals(0, expected.compareTo(result));
+
+ // Trim right
+ storage.reset();
+ input.trim(builder, storage, false, true, codePointSet);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ expected = generateUTF8Pointable("👨👨👦" + "\u200D");
+ assertEquals(0, expected.compareTo(result));
+
+ // Trim left and right
+ storage.reset();
+ input.trim(builder, storage, true, true, codePointSet);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ expected = generateUTF8Pointable("\u200D" + "👨👦" + "\u200D");
+ assertEquals(0, expected.compareTo(result));
+ }
+
+ @Test
+ public void testReverse() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+ UTF8StringPointable result = new UTF8StringPointable();
+ UTF8StringPointable input = generateUTF8Pointable(" I'd like to reverse ");
+ UTF8StringPointable expected = generateUTF8Pointable(" esrever ot ekil d'I ");
+
+ UTF8StringPointable.reverse(input, builder, storage);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ assertEquals(0, expected.compareTo(result));
+ }
+
+ @Test
+ public void testReverseWithEmoji() throws IOException {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+ UTF8StringPointable result = new UTF8StringPointable();
+ UTF8StringPointable input = generateUTF8Pointable("\uD83C\uDDE8\uD83C\uDDF3"); // CN flag
+ UTF8StringPointable expected = generateUTF8Pointable("\uD83C\uDDF3\uD83C\uDDE8"); // NC flag
+
+ UTF8StringPointable.reverse(input, builder, storage);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ assertEquals(0, expected.compareTo(result));
}
+ @Test
+ public void testStringBuilder() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray array = new GrowableArray();
+ UTF8StringPointable stringPointable = new UTF8StringPointable();
+ String writtenString;
+ int startIdx;
+
+ array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+ String chunk = "ABC";
+ String originalString = chunk.repeat(699051);
+
+ // test grow path
+ startIdx = array.getLength();
+ builder.reset(array, 2);
+ builder.appendString(originalString);
+ builder.finish();
+ stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+ writtenString = stringPointable.toString();
+ assertEquals(originalString, writtenString);
+
+ // test shrink path
+ array.reset();
+ array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+ startIdx = array.getLength();
+ builder.reset(array, 699051);
+ builder.appendString(chunk);
+ builder.finish();
+ stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+ writtenString = stringPointable.toString();
+ assertEquals(chunk, writtenString);
+ }
+
}