You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mh...@apache.org on 2021/09/08 19:10:13 UTC

[asterixdb] 02/03: Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'

This is an automated email from the ASF dual-hosted git repository.

mhubail pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 20314d118059da634847e1ddb1b1d744e552e713
Merge: 2521ade cc6143b
Author: Ali Alsuliman <al...@gmail.com>
AuthorDate: Wed Sep 8 00:19:26 2021 +0300

    Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'
    
    Change-Id: I3e700b07781bec8fc5b9eabf15a1249ce2be0272

 .../substr-ASTERIXDB-2949.0.query.sqlpp            | 25 ++++++++++++++++
 .../substr-ASTERIXDB-2949.0.adm                    |  1 +
 .../test/resources/runtimets/testsuite_sqlpp.xml   |  5 ++++
 .../data/std/primitive/UTF8StringPointable.java    |  3 +-
 .../data/std/util/AbstractVarLenObjectBuilder.java |  5 ++--
 .../std/primitive/UTF8StringPointableTest.java     | 33 ++++++++++++++++++++++
 6 files changed, 69 insertions(+), 3 deletions(-)

diff --cc asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index f0b20bf,600dde8..1e142a2
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@@ -9926,8 -9301,13 +9926,13 @@@
        </compilation-unit>
      </test-case>
      <test-case FilePath="string">
 -      <compilation-unit name="substring-after-1">
 -        <output-dir compare="Text">substring-after-1</output-dir>
++      <compilation-unit name="substr-ASTERIXDB-2949">
++        <output-dir compare="Text">substr-ASTERIXDB-2949</output-dir>
+       </compilation-unit>
+     </test-case>
+     <test-case FilePath="string">
 -      <compilation-unit name="substring-after-2">
 -        <output-dir compare="Text">substring-after-2</output-dir>
 +      <compilation-unit name="regexp_position_with_flag/offset0/regex_position0_with_flag">
 +        <output-dir compare="Text">regexp_position_with_flag/offset0/regex_position0_with_flag</output-dir>
        </compilation-unit>
      </test-case>
      <test-case FilePath="string">
diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 828de18,eff71de..49f6221
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@@ -463,12 -368,14 +463,13 @@@ public final class UTF8StringPointable 
              return false;
          }
  
-         builder.reset(out, Math.min(utfLen - byteIdx, (int) (codePointLength * 1.0 * byteIdx / codePointIdx)));
 -        // for byteIdx = 0, this estimate assumes that every char size = 1 byte
 -        int estimateOutBytes = byteIdx == 0 ? charLength : (int) (charLength * 1.0 * byteIdx / chIdx);
++        int estimateOutBytes = byteIdx == 0 ? codePointLength : (int) (codePointLength * 1.0 * byteIdx / codePointIdx);
+         builder.reset(out, Math.min(utfLen - byteIdx, estimateOutBytes));
 -        chIdx = 0;
 -        while (byteIdx < utfLen && chIdx < charLength) {
 -            builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx));
 -            chIdx++;
 -            byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
 +        codePointIdx = 0;
 +        while (byteIdx < utfLen && codePointIdx < codePointLength) {
 +            builder.appendCodePoint(src.codePointAt(src.getMetaDataLength() + byteIdx));
 +            codePointIdx++;
 +            byteIdx += src.codePointSize(src.getMetaDataLength() + byteIdx);
          }
          builder.finish();
          return true;
diff --cc hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
index dcf9a10,ed439cb..f088c7e
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@@ -306,60 -255,39 +306,93 @@@ public class UTF8StringPointableTest 
          result.set(storage.getByteArray(), 0, storage.getLength());
          expected = generateUTF8Pointable("is is it.i am;here.  ");
          assertEquals(0, expected.compareTo(result));
 +
 +        // Test Emoji trim
 +        input = STRING_POINTABLE_EMOJI_FAMILY_OF_4;
 +        pattern = "👨👦";
 +        patternPointable = generateUTF8Pointable(pattern);
 +        codePointSet.clear();
 +        patternPointable.getCodePoints(codePointSet);
 +
 +        // Trim left
 +        storage.reset();
 +        input.trim(builder, storage, true, false, codePointSet);
 +        result.set(storage.getByteArray(), 0, storage.getLength());
 +        expected = generateUTF8Pointable("\u200D" + "👨‍👦‍👦");
 +        assertEquals(0, expected.compareTo(result));
 +
 +        // Trim right
 +        storage.reset();
 +        input.trim(builder, storage, false, true, codePointSet);
 +        result.set(storage.getByteArray(), 0, storage.getLength());
 +        expected = generateUTF8Pointable("👨‍👨‍👦" + "\u200D");
 +        assertEquals(0, expected.compareTo(result));
 +
 +        // Trim left and right
 +        storage.reset();
 +        input.trim(builder, storage, true, true, codePointSet);
 +        result.set(storage.getByteArray(), 0, storage.getLength());
 +        expected = generateUTF8Pointable("\u200D" + "👨‍👦" + "\u200D");
 +        assertEquals(0, expected.compareTo(result));
 +    }
 +
 +    @Test
 +    public void testReverse() throws Exception {
 +        UTF8StringBuilder builder = new UTF8StringBuilder();
 +        GrowableArray storage = new GrowableArray();
 +        UTF8StringPointable result = new UTF8StringPointable();
 +        UTF8StringPointable input = generateUTF8Pointable(" I'd like to reverse ");
 +        UTF8StringPointable expected = generateUTF8Pointable(" esrever ot ekil d'I ");
 +
 +        UTF8StringPointable.reverse(input, builder, storage);
 +        result.set(storage.getByteArray(), 0, storage.getLength());
 +        assertEquals(0, expected.compareTo(result));
 +    }
 +
 +    @Test
 +    public void testReverseWithEmoji() throws IOException {
 +        UTF8StringBuilder builder = new UTF8StringBuilder();
 +        GrowableArray storage = new GrowableArray();
 +        UTF8StringPointable result = new UTF8StringPointable();
 +        UTF8StringPointable input = generateUTF8Pointable("\uD83C\uDDE8\uD83C\uDDF3"); // CN flag
 +        UTF8StringPointable expected = generateUTF8Pointable("\uD83C\uDDF3\uD83C\uDDE8"); // NC flag
 +
 +        UTF8StringPointable.reverse(input, builder, storage);
 +        result.set(storage.getByteArray(), 0, storage.getLength());
 +        assertEquals(0, expected.compareTo(result));
      }
  
+     @Test
+     public void testStringBuilder() throws Exception {
+         UTF8StringBuilder builder = new UTF8StringBuilder();
+         GrowableArray array = new GrowableArray();
+         UTF8StringPointable stringPointable = new UTF8StringPointable();
+         String writtenString;
+         int startIdx;
+ 
+         array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+         String chunk = "ABC";
+         String originalString = chunk.repeat(699051);
+ 
+         // test grow path
+         startIdx = array.getLength();
+         builder.reset(array, 2);
+         builder.appendString(originalString);
+         builder.finish();
+         stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+         writtenString = stringPointable.toString();
+         assertEquals(originalString, writtenString);
+ 
+         // test shrink path
+         array.reset();
+         array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+         startIdx = array.getLength();
+         builder.reset(array, 699051);
+         builder.appendString(chunk);
+         builder.finish();
+         stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+         writtenString = stringPointable.toString();
+         assertEquals(chunk, writtenString);
+     }
+ 
  }