You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ch...@apache.org on 2019/07/02 20:31:54 UTC

[couchdb] 01/01: Expose ICU ucol_getSortKey

This is an automated email from the ASF dual-hosted git repository.

chewbranca pushed a commit to branch 2067-add-get-sort-key
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit ef0612175f54f84b617c57122f8b55a1ab3b9a8e
Author: Russell Branca <ch...@apache.org>
AuthorDate: Tue Jul 2 13:31:33 2019 -0700

    Expose ICU ucol_getSortKey
---
 src/couch/priv/icu_driver/couch_icu_driver.c | 38 ++++++++++++++++++++++++++++
 src/couch/src/couch_util.erl                 |  8 +++++-
 src/couch/test/couch_util_tests.erl          | 35 +++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
index 4d9bb98..b9141ae 100644
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ b/src/couch/priv/icu_driver/couch_icu_driver.c
@@ -147,6 +147,44 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command,
 
         return return_control_result(&response, sizeof(response), rbuf, rlen);
         }
+    case 2: /* GET_SORT_KEY: */
+        {
+        uint8_t buf[1000];
+        uint8_t* currBuf = buf;
+        int32_t bufferLen = sizeof(buf);
+        int32_t expectedLen = 0;
+        int32_t len;
+        ErlDrvSSizeT res;
+        UChar sourceBuf[1000];
+
+        /* first 32bits are the length */
+        memcpy(&len, pBuf, sizeof(len));
+        pBuf += sizeof(len);
+        u_uastrcpy(sourceBuf, pBuf);
+
+        expectedLen = ucol_getSortKey(pData->coll, sourceBuf, len, currBuf, bufferLen);
+
+        /*
+         * TODO: handle case when expectedLen > bufferLen
+         * TODO: we need similar logic for when the the input is bigger than the sourceBuf buffer
+         * TODO: free sourceBuf buffer in the same manner as currBuf
+        if (expectedLen > bufferLen) {
+            printf("EXPECTEDLEN[%i] > BUFFERLEN[%i]\n", expectedLen, bufferLen);
+            if (currBuf == buf) {
+                currBuf = (uint8_t*)malloc(expectedLen);
+            } else {
+                currBuf = (uint8_t*)realloc(currBuf, expectedLen);
+            }
+            expectedLen = ucol_getSortKey(pData->coll, sourceBuf, len, currBuf, expectedLen);
+        }
+        */
+
+        res = return_control_result(currBuf, expectedLen, rbuf, rlen);
+        if (currBuf != buf && currBuf != NULL) {
+            free(currBuf);
+        }
+        return res;
+        }
 
     default:
         return -1;
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index 62e17ce..8c72355 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
 
 -export([priv_dir/0, normpath/1, fold_files/5]).
 -export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]).
 -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
 -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -411,6 +411,12 @@ collate(A, B, Options) when is_binary(A), is_binary(B) ->
     % expected typical -1, 0, 1
     Result - 1.
 
+get_sort_key(Str) ->
+    Operation = 2, % get_sort_key
+    Size = byte_size(Str),
+    Bin = <<Size:32/native, Str/binary>>,
+    erlang:port_control(drv_port(), Operation, Bin).
+
 should_flush() ->
     should_flush(?FLUSH_MAX_MEM).
 
diff --git a/src/couch/test/couch_util_tests.erl b/src/couch/test/couch_util_tests.erl
index 3e145c4..b993dbd 100644
--- a/src/couch/test/couch_util_tests.erl
+++ b/src/couch/test/couch_util_tests.erl
@@ -168,3 +168,38 @@ to_hex_test_() ->
         ?_assertEqual("", couch_util:to_hex(<<>>)),
         ?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>))
     ].
+
+sort_key_test_() ->
+    {
+        "Sort Key tests",
+        [
+            {
+                foreach,
+                fun setup/0, fun teardown/1,
+                [
+                    fun test_get_sort_key/1
+                ]
+            }
+        ]
+    }.
+
+test_get_sort_key(_) ->
+    Strs = [<<"foo">>, <<"bar">>, <<"Bar">>, <<"baz">>, <<"BAZ">>, <<"quaz">>, <<"pizza">>],
+    Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs],
+    lists:map(fun({S1, S2}) ->
+        S1K = couch_util:get_sort_key(S1),
+        S2K = couch_util:get_sort_key(S2),
+        Res = case S1K < S2K of
+            true ->
+                -1;
+            false -> case S1K =:= S2K of
+                true ->
+                    0;
+                false ->
+                    1
+            end
+        end,
+        Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])),
+        {Comment,?_assertEqual(Res, couch_util:collate(S1, S2))}
+    end, Pairs).
+