You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ch...@apache.org on 2019/07/10 23:23:37 UTC

[couchdb] branch 2067-add-get-sort-key updated (05d4fb4 -> 015fe58)

This is an automated email from the ASF dual-hosted git repository.

chewbranca pushed a change to branch 2067-add-get-sort-key
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


 discard 05d4fb4  Expose ICU ucol_getSortKey
     new 015fe58  Expose ICU ucol_getSortKey

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (05d4fb4)
            \
             N -- N -- N   refs/heads/2067-add-get-sort-key (015fe58)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/couch/test/couch_util_tests.erl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)


[couchdb] 01/01: Expose ICU ucol_getSortKey

Posted by ch...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

chewbranca pushed a commit to branch 2067-add-get-sort-key
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 015fe58c71555553e913cc40cdb9713aefe5fea3
Author: Russell Branca <ch...@apache.org>
AuthorDate: Tue Jul 2 13:31:33 2019 -0700

    Expose ICU ucol_getSortKey
---
 src/couch/priv/icu_driver/couch_icu_driver.c | 57 ++++++++++++++++++++++
 src/couch/src/couch_util.erl                 | 13 ++++-
 src/couch/test/couch_util_tests.erl          | 72 ++++++++++++++++++++++++++++
 3 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
index 4d9bb98..cfa7682 100644
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ b/src/couch/priv/icu_driver/couch_icu_driver.c
@@ -30,6 +30,8 @@ specific language governing permissions and limitations under the License.
 #include <string.h> /* for memcpy */
 #endif
 
+#define BUFFER_SIZE 1000
+
 
 typedef struct {
     ErlDrvPort port;
@@ -147,6 +149,61 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command,
 
         return return_control_result(&response, sizeof(response), rbuf, rlen);
         }
+    case 2: /* GET_SORT_KEY: */
+        {
+
+        UChar source[BUFFER_SIZE];
+        UChar* sourcePtr = source;
+        int32_t sourceLen = BUFFER_SIZE;
+
+        uint8_t sortKey[BUFFER_SIZE];
+        uint8_t* sortKeyPtr = sortKey;
+        int32_t sortKeyLen = BUFFER_SIZE;
+
+        int32_t inputLen;
+
+        UErrorCode status = U_ZERO_ERROR;
+        ErlDrvSSizeT res;
+
+        /* first 32bits are the length */
+        memcpy(&inputLen, pBuf, sizeof(inputLen));
+        pBuf += sizeof(inputLen);
+
+        u_strFromUTF8(sourcePtr, BUFFER_SIZE, &sourceLen, pBuf, inputLen, &status);
+
+        if (sourceLen >= BUFFER_SIZE) {
+            /* reset status or next u_strFromUTF8 call will auto-fail */
+            status = U_ZERO_ERROR;
+            sourcePtr = (UChar*) malloc(sourceLen * sizeof(UChar));
+            u_strFromUTF8(sourcePtr, sourceLen, NULL, pBuf, inputLen, &status);
+            if (U_FAILURE(status)) {
+                rbuf = NULL;
+                return 0;
+            }
+        } else if (U_FAILURE(status)) {
+            rbuf = NULL;
+            return 0;
+        }
+
+        sortKeyLen = ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, BUFFER_SIZE);
+
+        if (sortKeyLen > BUFFER_SIZE) {
+            sortKeyPtr = (uint8_t*) malloc(sortKeyLen);
+            ucol_getSortKey(pData->coll, sourcePtr, sourceLen, sortKeyPtr, sortKeyLen);
+        }
+
+        res = return_control_result(sortKeyPtr, sortKeyLen, rbuf, rlen);
+
+        if (sourcePtr != source) {
+            free(sourcePtr);
+        }
+
+        if (sortKeyPtr != sortKey) {
+            free(sortKeyPtr);
+        }
+
+        return res;
+    }
 
     default:
         return -1;
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index 62e17ce..adcc3e8 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
 
 -export([priv_dir/0, normpath/1, fold_files/5]).
 -export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]).
 -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
 -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -411,6 +411,17 @@ collate(A, B, Options) when is_binary(A), is_binary(B) ->
     % expected typical -1, 0, 1
     Result - 1.
 
+get_sort_key(<<>>) ->
+    error;
+get_sort_key(Str) when is_binary(Str) ->
+    Operation = 2, % get_sort_key
+    Size = byte_size(Str),
+    Bin = <<Size:32/native, Str/binary>>,
+    case erlang:port_control(drv_port(), Operation, Bin) of
+        [] -> error;
+        Res -> Res
+    end.
+
 should_flush() ->
     should_flush(?FLUSH_MAX_MEM).
 
diff --git a/src/couch/test/couch_util_tests.erl b/src/couch/test/couch_util_tests.erl
index 3e145c4..43baab6 100644
--- a/src/couch/test/couch_util_tests.erl
+++ b/src/couch/test/couch_util_tests.erl
@@ -168,3 +168,75 @@ to_hex_test_() ->
         ?_assertEqual("", couch_util:to_hex(<<>>)),
         ?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>))
     ].
+
+sort_key_test_() ->
+    {
+        "Sort Key tests",
+        [
+            {
+                foreach,
+                fun setup/0, fun teardown/1,
+                [
+                    fun test_get_sort_key/1,
+                    fun test_get_sort_key_jiffy_string/1,
+                    fun test_get_sort_key_fails_on_bad_input/1,
+                    fun test_get_sort_key_longer_than_buffer/1
+                ]
+            }
+        ]
+    }.
+
+test_get_sort_key(_) ->
+    Strs = [<<"foo">>, <<"bar">>, <<"Bar">>, <<"baz">>, <<"BAZ">>, <<"quaz">>,
+        <<"1234fdsa">>, <<"1234">>, <<"pizza">>],
+    Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs],
+    lists:map(fun({S1, S2}) ->
+        S1K = couch_util:get_sort_key(S1),
+        S2K = couch_util:get_sort_key(S2),
+        SortRes = sort_keys(S1K, S2K),
+        Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])),
+        CollRes = couch_util:collate(S1, S2),
+        {Comment, ?_assertEqual(SortRes, CollRes)}
+    end, Pairs).
+
+test_get_sort_key_jiffy_string(_) ->
+    %% jiffy:decode does not null terminate strings
+    %% so we use it here to test unterminated strings
+    {[{S1,S2}]} = jiffy:decode(<<"{\"foo\": \"bar\"}">>),
+    S1K = couch_util:get_sort_key(S1),
+    S2K = couch_util:get_sort_key(S2),
+    SortRes = sort_keys(S1K, S2K),
+    CollRes = couch_util:collate(S1, S2),
+    ?_assertEqual(SortRes, CollRes).
+
+test_get_sort_key_fails_on_bad_input(_) ->
+    %% generated with crypto:strong_rand_bytes
+    %% contains invalid character, should error
+    S = <<209,98,222,144,60,163,72,134,206,157>>,
+    Res = couch_util:get_sort_key(S),
+    ?_assertEqual(error, Res).
+
+test_get_sort_key_longer_than_buffer(_) ->
+    %% stack allocated buffer is 1000 units
+    %% test resize logic with strings > 1000 char
+    Extra = list_to_binary(["a" || _ <- lists:seq(1, 1200)]),
+    S1 = <<"foo", Extra/binary>>,
+    S2 = <<"bar", Extra/binary>>,
+    S1K = couch_util:get_sort_key(S1),
+    S2K = couch_util:get_sort_key(S2),
+    SortRes = sort_keys(S1K, S2K),
+    CollRes = couch_util:collate(S1, S2),
+    ?_assertEqual(SortRes, CollRes).
+
+sort_keys(S1, S2) ->
+    case S1 < S2 of
+        true ->
+            -1;
+        false -> case S1 =:= S2 of
+            true ->
+                0;
+            false ->
+                1
+        end
+    end.
+