You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ch...@apache.org on 2019/07/02 20:31:54 UTC
[couchdb] 01/01: Expose ICU ucol_getSortKey
This is an automated email from the ASF dual-hosted git repository.
chewbranca pushed a commit to branch 2067-add-get-sort-key
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit ef0612175f54f84b617c57122f8b55a1ab3b9a8e
Author: Russell Branca <ch...@apache.org>
AuthorDate: Tue Jul 2 13:31:33 2019 -0700
Expose ICU ucol_getSortKey
---
src/couch/priv/icu_driver/couch_icu_driver.c | 38 ++++++++++++++++++++++++++++
src/couch/src/couch_util.erl | 8 +++++-
src/couch/test/couch_util_tests.erl | 35 +++++++++++++++++++++++++
3 files changed, 80 insertions(+), 1 deletion(-)
diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
index 4d9bb98..b9141ae 100644
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ b/src/couch/priv/icu_driver/couch_icu_driver.c
@@ -147,6 +147,44 @@ couch_drv_control(ErlDrvData drv_data, unsigned int command,
return return_control_result(&response, sizeof(response), rbuf, rlen);
}
+ case 2: /* GET_SORT_KEY: */
+ {
+ uint8_t buf[1000];
+ uint8_t* currBuf = buf;
+ int32_t bufferLen = sizeof(buf);
+ int32_t expectedLen = 0;
+ int32_t len;
+ ErlDrvSSizeT res;
+ UChar sourceBuf[1000];
+
+ /* first 32bits are the length */
+ memcpy(&len, pBuf, sizeof(len));
+ pBuf += sizeof(len);
+ u_uastrcpy(sourceBuf, pBuf);
+
+ expectedLen = ucol_getSortKey(pData->coll, sourceBuf, len, currBuf, bufferLen);
+
+ /*
+ * TODO: handle case when expectedLen > bufferLen
+ * TODO: we need similar logic for when the the input is bigger than the sourceBuf buffer
+ * TODO: free sourceBuf buffer in the same manner as currBuf
+ if (expectedLen > bufferLen) {
+ printf("EXPECTEDLEN[%i] > BUFFERLEN[%i]\n", expectedLen, bufferLen);
+ if (currBuf == buf) {
+ currBuf = (uint8_t*)malloc(expectedLen);
+ } else {
+ currBuf = (uint8_t*)realloc(currBuf, expectedLen);
+ }
+ expectedLen = ucol_getSortKey(pData->coll, sourceBuf, len, currBuf, expectedLen);
+ }
+ */
+
+ res = return_control_result(currBuf, expectedLen, rbuf, rlen);
+ if (currBuf != buf && currBuf != NULL) {
+ free(currBuf);
+ }
+ return res;
+ }
default:
return -1;
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index 62e17ce..8c72355 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
-export([priv_dir/0, normpath/1, fold_files/5]).
-export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2, collate/2, collate/3, get_sort_key/1]).
-export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
-export([encodeBase64Url/1, decodeBase64Url/1]).
-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -411,6 +411,12 @@ collate(A, B, Options) when is_binary(A), is_binary(B) ->
% expected typical -1, 0, 1
Result - 1.
+get_sort_key(Str) ->
+ Operation = 2, % get_sort_key
+ Size = byte_size(Str),
+ Bin = <<Size:32/native, Str/binary>>,
+ erlang:port_control(drv_port(), Operation, Bin).
+
should_flush() ->
should_flush(?FLUSH_MAX_MEM).
diff --git a/src/couch/test/couch_util_tests.erl b/src/couch/test/couch_util_tests.erl
index 3e145c4..b993dbd 100644
--- a/src/couch/test/couch_util_tests.erl
+++ b/src/couch/test/couch_util_tests.erl
@@ -168,3 +168,38 @@ to_hex_test_() ->
?_assertEqual("", couch_util:to_hex(<<>>)),
?_assertEqual("010203faff", couch_util:to_hex(<<1, 2, 3, 250, 255>>))
].
+
+sort_key_test_() ->
+ {
+ "Sort Key tests",
+ [
+ {
+ foreach,
+ fun setup/0, fun teardown/1,
+ [
+ fun test_get_sort_key/1
+ ]
+ }
+ ]
+ }.
+
+test_get_sort_key(_) ->
+ Strs = [<<"foo">>, <<"bar">>, <<"Bar">>, <<"baz">>, <<"BAZ">>, <<"quaz">>, <<"pizza">>],
+ Pairs = [{S1, S2} || S1 <- Strs, S2 <- Strs],
+ lists:map(fun({S1, S2}) ->
+ S1K = couch_util:get_sort_key(S1),
+ S2K = couch_util:get_sort_key(S2),
+ Res = case S1K < S2K of
+ true ->
+ -1;
+ false -> case S1K =:= S2K of
+ true ->
+ 0;
+ false ->
+ 1
+ end
+ end,
+ Comment = list_to_binary(io_lib:format("strcmp(~p, ~p)", [S1, S2])),
+ {Comment,?_assertEqual(Res, couch_util:collate(S1, S2))}
+ end, Pairs).
+