You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2021/09/11 06:31:12 UTC
[couchdb] 01/01: Remove couch_icu_driver module
This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch remove-couch-icu-driver
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 418f99ee3e7f5ef44a78ffadd67b7d3831c20fd6
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Sat Sep 11 02:04:23 2021 -0400
Remove couch_icu_driver module
couch_icu_driver is only used for binary strings comparison in
couch_ejson_compare when expression depth becomes greater than 10.
The logic for the comparison is identical to what couch_ejson_compare
uses, so opt to just use couch_ejson_compare only instead of keeping a
whole other binary collation driver around.
To avoid a possible infinite loop if couch_ejson_compare NIF fails to
load, throw a nif loading error as is common for NIF modules.
To avoid another case of a possible infinite retry from of badarg
generated from by max depth, and an actual bad ejson term, use a
specific max depth error so we don't have to guess when we catch it
and retry term traversal in erlang.
---
.../priv/couch_ejson_compare/couch_ejson_compare.c | 39 +++--
src/couch/priv/icu_driver/couch_icu_driver.c | 184 ---------------------
src/couch/rebar.config.script | 7 -
src/couch/src/couch_drv.erl | 63 -------
src/couch/src/couch_ejson_compare.erl | 10 +-
src/couch/src/couch_primary_sup.erl | 6 -
src/couch/src/couch_util.erl | 29 +---
src/couch/test/eunit/couch_util_tests.erl | 23 ---
.../test/eunit/couch_mrview_collation_tests.erl | 6 +-
9 files changed, 37 insertions(+), 330 deletions(-)
diff --git a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
index 49d6cd8..b13d0f0 100644
--- a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
+++ b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
@@ -21,6 +21,10 @@
#define MAX_DEPTH 10
+#define NO_ERROR 0
+#define BAD_ARG_ERROR 1
+#define MAX_DEPTH_ERROR 2
+
#if (ERL_NIF_MAJOR_VERSION > 2) || \
(ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 3)
/* OTP R15B or higher */
@@ -41,6 +45,7 @@
static ERL_NIF_TERM ATOM_TRUE;
static ERL_NIF_TERM ATOM_FALSE;
static ERL_NIF_TERM ATOM_NULL;
+static ERL_NIF_TERM ATOM_MAX_DEPTH_ERROR;
typedef struct {
ErlNifEnv* env;
@@ -107,7 +112,7 @@ less_json_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
int result;
ctx.env = env;
- ctx.error = 0;
+ ctx.error = NO_ERROR;
ctx.coll = get_collator();
result = less_json(1, &ctx, argv[0], argv[1]);
@@ -122,6 +127,13 @@ less_json_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
* and do the comparison in Erlang land. In practice, views keys are
* EJSON structures with very little nesting.
*/
+ if (ctx.error == NO_ERROR) {
+ return enif_make_int(env, result);
+ } else if (ctx.error == MAX_DEPTH_ERROR) {
+ return enif_raise_exception(env, ATOM_MAX_DEPTH_ERROR);
+ } else {
+ return enif_make_badarg(env);
+ }
return ctx.error ? enif_make_badarg(env) : enif_make_int(env, result);
}
@@ -143,7 +155,7 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
* via an exception and do the EJSON comparison in Erlang land.
*/
if (depth > MAX_DEPTH) {
- ctx->error = 1;
+ ctx->error = MAX_DEPTH_ERROR;
return 0;
}
@@ -155,12 +167,12 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
int aSortOrd, bSortOrd;
if ((aSortOrd = atom_sort_order(ctx->env, a)) == -1) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if ((bSortOrd = atom_sort_order(ctx->env, b)) == -1) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
@@ -225,20 +237,20 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
}
if (!enif_get_tuple(ctx->env, a, &aArity, &aProps)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if ((aArity != 1) || !enif_is_list(ctx->env, aProps[0])) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if (!enif_get_tuple(ctx->env, b, &bArity, &bProps)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if ((bArity != 1) || !enif_is_list(ctx->env, bProps[0])) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
@@ -325,20 +337,20 @@ compare_props(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
}
if (!enif_get_tuple(ctx->env, headA, &aArity, &aKV)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if ((aArity != 2) || !enif_inspect_binary(ctx->env, aKV[0], &keyA)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if (!enif_get_tuple(ctx->env, headB, &bArity, &bKV)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
if ((bArity != 2) || !enif_inspect_binary(ctx->env, bKV[0], &keyB)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
@@ -409,7 +421,7 @@ compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b)
result = ucol_strcollIter(ctx->coll, &iterA, &iterB, &status);
if (U_FAILURE(status)) {
- ctx->error = 1;
+ ctx->error = BAD_ARG_ERROR;
return 0;
}
@@ -449,6 +461,7 @@ on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
ATOM_TRUE = enif_make_atom(env, "true");
ATOM_FALSE = enif_make_atom(env, "false");
ATOM_NULL = enif_make_atom(env, "null");
+ ATOM_MAX_DEPTH_ERROR = enif_make_atom(env, "max_depth_error");
return 0;
}
diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
deleted file mode 100644
index 4d9bb98..0000000
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-/* This file is the C port driver for Erlang. It provides a low overhead
- * means of calling into C code, however coding errors in this module can
- * crash the entire Erlang server.
- */
-
-#ifdef DARWIN
-#define U_HIDE_DRAFT_API 1
-#define U_DISABLE_RENAMING 1
-#endif
-
-#include "erl_driver.h"
-#include "unicode/ucol.h"
-#include "unicode/ucasemap.h"
-#ifndef WIN32
-#include <string.h> /* for memcpy */
-#endif
-
-
-typedef struct {
- ErlDrvPort port;
- UCollator* collNoCase;
- UCollator* coll;
-} couch_drv_data;
-
-static void couch_drv_stop(ErlDrvData data)
-{
- couch_drv_data* pData = (couch_drv_data*)data;
- if (pData->coll) {
- ucol_close(pData->coll);
- }
- if (pData->collNoCase) {
- ucol_close(pData->collNoCase);
- }
- driver_free((void*)pData);
-}
-
-static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff)
-{
- UErrorCode status = U_ZERO_ERROR;
- couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data));
-
- if (pData == NULL)
- return ERL_DRV_ERROR_GENERAL;
-
- pData->port = port;
-
- pData->coll = ucol_open("", &status);
- if (U_FAILURE(status)) {
- couch_drv_stop((ErlDrvData)pData);
- return ERL_DRV_ERROR_GENERAL;
- }
-
- pData->collNoCase = ucol_open("", &status);
- if (U_FAILURE(status)) {
- couch_drv_stop((ErlDrvData)pData);
- return ERL_DRV_ERROR_GENERAL;
- }
-
- ucol_setAttribute(pData->collNoCase, UCOL_STRENGTH, UCOL_PRIMARY, &status);
- if (U_FAILURE(status)) {
- couch_drv_stop((ErlDrvData)pData);
- return ERL_DRV_ERROR_GENERAL;
- }
-
- return (ErlDrvData)pData;
-}
-
-ErlDrvSSizeT
-return_control_result(void* pLocalResult, int localLen,
- char **ppRetBuf, ErlDrvSizeT returnLen)
-{
- if (*ppRetBuf == NULL || localLen > returnLen) {
- *ppRetBuf = (char*)driver_alloc_binary(localLen);
- if(*ppRetBuf == NULL) {
- return -1;
- }
- }
- memcpy(*ppRetBuf, pLocalResult, localLen);
- return localLen;
-}
-
-static ErlDrvSSizeT
-couch_drv_control(ErlDrvData drv_data, unsigned int command,
- char *pBuf, ErlDrvSizeT bufLen,
- char **rbuf, ErlDrvSizeT rlen)
-{
-
- couch_drv_data* pData = (couch_drv_data*)drv_data;
- switch(command) {
- case 0: /* COLLATE */
- case 1: /* COLLATE_NO_CASE: */
- {
- UErrorCode status = U_ZERO_ERROR;
- int collResult;
- char response;
- UCharIterator iterA;
- UCharIterator iterB;
- int32_t length;
-
- /* 2 strings are in the buffer, consecutively
- * The strings begin first with a 32 bit integer byte length, then the actual
- * string bytes follow.
- */
-
- /* first 32bits are the length */
- memcpy(&length, pBuf, sizeof(length));
- pBuf += sizeof(length);
-
- /* point the iterator at it. */
- uiter_setUTF8(&iterA, pBuf, length);
-
- pBuf += length; /* now on to string b */
-
- /* first 32bits are the length */
- memcpy(&length, pBuf, sizeof(length));
- pBuf += sizeof(length);
-
- /* point the iterator at it. */
- uiter_setUTF8(&iterB, pBuf, length);
-
- if (command == 0) /* COLLATE */
- collResult = ucol_strcollIter(pData->coll, &iterA, &iterB, &status);
- else /* COLLATE_NO_CASE */
- collResult = ucol_strcollIter(pData->collNoCase, &iterA, &iterB, &status);
-
- if (collResult < 0)
- response = 0; /*lt*/
- else if (collResult > 0)
- response = 2; /*gt*/
- else
- response = 1; /*eq*/
-
- return return_control_result(&response, sizeof(response), rbuf, rlen);
- }
-
- default:
- return -1;
- }
-}
-
-ErlDrvEntry couch_driver_entry = {
- NULL, /* F_PTR init, N/A */
- couch_drv_start, /* L_PTR start, called when port is opened */
- couch_drv_stop, /* F_PTR stop, called when port is closed */
- NULL, /* F_PTR output, called when erlang has sent */
- NULL, /* F_PTR ready_input, called when input descriptor ready */
- NULL, /* F_PTR ready_output, called when output descriptor ready */
- "couch_icu_driver", /* char *driver_name, the argument to open_port */
- NULL, /* F_PTR finish, called when unloaded */
- NULL, /* Not used */
- couch_drv_control, /* F_PTR control, port_command callback */
- NULL, /* F_PTR timeout, reserved */
- NULL, /* F_PTR outputv, reserved */
- NULL, /* F_PTR ready_async */
- NULL, /* F_PTR flush */
- NULL, /* F_PTR call */
- NULL, /* F_PTR event */
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- ERL_DRV_FLAG_USE_PORT_LOCKING,
- NULL, /* Reserved -- Used by emulator internally */
- NULL, /* F_PTR process_exit */
-};
-
-DRIVER_INIT(couch_icu_driver) /* must match name in driver_entry */
-{
- return &couch_driver_entry;
-}
diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script
index d1b4dbc..5f61b00 100644
--- a/src/couch/rebar.config.script
+++ b/src/couch/rebar.config.script
@@ -172,8 +172,6 @@ CouchJSEnv = case SMVsn of
]
end.
-IcuPath = "priv/couch_icu_driver.so".
-IcuSrc = ["priv/icu_driver/*.c"].
IcuEnv = [{"DRV_CFLAGS", "$DRV_CFLAGS -DPIC -O2 -fno-common"},
{"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}].
IcuDarwinEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/opt/icu4c/include -I/opt/homebrew/opt/icu4c/include"},
@@ -189,11 +187,6 @@ CompareSrc = ["priv/couch_ejson_compare/*.c"].
BaseSpecs = [
%% couchjs
{".*", CouchJSPath, CouchJSSrc, [{env, CouchJSEnv}]},
- % ICU
- {"darwin", IcuPath, IcuSrc, [{env, IcuEnv ++ IcuDarwinEnv}]},
- {"linux", IcuPath, IcuSrc, [{env, IcuEnv}]},
- {"bsd", IcuPath, IcuSrc, [{env, IcuEnv ++ IcuBsdEnv}]},
- {"win32", IcuPath, IcuSrc, [{env, IcuWinEnv}]},
% ejson_compare
{"darwin", ComparePath, CompareSrc, [{env, IcuEnv ++ IcuDarwinEnv}]},
{"linux", ComparePath, CompareSrc, [{env, IcuEnv}]},
diff --git a/src/couch/src/couch_drv.erl b/src/couch/src/couch_drv.erl
deleted file mode 100644
index f2ff2ac..0000000
--- a/src/couch/src/couch_drv.erl
+++ /dev/null
@@ -1,63 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-% http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
--module(couch_drv).
--behaviour(gen_server).
--vsn(1).
--export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
- code_change/3]).
-
--export([start_link/0]).
-
--include_lib("couch/include/couch_db.hrl").
-
-start_link() ->
- gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-
-init([]) ->
- LibDir = util_driver_dir(),
- case erl_ddll:load(LibDir, "couch_icu_driver") of
- ok ->
- {ok, nil};
- {error, already_loaded} ->
- couch_log:info("~p reloading couch_icu_driver", [?MODULE]),
- ok = erl_ddll:reload(LibDir, "couch_icu_driver"),
- {ok, nil};
- {error, Error} ->
- {stop, erl_ddll:format_error(Error)}
- end.
-
-handle_call(_Request, _From, State) ->
- {reply, ok, State}.
-
-handle_cast(_Request, State) ->
- {noreply, State}.
-
-handle_info(_Info, State) ->
- {noreply, State}.
-
-terminate(_Reason, _State) ->
- ok.
-
-code_change(_OldVsn, State, _Extra) ->
-
- {ok, State}.
-
-
-% private API
-util_driver_dir() ->
- case config:get("couchdb", "util_driver_dir", undefined) of
- undefined ->
- couch_util:priv_dir();
- LibDir0 ->
- LibDir0
- end.
diff --git a/src/couch/src/couch_ejson_compare.erl b/src/couch/src/couch_ejson_compare.erl
index ca36c86..21ab07b 100644
--- a/src/couch/src/couch_ejson_compare.erl
+++ b/src/couch/src/couch_ejson_compare.erl
@@ -30,8 +30,8 @@ less(A, B) ->
try
less_nif(A, B)
catch
- error:badarg ->
- % Maybe the EJSON structure is too deep, fallback to Erlang land.
+ error:max_depth_error ->
+ % The EJSON structure is too deep, fallback to Erlang land.
less_erl(A, B)
end.
@@ -48,7 +48,7 @@ less_json(A,B) ->
less_nif(A, B) ->
- less_erl(A, B).
+ erlang:nif_error(less_nif_load_error, [A, B]).
less_erl(A,A) -> 0;
@@ -61,7 +61,7 @@ less_erl(A,B) when is_number(A), is_number(B) -> A - B;
less_erl(A,_) when is_number(A) -> -1;
less_erl(_,B) when is_number(B) -> 1;
-less_erl(A,B) when is_binary(A), is_binary(B) -> couch_util:collate(A,B);
+less_erl(A,B) when is_binary(A), is_binary(B) -> less_nif(A,B);
less_erl(A,_) when is_binary(A) -> -1;
less_erl(_,B) when is_binary(B) -> 1;
@@ -84,7 +84,7 @@ less_props([], [_|_]) ->
less_props(_, []) ->
1;
less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) ->
- case couch_util:collate(AKey, BKey) of
+ case less_nif(AKey, BKey) of
0 ->
case less_erl(AValue, BValue) of
0 ->
diff --git a/src/couch/src/couch_primary_sup.erl b/src/couch/src/couch_primary_sup.erl
index 48c6d42..73c3de7 100644
--- a/src/couch/src/couch_primary_sup.erl
+++ b/src/couch/src/couch_primary_sup.erl
@@ -19,12 +19,6 @@ start_link() ->
init([]) ->
Children = [
- {collation_driver,
- {couch_drv, start_link, []},
- permanent,
- infinity,
- supervisor,
- [couch_drv]},
{couch_task_status,
{couch_task_status, start_link, []},
permanent,
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index d1f4940..c7dc894 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
-export([priv_dir/0, normpath/1, fold_files/5]).
-export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2]).
-export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
-export([encodeBase64Url/1, decodeBase64Url/1]).
-export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -387,33 +387,6 @@ implode([H|T], Sep, Acc) ->
implode(T, Sep, [Sep,H|Acc]).
-drv_port() ->
- case get(couch_drv_port) of
- undefined ->
- Port = open_port({spawn, "couch_icu_driver"}, []),
- put(couch_drv_port, Port),
- Port;
- Port ->
- Port
- end.
-
-collate(A, B) ->
- collate(A, B, []).
-
-collate(A, B, Options) when is_binary(A), is_binary(B) ->
- Operation =
- case lists:member(nocase, Options) of
- true -> 1; % Case insensitive
- false -> 0 % Case sensitive
- end,
- SizeA = byte_size(A),
- SizeB = byte_size(B),
- Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>,
- [Result] = erlang:port_control(drv_port(), Operation, Bin),
- % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the
- % expected typical -1, 0, 1
- Result - 1.
-
should_flush() ->
should_flush(?FLUSH_MAX_MEM).
diff --git a/src/couch/test/eunit/couch_util_tests.erl b/src/couch/test/eunit/couch_util_tests.erl
index 012c961..f6d7d95 100644
--- a/src/couch/test/eunit/couch_util_tests.erl
+++ b/src/couch/test/eunit/couch_util_tests.erl
@@ -25,31 +25,14 @@ setup() ->
%%
Ctx = test_util:start_couch(),
%% config:start_link(?CONFIG_CHAIN),
- %% {ok, _} = couch_drv:start_link(),
Ctx.
teardown(Ctx) ->
ok = test_util:stop_couch(Ctx),
%% config:stop(),
- %% erl_ddll:unload_driver(couch_icu_driver),
ok.
-collation_test_() ->
- {
- "Collation tests",
- [
- {
- setup,
- fun setup/0, fun teardown/1,
- [
- should_collate_ascii(),
- should_collate_non_ascii()
- ]
- }
- ]
- }.
-
validate_callback_exists_test_() ->
{
"validate_callback_exists tests",
@@ -59,12 +42,6 @@ validate_callback_exists_test_() ->
]
}.
-should_collate_ascii() ->
- ?_assertEqual(1, couch_util:collate(<<"foo">>, <<"bar">>)).
-
-should_collate_non_ascii() ->
- ?_assertEqual(-1, couch_util:collate(<<"A">>, <<"aa">>)).
-
to_existed_atom_test() ->
?assert(couch_util:to_existing_atom(true)),
?assertMatch(foo, couch_util:to_existing_atom(<<"foo">>)),
diff --git a/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
index 5c8cb54..0d2afbe 100644
--- a/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
+++ b/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
@@ -52,7 +52,11 @@
{[{<<"b">>, 1}]},
{[{<<"b">>, 2}]},
{[{<<"b">>, 2}, {<<"a">>, 1}]},
- {[{<<"b">>, 2}, {<<"c">>, 2}]}
+ {[{<<"b">>, 2}, {<<"c">>, 2}]},
+
+ % Values with depth > 10 trigger the erlang collation fallback in couch_ejson_compare
+ {[{<<"x">>, [[[[[[[[[[[<<"y">>]]]]]]]]]]]}]}
+
]).