You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2021/10/15 02:29:55 UTC

[couchdb] branch remove-couch-icu-driver updated (71ca501 -> 6eaf985)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch remove-couch-icu-driver
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


    omit 71ca501  Remove couch_icu_driver module
     add 029612b  Avoid badmatch for fabric:cleanup_index_files
     new 6eaf985  Remove couch_icu_driver module

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (71ca501)
            \
             N -- N -- N   refs/heads/remove-couch-icu-driver (6eaf985)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/fabric/src/fabric.erl | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

[couchdb] 01/01: Remove couch_icu_driver module

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch remove-couch-icu-driver
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 6eaf9850772f93c4a33818e050975ee68894662d
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Sat Sep 11 02:04:23 2021 -0400

    Remove couch_icu_driver module
    
    couch_icu_driver is only used for binary strings comparison in
    couch_ejson_compare when expression depth becomes greater than 10.
    
    The logic for string comparison is identical to what couch_ejson_compare uses,
    so opt to just use couch_ejson_compare instead of keeping a whole other binary
    collation driver around.
    
    To avoid a possible infinite loop if couch_ejson_compare nif fails to
    load, throw a nif loading error as is common for nif modules.
    
    To avoid another case of a possible infinite retry from of badarg
    generated by max depth, and/or an actual bad ejson term, use a
    specific max depth error so we don't have to guess when we catch it
    and retry term traversal in erlang.
    
    There was another uncodumented case when badarg was thrown besides max
    depth or an invalid arg. It was when a prop value was compared with
    any other supported type. In erlang it would be handled in these
    clauses:
    
    ```
       less_erl({A},{B}) when is_list(A), is_list(B) -> less_props(A,B);
       less_erl({A},_) when is_list(A)               -> -1;
       less_erl(_,{B}) when is_list(B)               -> 1.
    ```
    
    However, in C we threw a badarg for the last two clauses and relied on
    erlang to do all the work. This case was a potential performance issue
    as well since that is a common comparison for mango where we may
    compare keys against the max json object value (<<255,255,255,255>>).
    
    Add a few property tests in order to validate collation behavior. The two main
    ones are:
    
      1) Given an expected sort order of some test values, assert that both the
         erlang and nif collators would correctly order any of those test values.
    
      2) In general, the nif collator would sort any json the same way as the
         erlang one.
---
 src/couch/include/couch_eunit_proper.hrl           |   8 +-
 .../priv/couch_ejson_compare/couch_ejson_compare.c | 103 ++++++--
 src/couch/priv/icu_driver/couch_icu_driver.c       | 184 -------------
 src/couch/rebar.config.script                      |   7 -
 src/couch/src/couch_drv.erl                        |  63 -----
 src/couch/src/couch_ejson_compare.erl              |  18 +-
 src/couch/src/couch_primary_sup.erl                |   6 -
 src/couch/src/couch_util.erl                       |  29 +--
 src/couch/test/eunit/couch_ejson_compare_tests.erl | 286 +++++++++++++++++++++
 src/couch/test/eunit/couch_util_tests.erl          |  23 --
 .../test/eunit/couch_mrview_collation_tests.erl    |   6 +-
 11 files changed, 390 insertions(+), 343 deletions(-)

diff --git a/src/couch/include/couch_eunit_proper.hrl b/src/couch/include/couch_eunit_proper.hrl
index 31ae40e..dcf0770 100644
--- a/src/couch/include/couch_eunit_proper.hrl
+++ b/src/couch/include/couch_eunit_proper.hrl
@@ -13,8 +13,7 @@
 -include_lib("proper/include/proper.hrl").
 -include_lib("eunit/include/eunit.hrl").
 
-
--define(EUNIT_QUICKCHECK(QuickcheckTimeout),
+-define(EUNIT_QUICKCHECK(QuickcheckTimeout, NumTests),
     [
         {
             atom_to_list(F),
@@ -22,8 +21,13 @@
                 ?_assert(proper:quickcheck(?MODULE:F(), [
                     {to_file, user},
                     {start_size, 2},
+                    {numtests, NumTests},
                     long_result
                  ]))}
             }
         || {F, 0} <- ?MODULE:module_info(exports), F > 'prop_', F < 'prop`'
     ]).
+
+-define(EUNIT_QUICKCHECK(QuickcheckTimeout),
+        ?EUNIT_QUICKCHECK(QuickcheckTimeout, 100)
+    ).
diff --git a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
index 49d6cd8..f453a29 100644
--- a/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
+++ b/src/couch/priv/couch_ejson_compare/couch_ejson_compare.c
@@ -21,6 +21,10 @@
 
 #define MAX_DEPTH 10
 
+#define NO_ERROR 0
+#define BAD_ARG_ERROR 1
+#define MAX_DEPTH_ERROR 2
+
 #if (ERL_NIF_MAJOR_VERSION > 2) || \
     (ERL_NIF_MAJOR_VERSION == 2 && ERL_NIF_MINOR_VERSION >= 3)
 /* OTP R15B or higher */
@@ -41,6 +45,7 @@
 static ERL_NIF_TERM ATOM_TRUE;
 static ERL_NIF_TERM ATOM_FALSE;
 static ERL_NIF_TERM ATOM_NULL;
+static ERL_NIF_TERM ATOM_MAX_DEPTH_ERROR;
 
 typedef struct {
     ErlNifEnv* env;
@@ -107,7 +112,7 @@ less_json_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
     int result;
 
     ctx.env = env;
-    ctx.error = 0;
+    ctx.error = NO_ERROR;
     ctx.coll = get_collator();
 
     result = less_json(1, &ctx, argv[0], argv[1]);
@@ -122,7 +127,42 @@ less_json_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
      *    and do the comparison in Erlang land. In practice, views keys are
      *    EJSON structures with very little nesting.
      */
-    return ctx.error ? enif_make_badarg(env) : enif_make_int(env, result);
+    if (ctx.error == NO_ERROR) {
+        return enif_make_int(env, result);
+    } else if (ctx.error == MAX_DEPTH_ERROR) {
+        return enif_raise_exception(env, ATOM_MAX_DEPTH_ERROR);
+    } else {
+        return enif_make_badarg(env);
+    }
+}
+
+
+ERL_NIF_TERM
+compare_strings_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    ctx_t ctx;
+    int result;
+    ErlNifBinary binA, binB;
+
+    if (!enif_inspect_binary(env, argv[0], &binA)) {
+        return enif_make_badarg(env);
+    }
+
+    if (!enif_inspect_binary(env, argv[1], &binB)) {
+        return enif_make_badarg(env);
+    }
+
+    ctx.env = env;
+    ctx.error = NO_ERROR;
+    ctx.coll = get_collator();
+
+    result = compare_strings(&ctx, binA, binB);
+
+    if (ctx.error == NO_ERROR){
+        return enif_make_int(env, result);
+    } else {
+        return enif_make_badarg(env);
+    }
 }
 
 
@@ -134,6 +174,7 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
     int aIsNumber, bIsNumber;
     int aIsList, bIsList;
     int aArity, bArity;
+    int aIsProps, bIsProps;
     const ERL_NIF_TERM *aProps, *bProps;
 
     /*
@@ -143,7 +184,7 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
      * via an exception and do the EJSON comparison in Erlang land.
      */
     if (depth > MAX_DEPTH) {
-        ctx->error = 1;
+        ctx->error = MAX_DEPTH_ERROR;
         return 0;
     }
 
@@ -155,12 +196,12 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
             int aSortOrd, bSortOrd;
 
             if ((aSortOrd = atom_sort_order(ctx->env, a)) == -1) {
-                ctx->error = 1;
+                ctx->error = BAD_ARG_ERROR;
                 return 0;
             }
 
             if ((bSortOrd = atom_sort_order(ctx->env, b)) == -1) {
-                ctx->error = 1;
+                ctx->error = BAD_ARG_ERROR;
                 return 0;
             }
 
@@ -224,25 +265,37 @@ less_json(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
         return 1;
     }
 
-    if (!enif_get_tuple(ctx->env, a, &aArity, &aProps)) {
-        ctx->error = 1;
-        return 0;
+
+    aIsProps = 0;
+    if (enif_get_tuple(ctx->env, a, &aArity, &aProps)) {
+        if (aArity == 1 && enif_is_list(ctx->env, aProps[0])) {
+            aIsProps = 1;
+        }
     }
-    if ((aArity != 1) || !enif_is_list(ctx->env, aProps[0])) {
-        ctx->error = 1;
-        return 0;
+
+    bIsProps = 0;
+    if (enif_get_tuple(ctx->env, b, &bArity, &bProps)) {
+        if (bArity == 1 && enif_is_list(ctx->env, bProps[0])) {
+            bIsProps = 1;
+        }
     }
 
-    if (!enif_get_tuple(ctx->env, b, &bArity, &bProps)) {
-        ctx->error = 1;
-        return 0;
+    if (aIsProps) {
+        if (bIsProps) {
+            return compare_props(depth, ctx, aProps[0], bProps[0]);
+        }
+        return -1;
     }
-    if ((bArity != 1) || !enif_is_list(ctx->env, bProps[0])) {
-        ctx->error = 1;
-        return 0;
+
+    if (bIsProps) {
+        return 1;
     }
 
-    return compare_props(depth, ctx, aProps[0], bProps[0]);
+    /*
+     * Both arguments are unsupported data types. Return a badarg error
+     */
+    ctx->error = BAD_ARG_ERROR;
+    return 0;
 }
 
 
@@ -325,20 +378,20 @@ compare_props(int depth, ctx_t* ctx, ERL_NIF_TERM a, ERL_NIF_TERM b)
         }
 
         if (!enif_get_tuple(ctx->env, headA, &aArity, &aKV)) {
-            ctx->error = 1;
+            ctx->error = BAD_ARG_ERROR;
             return 0;
         }
         if ((aArity != 2) || !enif_inspect_binary(ctx->env, aKV[0], &keyA)) {
-            ctx->error = 1;
+            ctx->error = BAD_ARG_ERROR;
             return 0;
         }
 
         if (!enif_get_tuple(ctx->env, headB, &bArity, &bKV)) {
-            ctx->error = 1;
+            ctx->error = BAD_ARG_ERROR;
             return 0;
         }
         if ((bArity != 2) || !enif_inspect_binary(ctx->env, bKV[0], &keyB)) {
-            ctx->error = 1;
+            ctx->error = BAD_ARG_ERROR;
             return 0;
         }
 
@@ -409,7 +462,7 @@ compare_strings(ctx_t* ctx, ErlNifBinary a, ErlNifBinary b)
     result = ucol_strcollIter(ctx->coll, &iterA, &iterB, &status);
 
     if (U_FAILURE(status)) {
-        ctx->error = 1;
+        ctx->error = BAD_ARG_ERROR;
         return 0;
     }
 
@@ -449,6 +502,7 @@ on_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info)
     ATOM_TRUE = enif_make_atom(env, "true");
     ATOM_FALSE = enif_make_atom(env, "false");
     ATOM_NULL = enif_make_atom(env, "null");
+    ATOM_MAX_DEPTH_ERROR = enif_make_atom(env, "max_depth_error");
 
     return 0;
 }
@@ -476,7 +530,8 @@ on_unload(ErlNifEnv* env, void* priv_data)
 
 
 static ErlNifFunc nif_functions[] = {
-    {"less_nif", 2, less_json_nif}
+    {"less_nif", 2, less_json_nif},
+    {"compare_strings_nif", 2, compare_strings_nif}
 };
 
 
diff --git a/src/couch/priv/icu_driver/couch_icu_driver.c b/src/couch/priv/icu_driver/couch_icu_driver.c
deleted file mode 100644
index 4d9bb98..0000000
--- a/src/couch/priv/icu_driver/couch_icu_driver.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use
-this file except in compliance with the License. You may obtain a copy of the
-License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed
-under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-*/
-
-/* This file is the C port driver for Erlang. It provides a low overhead
- * means of calling into C code, however coding errors in this module can
- * crash the entire Erlang server.
- */
-
-#ifdef DARWIN
-#define U_HIDE_DRAFT_API 1
-#define U_DISABLE_RENAMING 1
-#endif
-
-#include "erl_driver.h"
-#include "unicode/ucol.h"
-#include "unicode/ucasemap.h"
-#ifndef WIN32
-#include <string.h> /* for memcpy */
-#endif
-
-
-typedef struct {
-    ErlDrvPort port;
-    UCollator* collNoCase;
-    UCollator* coll;
-} couch_drv_data;
-
-static void couch_drv_stop(ErlDrvData data)
-{
-    couch_drv_data* pData = (couch_drv_data*)data;
-    if (pData->coll) {
-        ucol_close(pData->coll);
-    }
-    if (pData->collNoCase) {
-        ucol_close(pData->collNoCase);
-    }
-    driver_free((void*)pData);
-}
-
-static ErlDrvData couch_drv_start(ErlDrvPort port, char *buff)
-{
-    UErrorCode status = U_ZERO_ERROR;
-    couch_drv_data* pData = (couch_drv_data*)driver_alloc(sizeof(couch_drv_data));
-
-    if (pData == NULL)
-        return ERL_DRV_ERROR_GENERAL;
-
-    pData->port = port;
-
-    pData->coll = ucol_open("", &status);
-    if (U_FAILURE(status)) {
-        couch_drv_stop((ErlDrvData)pData);
-        return ERL_DRV_ERROR_GENERAL;
-    }
-
-    pData->collNoCase = ucol_open("", &status);
-    if (U_FAILURE(status)) {
-        couch_drv_stop((ErlDrvData)pData);
-        return ERL_DRV_ERROR_GENERAL;
-    }
-
-    ucol_setAttribute(pData->collNoCase, UCOL_STRENGTH, UCOL_PRIMARY, &status);
-    if (U_FAILURE(status)) {
-        couch_drv_stop((ErlDrvData)pData);
-        return ERL_DRV_ERROR_GENERAL;
-    }
-
-    return (ErlDrvData)pData;
-}
-
-ErlDrvSSizeT
-return_control_result(void* pLocalResult, int localLen,
-            char **ppRetBuf, ErlDrvSizeT returnLen)
-{
-    if (*ppRetBuf == NULL || localLen > returnLen) {
-        *ppRetBuf = (char*)driver_alloc_binary(localLen);
-        if(*ppRetBuf == NULL) {
-            return -1;
-        }
-    }
-    memcpy(*ppRetBuf, pLocalResult, localLen);
-    return localLen;
-}
-
-static ErlDrvSSizeT
-couch_drv_control(ErlDrvData drv_data, unsigned int command,
-        char *pBuf, ErlDrvSizeT bufLen,
-        char **rbuf, ErlDrvSizeT rlen)
-{
-
-    couch_drv_data* pData = (couch_drv_data*)drv_data;
-    switch(command) {
-    case 0: /* COLLATE */
-    case 1: /* COLLATE_NO_CASE: */
-        {
-        UErrorCode status = U_ZERO_ERROR;
-        int collResult;
-        char response;
-        UCharIterator iterA;
-        UCharIterator iterB;
-        int32_t length;
-
-        /* 2 strings are in the buffer, consecutively
-         * The strings begin first with a 32 bit integer byte length, then the actual
-         * string bytes follow.
-         */
-
-        /* first 32bits are the length */
-        memcpy(&length, pBuf, sizeof(length));
-        pBuf += sizeof(length);
-
-        /* point the iterator at it. */
-        uiter_setUTF8(&iterA, pBuf, length);
-
-        pBuf += length; /* now on to string b */
-
-        /* first 32bits are the length */
-        memcpy(&length, pBuf, sizeof(length));
-        pBuf += sizeof(length);
-
-        /* point the iterator at it. */
-        uiter_setUTF8(&iterB, pBuf, length);
-
-        if (command == 0) /* COLLATE */
-          collResult = ucol_strcollIter(pData->coll, &iterA, &iterB, &status);
-        else              /* COLLATE_NO_CASE */
-          collResult = ucol_strcollIter(pData->collNoCase, &iterA, &iterB, &status);
-
-        if (collResult < 0)
-          response = 0; /*lt*/
-        else if (collResult > 0)
-          response = 2; /*gt*/
-        else
-          response = 1; /*eq*/
-
-        return return_control_result(&response, sizeof(response), rbuf, rlen);
-        }
-
-    default:
-        return -1;
-    }
-}
-
-ErlDrvEntry couch_driver_entry = {
-        NULL,               /* F_PTR init, N/A */
-        couch_drv_start,    /* L_PTR start, called when port is opened */
-        couch_drv_stop,     /* F_PTR stop, called when port is closed */
-        NULL,               /* F_PTR output, called when erlang has sent */
-        NULL,               /* F_PTR ready_input, called when input descriptor ready */
-        NULL,               /* F_PTR ready_output, called when output descriptor ready */
-        "couch_icu_driver", /* char *driver_name, the argument to open_port */
-        NULL,               /* F_PTR finish, called when unloaded */
-        NULL,               /* Not used */
-        couch_drv_control,  /* F_PTR control, port_command callback */
-        NULL,               /* F_PTR timeout, reserved */
-        NULL,               /* F_PTR outputv, reserved */
-        NULL,               /* F_PTR ready_async */
-        NULL,               /* F_PTR flush */
-        NULL,               /* F_PTR call */
-        NULL,               /* F_PTR event */
-        ERL_DRV_EXTENDED_MARKER,
-        ERL_DRV_EXTENDED_MAJOR_VERSION,
-        ERL_DRV_EXTENDED_MINOR_VERSION,
-        ERL_DRV_FLAG_USE_PORT_LOCKING,
-        NULL,               /* Reserved -- Used by emulator internally */
-        NULL,               /* F_PTR process_exit */
-};
-
-DRIVER_INIT(couch_icu_driver) /* must match name in driver_entry */
-{
-        return &couch_driver_entry;
-}
diff --git a/src/couch/rebar.config.script b/src/couch/rebar.config.script
index 4a44baf..86b6a58 100644
--- a/src/couch/rebar.config.script
+++ b/src/couch/rebar.config.script
@@ -172,8 +172,6 @@ CouchJSEnv = case SMVsn of
         ]
 end.
 
-IcuPath = "priv/couch_icu_driver.so".
-IcuSrc = ["priv/icu_driver/*.c"].
 IcuEnv = [{"DRV_CFLAGS",  "$DRV_CFLAGS -DPIC -O2 -fno-common"},
           {"DRV_LDFLAGS", "$DRV_LDFLAGS -lm -licuuc -licudata -licui18n -lpthread"}].
 IcuDarwinEnv = [{"CFLAGS", "-DXP_UNIX -I/usr/local/opt/icu4c/include -I/opt/homebrew/opt/icu4c/include"},
@@ -189,11 +187,6 @@ CompareSrc = ["priv/couch_ejson_compare/*.c"].
 BaseSpecs = [
         %% couchjs
         {".*", CouchJSPath, CouchJSSrc, [{env, CouchJSEnv}]},
-        % ICU
-        {"darwin", IcuPath, IcuSrc, [{env, IcuEnv ++ IcuDarwinEnv}]},
-        {"linux",  IcuPath, IcuSrc, [{env, IcuEnv}]},
-        {"bsd",   IcuPath, IcuSrc, [{env, IcuEnv ++ IcuBsdEnv}]},
-        {"win32",  IcuPath, IcuSrc, [{env, IcuWinEnv}]},
         % ejson_compare
         {"darwin", ComparePath, CompareSrc, [{env, IcuEnv ++ IcuDarwinEnv}]},
         {"linux",  ComparePath, CompareSrc, [{env, IcuEnv}]},
diff --git a/src/couch/src/couch_drv.erl b/src/couch/src/couch_drv.erl
deleted file mode 100644
index f2ff2ac..0000000
--- a/src/couch/src/couch_drv.erl
+++ /dev/null
@@ -1,63 +0,0 @@
-% Licensed under the Apache License, Version 2.0 (the "License"); you may not
-% use this file except in compliance with the License. You may obtain a copy of
-% the License at
-%
-%   http://www.apache.org/licenses/LICENSE-2.0
-%
-% Unless required by applicable law or agreed to in writing, software
-% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-% License for the specific language governing permissions and limitations under
-% the License.
-
--module(couch_drv).
--behaviour(gen_server).
--vsn(1).
--export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2,
-    code_change/3]).
-
--export([start_link/0]).
-
--include_lib("couch/include/couch_db.hrl").
-
-start_link() ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-
-init([]) ->
-    LibDir = util_driver_dir(),
-    case erl_ddll:load(LibDir, "couch_icu_driver") of
-    ok ->
-        {ok, nil};
-    {error, already_loaded} ->
-        couch_log:info("~p reloading couch_icu_driver", [?MODULE]),
-        ok = erl_ddll:reload(LibDir, "couch_icu_driver"),
-        {ok, nil};
-    {error, Error} ->
-        {stop, erl_ddll:format_error(Error)}
-    end.
-
-handle_call(_Request, _From, State) ->
-    {reply, ok, State}.
-
-handle_cast(_Request, State) ->
-    {noreply, State}.
-
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, _State) ->
-    ok.
-
-code_change(_OldVsn, State, _Extra) ->
-
-    {ok, State}.
-
-
-% private API
-util_driver_dir() ->
-    case config:get("couchdb", "util_driver_dir", undefined) of
-    undefined ->
-        couch_util:priv_dir();
-    LibDir0 ->
-        LibDir0
-    end.
diff --git a/src/couch/src/couch_ejson_compare.erl b/src/couch/src/couch_ejson_compare.erl
index ca36c86..8681296 100644
--- a/src/couch/src/couch_ejson_compare.erl
+++ b/src/couch/src/couch_ejson_compare.erl
@@ -14,6 +14,10 @@
 
 -export([less/2, less_json_ids/2, less_json/2]).
 
+% For testing
+-export([less_nif/2, less_erl/2, compare_strings_nif/2]).
+
+
 -on_load(init/0).
 
 
@@ -30,8 +34,8 @@ less(A, B) ->
     try
         less_nif(A, B)
     catch
-    error:badarg ->
-        % Maybe the EJSON structure is too deep, fallback to Erlang land.
+    error:max_depth_error ->
+        % The EJSON structure is too deep, fallback to Erlang land.
         less_erl(A, B)
     end.
 
@@ -48,7 +52,11 @@ less_json(A,B) ->
 
 
 less_nif(A, B) ->
-    less_erl(A, B).
+    erlang:nif_error(less_nif_load_error, [A, B]).
+
+
+compare_strings_nif(A, B) ->
+    erlang:nif_error(compare_string_nif, [A, B]).
 
 
 less_erl(A,A)                                 -> 0;
@@ -61,7 +69,7 @@ less_erl(A,B) when is_number(A), is_number(B) -> A - B;
 less_erl(A,_) when is_number(A)               -> -1;
 less_erl(_,B) when is_number(B)               -> 1;
 
-less_erl(A,B) when is_binary(A), is_binary(B) -> couch_util:collate(A,B);
+less_erl(A,B) when is_binary(A), is_binary(B) -> compare_strings_nif(A,B);
 less_erl(A,_) when is_binary(A)               -> -1;
 less_erl(_,B) when is_binary(B)               -> 1;
 
@@ -84,7 +92,7 @@ less_props([], [_|_]) ->
 less_props(_, []) ->
     1;
 less_props([{AKey, AValue}|RestA], [{BKey, BValue}|RestB]) ->
-    case couch_util:collate(AKey, BKey) of
+    case compare_strings_nif(AKey, BKey) of
     0 ->
         case less_erl(AValue, BValue) of
         0 ->
diff --git a/src/couch/src/couch_primary_sup.erl b/src/couch/src/couch_primary_sup.erl
index 48c6d42..73c3de7 100644
--- a/src/couch/src/couch_primary_sup.erl
+++ b/src/couch/src/couch_primary_sup.erl
@@ -19,12 +19,6 @@ start_link() ->
 
 init([]) ->
     Children = [
-        {collation_driver,
-            {couch_drv, start_link, []},
-            permanent,
-            infinity,
-            supervisor,
-            [couch_drv]},
         {couch_task_status,
             {couch_task_status, start_link, []},
             permanent,
diff --git a/src/couch/src/couch_util.erl b/src/couch/src/couch_util.erl
index d1f4940..c7dc894 100644
--- a/src/couch/src/couch_util.erl
+++ b/src/couch/src/couch_util.erl
@@ -14,7 +14,7 @@
 
 -export([priv_dir/0, normpath/1, fold_files/5]).
 -export([should_flush/0, should_flush/1, to_existing_atom/1]).
--export([rand32/0, implode/2, collate/2, collate/3]).
+-export([rand32/0, implode/2]).
 -export([abs_pathname/1,abs_pathname/2, trim/1, drop_dot_couch_ext/1]).
 -export([encodeBase64Url/1, decodeBase64Url/1]).
 -export([validate_utf8/1, to_hex/1, parse_term/1, dict_find/3]).
@@ -387,33 +387,6 @@ implode([H|T], Sep, Acc) ->
     implode(T, Sep, [Sep,H|Acc]).
 
 
-drv_port() ->
-    case get(couch_drv_port) of
-    undefined ->
-        Port = open_port({spawn, "couch_icu_driver"}, []),
-        put(couch_drv_port, Port),
-        Port;
-    Port ->
-        Port
-    end.
-
-collate(A, B) ->
-    collate(A, B, []).
-
-collate(A, B, Options) when is_binary(A), is_binary(B) ->
-    Operation =
-    case lists:member(nocase, Options) of
-        true -> 1; % Case insensitive
-        false -> 0 % Case sensitive
-    end,
-    SizeA = byte_size(A),
-    SizeB = byte_size(B),
-    Bin = <<SizeA:32/native, A/binary, SizeB:32/native, B/binary>>,
-    [Result] = erlang:port_control(drv_port(), Operation, Bin),
-    % Result is 0 for lt, 1 for eq and 2 for gt. Subtract 1 to return the
-    % expected typical -1, 0, 1
-    Result - 1.
-
 should_flush() ->
     should_flush(?FLUSH_MAX_MEM).
 
diff --git a/src/couch/test/eunit/couch_ejson_compare_tests.erl b/src/couch/test/eunit/couch_ejson_compare_tests.erl
new file mode 100644
index 0000000..790f6e5
--- /dev/null
+++ b/src/couch/test/eunit/couch_ejson_compare_tests.erl
@@ -0,0 +1,286 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_ejson_compare_tests).
+
+
+-define(MAX_UNICODE_STRING, <<255,255,255,255>>).
+
+% See mango_idx_view.hrl
+-define(MAX_JSON_OBJ, {?MAX_UNICODE_STRING}).
+
+-define(TEST_VALUES, [
+    null,
+    false,
+    true,
+    -2,
+    -0.1,
+    0,
+    0.1,
+    1,
+    2,
+    3.0,
+    4,
+    <<"a">>,
+    <<"A">>,
+    <<"aa">>,
+    <<"b">>,
+    <<"B">>,
+    <<"ba">>,
+    <<"bb">>,
+    % Highest sorting unicode value. Special case in the nif
+    ?MAX_UNICODE_STRING,
+    [<<"a">>],
+    [<<"b">>],
+    [<<"b">>, <<"c">>],
+    [<<"b">>, <<"d">>],
+    [<<"b">>, <<"d">>, <<"e">>],
+    {[{<<"a">>, 1}]},
+    {[{<<"a">>, 2}]},
+    {[{<<"b">>, 1}]},
+    {[{<<"b">>, 2}]},
+    {[{<<"b">>, 2}, {<<"a">>, 1}]},
+    {[{<<"b">>, 2}, {<<"c">>, 2}]}
+]).
+
+
+% Propery tests
+
+-ifdef(WITH_PROPER).
+
+-include_lib("couch/include/couch_eunit_proper.hrl").
+
+
+property_test_() ->
+    ?EUNIT_QUICKCHECK(60, 400).
+
+
+% Properties
+
+% The main, nif-based comparison, sorts the test values correctly
+prop_nif_sorts_correctly() ->
+    Positions = get_positions(?TEST_VALUES),
+    ?FORALL(A, oneof(?TEST_VALUES),
+        ?FORALL(B, oneof(?TEST_VALUES), begin
+            expected_less(A, B, Positions) =:= less_nif(A, B)
+        end)
+    ).
+
+
+% The erlang fallback comparison sorts the test values correctly
+prop_erlang_sorts_correctly() ->
+    Positions = get_positions(?TEST_VALUES),
+    ?FORALL(A, oneof(?TEST_VALUES),
+        ?FORALL(B, oneof(?TEST_VALUES), begin
+            expected_less(A, B, Positions) =:= less_erl(A, B)
+        end)
+    ).
+
+
+% Zero width unicode chars are ignored
+prop_equivalent_unicode_values() ->
+    ?FORALL({Prefix, Suffix}, {zero_width_list(), zero_width_list()}, begin
+        Binary = unicode:characters_to_binary(Prefix ++ [$a] ++ Suffix),
+        less(<<"a">>, Binary) =:= 0
+    end).
+
+
+% Every test value sorts less than the special ?MAX_JSON_OBJ
+prop_test_values_are_less_than_max_json() ->
+    ?FORALL(V, oneof(?TEST_VALUES), begin
+        less(V, ?MAX_JSON_OBJ) =:= -1
+    end).
+
+
+% Any json value sorts less than the special ?MAX_JSON_OBJ
+prop_any_json_is_less_than_max_json() ->
+    ?FORALL(V, json(), begin
+        less(V, ?MAX_JSON_OBJ) =:= -1
+    end).
+
+
+% In general, for any json, the nif collator matches the erlang collator
+prop_nif_matches_erlang() ->
+    ?FORALL(A, json(),
+        ?FORALL(B, json(), begin
+            less_nif(A, B) =:= less_erl(A, B)
+        end)
+    ).
+
+
+% Generators
+
+json() ->
+    ?SIZED(Size, json(Size)).
+
+
+json(0) ->
+    oneof([
+        null,
+        true,
+        false,
+        json_number(),
+        json_string(),
+        [],
+        {[]}
+    ]);
+
+json(Size) ->
+    frequency([
+        {1, null},
+        {1, true},
+        {1, false},
+        {2, json_number()},
+        {3, json_string()},
+        {4, []},
+        {4, {[]}},
+        {5, ?LAZY(json_array(Size))},
+        {5, ?LAZY(json_object(Size))}
+    ]).
+
+
+json_number() ->
+    oneof([largeint(), int(), real()]).
+
+
+json_string() ->
+    utf8().
+
+
+json_array(0) ->
+    [];
+
+json_array(Size) ->
+    vector(Size div 2, json(Size div 2)).
+
+
+json_object(0) ->
+    {[]};
+
+json_object(Size) ->
+    {vector(Size div 2, {json_string(), json(Size div 2)})}.
+
+
+zero_width_list() ->
+    ?SIZED(Size, vector(Size, zero_width_chars())).
+
+
+zero_width_chars() ->
+    oneof([16#200B, 16#200C, 16#200D]).
+
+
+-endif.
+
+
+% Regular EUnit tests
+
+max_depth_error_list_test() ->
+    % NIF can handle terms with depth <= 9
+    Nested9 = nest_list(<<"val">>, 9),
+    ?assertEqual(0, less_nif(Nested9, Nested9)),
+
+    % At depth >= 10 it will throw a max_depth_error
+    Nested10 = nest_list(<<"val">>, 10),
+    ?assertError(max_depth_error, less_nif(Nested10, Nested10)),
+
+    % Then it should transparently jump to erlang land
+    ?assertEqual(0, less(Nested10, Nested10)).
+
+
+max_depth_error_obj_test() ->
+    % NIF can handle terms with depth <= 9
+    Nested9 = nest_obj(<<"k">>, <<"v">>, 9),
+    ?assertEqual(0, less_nif(Nested9, Nested9)),
+
+    % At depth >= 10 it will throw a max_depth_error
+    Nested10 = nest_obj(<<"k">>, <<"v">>, 10),
+    ?assertError(max_depth_error, less_nif(Nested10, Nested10)),
+
+    % Then it should transparently jump to erlang land
+    ?assertEqual(0, less(Nested10, Nested10)).
+
+
+compare_strings_nif_test() ->
+    ?assertEqual(-1, compare_strings(<<"a">>, <<"b">>)),
+    ?assertEqual(0, compare_strings(<<"a">>, <<"a">>)),
+    ?assertEqual(1, compare_strings(<<"b">>, <<"a">>)),
+
+    LargeBin1 = << <<"x">> || _ <- lists:seq(1, 1000000)>>,
+    LargeBin2 = <<LargeBin1/binary, "x">>,
+    ?assertEqual(-1, compare_strings(LargeBin1, LargeBin2)),
+    ?assertEqual(1, compare_strings(LargeBin2, LargeBin1)),
+    ?assertEqual(0, compare_strings(LargeBin1, LargeBin1)),
+
+    ?assertError(badarg, compare_strings(42, <<"a">>)),
+    ?assertError(badarg, compare_strings(<<"a">>, 42)),
+    ?assertError(badarg, compare_strings(42, 42)).
+
+
+% Helper functions
+
+less(A, B) ->
+    cmp_norm(couch_ejson_compare:less(A, B)).
+
+
+less_nif(A, B) ->
+    cmp_norm(couch_ejson_compare:less_nif(A, B)).
+
+
+less_erl(A, B) ->
+    cmp_norm(couch_ejson_compare:less_erl(A, B)).
+
+
+compare_strings(A, B) ->
+    couch_ejson_compare:compare_strings_nif(A, B).
+
+
+nest_list(Val, 0) ->
+    Val;
+
+nest_list(Val, Depth) when is_integer(Depth), Depth > 0 ->
+    [nest_list(Val, Depth - 1)].
+
+
+nest_obj(K, V, 1) ->
+    {[{K, V}]};
+
+nest_obj(K, V, Depth) when is_integer(Depth), Depth > 1 ->
+    {[{K, nest_obj(K, V, Depth - 1)}]}.
+
+
+% Build a map of #{Val => PositionIndex} for the test values so that when any
+% two are compared we can verify their position in the test list matches the
+% compared result
+get_positions(TestValues) ->
+    lists:foldl(fun(Val, Acc) ->
+        Acc#{Val => map_size(Acc)}
+    end, #{}, TestValues).
+
+
+% When two values are compared, check the test values positions index to ensure
+% the order in the test value list matches the comparison result
+expected_less(A, B, Positions) ->
+    #{A := PosA, B := PosB} = Positions,
+    if
+        PosA =:= PosB -> 0;
+        PosA < PosB -> -1;
+        PosA > PosB -> 1
+    end.
+
+% Since collation functions can return magnitudes > 1, for example when
+% comparing atoms A - B, we need to normalize the result to -1, 0, and 1.
+cmp_norm(Cmp) when is_number(Cmp) ->
+    if
+        Cmp == 0 -> 0;
+        Cmp < 0 -> -1;
+        Cmp > 0 -> 1
+    end.
diff --git a/src/couch/test/eunit/couch_util_tests.erl b/src/couch/test/eunit/couch_util_tests.erl
index 012c961..f6d7d95 100644
--- a/src/couch/test/eunit/couch_util_tests.erl
+++ b/src/couch/test/eunit/couch_util_tests.erl
@@ -25,31 +25,14 @@ setup() ->
     %%
     Ctx = test_util:start_couch(),
     %% config:start_link(?CONFIG_CHAIN),
-    %% {ok, _} = couch_drv:start_link(),
     Ctx.
 
 teardown(Ctx) ->
     ok = test_util:stop_couch(Ctx),
     %% config:stop(),
-    %% erl_ddll:unload_driver(couch_icu_driver),
     ok.
 
 
-collation_test_() ->
-    {
-        "Collation tests",
-        [
-            {
-                setup,
-                fun setup/0, fun teardown/1,
-                [
-                    should_collate_ascii(),
-                    should_collate_non_ascii()
-                ]
-            }
-        ]
-    }.
-
 validate_callback_exists_test_() ->
     {
         "validate_callback_exists tests",
@@ -59,12 +42,6 @@ validate_callback_exists_test_() ->
         ]
     }.
 
-should_collate_ascii() ->
-    ?_assertEqual(1, couch_util:collate(<<"foo">>, <<"bar">>)).
-
-should_collate_non_ascii() ->
-    ?_assertEqual(-1, couch_util:collate(<<"A">>, <<"aa">>)).
-
 to_existed_atom_test() ->
     ?assert(couch_util:to_existing_atom(true)),
     ?assertMatch(foo, couch_util:to_existing_atom(<<"foo">>)),
diff --git a/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl b/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
index 5c8cb54..0d2afbe 100644
--- a/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
+++ b/src/couch_mrview/test/eunit/couch_mrview_collation_tests.erl
@@ -52,7 +52,11 @@
     {[{<<"b">>, 1}]},
     {[{<<"b">>, 2}]},
     {[{<<"b">>, 2}, {<<"a">>, 1}]},
-    {[{<<"b">>, 2}, {<<"c">>, 2}]}
+    {[{<<"b">>, 2}, {<<"c">>, 2}]},
+
+    % Values with depth > 10 trigger the erlang collation fallback in couch_ejson_compare
+    {[{<<"x">>, [[[[[[[[[[[<<"y">>]]]]]]]]]]]}]}
+
 ]).