You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2020/04/16 20:20:11 UTC

[couchdb] branch fix-couchjs-utf8-conversions-take2 created (now 2c1bcbf)

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a change to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git.


      at 2c1bcbf  Add tests for couchjs UTF-8 support

This branch includes the following new commits:

     new e85c5d5  Fix UTF-8 transcoding functions
     new 170b6bb  Encode JavaScript strings as UTF-8 for printing
     new 44216d3  Check that only strings are passed to print
     new 356b137  Use builting UTF-8 conversions in http.cpp
     new 0b6a9cc  Report error messages at global scope
     new 19e30b7  Remove custom UTF-8 conversion functions
     new 2c1bcbf  Add tests for couchjs UTF-8 support

The 7 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 01/07: Fix UTF-8 transcoding functions

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit e85c5d5d1c4c847afcde5baa8ba5f4983ab5c134
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Tue Apr 14 13:22:28 2020 -0500

    Fix UTF-8 transcoding functions
    
    This fixes our string transcoding between UTF-8 and UTF-16. Once
    everything is updated to use these new adapters we can remove the
    bespoke UTF-8 transcoding implementation in `utf8.{h,c}`.
---
 src/couch/priv/couch_js/60/util.cpp | 48 ++++++++++++++++++++++++++++++-------
 src/couch/priv/couch_js/60/util.h   |  1 +
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp
index ad68f26..3945114 100644
--- a/src/couch/priv/couch_js/60/util.cpp
+++ b/src/couch/priv/couch_js/60/util.cpp
@@ -15,7 +15,9 @@
 
 #include <jsapi.h>
 #include <js/Initialization.h>
+#include <js/CharacterEncoding.h>
 #include <js/Conversions.h>
+#include <mozilla/Unused.h>
 
 #include "help.h"
 #include "util.h"
@@ -30,23 +32,53 @@ js_to_string(JSContext* cx, JS::HandleValue val)
     JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, sval));
     if(!chars) {
         JS_ClearPendingException(cx);
-        fprintf(stderr, "Error converting value to string.\n");
-        exit(3);
+        return std::string();
     }
 
     return chars.get();
 }
 
+bool
+js_to_string(JSContext* cx, JS::HandleValue val, std::string& str)
+{
+    if(!val.isString()) {
+        return false;
+    }
+
+    if(JS_GetStringLength(val.toString()) == 0) {
+        str = "";
+        return true;
+    }
+
+    std::string conv = js_to_string(cx, val);
+    if(!conv.size()) {
+        return false;
+    }
+
+    str = conv;
+    return true;
+}
+
 JSString*
-string_to_js(JSContext* cx, const std::string& s)
+string_to_js(JSContext* cx, const std::string& raw)
 {
-    JSString* ret = JS_NewStringCopyN(cx, s.c_str(), s.size());
-    if(ret != nullptr) {
-        return ret;
+    JS::UTF8Chars utf8(raw.c_str(), raw.size());
+    JS::UniqueTwoByteChars utf16;
+    size_t len;
+
+    utf16.reset(JS::UTF8CharsToNewTwoByteCharsZ(cx, utf8, &len).get());
+    if(!utf16) {
+        return nullptr;
+    }
+
+    JSString* ret = JS_NewUCString(cx, utf16.get(), len);
+
+    if(ret) {
+        // JS_NewUCString took ownership on succses
+        mozilla::Unused << utf16.release();
     }
 
-    fprintf(stderr, "Unable to allocate string object.\n");
-    exit(3);
+    return ret;
 }
 
 size_t
diff --git a/src/couch/priv/couch_js/60/util.h b/src/couch/priv/couch_js/60/util.h
index 0c9f0f8..07efc47 100644
--- a/src/couch/priv/couch_js/60/util.h
+++ b/src/couch/priv/couch_js/60/util.h
@@ -26,6 +26,7 @@ typedef struct {
 } couch_args;
 
 std::string js_to_string(JSContext* cx, JS::HandleValue val);
+bool js_to_string(JSContext* cx, JS::HandleValue val, std::string& str);
 JSString* string_to_js(JSContext* cx, const std::string& s);
 
 couch_args* couch_parse_args(int argc, const char* argv[]);


[couchdb] 05/07: Report error messages at global scope

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 0b6a9cc4f7b71899927765e21d986dd4d3457d7a
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Thu Apr 16 14:11:30 2020 -0500

    Report error messages at global scope
    
    Previously we weren't reporting any uncaught exceptions or compilation
    errors. This changes that to print any compilation errors or any
    uncaught exceptions with stack traces.
    
    The previous implementation of `couch_error` was attempting to call
    `String.replace` on the `stack` member string of the thrown exception.
    This likely never worked and attempting to fix I was unable to properly
    invoke the `String.replace` function. This changes the implementation to
    use the builtin stack formatting method instead.
---
 src/couch/priv/couch_js/60/main.cpp |  20 ++++++-
 src/couch/priv/couch_js/60/util.cpp | 107 +++++++++++++++++++++---------------
 2 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/src/couch/priv/couch_js/60/main.cpp b/src/couch/priv/couch_js/60/main.cpp
index 11f8152..f0a4e31 100644
--- a/src/couch/priv/couch_js/60/main.cpp
+++ b/src/couch/priv/couch_js/60/main.cpp
@@ -21,6 +21,8 @@
 #include <unistd.h>
 #endif
 
+#include <sstream>
+
 #include <jsapi.h>
 #include <js/Initialization.h>
 #include <js/Conversions.h>
@@ -489,7 +491,14 @@ main(int argc, const char* argv[])
         JS::RootedScript script(cx);
 
         if(!JS_CompileScript(cx, scriptsrc, slen, options, &script)) {
-            fprintf(stderr, "Failed to compile script.\n");
+            JS::RootedValue exc(cx);
+            if(!JS_GetPendingException(cx, &exc)) {
+                fprintf(stderr, "Failed to compile script.\n");
+            } else {
+                JS::RootedObject exc_obj(cx, &exc.toObject());
+                JSErrorReport* report = JS_ErrorFromException(cx, exc_obj);
+                couch_error(cx, report);
+            }
             return 1;
         }
 
@@ -497,7 +506,14 @@ main(int argc, const char* argv[])
 
         JS::RootedValue result(cx);
         if(JS_ExecuteScript(cx, script, &result) != true) {
-            fprintf(stderr, "Failed to execute script.\n");
+            JS::RootedValue exc(cx);
+            if(!JS_GetPendingException(cx, &exc)) {
+                fprintf(stderr, "Failed to execute script.\n");
+            } else {
+                JS::RootedObject exc_obj(cx, &exc.toObject());
+                JSErrorReport* report = JS_ErrorFromException(cx, exc_obj);
+                couch_error(cx, report);
+            }
             return 1;
         }
 
diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp
index 9ea9af8..cafb01b 100644
--- a/src/couch/priv/couch_js/60/util.cpp
+++ b/src/couch/priv/couch_js/60/util.cpp
@@ -13,6 +13,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <sstream>
+
 #include <jsapi.h>
 #include <js/Initialization.h>
 #include <js/CharacterEncoding.h>
@@ -274,51 +276,70 @@ couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr)
 void
 couch_error(JSContext* cx, JSErrorReport* report)
 {
-    JS::RootedValue v(cx), stack(cx), replace(cx);
-    char* bytes;
-    JSObject* regexp;
-
-    if(!report || !JSREPORT_IS_WARNING(report->flags))
-    {
-        fprintf(stderr, "%s\n", report->message().c_str());
-
-        // Print a stack trace, if available.
-        if (JSREPORT_IS_EXCEPTION(report->flags) &&
-            JS_GetPendingException(cx, &v))
-        {
-            // Clear the exception before an JS method calls or the result is
-            // infinite, recursive error report generation.
-            JS_ClearPendingException(cx);
-
-            // Use JS regexp to indent the stack trace.
-            // If the regexp can't be created, don't JS_ReportErrorUTF8 since it is
-            // probably not productive to wind up here again.
-            JS::RootedObject vobj(cx, v.toObjectOrNull());
-
-            if(JS_GetProperty(cx, vobj, "stack", &stack) &&
-               (regexp = JS_NewRegExpObject(
-                   cx, "^(?=.)", 6, JSREG_GLOB | JSREG_MULTILINE)))
-            {
-                // Set up the arguments to ``String.replace()``
-                JS::AutoValueVector re_args(cx);
-                JS::RootedValue arg0(cx, JS::ObjectValue(*regexp));
-                auto arg1 = JS::StringValue(string_to_js(cx, "\t"));
-
-                if (re_args.append(arg0) && re_args.append(arg1)) {
-                    // Perform the replacement
-                    JS::RootedObject sobj(cx, stack.toObjectOrNull());
-                    if(JS_GetProperty(cx, sobj, "replace", &replace) &&
-                       JS_CallFunctionValue(cx, sobj, replace, re_args, &v))
-                    {
-                        // Print the result
-                        bytes = enc_string(cx, v, NULL);
-                        fprintf(stderr, "Stacktrace:\n%s", bytes);
-                        JS_free(cx, bytes);
-                    }
-                }
-            }
+    if(!report) {
+        return;
+    }
+
+    std::ostringstream msg;
+
+    if(JSREPORT_IS_WARNING(report->flags)) {
+        if(JSREPORT_IS_STRICT(report->flags)) {
+            msg << "strict warning";
+        } else {
+            msg << "warning";
+        }
+    } else {
+        msg << "error";
+    }
+
+    msg << ": " << report->message().c_str();
+
+    mozilla::Maybe<JSAutoCompartment> ac;
+    JS::RootedValue exc(cx);
+    JS::RootedObject exc_obj(cx);
+    JS::RootedObject stack_obj(cx);
+    JS::RootedString stack_str(cx);
+    JS::RootedValue stack_val(cx);
+
+    if(!JS_GetPendingException(cx, &exc)) {
+        goto done;
+    }
+
+    // Clear the exception before an JS method calls or the result is
+    // infinite, recursive error report generation.
+    JS_ClearPendingException(cx);
+
+    exc_obj.set(exc.toObjectOrNull());
+    stack_obj.set(JS::ExceptionStackOrNull(exc_obj));
+
+    if(!stack_obj) {
+        // Compilation errors don't have a stack
+
+        msg << " at ";
+
+        if(report->filename) {
+            msg << report->filename;
+        } else {
+            msg << "<unknown>";
         }
+
+        if(report->lineno) {
+            msg << ':' << report->lineno << ':' << report->column;
+        }
+
+        goto done;
+    }
+
+    if(!JS::BuildStackString(cx, stack_obj, &stack_str, 2)) {
+        goto done;
     }
+
+    stack_val.set(JS::StringValue(stack_str));
+    msg << std::endl << std::endl << js_to_string(cx, stack_val).c_str();
+
+done:
+    msg << std::endl;
+    fprintf(stderr, "%s", msg.str().c_str());
 }
 
 


[couchdb] 06/07: Remove custom UTF-8 conversion functions

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 19e30b79e072acc6da5adafbef69d37fd904215a
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Apr 15 15:34:33 2020 -0500

    Remove custom UTF-8 conversion functions
    
    We're now using 100% built-in functionality of SpiderMonkey to handle
    all UTF-8 conversions.
---
 src/couch/priv/couch_js/60/main.cpp |   1 -
 src/couch/priv/couch_js/60/utf8.cpp | 301 ------------------------------------
 src/couch/priv/couch_js/60/utf8.h   |  19 ---
 src/couch/priv/couch_js/60/util.cpp |   1 -
 4 files changed, 322 deletions(-)

diff --git a/src/couch/priv/couch_js/60/main.cpp b/src/couch/priv/couch_js/60/main.cpp
index f0a4e31..1172ce8 100644
--- a/src/couch/priv/couch_js/60/main.cpp
+++ b/src/couch/priv/couch_js/60/main.cpp
@@ -30,7 +30,6 @@
 
 #include "config.h"
 #include "http.h"
-#include "utf8.h"
 #include "util.h"
 
 static bool enableSharedMemory = true;
diff --git a/src/couch/priv/couch_js/60/utf8.cpp b/src/couch/priv/couch_js/60/utf8.cpp
deleted file mode 100644
index 38dfa62..0000000
--- a/src/couch/priv/couch_js/60/utf8.cpp
+++ /dev/null
@@ -1,301 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy of
-// the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations under
-// the License.
-
-#include <jsapi.h>
-#include <js/Initialization.h>
-#include <js/Conversions.h>
-#include <js/Wrapper.h>
-#include "config.h"
-#include "util.h"
-
-static int
-enc_char(uint8_t *utf8Buffer, uint32_t ucs4Char)
-{
-    int utf8Length = 1;
-
-    if (ucs4Char < 0x80)
-    {
-        *utf8Buffer = (uint8_t)ucs4Char;
-    }
-    else
-    {
-        int i;
-        uint32_t a = ucs4Char >> 11;
-        utf8Length = 2;
-        while(a)
-        {
-            a >>= 5;
-            utf8Length++;
-        }
-        i = utf8Length;
-        while(--i)
-        {
-            utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
-            ucs4Char >>= 6;
-        }
-        *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
-    }
-
-    return utf8Length;
-}
-
-static bool
-enc_charbuf(const char16_t* src, size_t srclen, char* dst, size_t* dstlenp)
-{
-    size_t i;
-    size_t utf8Len;
-    size_t dstlen = *dstlenp;
-    size_t origDstlen = dstlen;
-    char16_t c;
-    char16_t c2;
-    uint32_t v;
-    uint8_t utf8buf[6];
-
-    if(!dst)
-    {
-        dstlen = origDstlen = (size_t) -1;
-    }
-
-    while(srclen)
-    {
-        c = *src++;
-        srclen--;
-
-        if(c <= 0xD7FF || c >= 0xE000)
-        {
-            v = (uint32_t) c;
-        }
-        else if(c >= 0xD800 && c <= 0xDBFF)
-        {
-            if(srclen < 1) goto buffer_too_small;
-            c2 = *src++;
-            srclen--;
-            if(c2 >= 0xDC00 && c2 <= 0xDFFF)
-            {
-                v = (uint32_t) (((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000);
-            }
-            else
-            {
-                // Invalid second half of surrogate pair
-                v = (uint32_t) 0xFFFD;
-                // Undo our character advancement
-                src--;
-                srclen++;
-            }
-        }
-        else
-        {
-            // Invalid first half surrogate pair
-            v = (uint32_t) 0xFFFD;
-        }
-
-        if(v < 0x0080)
-        {
-            /* no encoding necessary - performance hack */
-            if(!dstlen) goto buffer_too_small;
-            if(dst) *dst++ = (char) v;
-            utf8Len = 1;
-        }
-        else
-        {
-            utf8Len = enc_char(utf8buf, v);
-            if(utf8Len > dstlen) goto buffer_too_small;
-            if(dst)
-            {
-                for (i = 0; i < utf8Len; i++)
-                {
-                    *dst++ = (char) utf8buf[i];
-                }
-            }
-        }
-        dstlen -= utf8Len;
-    }
-    
-    *dstlenp = (origDstlen - dstlen);
-    return true;
-
-buffer_too_small:
-    *dstlenp = (origDstlen - dstlen);
-    return false;
-}
-
-char*
-enc_string(JSContext* cx, JS::Value arg, size_t* buflen)
-{
-    JSString* str = NULL;
-    const char16_t* src = NULL;
-    char* bytes = NULL;
-    size_t srclen = 0;
-    size_t byteslen = 0;
-    js::AutoStableStringChars rawChars(cx);
-    
-    str = arg.toString();
-    if(!str) goto error;
-
-    if (!rawChars.initTwoByte(cx, str))
-        return NULL;
-
-    src = rawChars.twoByteRange().begin().get();
-    srclen = JS_GetStringLength(str);
-
-    if(!enc_charbuf(src, srclen, NULL, &byteslen)) goto error;
-    
-    bytes = (char *)JS_malloc(cx, (byteslen) + 1);
-    bytes[byteslen] = 0;
-    
-    if(!enc_charbuf(src, srclen, bytes, &byteslen)) goto error;
-
-    if(buflen) *buflen = byteslen;
-    goto success;
-
-error:
-    if(bytes != NULL) JS_free(cx, bytes);
-    bytes = NULL;
-
-success:
-    return bytes;
-}
-
-static uint32_t
-dec_char(const uint8_t *utf8Buffer, int utf8Length)
-{
-    uint32_t ucs4Char;
-    uint32_t minucs4Char;
-
-    /* from Unicode 3.1, non-shortest form is illegal */
-    static const uint32_t minucs4Table[] = {
-        0x00000080, 0x00000800, 0x0001000, 0x0020000, 0x0400000
-    };
-
-    if (utf8Length == 1)
-    {
-        ucs4Char = *utf8Buffer;
-    }
-    else
-    {
-        ucs4Char = *utf8Buffer++ & ((1<<(7-utf8Length))-1);
-        minucs4Char = minucs4Table[utf8Length-2];
-        while(--utf8Length)
-        {
-            ucs4Char = ucs4Char<<6 | (*utf8Buffer++ & 0x3F);
-        }
-        if(ucs4Char < minucs4Char || ucs4Char == 0xFFFE || ucs4Char == 0xFFFF)
-        {
-            ucs4Char = 0xFFFD;
-        }
-    }
-
-    return ucs4Char;
-}
-
-static bool
-dec_charbuf(const char *src, size_t srclen, char16_t *dst, size_t *dstlenp)
-{
-    uint32_t v;
-    size_t offset = 0;
-    size_t j;
-    size_t n;
-    size_t dstlen = *dstlenp;
-    size_t origDstlen = dstlen;
-
-    if(!dst) dstlen = origDstlen = (size_t) -1;
-
-    while(srclen)
-    {
-        v = (uint8_t) *src;
-        n = 1;
-        
-        if(v & 0x80)
-        {
-            while(v & (0x80 >> n))
-            {
-                n++;
-            }
-            
-            if(n > srclen) goto buffer_too_small;
-            if(n == 1 || n > 6) goto bad_character;
-            
-            for(j = 1; j < n; j++)
-            {
-                if((src[j] & 0xC0) != 0x80) goto bad_character;
-            }
-
-            v = dec_char((const uint8_t *) src, n);
-            if(v >= 0x10000)
-            {
-                v -= 0x10000;
-                
-                if(v > 0xFFFFF || dstlen < 2)
-                {
-                    *dstlenp = (origDstlen - dstlen);
-                    return false;
-                }
-                
-                if(dstlen < 2) goto buffer_too_small;
-
-                if(dst)
-                {
-                    *dst++ = (char16_t)((v >> 10) + 0xD800);
-                    v = (char16_t)((v & 0x3FF) + 0xDC00);
-                }
-                dstlen--;
-            }
-        }
-
-        if(!dstlen) goto buffer_too_small;
-        if(dst) *dst++ = (char16_t) v;
-
-        dstlen--;
-        offset += n;
-        src += n;
-        srclen -= n;
-    }
-
-    *dstlenp = (origDstlen - dstlen);
-    return true;
-
-bad_character:
-    *dstlenp = (origDstlen - dstlen);
-    return false;
-
-buffer_too_small:
-    *dstlenp = (origDstlen - dstlen);
-    return false;
-}
-
-JSString*
-dec_string(JSContext* cx, const char* bytes, size_t byteslen)
-{
-    JSString* str = NULL;
-    char16_t* chars = NULL;
-    size_t charslen;
-    
-    if(!dec_charbuf(bytes, byteslen, NULL, &charslen)) goto error;
-
-    chars = (char16_t *)JS_malloc(cx, (charslen + 1) * sizeof(char16_t));
-    if(!chars) return NULL;
-    chars[charslen] = 0;
-
-    if(!dec_charbuf(bytes, byteslen, chars, &charslen)) goto error;
-
-    str = JS_NewUCString(cx, chars, charslen - 1);
-    if(!str) goto error;
-
-    goto success;
-
-error:
-    if(chars != NULL) JS_free(cx, chars);
-    str = NULL;
-
-success:
-    return str;
-}
diff --git a/src/couch/priv/couch_js/60/utf8.h b/src/couch/priv/couch_js/60/utf8.h
deleted file mode 100644
index c8b1f4d..0000000
--- a/src/couch/priv/couch_js/60/utf8.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy of
-// the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations under
-// the License.
-
-#ifndef COUCH_JS_UTF_8_H
-#define COUCH_JS_UTF_8_H
-
-char* enc_string(JSContext* cx, JS::Value arg, size_t* buflen);
-JSString* dec_string(JSContext* cx, const char* buf, size_t buflen);
-
-#endif
diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp
index cafb01b..59525f2 100644
--- a/src/couch/priv/couch_js/60/util.cpp
+++ b/src/couch/priv/couch_js/60/util.cpp
@@ -23,7 +23,6 @@
 
 #include "help.h"
 #include "util.h"
-#include "utf8.h"
 
 std::string
 js_to_string(JSContext* cx, JS::HandleValue val)


[couchdb] 07/07: Add tests for couchjs UTF-8 support

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2c1bcbf8a33e81be61ce2a158eb779d81b01d08a
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Tue Apr 14 15:05:58 2020 -0500

    Add tests for couchjs UTF-8 support
---
 src/couch/test/eunit/couch_js_tests.erl | 99 +++++++++++++++++++++++++++++----
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/src/couch/test/eunit/couch_js_tests.erl b/src/couch/test/eunit/couch_js_tests.erl
index cd6452c..2a2f3bb 100644
--- a/src/couch/test/eunit/couch_js_tests.erl
+++ b/src/couch/test/eunit/couch_js_tests.erl
@@ -14,17 +14,6 @@
 -include_lib("eunit/include/eunit.hrl").
 
 
--define(FUNC, <<
-  "var state = [];\n"
-  "function(doc) {\n"
-  "  var val = \"0123456789ABCDEF\";\n"
-  "  for(var i = 0; i < 165535; i++) {\n"
-  "    state.push([val, val]);\n"
-  "  }\n"
-  "}\n"
->>).
-
-
 couch_js_test_() ->
     {
         "Test couchjs",
@@ -33,15 +22,101 @@ couch_js_test_() ->
             fun test_util:start_couch/0,
             fun test_util:stop_couch/1,
             [
+                fun should_create_sandbox/0,
+                fun should_roundtrip_utf8/0,
+                fun should_roundtrip_modified_utf8/0,
+                fun should_replace_broken_utf16/0,
                 {timeout, 60000, fun should_exit_on_oom/0}
             ]
         }
     }.
 
 
+should_create_sandbox() ->
+    % Try and detect whether we can see out of the
+    % sandbox or not.
+    Src = <<
+      "function(doc) {\n"
+      "  try {\n"
+      "    emit(false, typeof(Couch.compile_function));\n"
+      "  } catch (e) {\n"
+      "    emit(true, e.message);\n"
+      "  }\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, <<"{}">>]),
+    ?assertEqual([[[true, <<"Couch is not defined">>]]], Result).
+
+
+should_roundtrip_utf8() ->
+    % Try round tripping UTF-8 both directions through
+    % couchjs. These tests use hex encoded values of
+    % Ä (C384) and Ü (C39C) so as to avoid odd editor/Erlang encoding
+    % strangeness.
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value, \"", 16#C3, 16#9C, "\");\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, <<16#C3, 16#84>>}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    ?assertEqual([[[<<16#C3, 16#84>>, <<16#C3, 16#9C>>]]], Result).
+
+
+should_roundtrip_modified_utf8() ->
+    % Mimicing the test case from the mailing list
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value.toLowerCase(), \"", 16#C3, 16#9C, "\");\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, <<16#C3, 16#84>>}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    ?assertEqual([[[<<16#C3, 16#A4>>, <<16#C3, 16#9C>>]]], Result).
+
+
+should_replace_broken_utf16() ->
+    % This test reverse the surrogate pair of
+    % the Boom emoji U+1F4A5
+    Src = <<
+      "function(doc) {\n"
+      "  emit(doc.value.split(\"\").reverse().join(\"\"), 1);\n"
+      "}\n"
+    >>,
+    Proc = couch_query_servers:get_os_process(<<"javascript">>),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
+    Doc = {[
+        {<<"value">>, list_to_binary(xmerl_ucs:to_utf8([16#1F4A5]))}
+    ]},
+    Result = couch_query_servers:proc_prompt(Proc, [<<"map_doc">>, Doc]),
+    % Invalid UTF-8 gets replaced with the 16#FFFD replacement
+    % marker
+    Markers = list_to_binary(xmerl_ucs:to_utf8([16#FFFD, 16#FFFD])),
+    ?assertEqual([[[Markers, 1]]], Result).
+
+
 should_exit_on_oom() ->
+    Src = <<
+      "var state = [];\n"
+      "function(doc) {\n"
+      "  var val = \"0123456789ABCDEF\";\n"
+      "  for(var i = 0; i < 165535; i++) {\n"
+      "    state.push([val, val]);\n"
+      "  }\n"
+      "}\n"
+    >>,
     Proc = couch_query_servers:get_os_process(<<"javascript">>),
-    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, ?FUNC]),
+    true = couch_query_servers:proc_prompt(Proc, [<<"add_fun">>, Src]),
     trigger_oom(Proc).
 
 trigger_oom(Proc) ->


[couchdb] 03/07: Check that only strings are passed to print

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 44216d3a1c29dec5473defcbe3c68a7378a72895
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Apr 15 15:12:59 2020 -0500

    Check that only strings are passed to print
    
    This function won't do conversion of arbitrary JavaScript values to
    strings. Callers should ensure they only pass JavaScript strings
    instead.
---
 src/couch/priv/couch_js/60/main.cpp | 14 +++++++++++++-
 src/couch/priv/couch_js/60/util.cpp | 17 ++++++-----------
 src/couch/priv/couch_js/60/util.h   |  2 +-
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/couch/priv/couch_js/60/main.cpp b/src/couch/priv/couch_js/60/main.cpp
index db2157d..11f8152 100644
--- a/src/couch/priv/couch_js/60/main.cpp
+++ b/src/couch/priv/couch_js/60/main.cpp
@@ -283,7 +283,19 @@ static bool
 print(JSContext* cx, unsigned int argc, JS::Value* vp)
 {
     JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
-    couch_print(cx, argc, args);
+
+    bool use_stderr = false;
+    if(argc > 1 && args[1].isTrue()) {
+        use_stderr = true;
+    }
+
+    if(!args[0].isString()) {
+        JS_ReportErrorUTF8(cx, "Unable to print non-string value.");
+        return false;
+    }
+
+    couch_print(cx, args[0], use_stderr);
+
     args.rval().setUndefined();
     return true;
 }
diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp
index 2cf02fd..9ea9af8 100644
--- a/src/couch/priv/couch_js/60/util.cpp
+++ b/src/couch/priv/couch_js/60/util.cpp
@@ -257,21 +257,16 @@ couch_readline(JSContext* cx, FILE* fp)
 
 
 void
-couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv)
+couch_print(JSContext* cx, JS::HandleValue obj, bool use_stderr)
 {
-    uint8_t* bytes = nullptr;
-    FILE *stream = stdout;
+    FILE* stream = stdout;
 
-    if (argc) {
-        if (argc > 1 && argv[1].isTrue()) {
-          stream = stderr;
-        }
-        std::string val = js_to_string(cx, argv.get(0));
-        fprintf(stream, "%s", val.c_str());
-        JS_free(cx, bytes);
+    if(use_stderr) {
+        stream = stderr;
     }
 
-    fputc('\n', stream);
+    std::string val = js_to_string(cx, obj);
+    fprintf(stream, "%s\n", val.c_str());
     fflush(stream);
 }
 
diff --git a/src/couch/priv/couch_js/60/util.h b/src/couch/priv/couch_js/60/util.h
index 07efc47..4c27f0f 100644
--- a/src/couch/priv/couch_js/60/util.h
+++ b/src/couch/priv/couch_js/60/util.h
@@ -33,7 +33,7 @@ couch_args* couch_parse_args(int argc, const char* argv[]);
 int couch_fgets(char* buf, int size, FILE* fp);
 JSString* couch_readline(JSContext* cx, FILE* fp);
 size_t couch_readfile(const char* file, char** outbuf_p);
-void couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv);
+void couch_print(JSContext* cx, JS::HandleValue str, bool use_stderr);
 void couch_error(JSContext* cx, JSErrorReport* report);
 void couch_oom(JSContext* cx, void* data);
 bool couch_load_funcs(JSContext* cx, JS::HandleObject obj, JSFunctionSpec* funcs);


[couchdb] 02/07: Encode JavaScript strings as UTF-8 for printing

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 170b6bb630e326272a5c73a43fcecc8d3d52d2c7
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Tue Apr 14 13:23:48 2020 -0500

    Encode JavaScript strings as UTF-8 for printing
---
 src/couch/priv/couch_js/60/util.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/couch/priv/couch_js/60/util.cpp b/src/couch/priv/couch_js/60/util.cpp
index 3945114..2cf02fd 100644
--- a/src/couch/priv/couch_js/60/util.cpp
+++ b/src/couch/priv/couch_js/60/util.cpp
@@ -266,9 +266,8 @@ couch_print(JSContext* cx, unsigned int argc, JS::CallArgs argv)
         if (argc > 1 && argv[1].isTrue()) {
           stream = stderr;
         }
-        JSString* str = JS::ToString(cx, argv.get(0));
-        bytes = reinterpret_cast<uint8_t*>(JS_EncodeString(cx, str));
-        fprintf(stream, "%s", bytes);
+        std::string val = js_to_string(cx, argv.get(0));
+        fprintf(stream, "%s", val.c_str());
         JS_free(cx, bytes);
     }
 


[couchdb] 04/07: Use builting UTF-8 conversions in http.cpp

Posted by da...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch fix-couchjs-utf8-conversions-take2
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 356b137f40f009e764094be44aeed546fc1d1bfe
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Thu Apr 16 15:16:45 2020 -0500

    Use builting UTF-8 conversions in http.cpp
    
    This switches the logic around to use the new adapaters for the builtin
    UTF-8 transcoding.
---
 src/couch/priv/couch_js/60/http.cpp | 159 +++++++++++++-----------------------
 1 file changed, 56 insertions(+), 103 deletions(-)

diff --git a/src/couch/priv/couch_js/60/http.cpp b/src/couch/priv/couch_js/60/http.cpp
index 9ab47b2..993f808 100644
--- a/src/couch/priv/couch_js/60/http.cpp
+++ b/src/couch/priv/couch_js/60/http.cpp
@@ -18,7 +18,6 @@
 #include <jsapi.h>
 #include <js/Initialization.h>
 #include "config.h"
-#include "utf8.h"
 #include "util.h"
 
 // Soft dependency on cURL bindings because they're
@@ -109,7 +108,7 @@ typedef struct curl_slist CurlHeaders;
 
 typedef struct {
     int             method;
-    char*           url;
+    std::string     url;
     CurlHeaders*    req_headers;
     int16_t          last_status;
 } HTTPData;
@@ -127,12 +126,7 @@ const char* METHODS[] = {"GET", "HEAD", "POST", "PUT", "DELETE", "COPY", "OPTION
 #define OPTIONS 6
 
 
-static bool
-go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t blen);
-
-
-static JSString*
-str_from_binary(JSContext* cx, char* data, size_t length);
+static bool go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body);
 
 
 bool
@@ -149,7 +143,6 @@ http_ctor(JSContext* cx, JSObject* req)
     }
 
     http->method = -1;
-    http->url = NULL;
     http->req_headers = NULL;
     http->last_status = -1;
 
@@ -171,7 +164,6 @@ http_dtor(JSFreeOp* fop, JSObject* obj)
 {
     HTTPData* http = (HTTPData*) JS_GetPrivate(obj);
     if(http) {
-        if(http->url) free(http->url);
         if(http->req_headers) curl_slist_free_all(http->req_headers);
         free(http);
     }
@@ -182,56 +174,48 @@ bool
 http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value snc)
 {
     HTTPData* http = (HTTPData*) JS_GetPrivate(req);
-    char* method = NULL;
     int methid;
-    bool ret = false;
 
     if(!http) {
         JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance.");
-        goto done;
+        return false;
     }
 
-    if(mth.isUndefined()) {
-        JS_ReportErrorUTF8(cx, "You must specify a method.");
-        goto done;
+    if(!mth.isString()) {
+        JS_ReportErrorUTF8(cx, "Method must be a string.");
+        return false;
     }
 
-    method = enc_string(cx, mth, NULL);
-    if(!method) {
+    std::string method;
+    if(!js_to_string(cx, JS::RootedValue(cx, mth), method)) {
         JS_ReportErrorUTF8(cx, "Failed to encode method.");
-        goto done;
+        return false;
     }
 
     for(methid = 0; METHODS[methid] != NULL; methid++) {
-        if(strcasecmp(METHODS[methid], method) == 0) break;
+        if(strcasecmp(METHODS[methid], method.c_str()) == 0) break;
     }
 
     if(methid > OPTIONS) {
         JS_ReportErrorUTF8(cx, "Invalid method specified.");
-        goto done;
+        return false;
     }
 
     http->method = methid;
 
-    if(url.isUndefined()) {
-        JS_ReportErrorUTF8(cx, "You must specify a URL.");
-        goto done;
-    }
-
-    if(http->url != NULL) {
-        free(http->url);
-        http->url = NULL;
+    if(!url.isString()) {
+        JS_ReportErrorUTF8(cx, "URL must be a string");
+        return false;
     }
 
-    http->url = enc_string(cx, url, NULL);
-    if(http->url == NULL) {
+    if(!js_to_string(cx, JS::RootedValue(cx, url), http->url)) {
         JS_ReportErrorUTF8(cx, "Failed to encode URL.");
-        goto done;
+        return false;
     }
 
     if(snc.isBoolean() && snc.isTrue()) {
         JS_ReportErrorUTF8(cx, "Synchronous flag must be false.");
-        goto done;
+        return false;
     }
 
     if(http->req_headers) {
@@ -242,11 +226,7 @@ http_open(JSContext* cx, JSObject* req, JS::Value mth, JS::Value url, JS::Value
     // Disable Expect: 100-continue
     http->req_headers = curl_slist_append(http->req_headers, "Expect:");
 
-    ret = true;
-
-done:
-    if(method) free(method);
-    return ret;
+    return true;
 }
 
 
@@ -254,88 +234,60 @@ bool
 http_set_hdr(JSContext* cx, JSObject* req, JS::Value name, JS::Value val)
 {
     HTTPData* http = (HTTPData*) JS_GetPrivate(req);
-    char* keystr = NULL;
-    char* valstr = NULL;
-    char* hdrbuf = NULL;
-    size_t hdrlen = -1;
-    bool ret = false;
 
     if(!http) {
         JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance.");
-        goto done;
+        return false;
     }
 
-    if(name.isUndefined())
+    if(!name.isString())
     {
-        JS_ReportErrorUTF8(cx, "You must speciy a header name.");
-        goto done;
+        JS_ReportErrorUTF8(cx, "Header names must be strings.");
+        return false;
     }
 
-    keystr = enc_string(cx, name, NULL);
-    if(!keystr)
+    std::string keystr;
+    if(!js_to_string(cx, JS::RootedValue(cx, name), keystr))
     {
         JS_ReportErrorUTF8(cx, "Failed to encode header name.");
-        goto done;
+        return false;
     }
 
-    if(val.isUndefined())
+    if(!val.isString())
     {
-        JS_ReportErrorUTF8(cx, "You must specify a header value.");
-        goto done;
+        JS_ReportErrorUTF8(cx, "Header values must be strings.");
+        return false;
     }
 
-    valstr = enc_string(cx, val, NULL);
-    if(!valstr)
-    {
+    std::string valstr;
+    if(!js_to_string(cx, JS::RootedValue(cx, val), valstr)) {
         JS_ReportErrorUTF8(cx, "Failed to encode header value.");
-        goto done;
-    }
-
-    hdrlen = strlen(keystr) + strlen(valstr) + 3;
-    hdrbuf = (char*) malloc(hdrlen * sizeof(char));
-    if(!hdrbuf) {
-        JS_ReportErrorUTF8(cx, "Failed to allocate header buffer.");
-        goto done;
+        return false;
     }
 
-    snprintf(hdrbuf, hdrlen, "%s: %s", keystr, valstr);
-    http->req_headers = curl_slist_append(http->req_headers, hdrbuf);
+    std::string header = keystr + ": " + valstr;
+    http->req_headers = curl_slist_append(http->req_headers, header.c_str());
 
-    ret = true;
-
-done:
-    if(keystr) free(keystr);
-    if(valstr) free(valstr);
-    if(hdrbuf) free(hdrbuf);
-    return ret;
+    return true;
 }
 
 bool
 http_send(JSContext* cx, JSObject* req, JS::Value body)
 {
     HTTPData* http = (HTTPData*) JS_GetPrivate(req);
-    char* bodystr = NULL;
-    size_t bodylen = 0;
-    bool ret = false;
 
     if(!http) {
         JS_ReportErrorUTF8(cx, "Invalid CouchHTTP instance.");
-        goto done;
+        return false;
     }
 
-    if(!body.isUndefined()) {
-        bodystr = enc_string(cx, body, &bodylen);
-        if(!bodystr) {
-            JS_ReportErrorUTF8(cx, "Failed to encode body.");
-            goto done;
-        }
+    std::string bodystr;
+    if(!js_to_string(cx, JS::RootedValue(cx, body), bodystr)) {
+        JS_ReportErrorUTF8(cx, "Failed to encode body.");
+        return false;
     }
 
-    ret = go(cx, req, http, bodystr, bodylen);
-
-done:
-    if(bodystr) free(bodystr);
-    return ret;
+    return go(cx, req, http, bodystr);
 }
 
 int
@@ -395,7 +347,7 @@ typedef struct {
     HTTPData*   http;
     JSContext*  cx;
     JSObject*   resp_headers;
-    char*       sendbuf;
+    const char* sendbuf;
     size_t      sendlen;
     size_t      sent;
     int         sent_once;
@@ -417,10 +369,9 @@ static size_t recv_body(void *ptr, size_t size, size_t nmem, void *data);
 static size_t recv_header(void *ptr, size_t size, size_t nmem, void *data);
 
 static bool
-go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen)
+go(JSContext* cx, JSObject* obj, HTTPData* http, std::string& body)
 {
     CurlState state;
-    char* referer;
     JSString* jsbody;
     bool ret = false;
     JS::Value tmp;
@@ -431,8 +382,8 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen)
     state.cx = cx;
     state.http = http;
 
-    state.sendbuf = body;
-    state.sendlen = bodylen;
+    state.sendbuf = body.c_str();
+    state.sendlen = body.size();
     state.sent = 0;
     state.sent_once = 0;
 
@@ -463,13 +414,13 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen)
 
     tmp = JS_GetReservedSlot(obj, 0);
 
-    if(!(referer = enc_string(cx, tmp, NULL))) {
+    std::string referer;
+    if(!js_to_string(cx, JS::RootedValue(cx, tmp), referer)) {
         JS_ReportErrorUTF8(cx, "Failed to encode referer.");
         if(state.recvbuf) JS_free(cx, state.recvbuf);
-          return ret;
+        return ret;
     }
-    curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer);
-    free(referer);
+    curl_easy_setopt(HTTP_HANDLE, CURLOPT_REFERER, referer.c_str());
 
     if(http->method < 0 || http->method > OPTIONS) {
         JS_ReportErrorUTF8(cx, "INTERNAL: Unknown method.");
@@ -490,15 +441,15 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen)
         curl_easy_setopt(HTTP_HANDLE, CURLOPT_FOLLOWLOCATION, 0);
     }
 
-    if(body && bodylen) {
-        curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, bodylen);
+    if(body.size() > 0) {
+        curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, body.size());
     } else {
         curl_easy_setopt(HTTP_HANDLE, CURLOPT_INFILESIZE, 0);
     }
 
     // curl_easy_setopt(HTTP_HANDLE, CURLOPT_VERBOSE, 1);
 
-    curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url);
+    curl_easy_setopt(HTTP_HANDLE, CURLOPT_URL, http->url.c_str());
     curl_easy_setopt(HTTP_HANDLE, CURLOPT_HTTPHEADER, http->req_headers);
     curl_easy_setopt(HTTP_HANDLE, CURLOPT_READDATA, &state);
     curl_easy_setopt(HTTP_HANDLE, CURLOPT_SEEKDATA, &state);
@@ -532,12 +483,13 @@ go(JSContext* cx, JSObject* obj, HTTPData* http, char* body, size_t bodylen)
 
     if(state.recvbuf) {
         state.recvbuf[state.read] = '\0';
-        jsbody = dec_string(cx, state.recvbuf, state.read+1);
+        std::string bodystr(state.recvbuf, state.read + 1);
+        jsbody = string_to_js(cx, bodystr);
         if(!jsbody) {
             // If we can't decode the body as UTF-8 we forcefully
             // convert it to a string by just forcing each byte
             // to a char16_t.
-            jsbody = str_from_binary(cx, state.recvbuf, state.read);
+            jsbody = JS_NewStringCopyN(cx, state.recvbuf, state.read);
             if(!jsbody) {
                 if(!JS_IsExceptionPending(cx)) {
                     JS_ReportErrorUTF8(cx, "INTERNAL: Failed to decode body.");
@@ -638,7 +590,8 @@ recv_header(void *ptr, size_t size, size_t nmem, void *data)
     }
 
     // Append the new header to our array.
-    hdr = dec_string(state->cx, header, length);
+    std::string hdrstr(header, length);
+    hdr = string_to_js(state->cx, hdrstr);
     if(!hdr) {
         return CURLE_WRITE_ERROR;
     }