You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/11/21 04:26:48 UTC

[lucy-commits] svn commit: r1037377 - in /incubator/lucy/trunk: clownfish/lib/Clownfish/Binding/Core/Aliases.pm core/Lucy/Object/CharBuf.c core/Lucy/Object/CharBuf.cfh perl/lib/Lucy.pm perl/xs/XSBind.c

Author: marvin
Date: Sun Nov 21 03:26:47 2010
New Revision: 1037377

URL: http://svn.apache.org/viewvc?rev=1037377&view=rev
Log:
Fix two bugs which had mostly canceled each other out.  JSON::XS was being fed
a scalar without the UTF8 flag set and thus was producing data structures with
non-UTF8 scalars.  XSBind_perl_hash_to_cfish_hash was using UTF-8 byte
validity instead of the UTF8 flag when turning scalars into CharBufs.

Modified:
    incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm
    incubator/lucy/trunk/core/Lucy/Object/CharBuf.c
    incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh
    incubator/lucy/trunk/perl/lib/Lucy.pm
    incubator/lucy/trunk/perl/xs/XSBind.c

Modified: incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm (original)
+++ incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm Sun Nov 21 03:26:47 2010
@@ -51,6 +51,8 @@ our %aliases = (
     CFISH_VIEWCHARBUF              => 'LUCY_VIEWCHARBUF',
     cfish_ZCB_size                 => 'lucy_ZCB_size',
     cfish_ZCB_wrap_str             => 'lucy_ZCB_wrap_str',
+    Cfish_ZCB_Assign_Str           => 'Lucy_ZCB_Assign_Str',
+    Cfish_ZCB_Assign_Trusted_Str   => 'Lucy_ZCB_Assign_Trusted_Str',
     Cfish_CB_Get_Ptr8              => 'Lucy_CB_Get_Ptr8',
     Cfish_CB_Get_Size              => 'Lucy_CB_Get_Size',
 
@@ -70,6 +72,7 @@ our %aliases = (
     Cfish_Hash_Next      => 'Lucy_Hash_Next',
     Cfish_Hash_Fetch_Str => 'Lucy_Hash_Fetch_Str',
     Cfish_Hash_Store_Str => 'Lucy_Hash_Store_Str',
+    Cfish_Hash_Store     => 'Lucy_Hash_Store',
 
     cfish_VArray      => 'lucy_VArray',
     CFISH_VARRAY      => 'LUCY_VARRAY',

Modified: incubator/lucy/trunk/core/Lucy/Object/CharBuf.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Object/CharBuf.c?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Object/CharBuf.c (original)
+++ incubator/lucy/trunk/core/Lucy/Object/CharBuf.c Sun Nov 21 03:26:47 2010
@@ -864,6 +864,13 @@ ViewCB_assign_str(ViewCharBuf *self, con
     self->size = size;
 }
 
+void
+ViewCB_assign_trusted_str(ViewCharBuf *self, const char *utf8, size_t size)
+{
+    self->ptr  = (char*)utf8;
+    self->size = size;
+}
+
 uint32_t
 ViewCB_trim_top(ViewCharBuf *self)
 {

Modified: incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh Sun Nov 21 03:26:47 2010
@@ -326,6 +326,9 @@ class Lucy::Object::ViewCharBuf cnick Vi
     void
     Assign_Str(ViewCharBuf *self, const char *utf8, size_t size);
 
+    void
+    Assign_Trusted_Str(ViewCharBuf *self, const char *utf8, size_t size);
+
     uint32_t
     Trim_Top(ViewCharBuf *self);
 

Modified: incubator/lucy/trunk/perl/lib/Lucy.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy.pm?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy.pm Sun Nov 21 03:26:47 2010
@@ -530,19 +530,26 @@ sub error {$Lucy::Object::Err::error}
     package Lucy::Util::Json;
     use Scalar::Util qw( blessed );
     use Lucy qw( to_clownfish );
-
+    use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on );
     use JSON::XS qw();
 
     my $json_encoder = JSON::XS->new->pretty(1)->canonical(1);
 
     sub slurp_json {
         my ( undef, %args ) = @_;
+        my $result;
         my $instream = $args{folder}->open_in( $args{path} )
             or return;
         my $len = $instream->length;
         my $json;
         $instream->read( $json, $len );
-        my $result = eval { to_clownfish( $json_encoder->decode($json) ) };
+        if ( utf8_valid($json) ) {
+            utf8_flag_on($json);
+            $result = eval { to_clownfish( $json_encoder->decode($json) ) };
+        }
+        else {
+            $@ = "Invalid UTF-8";
+        }
         if ( $@ or !$result ) {
             Lucy::Object::Err->set_error(
                 Lucy::Object::Err->new( $@ || "Failed to decode JSON" )

Modified: incubator/lucy/trunk/perl/xs/XSBind.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/xs/XSBind.c?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/xs/XSBind.c (original)
+++ incubator/lucy/trunk/perl/xs/XSBind.c Sun Nov 21 03:26:47 2010
@@ -231,33 +231,46 @@ XSBind_cb_to_sv(const cfish_CharBuf *cb)
 static cfish_Hash*
 S_perl_hash_to_cfish_hash(HV *phash)
 {
-    uint32_t    num_keys = hv_iterinit(phash);
-    cfish_Hash *retval   = cfish_Hash_new(num_keys);
+    uint32_t             num_keys = hv_iterinit(phash);
+    cfish_Hash          *retval   = cfish_Hash_new(num_keys);
+    cfish_ZombieCharBuf *key      = CFISH_ZCB_WRAP_STR("", 0);
 
     while (num_keys--) {
-        char *key;
-        STRLEN key_len;
-        HE *entry = hv_iternext(phash);
-        STRLEN he_key_len = HeKLEN(entry);
-        SV *value_sv = HeVAL(entry);
+        HE        *entry    = hv_iternext(phash);
+        STRLEN     key_len  = HeKLEN(entry);
+        SV        *value_sv = HeVAL(entry);
+        cfish_Obj *value    = XSBind_perl_to_cfish(value_sv); // Recurse.
 
         // Force key to UTF-8 if necessary.
-        if (he_key_len == (STRLEN)HEf_SVKEY) {
-            SV *key_sv = HeKEY_sv(entry);
-            key = SvPVutf8(key_sv, key_len);
+        if (key_len == (STRLEN)HEf_SVKEY) {
+            // Key is stored as an SV.  Use its UTF-8 flag?  Not sure about
+            // this.
+            SV   *key_sv  = HeKEY_sv(entry);
+            char *key_str = SvPVutf8(key_sv, key_len);
+            Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+            Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
+        }
+        else if (HeUTF8(entry)) {
+            Cfish_ZCB_Assign_Trusted_Str(key, HeKEY(entry), key_len);
+            Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
         }
         else {
-            key = HeKEY(entry);
-            key_len = he_key_len;
-            if (!lucy_StrHelp_utf8_valid(key, key_len)) {
+            char *key_str = HeKEY(entry);
+            chy_bool_t pure_ascii = true;
+            for (STRLEN i = 0; i < key_len; i++) {
+                if ((key_str[i] & 0x80) == 0x80) { pure_ascii = false; }
+            }
+            if (pure_ascii) {
+                Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+                Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
+            }
+            else {
                 SV *key_sv = HeSVKEY_force(entry);
-                key = SvPVutf8(key_sv, key_len);
+                key_str = SvPVutf8(key_sv, key_len);
+                Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+                Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
             }
         }
-
-        // Recurse for each value. 
-        Cfish_Hash_Store_Str(retval, key, key_len, 
-            XSBind_perl_to_cfish(value_sv));
     }
 
     return retval;