You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2010/11/21 04:26:48 UTC
[lucy-commits] svn commit: r1037377 - in /incubator/lucy/trunk:
clownfish/lib/Clownfish/Binding/Core/Aliases.pm core/Lucy/Object/CharBuf.c
core/Lucy/Object/CharBuf.cfh perl/lib/Lucy.pm perl/xs/XSBind.c
Author: marvin
Date: Sun Nov 21 03:26:47 2010
New Revision: 1037377
URL: http://svn.apache.org/viewvc?rev=1037377&view=rev
Log:
Fix two bugs which had mostly canceled each other out. JSON::XS was being fed
a scalar without the UTF8 flag set and thus was producing data structures with
non-UTF8 scalars. XSBind_perl_hash_to_cfish_hash was using UTF-8 byte
validity instead of the UTF8 flag when turning scalars into CharBufs.
Modified:
incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm
incubator/lucy/trunk/core/Lucy/Object/CharBuf.c
incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh
incubator/lucy/trunk/perl/lib/Lucy.pm
incubator/lucy/trunk/perl/xs/XSBind.c
Modified: incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm (original)
+++ incubator/lucy/trunk/clownfish/lib/Clownfish/Binding/Core/Aliases.pm Sun Nov 21 03:26:47 2010
@@ -51,6 +51,8 @@ our %aliases = (
CFISH_VIEWCHARBUF => 'LUCY_VIEWCHARBUF',
cfish_ZCB_size => 'lucy_ZCB_size',
cfish_ZCB_wrap_str => 'lucy_ZCB_wrap_str',
+ Cfish_ZCB_Assign_Str => 'Lucy_ZCB_Assign_Str',
+ Cfish_ZCB_Assign_Trusted_Str => 'Lucy_ZCB_Assign_Trusted_Str',
Cfish_CB_Get_Ptr8 => 'Lucy_CB_Get_Ptr8',
Cfish_CB_Get_Size => 'Lucy_CB_Get_Size',
@@ -70,6 +72,7 @@ our %aliases = (
Cfish_Hash_Next => 'Lucy_Hash_Next',
Cfish_Hash_Fetch_Str => 'Lucy_Hash_Fetch_Str',
Cfish_Hash_Store_Str => 'Lucy_Hash_Store_Str',
+ Cfish_Hash_Store => 'Lucy_Hash_Store',
cfish_VArray => 'lucy_VArray',
CFISH_VARRAY => 'LUCY_VARRAY',
Modified: incubator/lucy/trunk/core/Lucy/Object/CharBuf.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Object/CharBuf.c?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Object/CharBuf.c (original)
+++ incubator/lucy/trunk/core/Lucy/Object/CharBuf.c Sun Nov 21 03:26:47 2010
@@ -864,6 +864,13 @@ ViewCB_assign_str(ViewCharBuf *self, con
self->size = size;
}
+void
+ViewCB_assign_trusted_str(ViewCharBuf *self, const char *utf8, size_t size)
+{
+ self->ptr = (char*)utf8;
+ self->size = size;
+}
+
uint32_t
ViewCB_trim_top(ViewCharBuf *self)
{
Modified: incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Object/CharBuf.cfh Sun Nov 21 03:26:47 2010
@@ -326,6 +326,9 @@ class Lucy::Object::ViewCharBuf cnick Vi
void
Assign_Str(ViewCharBuf *self, const char *utf8, size_t size);
+ void
+ Assign_Trusted_Str(ViewCharBuf *self, const char *utf8, size_t size);
+
uint32_t
Trim_Top(ViewCharBuf *self);
Modified: incubator/lucy/trunk/perl/lib/Lucy.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy.pm?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy.pm Sun Nov 21 03:26:47 2010
@@ -530,19 +530,26 @@ sub error {$Lucy::Object::Err::error}
package Lucy::Util::Json;
use Scalar::Util qw( blessed );
use Lucy qw( to_clownfish );
-
+ use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on );
use JSON::XS qw();
my $json_encoder = JSON::XS->new->pretty(1)->canonical(1);
sub slurp_json {
my ( undef, %args ) = @_;
+ my $result;
my $instream = $args{folder}->open_in( $args{path} )
or return;
my $len = $instream->length;
my $json;
$instream->read( $json, $len );
- my $result = eval { to_clownfish( $json_encoder->decode($json) ) };
+ if ( utf8_valid($json) ) {
+ utf8_flag_on($json);
+ $result = eval { to_clownfish( $json_encoder->decode($json) ) };
+ }
+ else {
+ $@ = "Invalid UTF-8";
+ }
if ( $@ or !$result ) {
Lucy::Object::Err->set_error(
Lucy::Object::Err->new( $@ || "Failed to decode JSON" )
Modified: incubator/lucy/trunk/perl/xs/XSBind.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/xs/XSBind.c?rev=1037377&r1=1037376&r2=1037377&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/xs/XSBind.c (original)
+++ incubator/lucy/trunk/perl/xs/XSBind.c Sun Nov 21 03:26:47 2010
@@ -231,33 +231,46 @@ XSBind_cb_to_sv(const cfish_CharBuf *cb)
static cfish_Hash*
S_perl_hash_to_cfish_hash(HV *phash)
{
- uint32_t num_keys = hv_iterinit(phash);
- cfish_Hash *retval = cfish_Hash_new(num_keys);
+ uint32_t num_keys = hv_iterinit(phash);
+ cfish_Hash *retval = cfish_Hash_new(num_keys);
+ cfish_ZombieCharBuf *key = CFISH_ZCB_WRAP_STR("", 0);
while (num_keys--) {
- char *key;
- STRLEN key_len;
- HE *entry = hv_iternext(phash);
- STRLEN he_key_len = HeKLEN(entry);
- SV *value_sv = HeVAL(entry);
+ HE *entry = hv_iternext(phash);
+ STRLEN key_len = HeKLEN(entry);
+ SV *value_sv = HeVAL(entry);
+ cfish_Obj *value = XSBind_perl_to_cfish(value_sv); // Recurse.
// Force key to UTF-8 if necessary.
- if (he_key_len == (STRLEN)HEf_SVKEY) {
- SV *key_sv = HeKEY_sv(entry);
- key = SvPVutf8(key_sv, key_len);
+ if (key_len == (STRLEN)HEf_SVKEY) {
+ // Key is stored as an SV. Use its UTF-8 flag? Not sure about
+ // this.
+ SV *key_sv = HeKEY_sv(entry);
+ char *key_str = SvPVutf8(key_sv, key_len);
+ Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+ Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
+ }
+ else if (HeUTF8(entry)) {
+ Cfish_ZCB_Assign_Trusted_Str(key, HeKEY(entry), key_len);
+ Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
}
else {
- key = HeKEY(entry);
- key_len = he_key_len;
- if (!lucy_StrHelp_utf8_valid(key, key_len)) {
+ char *key_str = HeKEY(entry);
+ chy_bool_t pure_ascii = true;
+ for (STRLEN i = 0; i < key_len; i++) {
+ if ((key_str[i] & 0x80) == 0x80) { pure_ascii = false; }
+ }
+ if (pure_ascii) {
+ Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+ Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
+ }
+ else {
SV *key_sv = HeSVKEY_force(entry);
- key = SvPVutf8(key_sv, key_len);
+ key_str = SvPVutf8(key_sv, key_len);
+ Cfish_ZCB_Assign_Trusted_Str(key, key_str, key_len);
+ Cfish_Hash_Store(retval, (cfish_Obj*)key, value);
}
}
-
- // Recurse for each value.
- Cfish_Hash_Store_Str(retval, key, key_len,
- XSBind_perl_to_cfish(value_sv));
}
return retval;