You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Keh-Li Sheng (JIRA)" <ji...@apache.org> on 2012/05/06 08:30:11 UTC

[jira] [Created] (AVRO-1080) JsonIO.cc should allow \u escape sequence in string

Keh-Li Sheng created AVRO-1080:
----------------------------------

             Summary: JsonIO.cc should allow \u escape sequence in string
                 Key: AVRO-1080
                 URL: https://issues.apache.org/jira/browse/AVRO-1080
             Project: Avro
          Issue Type: Bug
          Components: c++
    Affects Versions: 1.6.3
         Environment: C++
            Reporter: Keh-Li Sheng
         Attachments: AVRO-1080.patch

If an avro string contains a unicode escape sequence that begins with "\u" instead of "\U" an exception is thrown by the parser. The problematic code is at JsonIO.cc line 269.

{code}
JsonParser::Token JsonParser::tryString()
{
    sv.clear();
    for ( ; ;) {
        char ch = in_.read();
        if (ch == '"') {
            return tkString;
        } else if (ch == '\\') {
            ch = in_.read();
            switch (ch) {
            case '"':
            case '\\':
            case '/':
                sv.push_back(ch);
                continue;
            case 'b':
                sv.push_back('\b');
                continue;
            case 'f':
                sv.push_back('\f');
                continue;
            case 'n':
                sv.push_back('\n');
                continue;
            case 'r':
                sv.push_back('\r');
                continue;
            case 't':
                sv.push_back('\t');
                continue;
            case 'U':
                {
                    unsigned int n = 0;
                    char e[4];
                    in_.readBytes(reinterpret_cast<uint8_t*>(e), 4);
                    for (int i = 0; i < 4; i++) {
                        n *= 16;
                        char c = e[i];
                        if (isdigit(c)) {
                            n += c - '0';
                        } else if (c >= 'a' && c <= 'f') {
                            n += c - 'a' + 10;
                        } else if (c >= 'A' && c <= 'F') {
                            n += c - 'A' + 10;
                        } else {
                            unexpected(c);
                        }
                    }
                    sv.push_back(n);
                }
                break;
            default:
                unexpected(ch);
            }
        } else {
            sv.push_back(ch);
        }
    }
}
{code}


--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] [Commented] (AVRO-1080) JsonIO.cc should allow \u escape sequence in string

Posted by "Thiruvalluvan M. G. (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/AVRO-1080?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13269171#comment-13269171 ] 

Thiruvalluvan M. G. commented on AVRO-1080:
-------------------------------------------

+1

Looks good to me. Thanks.
                
> JsonIO.cc should allow \u escape sequence in string
> ---------------------------------------------------
>
>                 Key: AVRO-1080
>                 URL: https://issues.apache.org/jira/browse/AVRO-1080
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.6.3
>         Environment: C++
>            Reporter: Keh-Li Sheng
>         Attachments: AVRO-1080.patch
>
>
> If an avro string contains a unicode escape sequence that begins with "\u" instead of "\U" an exception is thrown by the parser. The problematic code is at JsonIO.cc line 269.
> {code}
> JsonParser::Token JsonParser::tryString()
> {
>     sv.clear();
>     for ( ; ;) {
>         char ch = in_.read();
>         if (ch == '"') {
>             return tkString;
>         } else if (ch == '\\') {
>             ch = in_.read();
>             switch (ch) {
>             case '"':
>             case '\\':
>             case '/':
>                 sv.push_back(ch);
>                 continue;
>             case 'b':
>                 sv.push_back('\b');
>                 continue;
>             case 'f':
>                 sv.push_back('\f');
>                 continue;
>             case 'n':
>                 sv.push_back('\n');
>                 continue;
>             case 'r':
>                 sv.push_back('\r');
>                 continue;
>             case 't':
>                 sv.push_back('\t');
>                 continue;
>             case 'U':
>                 {
>                     unsigned int n = 0;
>                     char e[4];
>                     in_.readBytes(reinterpret_cast<uint8_t*>(e), 4);
>                     for (int i = 0; i < 4; i++) {
>                         n *= 16;
>                         char c = e[i];
>                         if (isdigit(c)) {
>                             n += c - '0';
>                         } else if (c >= 'a' && c <= 'f') {
>                             n += c - 'a' + 10;
>                         } else if (c >= 'A' && c <= 'F') {
>                             n += c - 'A' + 10;
>                         } else {
>                             unexpected(c);
>                         }
>                     }
>                     sv.push_back(n);
>                 }
>                 break;
>             default:
>                 unexpected(ch);
>             }
>         } else {
>             sv.push_back(ch);
>         }
>     }
> }
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] [Updated] (AVRO-1080) JsonIO.cc should allow \u escape sequence in string

Posted by "Thiruvalluvan M. G. (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/AVRO-1080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Thiruvalluvan M. G. updated AVRO-1080:
--------------------------------------

    Resolution: Fixed
        Status: Resolved  (was: Patch Available)

Committed revision 1335985.

Thank you Keh-Li Sheng.

                
> JsonIO.cc should allow \u escape sequence in string
> ---------------------------------------------------
>
>                 Key: AVRO-1080
>                 URL: https://issues.apache.org/jira/browse/AVRO-1080
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.6.3
>         Environment: C++
>            Reporter: Keh-Li Sheng
>         Attachments: AVRO-1080.patch
>
>
> If an avro string contains a unicode escape sequence that begins with "\u" instead of "\U" an exception is thrown by the parser. The problematic code is at JsonIO.cc line 269.
> {code}
> JsonParser::Token JsonParser::tryString()
> {
>     sv.clear();
>     for ( ; ;) {
>         char ch = in_.read();
>         if (ch == '"') {
>             return tkString;
>         } else if (ch == '\\') {
>             ch = in_.read();
>             switch (ch) {
>             case '"':
>             case '\\':
>             case '/':
>                 sv.push_back(ch);
>                 continue;
>             case 'b':
>                 sv.push_back('\b');
>                 continue;
>             case 'f':
>                 sv.push_back('\f');
>                 continue;
>             case 'n':
>                 sv.push_back('\n');
>                 continue;
>             case 'r':
>                 sv.push_back('\r');
>                 continue;
>             case 't':
>                 sv.push_back('\t');
>                 continue;
>             case 'U':
>                 {
>                     unsigned int n = 0;
>                     char e[4];
>                     in_.readBytes(reinterpret_cast<uint8_t*>(e), 4);
>                     for (int i = 0; i < 4; i++) {
>                         n *= 16;
>                         char c = e[i];
>                         if (isdigit(c)) {
>                             n += c - '0';
>                         } else if (c >= 'a' && c <= 'f') {
>                             n += c - 'a' + 10;
>                         } else if (c >= 'A' && c <= 'F') {
>                             n += c - 'A' + 10;
>                         } else {
>                             unexpected(c);
>                         }
>                     }
>                     sv.push_back(n);
>                 }
>                 break;
>             default:
>                 unexpected(ch);
>             }
>         } else {
>             sv.push_back(ch);
>         }
>     }
> }
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] [Updated] (AVRO-1080) JsonIO.cc should allow \u escape sequence in string

Posted by "Keh-Li Sheng (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/AVRO-1080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Keh-Li Sheng updated AVRO-1080:
-------------------------------

    Status: Patch Available  (was: Open)

Please include this in the next release
                
> JsonIO.cc should allow \u escape sequence in string
> ---------------------------------------------------
>
>                 Key: AVRO-1080
>                 URL: https://issues.apache.org/jira/browse/AVRO-1080
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.6.3
>         Environment: C++
>            Reporter: Keh-Li Sheng
>         Attachments: AVRO-1080.patch
>
>
> If an avro string contains a unicode escape sequence that begins with "\u" instead of "\U" an exception is thrown by the parser. The problematic code is at JsonIO.cc line 269.
> {code}
> JsonParser::Token JsonParser::tryString()
> {
>     sv.clear();
>     for ( ; ;) {
>         char ch = in_.read();
>         if (ch == '"') {
>             return tkString;
>         } else if (ch == '\\') {
>             ch = in_.read();
>             switch (ch) {
>             case '"':
>             case '\\':
>             case '/':
>                 sv.push_back(ch);
>                 continue;
>             case 'b':
>                 sv.push_back('\b');
>                 continue;
>             case 'f':
>                 sv.push_back('\f');
>                 continue;
>             case 'n':
>                 sv.push_back('\n');
>                 continue;
>             case 'r':
>                 sv.push_back('\r');
>                 continue;
>             case 't':
>                 sv.push_back('\t');
>                 continue;
>             case 'U':
>                 {
>                     unsigned int n = 0;
>                     char e[4];
>                     in_.readBytes(reinterpret_cast<uint8_t*>(e), 4);
>                     for (int i = 0; i < 4; i++) {
>                         n *= 16;
>                         char c = e[i];
>                         if (isdigit(c)) {
>                             n += c - '0';
>                         } else if (c >= 'a' && c <= 'f') {
>                             n += c - 'a' + 10;
>                         } else if (c >= 'A' && c <= 'F') {
>                             n += c - 'A' + 10;
>                         } else {
>                             unexpected(c);
>                         }
>                     }
>                     sv.push_back(n);
>                 }
>                 break;
>             default:
>                 unexpected(ch);
>             }
>         } else {
>             sv.push_back(ch);
>         }
>     }
> }
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

[jira] [Updated] (AVRO-1080) JsonIO.cc should allow \u escape sequence in string

Posted by "Keh-Li Sheng (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/AVRO-1080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Keh-Li Sheng updated AVRO-1080:
-------------------------------

    Attachment: AVRO-1080.patch

Contains trivial patch for resolving issue.
                
> JsonIO.cc should allow \u escape sequence in string
> ---------------------------------------------------
>
>                 Key: AVRO-1080
>                 URL: https://issues.apache.org/jira/browse/AVRO-1080
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.6.3
>         Environment: C++
>            Reporter: Keh-Li Sheng
>         Attachments: AVRO-1080.patch
>
>
> If an avro string contains a unicode escape sequence that begins with "\u" instead of "\U" an exception is thrown by the parser. The problematic code is at JsonIO.cc line 269.
> {code}
> JsonParser::Token JsonParser::tryString()
> {
>     sv.clear();
>     for ( ; ;) {
>         char ch = in_.read();
>         if (ch == '"') {
>             return tkString;
>         } else if (ch == '\\') {
>             ch = in_.read();
>             switch (ch) {
>             case '"':
>             case '\\':
>             case '/':
>                 sv.push_back(ch);
>                 continue;
>             case 'b':
>                 sv.push_back('\b');
>                 continue;
>             case 'f':
>                 sv.push_back('\f');
>                 continue;
>             case 'n':
>                 sv.push_back('\n');
>                 continue;
>             case 'r':
>                 sv.push_back('\r');
>                 continue;
>             case 't':
>                 sv.push_back('\t');
>                 continue;
>             case 'U':
>                 {
>                     unsigned int n = 0;
>                     char e[4];
>                     in_.readBytes(reinterpret_cast<uint8_t*>(e), 4);
>                     for (int i = 0; i < 4; i++) {
>                         n *= 16;
>                         char c = e[i];
>                         if (isdigit(c)) {
>                             n += c - '0';
>                         } else if (c >= 'a' && c <= 'f') {
>                             n += c - 'a' + 10;
>                         } else if (c >= 'A' && c <= 'F') {
>                             n += c - 'A' + 10;
>                         } else {
>                             unexpected(c);
>                         }
>                     }
>                     sv.push_back(n);
>                 }
>                 break;
>             default:
>                 unexpected(ch);
>             }
>         } else {
>             sv.push_back(ch);
>         }
>     }
> }
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira