You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@httpd.apache.org by Eric Covener <co...@gmail.com> on 2016/07/30 01:37:00 UTC

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

from rev below:
http://people.apache.org/~covener/test_char.h



On Fri, Jul 29, 2016 at 6:00 PM,  <wr...@apache.org> wrote:
> Author: wrowe
> Date: Fri Jul 29 22:00:52 2016
> New Revision: 1754579
>
> URL: http://svn.apache.org/viewvc?rev=1754579&view=rev
> Log:
> Replacement solution to identify VCHAR/ASCII symbols, even in EBCDIC.
>
> Looking for someone with an EBCDIC environment to post the output of
> the test_char.h generated file for verification.
>
>
> Modified:
>     httpd/httpd/trunk/server/gen_test_char.c
>
> Modified: httpd/httpd/trunk/server/gen_test_char.c
> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/gen_test_char.c?rev=1754579&r1=1754578&r2=1754579&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/server/gen_test_char.c (original)
> +++ httpd/httpd/trunk/server/gen_test_char.c Fri Jul 29 22:00:52 2016
> @@ -20,6 +20,7 @@
>  #define apr_isalpha(c) (isalpha(((unsigned char)(c))))
>  #define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
>  #define apr_isprint(c) (isprint(((unsigned char)(c))))
> +#define apr_isascii(c) (isascii(((unsigned char)(c))))
>  #include <ctype.h>
>  #define APR_HAVE_STDIO_H 1
>  #define APR_HAVE_STRING_H 1
> @@ -31,6 +32,48 @@
>
>  #endif
>
> +#if APR_CHARSET_EBCDIC
> +/* See util.c for complete explanation of this table */
> +static const short ucharmap[] = {
> +    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
> +    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
> +    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
> +    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
> +    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
> +    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
> +    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
> +    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
> +    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
> +    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
> +    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
> +    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
> +    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
> +    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
> +    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
> +    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
> +    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> +    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
> +    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> +    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
> +    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> +    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
> +    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
> +    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
> +    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> +    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
> +    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> +    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
> +    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> +    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
> +    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
> +    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
> +};
> +#define test_isascii_equiv(c) ((ucharmap[(unsigned char)c] & ~0x7f) == 0)
> +#else
> +#define test_isascii_equiv(c) apr_isascii(c)
> +#endif
> +
> +
>  #if defined(WIN32) || defined(OS2)
>  #define NEED_ENHANCED_ESCAPES
>  #endif
> @@ -120,19 +163,20 @@ int main(int argc, char *argv[])
>
>          /* Stop for any non-'token' character, including ctrls, obs-text,
>           * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616)
> -         * XXX: We need to build a specific table for EBCDIC values with
> -         * ASCII equivilants here
> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our EBCDIC table
> +         * are captured by apr_iscntrl()
>           */
> -        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)) {
> +        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)
> +               || !test_isascii_equiv(c)) {
>              flags |= T_HTTP_TOKEN_STOP;
>          }
>
>          /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2)
>           * This includes only the C0 plane, not C1 (which is obs-text itself.)
> -         * XXX: Need to constrain iscntrl to C0 equivilants in ASCII,
> -         * even on EBCDIC architecture
> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our EBCDIC table
> +         * are captured by apr_iscntrl()
>           */
> -        if (!c || (apr_iscntrl(c) && c != '\t')) {
> +        if (!c || (apr_iscntrl(c) && c != '\t' && test_isascii_equiv(c))) {
>              flags |= T_HTTP_CTRLS;
>          }
>
>
>



-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Aug 4, 2016 at 2:54 PM, Eric Covener <co...@gmail.com> wrote:

> On Thu, Aug 4, 2016 at 3:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> > It seems correcting the table is the correct way to go, by direct
> > observation
>
> #error if it's not the EBCDIC platform we made the observation on?   I
> don't know how much of a technicality another EBCDIC platform that
> comes anywhere near building moderm releases is.
>

I agree, and the problem is that the platform we are building on isn't
necessarily the target, which would apply across different EBCDIC
architectures.

You can get into trouble with this with your LANG setting even under
nearly-ASCII architectures, for example.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Thu, Aug 4, 2016 at 3:33 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> It seems correcting the table is the correct way to go, by direct
> observation

#error if it's not the EBCDIC platform we made the observation on?   I
don't know how much of a technicality another EBCDIC platform that
comes anywhere near building moderm releases is.

-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Aug 4, 2016 at 4:29 PM, Yann Ylavic <yl...@gmail.com> wrote:

> On Thu, Aug 4, 2016 at 11:10 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> > On Thu, Aug 4, 2016 at 3:52 PM, Yann Ylavic <yl...@gmail.com>
> wrote:
> >>
> >> On Thu, Aug 4, 2016 at 9:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
> >> wrote:
> >> >
> >> > It seems correcting the table is the correct way to go, by direct
> >> > observation, and placing great faith that other than 0x15/0x37,
> >> > the discrepancies between ASCII <> EBCDIC C0 mappings do
> >> > not vary widely between EBCDIC mapping choices.
> >>
> >> Maybe to be sure we could compare the current 'ucharmap' with some
> >> result of apr_xlate_conv_byte() for each byte?
> >
> >
> > Perhaps a VALIDATE_TABLE define for the builder, especially when
> > --with-maintainer-mode is given?
>
> Maybe a small temporary main() run once would be enough, it's not as
> if it could change anytime soon, no?
>

Then we may as well build the table at runtime as a register_hooks
callback... which is a possibility.

The downside is that the table would be more difficult to validate, not
less.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Aug 4, 2016 at 11:10 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> On Thu, Aug 4, 2016 at 3:52 PM, Yann Ylavic <yl...@gmail.com> wrote:
>>
>> On Thu, Aug 4, 2016 at 9:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
>> wrote:
>> >
>> > It seems correcting the table is the correct way to go, by direct
>> > observation, and placing great faith that other than 0x15/0x37,
>> > the discrepancies between ASCII <> EBCDIC C0 mappings do
>> > not vary widely between EBCDIC mapping choices.
>>
>> Maybe to be sure we could compare the current 'ucharmap' with some
>> result of apr_xlate_conv_byte() for each byte?
>
>
> Perhaps a VALIDATE_TABLE define for the builder, especially when
> --with-maintainer-mode is given?

Maybe a small temporary main() run once would be enough, it's not as
if it could change anytime soon, no?

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Aug 4, 2016 at 3:52 PM, Yann Ylavic <yl...@gmail.com> wrote:

> On Thu, Aug 4, 2016 at 9:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> >
> > It seems correcting the table is the correct way to go, by direct
> > observation, and placing great faith that other than 0x15/0x37,
> > the discrepancies between ASCII <> EBCDIC C0 mappings do
> > not vary widely between EBCDIC mapping choices.
>
> Maybe to be sure we could compare the current 'ucharmap' with some
> result of apr_xlate_conv_byte() for each byte?
>

Perhaps a VALIDATE_TABLE define for the builder, especially when
--with-maintainer-mode is given?

Without some specific define, invoking actual apr functions, again,
violates the ability to cross-compile.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Yann Ylavic <yl...@gmail.com>.
On Thu, Aug 4, 2016 at 9:33 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
>
> It seems correcting the table is the correct way to go, by direct
> observation, and placing great faith that other than 0x15/0x37,
> the discrepancies between ASCII <> EBCDIC C0 mappings do
> not vary widely between EBCDIC mapping choices.

Maybe to be sure we could compare the current 'ucharmap' with some
result of apr_xlate_conv_byte() for each byte?

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Mon, Aug 8, 2016 at 11:24 AM, Eric Covener <co...@gmail.com> wrote:

> On Mon, Aug 8, 2016 at 12:03 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> > Easier is to do a compile time comparison of '\n' to 0x15 vs 0x25. But I
> > need to know the mystery of 0x25's value through iconv on your
> architecture.
> > Please research, if they simply trade places we are fine. If they both
> map
> > to 0x0A in ASCII we simply treat them as equal in our comparison fn. And
> the
> > resulting table will be correct irrespective of what iconv munging has
> been
> > performe
>
> On z/OS the 15 and 25 are inverted:
>
> $ printf "\r\n\x25" |od -t x1
> 0000000000    0D  15  25
> 0000000003
> $ printf "\r\n\x25" |iconv -f IBM1047 -t ISO8859-1|od -t x1
> 0000000000    0D  0A  85
>
> (same result for 037 codepage, confirmed character constants compiled the
> same)
>

Thanks, that's trivial to account for in the fixed table, '\n' == 0x15 (vs
0x25)

I think we could accomodate 037 vs. 1047 by simply comparing these values;

 [  ]  ^
BA BB B0
AD BD 5F

Beyond this, things start to get weird, I've attached the list.

Particularly, 937 and 1399 are problematic because we can't test '\0x0f' ==
0x0f,
and there is no standard C escape sequence for 0x0e/0x0f to use as a compile
time trigger. Interestingly, those are the only C0 codes that ever seems to
differ
between EBCDIC code pages.

For the most part, however, we only care that 1:1 our Alpha upper-lower
matches
for apr_cstr_casecmp. For these ap_* util.c functions, we also care that
all C0
chars fall in the same C0 set of mappings, and that all other values which
map
to ASCII translate to some ASCII visible character value.

Beginning to think that a simple run-time program to regenerate this table
when
the user chooses and is running on an unusual code page could be valuable,
using iconv directly and not through apr to preserve cross-compilation, and
verifying in our ap_init_ebcdic that we are running with the correct code
page.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Mon, Aug 8, 2016 at 12:03 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> Easier is to do a compile time comparison of '\n' to 0x15 vs 0x25. But I
> need to know the mystery of 0x25's value through iconv on your architecture.
> Please research, if they simply trade places we are fine. If they both map
> to 0x0A in ASCII we simply treat them as equal in our comparison fn. And the
> resulting table will be correct irrespective of what iconv munging has been
> performe

On z/OS the 15 and 25 are inverted:

$ printf "\r\n\x25" |od -t x1
0000000000    0D  15  25
0000000003
$ printf "\r\n\x25" |iconv -f IBM1047 -t ISO8859-1|od -t x1
0000000000    0D  0A  85

(same result for 037 codepage, confirmed character constants compiled the same)

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Aug 5, 2016 4:37 PM, "Eric Covener" <co...@gmail.com> wrote:
>
> On Fri, Aug 5, 2016 at 4:58 PM, William A Rowe Jr <wr...@rowe-clan.net>
wrote:
> > On Thu, Aug 4, 2016 at 2:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
> > wrote:
> >>
> >> On Thu, Aug 4, 2016 at 2:01 PM, Eric Covener <co...@gmail.com> wrote:
> >>>
> >>> On Mon, Aug 1, 2016 at 3:22 PM, William A Rowe Jr <wrowe@rowe-clan.net
>
> >>> wrote:
> >>> > We have a few choices, but the bottom line is that we treat /r/n
> >>> > as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.
> >>> >
> >>> > Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
> >>> > number of ascii equivalents by one. Both would be treated at CTRL.
> >>> >
> >>> > Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
> >>> > visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
> >>> > text chars.
> >>> >
> >>> > Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
> >>> > ignore some 32 'opaque bytes' as unsupportable.
> >>>
> >>> How about #2 with the below -- but using apr_xlate so it is the same
as
> >>> runtime:

> I still think It has to match the apr_xlate result, which could be
> either flavor depending on whether iconv (0x15 <-> 0x0A and 0x25 <->
> 0x85) or apr-iconv (opposite) is used on the EBCDIC platform I can
> test on.

According to our current implementation, it actually won't be hard to fix
this at compile time, for cross compilation provided that both origin and
target are 'some EBCDIC' arch. It should not be impossible to take that one
step further and cross compile on an ASCII origin/build but that's a
project for another month, if ever.

First off, I am working to review all EBCDIC code page exceptions for the
some 3 dozen installed here. They all follow the classic \x0D\x25 mapping.
But I'll try to determine what the typical exceptions are within C0 and
ASCII equiv.

> I haven't heard a peep about any other EBCDIC platforms in many years.
> We can #error in this EBCDIC block if __MVS__ is not defined.

Easier is to do a compile time comparison of '\n' to 0x15 vs 0x25. But I
need to know the mystery of 0x25's value through iconv on your
architecture. Please research, if they simply trade places we are fine. If
they both map to 0x0A in ASCII we simply treat them as equal in our
comparison fn. And the resulting table will be correct irrespective of what
iconv munging has been performed.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Fri, Aug 5, 2016 at 4:58 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> On Thu, Aug 4, 2016 at 2:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
>>
>> On Thu, Aug 4, 2016 at 2:01 PM, Eric Covener <co...@gmail.com> wrote:
>>>
>>> On Mon, Aug 1, 2016 at 3:22 PM, William A Rowe Jr <wr...@rowe-clan.net>
>>> wrote:
>>> > We have a few choices, but the bottom line is that we treat /r/n
>>> > as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.
>>> >
>>> > Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
>>> > number of ascii equivalents by one. Both would be treated at CTRL.
>>> >
>>> > Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
>>> > visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
>>> > text chars.
>>> >
>>> > Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
>>> > ignore some 32 'opaque bytes' as unsupportable.
>>>
>>> How about #2 with the below -- but using apr_xlate so it is the same as
>>> runtime:
>>>
>>> http://people.apache.org/~covener/patches/ebcdic-gen_test_char.diff
>>
>>
>> Runtime is the issue, this module is set up for cross compilation,
>> including
>> between native ASCII architectures. The patch breaks all
>> cross-compilation,
>> AIUI, not exclusively EBCDIC target builds.
>>
>> I suspect that the generator is probably wrong for cross compilation on
>> an ASCII origin/build box targeting an EBCDIC OS in the first place.
>>
>> It seems correcting the table is the correct way to go, by direct
>> observation, and placing great faith that other than 0x15/0x37,
>> the discrepancies between ASCII <> EBCDIC C0 mappings do
>> not vary widely between EBCDIC mapping choices. Whether we
>> fix cross compilation of an ASCII build to an EBCDIC target is
>> a different question.
>>
>> An alternative is to directly speak iconv, /shrug.
>>
>>> (I am not happy about changing the non-ebcdic build here, but it
>>> should act just like a static support program)
>>
>>
>> Static support programs don't have to be invoked on the build environment,
>> today.
>>
>> But I'm still uneasy about leaving [correction] 0x25 / 37 unguarded.
>
>
> So I dug deeper, found a few more references;
>
> https://www.ibm.com/developerworks/community/forums/html/topic?id=77777777-0000-0000-0000-000014347663
>
> https://en.wikipedia.org/wiki/Newline
>
> In EBCDIC -> ASCII I believe the safest course is to treat the dominant
> compiler's representation of '\n' as ASCII NL 0x0A, no questions asked,
> #error if not true.  I'd be entirely OK with either refusing the other or
> treating
> as 0x85 the other value, between NL of 0x15 and LEN of 0x25. This would
> be consistent with compiling almost any third party c sources as an httpd
> loadable module.

I still think It has to match the apr_xlate result, which could be
either flavor depending on whether iconv (0x15 <-> 0x0A and 0x25 <->
0x85) or apr-iconv (opposite) is used on the EBCDIC platform I can
test on.

If apr-iconv is meant for systems w/o iconv, and  there's only 1
supportable EBCDIC platform and its iconv has this quirk, we should
use it instead of the real 037/1047 table because it will be applied
to the protocol data this way.

I haven't heard a peep about any other EBCDIC platforms in many years.
We can #error in this EBCDIC block if __MVS__ is not defined.

-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Aug 4, 2016 at 2:33 PM, William A Rowe Jr <wr...@rowe-clan.net>
wrote:

> On Thu, Aug 4, 2016 at 2:01 PM, Eric Covener <co...@gmail.com> wrote:
>
>> On Mon, Aug 1, 2016 at 3:22 PM, William A Rowe Jr <wr...@rowe-clan.net>
>> wrote:
>> > We have a few choices, but the bottom line is that we treat /r/n
>> > as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.
>> >
>> > Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
>> > number of ascii equivalents by one. Both would be treated at CTRL.
>> >
>> > Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
>> > visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
>> > text chars.
>> >
>> > Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
>> > ignore some 32 'opaque bytes' as unsupportable.
>>
>> How about #2 with the below -- but using apr_xlate so it is the same as
>> runtime:
>>
>> http://people.apache.org/~covener/patches/ebcdic-gen_test_char.diff
>>
>
> Runtime is the issue, this module is set up for cross compilation,
> including
> between native ASCII architectures. The patch breaks all
> cross-compilation,
> AIUI, not exclusively EBCDIC target builds.
>
> I suspect that the generator is probably wrong for cross compilation on
> an ASCII origin/build box targeting an EBCDIC OS in the first place.
>
> It seems correcting the table is the correct way to go, by direct
> observation, and placing great faith that other than 0x15/0x37,
> the discrepancies between ASCII <> EBCDIC C0 mappings do
> not vary widely between EBCDIC mapping choices. Whether we
> fix cross compilation of an ASCII build to an EBCDIC target is
> a different question.
>
> An alternative is to directly speak iconv, /shrug.
>
> (I am not happy about changing the non-ebcdic build here, but it
>> should act just like a static support program)
>>
>
> Static support programs don't have to be invoked on the build environment,
> today.
>
> But I'm still uneasy about leaving [correction] 0x25 / 37 unguarded.
>

So I dug deeper, found a few more references;

https://www.ibm.com/developerworks/community/forums/html/topic?id=77777777-0000-0000-0000-000014347663

https://en.wikipedia.org/wiki/Newline

In EBCDIC -> ASCII I believe the safest course is to treat the dominant
compiler's representation of '\n' as ASCII NL 0x0A, no questions asked,
#error if not true.  I'd be entirely OK with either refusing the other or
treating
as 0x85 the other value, between NL of 0x15 and LEN of 0x25. This would
be consistent with compiling almost any third party c sources as an httpd
loadable module.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Thu, Aug 4, 2016 at 2:01 PM, Eric Covener <co...@gmail.com> wrote:

> On Mon, Aug 1, 2016 at 3:22 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> > We have a few choices, but the bottom line is that we treat /r/n
> > as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.
> >
> > Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
> > number of ascii equivalents by one. Both would be treated at CTRL.
> >
> > Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
> > visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
> > text chars.
> >
> > Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
> > ignore some 32 'opaque bytes' as unsupportable.
>
> How about #2 with the below -- but using apr_xlate so it is the same as
> runtime:
>
> http://people.apache.org/~covener/patches/ebcdic-gen_test_char.diff
>

Runtime is the issue, this module is set up for cross compilation, including
between native ASCII architectures. The patch breaks all cross-compilation,
AIUI, not exclusively EBCDIC target builds.

I suspect that the generator is probably wrong for cross compilation on
an ASCII origin/build box targeting an EBCDIC OS in the first place.

It seems correcting the table is the correct way to go, by direct
observation, and placing great faith that other than 0x15/0x37,
the discrepancies between ASCII <> EBCDIC C0 mappings do
not vary widely between EBCDIC mapping choices. Whether we
fix cross compilation of an ASCII build to an EBCDIC target is
a different question.

An alternative is to directly speak iconv, /shrug.

(I am not happy about changing the non-ebcdic build here, but it
> should act just like a static support program)
>

Static support programs don't have to be invoked on the build environment,
today.

But I'm still uneasy about leaving 0x37 unguarded.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Mon, Aug 1, 2016 at 3:22 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> We have a few choices, but the bottom line is that we treat /r/n
> as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.
>
> Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
> number of ascii equivalents by one. Both would be treated at CTRL.
>
> Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
> visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
> text chars.
>
> Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
> ignore some 32 'opaque bytes' as unsupportable.

How about #2 with the below -- but using apr_xlate so it is the same as runtime:

http://people.apache.org/~covener/patches/ebcdic-gen_test_char.diff

(I am not happy about changing the non-ebcdic build here, but it
should act just like a static support program)

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Mon, Aug 1, 2016 at 2:08 PM, Eric Covener <co...@gmail.com> wrote:

> The mainframe guys say it's an unfortunate but intentional
> working-as-designed fudge of the iconv results to make the preferred
> line separator (0x15)map to/from 0x0A.  Seems like safest would be to
> not use a table for conversion but instead go through the same iconv
> that we're using to convert the data?


We have a few choices, but the bottom line is that we treat /r/n
as 0x0a 0x15 on ebcdic, and perhaps fix our iconv mapping.

Choice 1; map both 0x15 and 0x37 to ASCII 0x0d, which grows the
number of ascii equivalents by one. Both would be treated at CTRL.

Choice 2; invert our current mapping, ASCII NL to EBCDIC LF and
visa-versa. That would leave 0x37 'unguarded' and allowed as opaque
text chars.

Choice 3; treat the entire C1 codeplane on EBCDIC as CTRLs, and
ignore some 32 'opaque bytes' as unsupportable.

Thoughts?

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
The mainframe guys say it's an unfortunate but intentional
working-as-designed fudge of the iconv results to make the preferred
line separator (0x15)map to/from 0x0A.  Seems like safest would be to
not use a table for conversion but instead go through the same iconv
that we're using to convert the data?

On Mon, Aug 1, 2016 at 12:45 PM, Eric Covener <co...@gmail.com> wrote:
> On Mon, Aug 1, 2016 at 12:41 PM, Eric Covener <co...@gmail.com> wrote:
>> Looks like iconv on z/os changes behavior when stdout is a terminal,
>> so that taints some of my observations.  Will revisit soon.
>
> This bit is a false alarm.



-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Mon, Aug 1, 2016 at 12:41 PM, Eric Covener <co...@gmail.com> wrote:
> Looks like iconv on z/os changes behavior when stdout is a terminal,
> so that taints some of my observations.  Will revisit soon.

This bit is a false alarm.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
Looks like iconv on z/os changes behavior when stdout is a terminal,
so that taints some of my observations.  Will revisit soon.

On Mon, Aug 1, 2016 at 12:28 PM, Eric Covener <co...@gmail.com> wrote:
> I've sent a note to the z/OS Unix mailing list in hopes someone can
> distill the confusing over how 0x15 and 0x25  EBCDIC seem to be
> interpreted differently.  0x15 is kind of weird but 0x25 is hard to
> justify.
>
> On Mon, Aug 1, 2016 at 12:15 PM, Eric Covener <co...@gmail.com> wrote:
>> On Mon, Aug 1, 2016 at 11:55 AM, Eric Covener <co...@gmail.com> wrote:
>>> I notice ebcdic-us.c in apr-iconv has this mapping instead of 0x85
>>
>> iconv on linux acts like the apr-iconv table, though.
>
>
>
> --
> Eric Covener
> covener@gmail.com



-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
I've sent a note to the z/OS Unix mailing list in hopes someone can
distill the confusing over how 0x15 and 0x25  EBCDIC seem to be
interpreted differently.  0x15 is kind of weird but 0x25 is hard to
justify.

On Mon, Aug 1, 2016 at 12:15 PM, Eric Covener <co...@gmail.com> wrote:
> On Mon, Aug 1, 2016 at 11:55 AM, Eric Covener <co...@gmail.com> wrote:
>> I notice ebcdic-us.c in apr-iconv has this mapping instead of 0x85
>
> iconv on linux acts like the apr-iconv table, though.



-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Mon, Aug 1, 2016 at 11:55 AM, Eric Covener <co...@gmail.com> wrote:
> I notice ebcdic-us.c in apr-iconv has this mapping instead of 0x85

iconv on linux acts like the apr-iconv table, though.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Mon, Aug 1, 2016 at 11:28 AM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> On Sun, Jul 31, 2016 at 11:51 AM, Eric Covener <co...@gmail.com> wrote:
>>
>> On Sun, Jul 31, 2016 at 12:19 PM, William A Rowe Jr <wr...@rowe-clan.net>
>> wrote:
>> > Not a conclusion, but this is obviously a bigger headache...
>> >
>> >
>> > https://www.ibm.com/support/knowledgecenter/SSLTBW_2.1.0/com.ibm.zos.v2r1.ioea600/smbadap021003899.htm
>>
>> I don't think this is an issue for httpd. There are a number of
>> auto-conversion things at different layers, but the low level ones
>> that would affect our calls are opt-in.  That SMB server link for
>> example is more the equivalent of saying what mod_charset_lite does by
>> default rather than what a filesystem or C lib might do by default.
>
>
> The most important question is what is printf("%02x %02x\n", (int)'\r',
> (int)'\n');
> If '\n' is actualiy 0x15 we may as well flip LF and NL for the purposes of
> our
> table.

Output is what you anticipated -- 0d 15

native iconv on the host does xlate back and forth as 0x15<->0x0a

I notice ebcdic-us.c in apr-iconv has this mapping instead of 0x85

Note: AFAIK IBM has always used native iconv on the mainframe for
httpd, so synching up with apr-iconv tables is probably not paramount.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Sun, Jul 31, 2016 at 11:51 AM, Eric Covener <co...@gmail.com> wrote:

> On Sun, Jul 31, 2016 at 12:19 PM, William A Rowe Jr <wr...@rowe-clan.net>
> wrote:
> > Not a conclusion, but this is obviously a bigger headache...
> >
> >
> https://www.ibm.com/support/knowledgecenter/SSLTBW_2.1.0/com.ibm.zos.v2r1.ioea600/smbadap021003899.htm
>
> I don't think this is an issue for httpd. There are a number of
> auto-conversion things at different layers, but the low level ones
> that would affect our calls are opt-in.  That SMB server link for
> example is more the equivalent of saying what mod_charset_lite does by
> default rather than what a filesystem or C lib might do by default.
>

The most important question is what is printf("%02x
%02x\n", (int)'\r', (int)'\n');
If '\n' is actualiy 0x15 we may as well flip LF and NL for the purposes of
our
table.

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
On Sun, Jul 31, 2016 at 12:19 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote:
> Not a conclusion, but this is obviously a bigger headache...
>
> https://www.ibm.com/support/knowledgecenter/SSLTBW_2.1.0/com.ibm.zos.v2r1.ioea600/smbadap021003899.htm

I don't think this is an issue for httpd. There are a number of
auto-conversion things at different layers, but the low level ones
that would affect our calls are opt-in.  That SMB server link for
example is more the equivalent of saying what mod_charset_lite does by
default rather than what a filesystem or C lib might do by default.


-- 
Eric Covener
covener@gmail.com

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
On Jul 30, 2016 6:25 PM, "William A Rowe Jr" <wr...@rowe-clan.net> wrote:
>
> CR LF are 0D 37 in EBCDIC. Those have protocol specific meanings.
>
> NL in EBCDIC or ASCII has no specific meaning, it is opaque text. It's
not an HTTP CTRL char.
>
> However, wouldn't we need to escape it in a shell cmd? We might want to
consider escaping many C1 ctrls in the shell.
>
>
> On Jul 30, 2016 8:25 AM, "Eric Covener" <co...@gmail.com> wrote:
>>
>> What's going on with 0x15 (newline in ebcdic)?  It is a control char
>> that we map to 0x85 but it doesn't look right int he generated table.
>>
>> On Fri, Jul 29, 2016 at 9:37 PM, Eric Covener <co...@gmail.com> wrote:
>> > from rev below:
>> > http://people.apache.org/~covener/test_char.h
>> >
>> >
>> >
>> > On Fri, Jul 29, 2016 at 6:00 PM,  <wr...@apache.org> wrote:
>> >> Author: wrowe
>> >> Date: Fri Jul 29 22:00:52 2016
>> >> New Revision: 1754579
>> >>
>> >> URL: http://svn.apache.org/viewvc?rev=1754579&view=rev
>> >> Log:
>> >> Replacement solution to identify VCHAR/ASCII symbols, even in EBCDIC.
>> >>
>> >> Looking for someone with an EBCDIC environment to post the output of
>> >> the test_char.h generated file for verification.
>> >>
>> >>
>> >> Modified:
>> >>     httpd/httpd/trunk/server/gen_test_char.c
>> >>
>> >> Modified: httpd/httpd/trunk/server/gen_test_char.c
>> >> URL:
http://svn.apache.org/viewvc/httpd/httpd/trunk/server/gen_test_char.c?rev=1754579&r1=1754578&r2=1754579&view=diff
>> >>
==============================================================================
>> >> --- httpd/httpd/trunk/server/gen_test_char.c (original)
>> >> +++ httpd/httpd/trunk/server/gen_test_char.c Fri Jul 29 22:00:52 2016
>> >> @@ -20,6 +20,7 @@
>> >>  #define apr_isalpha(c) (isalpha(((unsigned char)(c))))
>> >>  #define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
>> >>  #define apr_isprint(c) (isprint(((unsigned char)(c))))
>> >> +#define apr_isascii(c) (isascii(((unsigned char)(c))))
>> >>  #include <ctype.h>
>> >>  #define APR_HAVE_STDIO_H 1
>> >>  #define APR_HAVE_STRING_H 1
>> >> @@ -31,6 +32,48 @@
>> >>
>> >>  #endif
>> >>
>> >> +#if APR_CHARSET_EBCDIC
>> >> +/* See util.c for complete explanation of this table */
>> >> +static const short ucharmap[] = {
>> >> +    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
>> >> +    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
>> >> +    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
>> >> +    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
>> >> +    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
>> >> +    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
>> >> +    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
>> >> +    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
>> >> +    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
>> >> +    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
>> >> +    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
>> >> +    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
>> >> +    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
>> >> +    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
>> >> +    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
>> >> +    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
>> >> +    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
>> >> +    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
>> >> +    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
>> >> +    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
>> >> +    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
>> >> +    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
>> >> +    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
>> >> +    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
>> >> +    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
>> >> +    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
>> >> +    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
>> >> +    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
>> >> +    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
>> >> +    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
>> >> +    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
>> >> +    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
>> >> +};
>> >> +#define test_isascii_equiv(c) ((ucharmap[(unsigned char)c] & ~0x7f)
== 0)
>> >> +#else
>> >> +#define test_isascii_equiv(c) apr_isascii(c)
>> >> +#endif
>> >> +
>> >> +
>> >>  #if defined(WIN32) || defined(OS2)
>> >>  #define NEED_ENHANCED_ESCAPES
>> >>  #endif
>> >> @@ -120,19 +163,20 @@ int main(int argc, char *argv[])
>> >>
>> >>          /* Stop for any non-'token' character, including ctrls,
obs-text,
>> >>           * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616)
>> >> -         * XXX: We need to build a specific table for EBCDIC values
with
>> >> -         * ASCII equivilants here
>> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
EBCDIC table
>> >> +         * are captured by apr_iscntrl()
>> >>           */
>> >> -        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}",
c)) {
>> >> +        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}",
c)
>> >> +               || !test_isascii_equiv(c)) {
>> >>              flags |= T_HTTP_TOKEN_STOP;
>> >>          }
>> >>
>> >>          /* Catch CTRLs other than VCHAR, HT and SP, and obs-text
(RFC7230 3.2)
>> >>           * This includes only the C0 plane, not C1 (which is
obs-text itself.)
>> >> -         * XXX: Need to constrain iscntrl to C0 equivilants in ASCII,
>> >> -         * even on EBCDIC architecture
>> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
EBCDIC table
>> >> +         * are captured by apr_iscntrl()
>> >>           */
>> >> -        if (!c || (apr_iscntrl(c) && c != '\t')) {
>> >> +        if (!c || (apr_iscntrl(c) && c != '\t' &&
test_isascii_equiv(c))) {
>> >>              flags |= T_HTTP_CTRLS;
>> >>          }
>> >>

Not a conclusion, but this is obviously a bigger headache...

https://www.ibm.com/support/knowledgecenter/SSLTBW_2.1.0/com.ibm.zos.v2r1.ioea600/smbadap021003899.htm

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by William A Rowe Jr <wr...@rowe-clan.net>.
CR LF are 0D 37 in EBCDIC. Those have protocol specific meanings.

NL in EBCDIC or ASCII has no specific meaning, it is opaque text. It's not
an HTTP CTRL char.

However, wouldn't we need to escape it in a shell cmd? We might want to
consider escaping many C1 ctrls in the shell.

On Jul 30, 2016 8:25 AM, "Eric Covener" <co...@gmail.com> wrote:

> What's going on with 0x15 (newline in ebcdic)?  It is a control char
> that we map to 0x85 but it doesn't look right int he generated table.
>
> On Fri, Jul 29, 2016 at 9:37 PM, Eric Covener <co...@gmail.com> wrote:
> > from rev below:
> > http://people.apache.org/~covener/test_char.h
> >
> >
> >
> > On Fri, Jul 29, 2016 at 6:00 PM,  <wr...@apache.org> wrote:
> >> Author: wrowe
> >> Date: Fri Jul 29 22:00:52 2016
> >> New Revision: 1754579
> >>
> >> URL: http://svn.apache.org/viewvc?rev=1754579&view=rev
> >> Log:
> >> Replacement solution to identify VCHAR/ASCII symbols, even in EBCDIC.
> >>
> >> Looking for someone with an EBCDIC environment to post the output of
> >> the test_char.h generated file for verification.
> >>
> >>
> >> Modified:
> >>     httpd/httpd/trunk/server/gen_test_char.c
> >>
> >> Modified: httpd/httpd/trunk/server/gen_test_char.c
> >> URL:
> http://svn.apache.org/viewvc/httpd/httpd/trunk/server/gen_test_char.c?rev=1754579&r1=1754578&r2=1754579&view=diff
> >>
> ==============================================================================
> >> --- httpd/httpd/trunk/server/gen_test_char.c (original)
> >> +++ httpd/httpd/trunk/server/gen_test_char.c Fri Jul 29 22:00:52 2016
> >> @@ -20,6 +20,7 @@
> >>  #define apr_isalpha(c) (isalpha(((unsigned char)(c))))
> >>  #define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
> >>  #define apr_isprint(c) (isprint(((unsigned char)(c))))
> >> +#define apr_isascii(c) (isascii(((unsigned char)(c))))
> >>  #include <ctype.h>
> >>  #define APR_HAVE_STDIO_H 1
> >>  #define APR_HAVE_STRING_H 1
> >> @@ -31,6 +32,48 @@
> >>
> >>  #endif
> >>
> >> +#if APR_CHARSET_EBCDIC
> >> +/* See util.c for complete explanation of this table */
> >> +static const short ucharmap[] = {
> >> +    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
> >> +    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
> >> +    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
> >> +    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
> >> +    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
> >> +    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
> >> +    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
> >> +    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
> >> +    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
> >> +    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
> >> +    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
> >> +    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
> >> +    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
> >> +    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
> >> +    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
> >> +    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
> >> +    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> >> +    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
> >> +    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> >> +    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
> >> +    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> >> +    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
> >> +    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
> >> +    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
> >> +    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
> >> +    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
> >> +    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
> >> +    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
> >> +    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
> >> +    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
> >> +    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
> >> +    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
> >> +};
> >> +#define test_isascii_equiv(c) ((ucharmap[(unsigned char)c] & ~0x7f) ==
> 0)
> >> +#else
> >> +#define test_isascii_equiv(c) apr_isascii(c)
> >> +#endif
> >> +
> >> +
> >>  #if defined(WIN32) || defined(OS2)
> >>  #define NEED_ENHANCED_ESCAPES
> >>  #endif
> >> @@ -120,19 +163,20 @@ int main(int argc, char *argv[])
> >>
> >>          /* Stop for any non-'token' character, including ctrls,
> obs-text,
> >>           * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616)
> >> -         * XXX: We need to build a specific table for EBCDIC values
> with
> >> -         * ASCII equivilants here
> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
> EBCDIC table
> >> +         * are captured by apr_iscntrl()
> >>           */
> >> -        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}",
> c)) {
> >> +        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)
> >> +               || !test_isascii_equiv(c)) {
> >>              flags |= T_HTTP_TOKEN_STOP;
> >>          }
> >>
> >>          /* Catch CTRLs other than VCHAR, HT and SP, and obs-text
> (RFC7230 3.2)
> >>           * This includes only the C0 plane, not C1 (which is obs-text
> itself.)
> >> -         * XXX: Need to constrain iscntrl to C0 equivilants in ASCII,
> >> -         * even on EBCDIC architecture
> >> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our
> EBCDIC table
> >> +         * are captured by apr_iscntrl()
> >>           */
> >> -        if (!c || (apr_iscntrl(c) && c != '\t')) {
> >> +        if (!c || (apr_iscntrl(c) && c != '\t' &&
> test_isascii_equiv(c))) {
> >>              flags |= T_HTTP_CTRLS;
> >>          }
> >>
> >>
> >>
> >
> >
> >
> > --
> > Eric Covener
> > covener@gmail.com
>
>
>
> --
> Eric Covener
> covener@gmail.com
>

Re: svn commit: r1754579 - /httpd/httpd/trunk/server/gen_test_char.c

Posted by Eric Covener <co...@gmail.com>.
What's going on with 0x15 (newline in ebcdic)?  It is a control char
that we map to 0x85 but it doesn't look right int he generated table.

On Fri, Jul 29, 2016 at 9:37 PM, Eric Covener <co...@gmail.com> wrote:
> from rev below:
> http://people.apache.org/~covener/test_char.h
>
>
>
> On Fri, Jul 29, 2016 at 6:00 PM,  <wr...@apache.org> wrote:
>> Author: wrowe
>> Date: Fri Jul 29 22:00:52 2016
>> New Revision: 1754579
>>
>> URL: http://svn.apache.org/viewvc?rev=1754579&view=rev
>> Log:
>> Replacement solution to identify VCHAR/ASCII symbols, even in EBCDIC.
>>
>> Looking for someone with an EBCDIC environment to post the output of
>> the test_char.h generated file for verification.
>>
>>
>> Modified:
>>     httpd/httpd/trunk/server/gen_test_char.c
>>
>> Modified: httpd/httpd/trunk/server/gen_test_char.c
>> URL: http://svn.apache.org/viewvc/httpd/httpd/trunk/server/gen_test_char.c?rev=1754579&r1=1754578&r2=1754579&view=diff
>> ==============================================================================
>> --- httpd/httpd/trunk/server/gen_test_char.c (original)
>> +++ httpd/httpd/trunk/server/gen_test_char.c Fri Jul 29 22:00:52 2016
>> @@ -20,6 +20,7 @@
>>  #define apr_isalpha(c) (isalpha(((unsigned char)(c))))
>>  #define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
>>  #define apr_isprint(c) (isprint(((unsigned char)(c))))
>> +#define apr_isascii(c) (isascii(((unsigned char)(c))))
>>  #include <ctype.h>
>>  #define APR_HAVE_STDIO_H 1
>>  #define APR_HAVE_STRING_H 1
>> @@ -31,6 +32,48 @@
>>
>>  #endif
>>
>> +#if APR_CHARSET_EBCDIC
>> +/* See util.c for complete explanation of this table */
>> +static const short ucharmap[] = {
>> +    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
>> +    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
>> +    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
>> +    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
>> +    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
>> +    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
>> +    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
>> +    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
>> +    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
>> +    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
>> +    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
>> +    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
>> +    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
>> +    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
>> +    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
>> +    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
>> +    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
>> +    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
>> +    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
>> +    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
>> +    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
>> +    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
>> +    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
>> +    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
>> +    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
>> +    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
>> +    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
>> +    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
>> +    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
>> +    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
>> +    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
>> +    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
>> +};
>> +#define test_isascii_equiv(c) ((ucharmap[(unsigned char)c] & ~0x7f) == 0)
>> +#else
>> +#define test_isascii_equiv(c) apr_isascii(c)
>> +#endif
>> +
>> +
>>  #if defined(WIN32) || defined(OS2)
>>  #define NEED_ENHANCED_ESCAPES
>>  #endif
>> @@ -120,19 +163,20 @@ int main(int argc, char *argv[])
>>
>>          /* Stop for any non-'token' character, including ctrls, obs-text,
>>           * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616)
>> -         * XXX: We need to build a specific table for EBCDIC values with
>> -         * ASCII equivilants here
>> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our EBCDIC table
>> +         * are captured by apr_iscntrl()
>>           */
>> -        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)) {
>> +        if (!c || apr_iscntrl(c) || strchr(" \t()<>@,;:\\\"/[]?={}", c)
>> +               || !test_isascii_equiv(c)) {
>>              flags |= T_HTTP_TOKEN_STOP;
>>          }
>>
>>          /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2)
>>           * This includes only the C0 plane, not C1 (which is obs-text itself.)
>> -         * XXX: Need to constrain iscntrl to C0 equivilants in ASCII,
>> -         * even on EBCDIC architecture
>> +         * XXX: We need to verify that ASCII C0 ctrls/DEL in our EBCDIC table
>> +         * are captured by apr_iscntrl()
>>           */
>> -        if (!c || (apr_iscntrl(c) && c != '\t')) {
>> +        if (!c || (apr_iscntrl(c) && c != '\t' && test_isascii_equiv(c))) {
>>              flags |= T_HTTP_CTRLS;
>>          }
>>
>>
>>
>
>
>
> --
> Eric Covener
> covener@gmail.com



-- 
Eric Covener
covener@gmail.com