You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "David Wayne Birdsall (JIRA)" <ji...@apache.org> on 2019/08/07 21:19:00 UTC
[jira] [Commented] (TRAFODION-3322) odb incorrectly generates 0 rows in a certain scenario

    [ https://issues.apache.org/jira/browse/TRAFODION-3322?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16902483#comment-16902483 ] 

David Wayne Birdsall commented on TRAFODION-3322:
-------------------------------------------------

I debugged this. The bug seems to be in the Oload function in module odb.c. The variable "len" gets used for multiple uses, and in one place in the code is essentially left uninitialized. In the following code:
{quote}{{ /* Reading input file */}}
{{ pstats = 1; /* From now on print stats on exit */}}
{{ Odp = &etab[eid].Orowsetl[m*etab[eid].s];}}
{{ while ( go ) {}}
{{    if ( isgz ) { /* is a gzipped file */}}
{{        if ( gzstream.avail_in ) { /* continue inflating previous gzbuff into buff */}}
{{            gzstream.avail_out = (unsigned int) etab[eid].buffsz ;}}
{{            gzstream.next_out = (unsigned char *)buff ;}}
{{            gzret = inflate (&gzstream, Z_NO_FLUSH) ;}}
{{            switch ( gzret ) {}}
{{            case Z_OK:}}
{{                break ; /* everything is ok - continue */}}
{{            case Z_STREAM_END:}}
{{                inflateReset ( &gzstream ) ;}}
{{                break;}}
{{            default:}}
{{                fprintf(stderr, "odb [Oload(%d)] - Error during deflate: [%d]\n",}}
{{                __LINE__, gzret);}}
{{                goto oload_exit ;}}
{{                break;}}
{{            }}}
{{            len = etab[eid].buffsz - gzstream.avail_out ;}}
{{        } else {}}
{{            if ( fl ) { /* read new data from normal file-system into gzbuff */}}
{{            len = fread ( gzbuff, 1, (size_t)etab[eid].buffsz, fl);}}
{{#ifdef HDFS}}
{{            } else if ( fhl ) { /* read new data from HDFS into gzbuff */}}
{{                len = (size_t)(*hdfsread)(hfs, fhl, (void *)buff, etab[eid].buffsz);}}
{{#endif   }}
{{            }}}
{{            gzstream.avail_in = (unsigned int)len ;}}
{{            gzstream.next_in = (unsigned char *)gzbuff ;}}
{{            if ( len )}}
{{                continue ;}}
{{        }}}
{{    } else if ( fl ) {}}
{{        len = fread ( buff, 1, (size_t)etab[eid].buffsz, fl);}}
{{#ifdef HDFS}}
{{    } else if ( fhl ) {}}
{{        len = (size_t)(*hdfsread)(hfs, fhl, (void *)buff, etab[eid].buffsz);}}
{{#endif}}
{{    } }}{{ }}
{{    if ( len == 0 ) { /* EOF */}}
{{        if ( ( k + 1 ) == mff && !(fg & 0004) ) { /* complete last row & insert */}}
{{            ch = -1; /* insert this block */}}
{{            goto oload_lastrow;}}
{{        } else if ( m ) { /* rows to be inserted */ }}
{{            goto oload_insert;}}
{{        } else { /* exit loop */}}
{{            break;}}
{{         }}}
{{    }}}
{{    nb += len; /* update bytes read from file */}}
{{    p = 0; /* reset buffer index */}}
{{    while (lts && p < len) { /* skip initial lines */}}
{{        if (buff[p++] == lrs) {}}
{{            --lts;}}
{{        }}}
{{    }}}
{{    if ( ccl ) { /* continue cleaning rest of line */}}
{{        while ( p < len && buff[p] != lrs ) /* ... skip the rest of the line */}}
{{            p++;}}
{{        if ( buff[p] == lrs ) { /* if a record separator has been found */}}
{{            ccl = 0; /* switch the continue cleaning flag off */}}
{{            p++; /* skip the record separator */}}
{{        }}}
{{    }}}
{{    for ( ; p < len ; p++ ) {}}
{{        ch = buff[p];}}
{quote}
 

In the loop above, after "while ( go )", we go through a set of if / else if blocks that set len to the length of a file. But in the case where there is no input file (src=nofile), len is not set. So it has whatever value it had before we get to this block of code. It happens that in our failing test case, when we get to "for ( ; p < len ; p++ )", len has the value 1. This causes the "for" loop to be exited prematurely.

I found that if I added an "else" case setting len = 2, then the code works correctly. (The value p is always reset to 0 within the "for" loop so the test "p < len" always succeeds.)

The patched code is below (added code is in red bolded italics):
{quote}{{/* Reading input file */}}
{{pstats = 1; /* From now on print stats on exit */}}
{{Odp = &etab[eid].Orowsetl[m*etab[eid].s];}}
{{while ( go ) {}}
{{    if ( isgz ) { /* is a gzipped file */}}
{{        if ( gzstream.avail_in ) { /* continue inflating previous gzbuff into buff */}}
{{            gzstream.avail_out = (unsigned int) etab[eid].buffsz ;}}
{{            gzstream.next_out = (unsigned char *)buff ;}}
{{            gzret = inflate (&gzstream, Z_NO_FLUSH) ;}}
{{            switch ( gzret ) {}}
{{            case Z_OK:}}
{{                break ; /* everything is ok - continue */}}
{{            case Z_STREAM_END:}}
{{                inflateReset ( &gzstream ) ;}}
{{                break;}}
{{            default:}}
{{                fprintf(stderr, "odb [Oload(%d)] - Error during deflate: [%d]\n",}}
{{                __LINE__, gzret);}}
{{                goto oload_exit ;}}
{{                break;}}
{{            }}}
{{            len = etab[eid].buffsz - gzstream.avail_out ;}}
{{        } else {}}
{{            if ( fl ) { /* read new data from normal file-system into gzbuff */}}
{{            len = fread ( gzbuff, 1, (size_t)etab[eid].buffsz, fl);}}
{{#ifdef HDFS}}
{{            } else if ( fhl ) { /* read new data from HDFS into gzbuff */}}
{{                len = (size_t)(*hdfsread)(hfs, fhl, (void *)buff, etab[eid].buffsz);}}
{{#endif   }}
{{            }}}
{{            gzstream.avail_in = (unsigned int)len ;}}
{{            gzstream.next_in = (unsigned char *)gzbuff ;}}
{{            if ( len )}}
{{                continue ;}}
{{        }}}
{{    } else if ( fl ) {}}
{{        len = fread ( buff, 1, (size_t)etab[eid].buffsz, fl);}}
{{#ifdef HDFS}}
{{    } else if ( fhl ) {}}
{{        len = (size_t)(*hdfsread)(hfs, fhl, (void *)buff, etab[eid].buffsz);}}
{{#endif}}
{{    } {color:#d04437}_*else*_{color} {color:#d04437}_*{*_{color}}}
{color:#d04437}        _*{{len = 2; // must be "nofile" case; need to use a dummy len value > 1}}*_ {color}
{{{color:#d04437}    _*}*_{color} }}
{{    if ( len == 0 ) { /* EOF */}}
{{        if ( ( k + 1 ) == mff && !(fg & 0004) ) { /* complete last row & insert */}}
{{            ch = -1; /* insert this block */}}
{{            goto oload_lastrow;}}
{{        } else if ( m ) { /* rows to be inserted */ }}
{{            goto oload_insert;}}
{{        } else { /* exit loop */}}
{{            break;}}
{{        }}}
{{    }}}
{{    nb += len; /* update bytes read from file */}}
{{    p = 0; /* reset buffer index */}}
{{    while (lts && p < len) { /* skip initial lines */}}
{{        if (buff[p++] == lrs) {}}
{{            --lts;}}
{{        }}}
{{    }}}
{{    if ( ccl ) { /* continue cleaning rest of line */}}
{{        while ( p < len && buff[p] != lrs ) /* ... skip the rest of the line */}}
{{            p++;}}
{{        if ( buff[p] == lrs ) { /* if a record separator has been found */}}
{{            ccl = 0; /* switch the continue cleaning flag off */}}
{{            p++; /* skip the record separator */}}
{{        }}}
{{    }}}
{{    for ( ; p < len ; p++ ) {}}
{{        ch = buff[p];}}
{quote}

> odb incorrectly generates 0 rows in a certain scenario
> ------------------------------------------------------
>
>                 Key: TRAFODION-3322
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-3322
>             Project: Apache Trafodion
>          Issue Type: Bug
>          Components: db-utility-odb
>    Affects Versions: 2.4
>            Reporter: David Wayne Birdsall
>            Priority: Major
>
> In the following scenario, odb generates 0 rows when it should generate 5 rows.
> DDL:
> create table toyodb2 (a int not null, b int, c varchar(100), d varchar(100), primary key (a));
> Map file (here named, try2OdbCrandFails.map):
> A:SEQ:0
> B:IRAND:3:12
> C:DSRAND:carMakes.txt
> D:CRAND:10
> The file carMakes.txt contains:
> Chevrolet
> Dodge
> Toyota
> Nissan
> Suzuki
> Hyundai
> odb command:
> odb64luo -u db__root -p traf123 -d traf -l src=nofile:tgt=trafodion.sch.toyodb2:max=100:map=try2OdbCrandFails.map:rows=100:truncate
> In this example, 0 rows will be generated, when 5 should have been.
> If you change the map file to the following, it will succeed in generating 5 rows:
> A:SEQ:0
> B:IRAND:3:12
> C:CRAND:10
> D:DSRAND:carMakes.txt
>  
>  



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)