You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Khurram Faraaz (JIRA)" <ji...@apache.org> on 2015/10/05 19:59:26 UTC

[jira] [Updated] (DRILL-3891) ROW_KEY filter IN(integer values) does not get pushed in to Scan

     [ https://issues.apache.org/jira/browse/DRILL-3891?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Khurram Faraaz updated DRILL-3891:
----------------------------------
    Description: 
ROW_KEY filter does not get pushed into Scan when filter involved IN (integer values). Data inserted into HBase table is byte ordered and encoded as Int32.

case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), 8388607), =(CONVERT_FROM($0, 'INT_OB'), 2147483647), =(CONVERT_FROM($0, 'INT_OB'), 67108863), =(CONVERT_FROM($0, 'INT_OB'), -536870912), =(CONVERT_FROM($0, 'INT_OB'), -2147483648)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 3) NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), CAST('8388607'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-536870912'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-2147483648'):INTEGER NOT NULL)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

Data inserted into HBase table

{code}
int[] arr = {Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};

        for (int i = 0; i < arr.length; i++) {
            byte[] bytes = new byte[5];
            org.apache.hadoop.hbase.util.PositionedByteRange br =
                new org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
            org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
                org.apache.hadoop.hbase.util.Order.ASCENDING);

            Put p = new Put(bytes);
            p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value %d", i).getBytes());
            table.put(p);
        }
{code}

  was:
ROW_KEY filter does not get pushed into Scan when filter involved IN (integer values). Data inserted into HBase table is byte ordered and encoded as Int32.

case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), 8388607), =(CONVERT_FROM($0, 'INT_OB'), 2147483647), =(CONVERT_FROM($0, 'INT_OB'), 67108863), =(CONVERT_FROM($0, 'INT_OB'), -536870912), =(CONVERT_FROM($0, 'INT_OB'), -2147483648)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 3) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

case 4) NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));

{code}
0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
00-02        SelectionVectorRemover
00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), CAST('8388607'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-536870912'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-2147483648'):INTEGER NOT NULL)))])
00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
{code}

Data inserted into HBase table

{code}
int[] arr = {Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};

        for (int i = 0; i < arr.length; i++) {
            byte[] bytes = new byte[5];
            org.apache.hadoop.hbase.util.PositionedByteRange br =
                new org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
            org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
                org.apache.hadoop.hbase.util.Order.ASCENDING);

            Put p = new Put(bytes);
            p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value %d", i).getBytes());
            table.put(p);
        }
{code}


> ROW_KEY filter IN(integer values) does not get pushed in to Scan
> ----------------------------------------------------------------
>
>                 Key: DRILL-3891
>                 URL: https://issues.apache.org/jira/browse/DRILL-3891
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Flow
>    Affects Versions: 1.2.0
>         Environment: 4 node cluster CentOS
>            Reporter: Khurram Faraaz
>            Assignee: Smidth Panchamia
>
> ROW_KEY filter does not get pushed into Scan when filter involved IN (integer values). Data inserted into HBase table is byte ordered and encoded as Int32.
> case 1) NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (8388607,2147483647,67108863,-536870912,-2147483648);
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02        SelectionVectorRemover
> 00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), 8388607), =(CONVERT_FROM($0, 'INT_OB'), 2147483647), =(CONVERT_FROM($0, 'INT_OB'), 67108863), =(CONVERT_FROM($0, 'INT_OB'), -536870912), =(CONVERT_FROM($0, 'INT_OB'), -2147483648)))])
> 00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
> {code}
> case 2) NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN ('8388607','2147483647','67108863','-536870912','-2147483648');
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02        SelectionVectorRemover
> 00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), '8388607'), =(CONVERT_FROM($0, 'INT_OB'), '2147483647'), =(CONVERT_FROM($0, 'INT_OB'), '67108863'), =(CONVERT_FROM($0, 'INT_OB'), '-536870912'), =(CONVERT_FROM($0, 'INT_OB'), '-2147483648')))])
> 00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
> {code}
> case 3) NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));
> {code}
> 0: jdbc:drill:schema=dfs.tmp> explain plan for select convert_from(row_key,'INT_OB') as rk, convert_from(T.`colfam1`.`qual1`,'UTF8') val from int_Tbl T where convert_from(row_key,'INT_OB') NOT IN (cast('8388607' as int),cast('2147483647' as int),cast('67108863' as int),cast('-536870912'as int),cast('-2147483648' as int));
> +------+------+
> | text | json |
> +------+------+
> | 00-00    Screen
> 00-01      Project(rk=[CONVERT_FROMINT_OB($0)], val=[CONVERT_FROMUTF8(ITEM($1, 'qual1'))])
> 00-02        SelectionVectorRemover
> 00-03          Filter(condition=[NOT(OR(=(CONVERT_FROM($0, 'INT_OB'), CAST('8388607'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('2147483647'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('67108863'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-536870912'):INTEGER NOT NULL), =(CONVERT_FROM($0, 'INT_OB'), CAST('-2147483648'):INTEGER NOT NULL)))])
> 00-04            Scan(groupscan=[HBaseGroupScan [HBaseScanSpec=HBaseScanSpec [tableName=int_Tbl, startRow=null, stopRow=null, filter=null], columns=[`*`]]])
> {code}
> Data inserted into HBase table
> {code}
> int[] arr = {Integer.MIN_VALUE,Integer.MIN_VALUE/4,Integer.MIN_VALUE/8,Integer.MIN_VALUE/16,Integer.MIN_VALUE/32,Integer.MIN_VALUE/64,Integer.MIN_VALUE/128,Integer.MAX_VALUE,Integer.MAX_VALUE/4,Integer.MAX_VALUE/8,Integer.MAX_VALUE/16,Integer.MAX_VALUE/32,Integer.MAX_VALUE/64,Integer.MAX_VALUE/128,Integer.MAX_VALUE/256,Integer.MAX_VALUE};
>         for (int i = 0; i < arr.length; i++) {
>             byte[] bytes = new byte[5];
>             org.apache.hadoop.hbase.util.PositionedByteRange br =
>                 new org.apache.hadoop.hbase.util.SimplePositionedByteRange(bytes, 0, 5);
>             org.apache.hadoop.hbase.util.OrderedBytes.encodeInt32(br, arr[i],
>                 org.apache.hadoop.hbase.util.Order.ASCENDING);
>             Put p = new Put(bytes);
>             p.add(Bytes.toBytes("colfam1"),Bytes.toBytes("qual1"),String.format("value %d", i).getBytes());
>             table.put(p);
>         }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)