You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@drill.apache.org by "Chun Chang (JIRA)" <ji...@apache.org> on 2014/05/22 01:10:38 UTC
[jira] [Created] (DRILL-805) when we cast a string to date, we add time component and timezone too, which is wrong

Chun Chang created DRILL-805:
--------------------------------

             Summary: when we cast a string to date, we add time component and timezone too, which is wrong
                 Key: DRILL-805
                 URL: https://issues.apache.org/jira/browse/DRILL-805
             Project: Apache Drill
          Issue Type: Bug
          Components: Execution - Operators
            Reporter: Chun Chang
            Assignee: Jinfeng Ni


tested with git.commit.id.abbrev=2fad21d

c_date column holds string of date information. when cast it into date type, we add time component, as well as timezone. and the timezone value seems to be random.

0: jdbc:drill:schema=dfs> select c_row, cast(c_date as varchar(20)), cast(c_date as date) from data where c_row < 23;
+------------+------------+------------+
|   c_row    |   EXPR$1   |   EXPR$2   |
+------------+------------+------------+
| 1          | 1957-04-09 | 1957-04-09T00:00:00.000-08:00 |
| 2          | 1957-06-13 | 1957-06-13T00:00:00.000-07:00 |
| 3          | 1996-02-28 | 1996-02-28T00:00:00.000-08:00 |
| 4          | 1996-01-29 | 1996-01-29T00:00:00.000-08:00 |
| 5          | 1996-03-01 | 1996-03-01T00:00:00.000-08:00 |
| 6          | 1996-03-02 | 1996-03-02T00:00:00.000-08:00 |
| 7          | 1997-02-28 | 1997-02-28T00:00:00.000-08:00 |
| 8          | 1997-01-29 | 1997-01-29T00:00:00.000-08:00 |
| 9          | 1997-03-01 | 1997-03-01T00:00:00.000-08:00 |
| 10         | 1997-03-02 | 1997-03-02T00:00:00.000-08:00 |
| 11         | 2000-04-01 | 2000-04-01T00:00:00.000-08:00 |
| 12         | 2000-04-02 | 2000-04-02T00:00:00.000-08:00 |
| 13         | 2000-04-03 | 2000-04-03T00:00:00.000-07:00 |
| 14         | 2038-04-08 | 2038-04-08T00:00:00.000-07:00 |
| 15         | 2039-04-09 | 2039-04-09T00:00:00.000-07:00 |
| 16         | 2040-04-10 | 2040-04-10T00:00:00.000-07:00 |
| 17         | 1999-01-08 | 1999-01-08T00:00:00.000-08:00 |
| 18         | 1999-02-08 | 1999-02-08T00:00:00.000-08:00 |
| 19         | 1999-03-08 | 1999-03-08T00:00:00.000-08:00 |
| 20         | 1999-01-18 | 1999-01-18T00:00:00.000-08:00 |
| 21         | 2003-01-02 | 2003-01-02T00:00:00.000-08:00 |
| 22         | 1999-04-08 | 1999-04-08T00:00:00.000-07:00 |
+------------+------------+------------+
22 rows selected (0.1 seconds)

physical plan for the query:

0: jdbc:drill:schema=dfs> explain plan for select c_row, cast(c_date as varchar(20)), cast(c_date as date) from data where c_row < 23;
+------------+------------+
|    text    |    json    |
+------------+------------+
| ScreenPrel
  ProjectPrel(c_row=[$1], EXPR$1=[CAST($2):VARCHAR(20) CHARACTER SET "ISO-8859-1" COLLATE "ISO-8859-1$en_US$primary"], EXPR$2=[CAST($2):DATE])
    FilterPrel(condition=[<($1, 23)])
      ScanPrel(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=maprfs:/user/root/mondrian/data]], selectionRoot=/user/root/mondrian/data, columns=[SchemaPath [`c_row`], SchemaPath [`c_date`]]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "parquet-scan",
    "@id" : 1,
    "entries" : [ {
      "path" : "maprfs:/user/root/mondrian/data"
    } ],
    "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" : {
        "default" : {
          "location" : "/user/root/mondrian/",
          "writable" : false,
          "storageformat" : null
        },
        "home" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`c_row`", "`c_date`" ],
    "selectionRoot" : "/user/root/mondrian/data"
  }, {
    "pop" : "filter",
    "@id" : 2,
    "child" : 1,
    "expr" : "less_than(`c_row`, 23) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "project",
    "@id" : 3,
    "exprs" : [ {
      "ref" : "`c_row`",
      "expr" : "`c_row`"
    }, {
      "ref" : "`EXPR$1`",
      "expr" : "cast( (`c_date` ) as VARCHAR(20) )"
    }, {
      "ref" : "`EXPR$2`",
      "expr" : "cast( (`c_date` ) as DATE )"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "screen",
    "@id" : 4,
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  } ]
} |



--
This message was sent by Atlassian JIRA
(v6.2#6252)