You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Ramana Inukonda Nagaraj (JIRA)" <ji...@apache.org> on 2014/06/02 22:37:02 UTC

[jira] [Comment Edited] (DRILL-881) Join between a JSON file and a parquet file fails with unsupported exception

    [ https://issues.apache.org/jira/browse/DRILL-881?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14014364#comment-14014364 ] 

Ramana Inukonda Nagaraj edited comment on DRILL-881 at 6/2/14 8:35 PM:
-----------------------------------------------------------------------

Explain plan:
{code}
 {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 131073,
    "files" : [ "maprfs:/drill/testdata/json_storage/crossData.json" ],
    "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "drillTestDir" : {
          "location" : "/drill/testdata/",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirAmplab" : {
          "location" : "/drill/testdata/amplab",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirP1" : {
          "location" : "/drill/testdata/p1tests",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExchanges" : {
          "location" : "/drill/testdata/exchanges_test",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "TpcHMulti" : {
          "location" : "/drill/testdata/tpch-multi",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExplicit" : {
          "location" : "/drill/testdata/explicit_cast",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`id`", "`firstName`" ],
    "selectionRoot" : "/drill/testdata/json_storage/crossData.json"
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65540,
    "child" : 131073,
    "expr" : "hash(`id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "parquet-scan",
    "@id" : 196609,
    "entries" : [ {
      "path" : "maprfs:/drill/testdata/tpch-multi/customer"
    } ],
    "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "drillTestDir" : {
          "location" : "/drill/testdata/",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirAmplab" : {
          "location" : "/drill/testdata/amplab",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirP1" : {
          "location" : "/drill/testdata/p1tests",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExchanges" : {
          "location" : "/drill/testdata/exchanges_test",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "TpcHMulti" : {
          "location" : "/drill/testdata/tpch-multi",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExplicit" : {
          "location" : "/drill/testdata/explicit_cast",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`C_CUSTKEY`" ],
    "selectionRoot" : "/drill/testdata/tpch-multi/customer"
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65541,
    "child" : 196609,
    "expr" : "hash(`C_CUSTKEY`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "project",
    "@id" : 65539,
    "exprs" : [ {
      "ref" : "`*0`",
      "expr" : "`*`"
    }, {
      "ref" : "`C_CUSTKEY`",
      "expr" : "`C_CUSTKEY`"
    } ],
    "child" : 65541,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "hash-join",
    "@id" : 65538,
    "left" : 65540,
    "right" : 65539,
    "conditions" : [ {
      "relationship" : "==",
      "left" : "`id`",
      "right" : "`C_CUSTKEY`"
    } ],
    "joinType" : "INNER",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "project",
    "@id" : 65537,
    "exprs" : [ {
      "ref" : "`firstName`",
      "expr" : "`firstName`"
    } ],
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "union-exchange",
    "@id" : 1,
    "child" : 65537,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  } ]
}
{code}


was (Author: inramana):
Explain plan:

 {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "fs-scan",
    "@id" : 131073,
    "files" : [ "maprfs:/drill/testdata/json_storage/crossData.json" ],
    "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "drillTestDir" : {
          "location" : "/drill/testdata/",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirAmplab" : {
          "location" : "/drill/testdata/amplab",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirP1" : {
          "location" : "/drill/testdata/p1tests",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExchanges" : {
          "location" : "/drill/testdata/exchanges_test",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "TpcHMulti" : {
          "location" : "/drill/testdata/tpch-multi",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExplicit" : {
          "location" : "/drill/testdata/explicit_cast",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "json"
    },
    "columns" : [ "`id`", "`firstName`" ],
    "selectionRoot" : "/drill/testdata/json_storage/crossData.json"
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65540,
    "child" : 131073,
    "expr" : "hash(`id`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "parquet-scan",
    "@id" : 196609,
    "entries" : [ {
      "path" : "maprfs:/drill/testdata/tpch-multi/customer"
    } ],
    "storage" : {
      "type" : "file",
      "connection" : "maprfs:///",
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "storageformat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "storageformat" : "csv"
        },
        "drillTestDir" : {
          "location" : "/drill/testdata/",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirAmplab" : {
          "location" : "/drill/testdata/amplab",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirP1" : {
          "location" : "/drill/testdata/p1tests",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExchanges" : {
          "location" : "/drill/testdata/exchanges_test",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "TpcHMulti" : {
          "location" : "/drill/testdata/tpch-multi",
          "writable" : true,
          "storageformat" : "parquet"
        },
        "drillTestDirExplicit" : {
          "location" : "/drill/testdata/explicit_cast",
          "writable" : true,
          "storageformat" : "parquet"
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`C_CUSTKEY`" ],
    "selectionRoot" : "/drill/testdata/tpch-multi/customer"
  }, {
    "pop" : "hash-to-random-exchange",
    "@id" : 65541,
    "child" : 196609,
    "expr" : "hash(`C_CUSTKEY`) ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "project",
    "@id" : 65539,
    "exprs" : [ {
      "ref" : "`*0`",
      "expr" : "`*`"
    }, {
      "ref" : "`C_CUSTKEY`",
      "expr" : "`C_CUSTKEY`"
    } ],
    "child" : 65541,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "hash-join",
    "@id" : 65538,
    "left" : 65540,
    "right" : 65539,
    "conditions" : [ {
      "relationship" : "==",
      "left" : "`id`",
      "right" : "`C_CUSTKEY`"
    } ],
    "joinType" : "INNER",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "project",
    "@id" : 65537,
    "exprs" : [ {
      "ref" : "`firstName`",
      "expr" : "`firstName`"
    } ],
    "child" : 65538,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "union-exchange",
    "@id" : 1,
    "child" : 65537,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000
  } ]
}

> Join between a JSON file and a parquet file fails with unsupported exception
> ----------------------------------------------------------------------------
>
>                 Key: DRILL-881
>                 URL: https://issues.apache.org/jira/browse/DRILL-881
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Query Planning & Optimization
>            Reporter: Ramana Inukonda Nagaraj
>            Assignee: Steven Phillips
>
> select c_json.firstName from `json_storage/crossData.json` c_json,`tpch-multi/customer` c where c.C_CUSTKEY=c_json.id;
> The following queries work:
> select c_json.firstName from `json_storage/crossData.json` c_json;
> select c.C_CUSTKEY from `tpch-multi/customer` c;
> So its not a problem with the data sources



--
This message was sent by Atlassian JIRA
(v6.2#6252)