You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Chunhui Shi (JIRA)" <ji...@apache.org> on 2017/03/07 23:47:38 UTC

[jira] [Created] (DRILL-5328) Trim down physical plan size - replace StoragePluginConfig with storage name

Chunhui Shi created DRILL-5328:
----------------------------------

             Summary: Trim down physical plan size - replace StoragePluginConfig with storage name
                 Key: DRILL-5328
                 URL: https://issues.apache.org/jira/browse/DRILL-5328
             Project: Apache Drill
          Issue Type: Improvement
            Reporter: Chunhui Shi


For a physical plan, we now pass StoragePluginConfig as part of plan, then the destination use the config to fetch the storage plugin in StoragePluginRegistry. However, we can also fetch a storage plugin with the name which is identical to all Drillbits. 

In the example of simple physical plan of 150 lines shown below,  the storage plugin config took 60 lines. In a typical large system, FileSystem's StoragePluginConfig could be >500 lines. So this improvement should save the cost of passing a larger physical plan among nodes.

0: jdbc:drill:zk=10.10.88.126:5181> explain plan for select * from dfs.tmp.employee1 where last_name='Blumberg';
+------+------+
| text | json |
+------+------+
| 00-00    Screen
00-01      Project(*=[$0])
00-02        Project(T1¦¦*=[$0])
00-03          SelectionVectorRemover
00-04            Filter(condition=[=($1, 'Blumberg')])
00-05              Project(T1¦¦*=[$0], last_name=[$1])
00-06                Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=/tmp/employee1/0_0_0.parquet]], selectionRoot=/tmp/employee1, numFiles=1, usedMetadataFile=true, cacheFileRoot=/tmp/employee1, columns=[`*`]]])
 | {
  "head" : {
    "version" : 1,
    "generator" : {
      "type" : "ExplainHandler",
      "info" : ""
    },
    "type" : "APACHE_DRILL_PHYSICAL",
    "options" : [ ],
    "queue" : 0,
    "resultMode" : "EXEC"
  },
  "graph" : [ {
    "pop" : "parquet-scan",
    "@id" : 6,
    "userName" : "root",
    "entries" : [ {
      "path" : "/tmp/employee1/0_0_0.parquet"
    } ],
    "storage" : {
      "type" : "file",
      "enabled" : true,
      "connection" : "maprfs:///",
      "config" : null,
      "workspaces" : {
        "root" : {
          "location" : "/",
          "writable" : false,
          "defaultInputFormat" : null
        },
        "tmp" : {
          "location" : "/tmp",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "shi" : {
          "location" : "/user/shi",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dir700" : {
          "location" : "/user/shi/dir700",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "dir775" : {
          "location" : "/user/shi/dir775",
          "writable" : true,
          "defaultInputFormat" : null
        },
        "xyz" : {
          "location" : "/user/xyz",
          "writable" : true,
          "defaultInputFormat" : null
        }
      },
      "formats" : {
        "psv" : {
          "type" : "text",
          "extensions" : [ "tbl" ],
          "delimiter" : "|"
        },
        "csv" : {
          "type" : "text",
          "extensions" : [ "csv" ],
          "delimiter" : ","
        },
        "tsv" : {
          "type" : "text",
          "extensions" : [ "tsv" ],
          "delimiter" : "\t"
        },
        "parquet" : {
          "type" : "parquet"
        },
        "json" : {
          "type" : "json",
          "extensions" : [ "json" ]
        },
        "maprdb" : {
          "type" : "maprdb"
        }
      }
    },
    "format" : {
      "type" : "parquet"
    },
    "columns" : [ "`*`" ],
    "selectionRoot" : "/tmp/employee1",
    "filter" : "true",
    "fileSet" : [ "/tmp/employee1/0_0_0.parquet" ],
    "files" : [ "/tmp/employee1/0_0_0.parquet" ],
    "cost" : 1155.0
  }, {
    "pop" : "project",
    "@id" : 5,
    "exprs" : [ {
      "ref" : "`T1¦¦*`",
      "expr" : "`*`"
    }, {
      "ref" : "`last_name`",
      "expr" : "`last_name`"
    } ],
    "child" : 6,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 1155.0
  }, {
    "pop" : "filter",
    "@id" : 4,
    "child" : 5,
    "expr" : "equal(`last_name`, 'Blumberg') ",
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "selection-vector-remover",
    "@id" : 3,
    "child" : 4,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "project",
    "@id" : 2,
    "exprs" : [ {
      "ref" : "`T1¦¦*`",
      "expr" : "`T1¦¦*`"
    } ],
    "child" : 3,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "project",
    "@id" : 1,
    "exprs" : [ {
      "ref" : "`*`",
      "expr" : "`T1¦¦*`"
    } ],
    "child" : 2,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  }, {
    "pop" : "screen",
    "@id" : 0,
    "child" : 1,
    "initialAllocation" : 1000000,
    "maxAllocation" : 10000000000,
    "cost" : 173.25
  } ]
} |




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)