You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by br...@apache.org on 2015/06/24 02:16:25 UTC

[5/6] drill git commit: add zk connect example plus edits

add zk connect example plus edits

formatting

cannot hv 2 pg same name

typo

formatting

conflicting titles


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/06cc2524
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/06cc2524
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/06cc2524

Branch: refs/heads/gh-pages
Commit: 06cc2524dce2b5c8444b9f286998644d1e2b0c73
Parents: 7de2ea7
Author: Kristine Hahn <kh...@maprtech.com>
Authored: Tue Jun 23 13:47:07 2015 -0700
Committer: Kristine Hahn <kh...@maprtech.com>
Committed: Tue Jun 23 16:52:25 2015 -0700

----------------------------------------------------------------------
 _data/docs.json                                 | 54 +++-----------
 _docs/archived-pages/030-partition-pruning.md   | 75 --------------------
 .../connect-a-data-source/100-mapr-db-format.md |  2 +-
 .../performance-tuning/020-partition-pruning.md |  4 +-
 4 files changed, 13 insertions(+), 122 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/06cc2524/_data/docs.json
----------------------------------------------------------------------
diff --git a/_data/docs.json b/_data/docs.json
index ddd3a4e..6f6aca1 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -511,31 +511,14 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Partition Pruning", 
-                    "next_url": "/docs/partition-pruning/", 
+                    "next_title": "Progress Reports", 
+                    "next_url": "/docs/progress-reports/", 
                     "parent": "Archived Pages", 
                     "previous_title": "How to Run the Drill Demo", 
                     "previous_url": "/docs/how-to-run-the-drill-demo/", 
                     "relative_path": "_docs/archived-pages/020-what-is-apache-drill.md", 
                     "title": "What is Apache Drill", 
                     "url": "/docs/what-is-apache-drill/"
-                }, 
-                {
-                    "breadcrumbs": [
-                        {
-                            "title": "Archived Pages", 
-                            "url": "/docs/archived-pages/"
-                        }
-                    ], 
-                    "children": [], 
-                    "next_title": "Progress Reports", 
-                    "next_url": "/docs/progress-reports/", 
-                    "parent": "Archived Pages", 
-                    "previous_title": "What is Apache Drill", 
-                    "previous_url": "/docs/what-is-apache-drill/", 
-                    "relative_path": "_docs/archived-pages/030-partition-pruning.md", 
-                    "title": "Partition Pruning", 
-                    "url": "/docs/partition-pruning/"
                 }
             ], 
             "next_title": "How to Run the Drill Demo", 
@@ -6168,8 +6151,8 @@
             "next_title": "2014 Q1 Drill Report", 
             "next_url": "/docs/2014-q1-drill-report/", 
             "parent": "", 
-            "previous_title": "Partition Pruning", 
-            "previous_url": "/docs/partition-pruning/", 
+            "previous_title": "What is Apache Drill", 
+            "previous_url": "/docs/what-is-apache-drill/", 
             "relative_path": "_docs/160-progress-reports.md", 
             "title": "Progress Reports", 
             "url": "/docs/progress-reports/"
@@ -10933,8 +10916,8 @@
                 }
             ], 
             "children": [], 
-            "next_title": "Partition Pruning", 
-            "next_url": "/docs/partition-pruning/", 
+            "next_title": "Progress Reports", 
+            "next_url": "/docs/progress-reports/", 
             "parent": "Archived Pages", 
             "previous_title": "How to Run the Drill Demo", 
             "previous_url": "/docs/how-to-run-the-drill-demo/", 
@@ -15316,31 +15299,14 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Partition Pruning", 
-                    "next_url": "/docs/partition-pruning/", 
+                    "next_title": "Progress Reports", 
+                    "next_url": "/docs/progress-reports/", 
                     "parent": "Archived Pages", 
                     "previous_title": "How to Run the Drill Demo", 
                     "previous_url": "/docs/how-to-run-the-drill-demo/", 
                     "relative_path": "_docs/archived-pages/020-what-is-apache-drill.md", 
                     "title": "What is Apache Drill", 
                     "url": "/docs/what-is-apache-drill/"
-                }, 
-                {
-                    "breadcrumbs": [
-                        {
-                            "title": "Archived Pages", 
-                            "url": "/docs/archived-pages/"
-                        }
-                    ], 
-                    "children": [], 
-                    "next_title": "Progress Reports", 
-                    "next_url": "/docs/progress-reports/", 
-                    "parent": "Archived Pages", 
-                    "previous_title": "What is Apache Drill", 
-                    "previous_url": "/docs/what-is-apache-drill/", 
-                    "relative_path": "_docs/archived-pages/030-partition-pruning.md", 
-                    "title": "Partition Pruning", 
-                    "url": "/docs/partition-pruning/"
                 }
             ], 
             "next_title": "How to Run the Drill Demo", 
@@ -15376,8 +15342,8 @@
             "next_title": "2014 Q1 Drill Report", 
             "next_url": "/docs/2014-q1-drill-report/", 
             "parent": "", 
-            "previous_title": "Partition Pruning", 
-            "previous_url": "/docs/partition-pruning/", 
+            "previous_title": "What is Apache Drill", 
+            "previous_url": "/docs/what-is-apache-drill/", 
             "relative_path": "_docs/160-progress-reports.md", 
             "title": "Progress Reports", 
             "url": "/docs/progress-reports/"

http://git-wip-us.apache.org/repos/asf/drill/blob/06cc2524/_docs/archived-pages/030-partition-pruning.md
----------------------------------------------------------------------
diff --git a/_docs/archived-pages/030-partition-pruning.md b/_docs/archived-pages/030-partition-pruning.md
deleted file mode 100644
index 3dc79ff..0000000
--- a/_docs/archived-pages/030-partition-pruning.md
+++ /dev/null
@@ -1,75 +0,0 @@
----
-title: "Partition Pruning"
-parent: "Archived Pages"
----
-Partition pruning is a performance optimization that limits the number of
-files and partitions that Drill reads when querying file systems and Hive
-tables. Drill only reads a subset of the files that reside in a file system or
-a subset of the partitions in a Hive table when a query matches certain filter
-criteria.
-
-For Drill to apply partition pruning to Hive tables, you must have created the
-tables in Hive using the `PARTITION BY` clause:
-
-`CREATE TABLE <table_name> (<column_name>) PARTITION BY (<column_name>);`
-
-When you create Hive tables using the `PARTITION BY` clause, each partition of
-data is automatically split out into different directories as data is written
-to disk. For more information about Hive partitioning, refer to the [Apache
-Hive wiki](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL/#LanguageManualDDL-PartitionedTables).
-
-Typically, table data in a file system is organized by directories and
-subdirectories. Queries on table data may contain `WHERE` clause filters on
-specific directories.
-
-Drill’s query planner evaluates the filters as part of a Filter operator. If
-no partition filters are present, the underlying Scan operator reads all files
-in all directories and then sends the data to operators downstream, such as
-Filter.
-
-When partition filters are present, the query planner determines if it can
-push the filters down to the Scan such that the Scan only reads the
-directories that match the partition filters, thus reducing disk I/O.
-
-## Partition Pruning Example
-
-The /`Users/max/data/logs` directory in a file system contains subdirectories
-that span a few years.
-
-The following image shows the hierarchical structure of the `…/logs` directory
-and (sub) directories:
-
-![drill query flow]({{ site.baseurl }}/docs/img/54.png)
-
-The following query requests log file data for 2013 from the `…/logs`
-directory in the file system:
-
-    SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 and dir0 = 2013 limit 2;
-
-If you run the `EXPLAIN PLAN` command for the query, you can see that the`
-…/logs` directory is filtered by the scan operator.
-
-    EXPLAIN PLAN FOR SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 and dir0 = 2013 limit 2;
-
-The following image shows a portion of the physical plan when partition
-pruning is applied:
-
-![drill query flow]({{ site.baseurl }}/docs/img/21.png)
-
-## Filter Examples
-
-The following queries include examples of the types of filters eligible for
-partition pruning optimization:
-
-**Example 1: Partition filters ANDed together**
-
-    SELECT * FROM dfs.`/Users/max/data/logs` WHERE dir0 = '2014' AND dir1 = '1'
-
-**Example 2: Partition filter ANDed with regular column filter**
-
-    SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 AND dir0 = 2013 limit 2;
-
-**Example 3: Combination of AND, OR involving partition filters**
-
-    SELECT * FROM dfs.`/Users/max/data/logs` WHERE (dir0 = '2013' AND dir1 = '1') OR (dir0 = '2014' AND dir1 = '2')
-

http://git-wip-us.apache.org/repos/asf/drill/blob/06cc2524/_docs/connect-a-data-source/100-mapr-db-format.md
----------------------------------------------------------------------
diff --git a/_docs/connect-a-data-source/100-mapr-db-format.md b/_docs/connect-a-data-source/100-mapr-db-format.md
index 25098dd..74f8387 100755
--- a/_docs/connect-a-data-source/100-mapr-db-format.md
+++ b/_docs/connect-a-data-source/100-mapr-db-format.md
@@ -2,7 +2,7 @@
 title: "MapR-DB Format"
 parent: "Connect a Data Source"
 ---
-The MapR-DB format is not included in apache drill release. Drill includes a `maprdb` format for MapR-DB that is defined within the
+The MapR-DB format is not included in the Apache drill release. Drill includes a `maprdb` format for MapR-DB that is defined within the
 default `dfs` storage plugin instance when you install Drill from the `mapr-drill` package on a MapR node. The `maprdb` format improves the
 estimated number of rows that Drill uses to plan a query. It also enables you
 to query tables like you would query files in a file system because MapR-DB

http://git-wip-us.apache.org/repos/asf/drill/blob/06cc2524/_docs/performance-tuning/020-partition-pruning.md
----------------------------------------------------------------------
diff --git a/_docs/performance-tuning/020-partition-pruning.md b/_docs/performance-tuning/020-partition-pruning.md
index 49be254..7c3f272 100755
--- a/_docs/performance-tuning/020-partition-pruning.md
+++ b/_docs/performance-tuning/020-partition-pruning.md
@@ -7,12 +7,12 @@ Partition pruning is a performance optimization that limits the number of files
  
 The query planner in Drill performs partition pruning by evaluating the filters. If no partition filters are present, the underlying Scan operator reads all files in all directories and then sends the data to operators, such as Filter, downstream. When partition filters are present, the query planner pushes the filters down to the Scan if possible. The Scan reads only the directories that match the partition filters, thus reducing disk I/O.
 
-## How to Use Partition Pruning
+## How Partition Data
 
 You can partition data manually or automatically to take advantage of partition pruning in Drill. In Drill 1.0 and earlier, you need to organize your data in such a way to take advantage of partition pruning. In Drill 1.1.0 and later, if the data source is Parquet, you can partition data automatically using CTAS--no data organization tasks required. 
 
 ## Automatic Partitioning
-Automatic partitioning in Drill 1.1.0 and later occurs when you write Parquet date using the [[PARTITION BY]({{site.baseurl}}/docs/partition-by-clause/) clause in the CTAS statemebnt.
+Automatic partitioning in Drill 1.1.0 and later occurs when you write Parquet date using the [PARTITION BY]({{site.baseurl}}/docs/partition-by-clause/) clause in the CTAS statemebnt.
 
 Automatic partitioning creates separate files, but not separate directories, for different partitions. Each file contains exactly one partition value, but there could be multiple files for the same partition value.