You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2018/05/01 22:36:01 UTC

[1/2] impala git commit: [DOCS] Minor copy edits and typo fixes

Repository: impala
Updated Branches:
  refs/heads/master 52a2d90f9 -> 202807e2f


[DOCS] Minor copy edits and typo fixes

Change-Id: I353a7917eb770aa40696476a587d7600289c336c
Cherry-picks: not for 2.x.
Reviewed-on: http://gerrit.cloudera.org:8080/10276
Reviewed-by: Alex Rodoni <ar...@cloudera.com>
Tested-by: Alex Rodoni <ar...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/80d07aff
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/80d07aff
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/80d07aff

Branch: refs/heads/master
Commit: 80d07afff3ab03da77071198eb8c5181c01a215c
Parents: 52a2d90
Author: Alex Rodoni <ar...@cloudera.com>
Authored: Tue May 1 15:07:30 2018 -0700
Committer: Alex Rodoni <ar...@cloudera.com>
Committed: Tue May 1 22:10:42 2018 +0000

----------------------------------------------------------------------
 docs/topics/impala_decimal.xml | 166 +++++++++++++++++-------------------
 1 file changed, 78 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/80d07aff/docs/topics/impala_decimal.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_decimal.xml b/docs/topics/impala_decimal.xml
index f1a7f7a..268a789 100644
--- a/docs/topics/impala_decimal.xml
+++ b/docs/topics/impala_decimal.xml
@@ -208,20 +208,20 @@ under the License.
     </p>
 
     <p>
-      For all arithmetic options, the resulting precision is at most 38.
+      For all arithmetic operations, the resulting precision is at most 38.
     </p>
 
     <p>
-      If the precision of the result would be greater than 38, Impala truncates the result from
-      the back, but keeps at least 6 fractional digits in scale and rounds.
+      If the resulting precision would be greater than 38, Impala truncates the result from the
+      back, but keeps at least 6 fractional digits in scale and rounds.
     </p>
 
     <p>
       For example, <codeph>DECIMAL(38, 20) * DECIMAL(38, 20)</codeph> returns
       <codeph>DECIMAL(38, 6)</codeph>. According to the table below, the resulting precision and
       scale would be <codeph>(77, 40)</codeph>, but they are higher than the maximum precision
-      and scale. So, Impala sets the precision to the maximum allowed 38, and truncates the
-      scale to 6.
+      and scale for <codeph>DECIMAL</codeph>. So, Impala sets the precision to the maximum
+      allowed 38, and truncates the scale to 6.
     </p>
 
     <p>
@@ -401,8 +401,7 @@ INSERT INTO flt SELECT CAST(1e37 AS DECIMAL(38, 0));
 SELECT CAST(c AS DECIMAL(38, 0)) FROM flt;
 
 Result: 9999999933815812510711506376257961984</codeblock>
-          <p
-            dir="ltr">
+          <p dir="ltr">
             The result has a loss of information due to implicit casting. This is why we
             discourage using the <codeph>DOUBLE</codeph> and <codeph>FLOAT</codeph> types in
             general.
@@ -426,7 +425,7 @@ Result: 9999999933815812510711506376257961984</codeblock>
 
         <li>
           Integer values can be implicitly converted to <codeph>DECIMAL</codeph> when there is
-          enough room in the <codeph>DECIMAL</codeph> to guarantee that all digits will fit. The
+          enough room in the <codeph>DECIMAL</codeph> to guarantee that all digits fit. The
           integer types require the following numbers of digits to the left of the decimal point
           when converted to <codeph>DECIMAL</codeph>:
           <ul>
@@ -594,11 +593,8 @@ INSERT INTO decimals_11_8 VALUES (CAST(1 AS TINYINT));</codeblock>
       <ul>
         <li dir="ltr">
           <p dir="ltr">
-            If scale in <codeph>STRING</codeph> > scale in <codeph>DECIMAL</codeph>:
-          </p>
-
-          <p dir="ltr">
-            The fractional digits are rounded to the <codeph>DECIMAL</codeph> scale.
+            If scale in <codeph>STRING</codeph> > scale in <codeph>DECIMAL</codeph>, the
+            fractional digits are rounded to the <codeph>DECIMAL</codeph> scale.
           </p>
 
           <p dir="ltr">
@@ -724,17 +720,12 @@ INSERT INTO decimals_11_8 VALUES (CAST(1 AS TINYINT));</codeblock>
 
     <p conref="../shared/impala_common.xml#common/file_format_blurb"/>
 
-    <p>
+    <p dir="ltr">
+      The <codeph>DECIMAL</codeph> data type can be stored in any of the file formats supported
+      by Impala.
       <ul>
         <li dir="ltr">
           <p dir="ltr">
-            The <codeph>DECIMAL</codeph> data type can be stored in any of the file formats
-            supported by Impala.
-          </p>
-        </li>
-
-        <li dir="ltr">
-          <p dir="ltr">
             Impala can query Avro, RCFile, or SequenceFile tables that contain
             <codeph>DECIMAL</codeph> columns, created by other Hadoop components.
           </p>
@@ -834,10 +825,10 @@ INSERT INTO decimals_11_8 VALUES (CAST(1 AS TINYINT));</codeblock>
                 Although an <codeph>ALTER TABLE ... REPLACE COLUMNS</codeph> statement that
                 changes the precision or scale of a <codeph>DECIMAL</codeph> column succeeds,
                 any subsequent attempt to query the changed column results in a fatal error.
-                (The other columns can still be queried successfully.) This is because the
-                metadata about the columns is stored in the data files themselves, and
-                <codeph>ALTER TABLE</codeph> does not actually make any updates to the data
-                files.
+                This is because the metadata about the columns is stored in the data files
+                themselves, and <codeph>ALTER TABLE</codeph> does not actually make any updates
+                to the data files. The other unaltered columns can still be queried
+                successfully.
               </p>
             </li>
 
@@ -870,102 +861,101 @@ INSERT INTO decimals_11_8 VALUES (CAST(1 AS TINYINT));</codeblock>
       STATS</codeph> statement.
     </p>
 
-    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+    <p>
+      <b>Compatibility with older version of DECIMAL:</b>
+    </p>
 
     <p>
-      <ul>
-        <li dir="ltr">
-          <p dir="ltr">
-            This version of <codeph>DECIMAL</codeph> type is the default in
-            <keyword keyref="impala30_full"/> and higher. The key differences between this
-            version of <codeph>DECIMAL</codeph> and the previous <codeph>DECIMAL</codeph> V1 in
-            Impala 2.x include the following.
-          </p>
-          <simpletable frame="all" relcolwidth="1* 1* 1*"
-            id="simpletable_bdr_rzc_qdb">
+      This version of <codeph>DECIMAL</codeph> type is the default in
+      <keyword
+        keyref="impala30_full"/> and higher. The key differences between this
+      version of <codeph>DECIMAL</codeph> and the previous <codeph>DECIMAL</codeph> V1 in Impala
+      2.x include the following.
+    </p>
+
+    <p>
+      <simpletable frame="all" relcolwidth="1* 1* 1*"
+        id="simpletable_bwl_khm_rdb">
 
-            <sthead>
+        <sthead>
 
-              <stentry/>
+          <stentry/>
 
-              <stentry>DECIMAL in <keyword keyref="impala30_full"/> or
-                higher</stentry>
+          <stentry>DECIMAL in <keyword keyref="impala30_full"/> or
+            higher</stentry>
 
-              <stentry>DECIMAL in <keyword keyref="impala212_full"/> or
-                  lower
+          <stentry>DECIMAL in <keyword keyref="impala212_full"/> or lower
+          </stentry>
 
-              </stentry>
+        </sthead>
 
-            </sthead>
+        <strow>
 
-            <strow>
+          <stentry>Overall behavior</stentry>
 
-              <stentry>Overall behavior</stentry>
+          <stentry>Returns either the result or an error.</stentry>
 
-              <stentry>Returns either the result or an error.</stentry>
+          <stentry>Returns either the result or <codeph>NULL</codeph> with a
+            warning.</stentry>
 
-              <stentry>Returns either the result or <codeph>NULL</codeph> with a
-                warning.</stentry>
+        </strow>
 
-            </strow>
+        <strow>
 
-            <strow>
+          <stentry>Overflow behavior</stentry>
 
-              <stentry>Overflow behavior</stentry>
+          <stentry>Aborts with an error.</stentry>
 
-              <stentry>Aborts with an error.</stentry>
+          <stentry>Issues a warning and returns <codeph>NULL</codeph>.</stentry>
 
-              <stentry>Issues a warning and returns
-                <codeph>NULL</codeph>.</stentry>
+        </strow>
 
-            </strow>
+        <strow>
 
-            <strow>
+          <stentry>Truncation / rounding behavior in arithmetic</stentry>
 
-              <stentry>Truncation / rounding behavior in arithmetic</stentry>
+          <stentry>Truncates and rounds digits from the back.</stentry>
 
-              <stentry>Truncates and rounds digits from the back.</stentry>
+          <stentry>Truncates digits from the front.</stentry>
 
-              <stentry>Truncates digits from the front.</stentry>
+        </strow>
 
-            </strow>
+        <strow>
 
-            <strow>
+          <stentry>String cast</stentry>
 
-              <stentry>String cast</stentry>
+          <stentry>Truncates from the back and rounds.</stentry>
 
-              <stentry>Truncates from the back and rounds.</stentry>
+          <stentry>Truncates from the back.</stentry>
 
-              <stentry>Truncates from the back.</stentry>
+        </strow>
 
-            </strow>
+      </simpletable>
+    </p>
 
-          </simpletable>
-          <p>
-            If you need to continue using the first version of the <codeph>DECIMAL</codeph> type
-            for the backward compatibility of your queries, set the <codeph>DECIMAL_V2</codeph>
-            query option to <codeph>FALSE</codeph>:
+    <p>
+      If you need to continue using the first version of the <codeph>DECIMAL</codeph> type for
+      the backward compatibility of your queries, set the <codeph>DECIMAL_V2</codeph> query
+      option to <codeph>FALSE</codeph>:
 <codeblock>SET DECIMAL_V2=FALSE;</codeblock>
-          </p>
-        </li>
+    </p>
 
-        <li dir="ltr">
-          <p dir="ltr">
-            Use the <codeph>DECIMAL</codeph> data type in Impala for applications where you used
-            the <codeph>NUMBER</codeph> data type in Oracle.
-          </p>
+    <p>
+      <b>Compatibility with other databases:</b>
+    </p>
 
-          <p dir="ltr">
-            The Impala <codeph>DECIMAL</codeph> type does not support the Oracle idioms of
-            <codeph>*</codeph> for scale.
-          </p>
+    <p dir="ltr">
+      Use the <codeph>DECIMAL</codeph> data type in Impala for applications where you used the
+      <codeph>NUMBER</codeph> data type in Oracle.
+    </p>
 
-          <p dir="ltr">
-            The Impala <codeph>DECIMAL</codeph> type does not support negative values for
-            precision.
-          </p>
-        </li>
-      </ul>
+    <p dir="ltr">
+      The Impala <codeph>DECIMAL</codeph> type does not support the Oracle idioms of
+      <codeph>*</codeph> for scale.
+    </p>
+
+    <p dir="ltr">
+      The Impala <codeph>DECIMAL</codeph> type does not support negative values for precision.
     </p>
 
   </conbody>


[2/2] impala git commit: Speed up Python dependencies.

Posted by ar...@apache.org.
Speed up Python dependencies.

This parallelizes downloading some Python libraries, giving a speedup of
$IMPALA_HOME/infra/python/deps/download_requirements.  I've seen this
take from 7-15 seconds before and from 2-5 seconds after.

I also checked that we always have at least Python 2.6 when
building Impala, so I was able to remove the try/except
handling in bootstrap_toolchain.

Change-Id: I7cbf622adb7d037f1a53c519402dcd8ae3c0fe30
Reviewed-on: http://gerrit.cloudera.org:8080/10234
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/202807e2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/202807e2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/202807e2

Branch: refs/heads/master
Commit: 202807e2ff93b6771f26f47b65c3fb9e0cedac38
Parents: 80d07af
Author: Philip Zeyliger <ph...@cloudera.com>
Authored: Mon Apr 23 11:16:42 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Tue May 1 22:12:39 2018 +0000

----------------------------------------------------------------------
 bin/bootstrap_toolchain.py        | 18 +++++----------
 infra/python/deps/pip_download.py | 42 +++++++++++++++++-----------------
 2 files changed, 27 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/202807e2/bin/bootstrap_toolchain.py
----------------------------------------------------------------------
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index f54bf04..6070350 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -35,6 +35,7 @@
 #
 #     python bootstrap_toolchain.py
 import logging
+import multiprocessing.pool
 import os
 import random
 import re
@@ -350,19 +351,12 @@ extern "C" void %s() {
 
 def execute_many(f, args):
   """
-  Executes f(a) for a in args. If possible, uses a threadpool
-  to execute in parallel. The pool uses the number of CPUs
-  in the system as the default size.
+  Executes f(a) for a in args using a threadpool to execute in parallel.
+  The pool uses the smaller of 4 and the number of CPUs in the system
+  as the pool size.
   """
-  pool = None
-  try:
-    import multiprocessing.pool
-    pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4))
-    return pool.map(f, args, 1)
-  except ImportError:
-    # multiprocessing was introduced in Python 2.6.
-    # For older Pythons (CentOS 5), degrade to single-threaded execution:
-    return [ f(a) for a in args ]
+  pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4))
+  return pool.map(f, args, 1)
 
 def download_cdh_components(toolchain_root, cdh_components):
   """Downloads and unpacks the CDH components into $CDH_COMPONENTS_HOME if not found."""

http://git-wip-us.apache.org/repos/asf/impala/blob/202807e2/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py
index 0cce9e9..3e593c4 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -22,6 +22,7 @@
 # This script requires Python 2.6+.
 
 import hashlib
+import multiprocessing.pool
 import os
 import os.path
 import re
@@ -130,30 +131,29 @@ def main():
     download_package(pkg_name, pkg_version)
     return
 
+  pool = multiprocessing.pool.ThreadPool(processes=min(multiprocessing.cpu_count(), 4))
+  results = []
+
   for requirements_file in REQUIREMENTS_FILES:
     # If the package name and version are not specified in the command line arguments,
     # download the packages that in requirements.txt.
-    f = open(requirements_file, 'r')
-    try:
-      # requirements.txt follows the standard pip grammar.
-      for line in f:
-        # A hash symbol ("#") represents a comment that should be ignored.
-        hash_index = line.find('#')
-        if hash_index != -1:
-          line = line[:hash_index]
-        # A semi colon (";") specifies some additional condition for when the package
-        # should be installed (for example a specific OS). We can ignore this and download
-        # the package anyways because the installation script(bootstrap_virtualenv.py) can
-        # take it into account.
-        semi_colon_index = line.find(';')
-        if semi_colon_index != -1:
-          line = line[:semi_colon_index]
-        l = line.strip()
-        if len(l) > 0:
-          pkg_name, pkg_version = l.split('==')
-          download_package(pkg_name.strip(), pkg_version.strip())
-    finally:
-      f.close()
+    # requirements.txt follows the standard pip grammar.
+    for line in open(requirements_file):
+      # A hash symbol ("#") represents a comment that should be ignored.
+      line = line.split("#")[0]
+      # A semi colon (";") specifies some additional condition for when the package
+      # should be installed (for example a specific OS). We can ignore this and download
+      # the package anyways because the installation script(bootstrap_virtualenv.py) can
+      # take it into account.
+      l = line.split(";")[0].strip()
+      if not l:
+        continue
+      pkg_name, pkg_version = l.split('==')
+      results.append(pool.apply_async(
+        download_package, args=[pkg_name.strip(), pkg_version.strip()]))
+
+    for x in results:
+      x.get()
 
 if __name__ == '__main__':
   main()