You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by jr...@apache.org on 2017/07/07 19:04:32 UTC

[1/2] incubator-impala git commit: IMPALA-5618: buffered-tuple-stream-v2 fixes

Repository: incubator-impala
Updated Branches:
  refs/heads/master bc1feb34d -> 07d3cea1f


IMPALA-5618: buffered-tuple-stream-v2 fixes

This fixes two issues:
* AddRowCustom() caused a performance regression when the function
  was heap-allocated. This is solved by splitting the API into two
  separate calls. This imposes an additional burden on the caller
  but it is easier to reason about its performance.
* Allow re-reading streams with 'delete_on_read_' set so long as no rows
  were read from the stream. This is necessary for some spilling ExecNodes
  that prepare the stream for reading in order to acquire the buffer,
  but then need to spill the stream to free memory before they actually
  are able to read the stream.

Change-Id: Ibab0d774f66be632f17376a56abf302821cca047
Reviewed-on: http://gerrit.cloudera.org:8080/7358
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/081ecf01
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/081ecf01
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/081ecf01

Branch: refs/heads/master
Commit: 081ecf01526449c2360d2d702afc1488b57e07fb
Parents: bc1feb3
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Jul 5 17:55:58 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jul 7 08:15:59 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/buffered-tuple-stream-v2-test.cc | 18 +++--
 be/src/runtime/buffered-tuple-stream-v2.cc      | 51 +++++++-------
 be/src/runtime/buffered-tuple-stream-v2.h       | 72 +++++++++++---------
 .../runtime/buffered-tuple-stream-v2.inline.h   | 12 ++--
 4 files changed, 81 insertions(+), 72 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/081ecf01/be/src/runtime/buffered-tuple-stream-v2-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-v2-test.cc b/be/src/runtime/buffered-tuple-stream-v2-test.cc
index 277a564..7e4cef8 100644
--- a/be/src/runtime/buffered-tuple-stream-v2-test.cc
+++ b/be/src/runtime/buffered-tuple-stream-v2-test.cc
@@ -794,12 +794,11 @@ TEST_F(SimpleTupleStreamTest, StringsOutsideStream) {
     for (int j = 0; j < batch->num_rows(); ++j) {
       int fixed_size = tuple_desc.byte_size();
       // Copy fixed portion in, but leave it pointing to row batch's varlen data.
-      ASSERT_TRUE(stream.AddRowCustom(fixed_size,
-          [batch, fixed_size, j](uint8_t* tuple_data) {
-            memcpy(tuple_data, batch->GetRow(j)->GetTuple(0), fixed_size);
-          },
-          &status));
+      uint8_t* tuple_data = stream.AddRowCustomBegin(fixed_size, &status);
+      ASSERT_TRUE(tuple_data != nullptr);
       ASSERT_TRUE(status.ok());
+      memcpy(tuple_data, batch->GetRow(j)->GetTuple(0), fixed_size);
+      stream.AddRowCustomEnd(fixed_size);
     }
     rows_added += batch->num_rows();
   }
@@ -1125,12 +1124,11 @@ TEST_F(MultiTupleStreamTest, MultiTupleAddRowCustom) {
         fixed_size += tuple_desc->byte_size();
         varlen_size += row->GetTuple(k)->VarlenByteSize(*tuple_desc);
       }
-      ASSERT_TRUE(stream.AddRowCustom(fixed_size + varlen_size,
-          [this, row, fixed_size, varlen_size](uint8_t* data) {
-            WriteStringRow(string_desc_, row, fixed_size, varlen_size, data);
-          },
-          &status));
+      uint8_t* data = stream.AddRowCustomBegin(fixed_size + varlen_size, &status);
+      ASSERT_TRUE(data != nullptr);
       ASSERT_TRUE(status.ok());
+      WriteStringRow(string_desc_, row, fixed_size, varlen_size, data);
+      stream.AddRowCustomEnd(fixed_size + varlen_size);
     }
     rows_added += batch->num_rows();
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/081ecf01/be/src/runtime/buffered-tuple-stream-v2.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-v2.cc b/be/src/runtime/buffered-tuple-stream-v2.cc
index 82da2bc..90d9c12 100644
--- a/be/src/runtime/buffered-tuple-stream-v2.cc
+++ b/be/src/runtime/buffered-tuple-stream-v2.cc
@@ -540,6 +540,9 @@ void BufferedTupleStreamV2::InvalidateReadIterator() {
   if (read_page_reservation_.GetReservation() > 0) {
     buffer_pool_client_->RestoreReservation(&read_page_reservation_, default_page_len_);
   }
+  // It is safe to re-read a delete-on-read stream if no rows were read and no pages
+  // were therefore deleted.
+  if (rows_returned_ == 0) delete_on_read_ = false;
 }
 
 Status BufferedTupleStreamV2::PrepareForRead(bool delete_on_read, bool* got_reservation) {
@@ -863,39 +866,41 @@ int64_t BufferedTupleStreamV2::ComputeRowSize(TupleRow* row) const noexcept {
 }
 
 bool BufferedTupleStreamV2::AddRowSlow(TupleRow* row, Status* status) noexcept {
-  // Use AddRowCustomSlow() to do the work of advancing the page.
+  // Use AddRowCustom*() to do the work of advancing the page.
   int64_t row_size = ComputeRowSize(row);
-  return AddRowCustomSlow(row_size,
-      [this, row, row_size](uint8_t* data) {
-        bool success = DeepCopy(row, &data, data + row_size);
-        DCHECK(success);
-        DCHECK_EQ(data, write_ptr_);
-      },
-      status);
+  uint8_t* data = AddRowCustomBeginSlow(row_size, status);
+  if (data == nullptr) return false;
+  bool success = DeepCopy(row, &data, data + row_size);
+  DCHECK(success);
+  DCHECK_EQ(data, write_ptr_);
+  AddRowCustomEnd(row_size);
+  return true;
 }
 
-bool BufferedTupleStreamV2::AddRowCustomSlow(
-    int64_t size, const WriteRowFn& write_fn, Status* status) noexcept {
+uint8_t* BufferedTupleStreamV2::AddRowCustomBeginSlow(
+    int64_t size, Status* status) noexcept {
   bool got_reservation;
   *status = AdvanceWritePage(size, &got_reservation);
-  if (!status->ok() || !got_reservation) return false;
+  if (!status->ok() || !got_reservation) return nullptr;
 
   // We have a large-enough page so now success is guaranteed.
-  bool result = AddRowCustom(size, write_fn, status);
-  DCHECK(result);
-  if (size > default_page_len_) {
-    // Immediately unpin the large write page so that we're not using up extra reservation
-    // and so we don't append another row to the page.
-    ResetWritePage();
-    // Save some of the reservation we freed up so we can create the next write page when
-    // needed.
-    if (NeedWriteReservation()) {
-      buffer_pool_client_->SaveReservation(&write_page_reservation_, default_page_len_);
-    }
+  uint8_t* result = AddRowCustomBegin(size, status);
+  DCHECK(result != nullptr);
+  return result;
+}
+
+void BufferedTupleStreamV2::AddLargeRowCustomEnd(int64_t size) noexcept {
+  DCHECK_GT(size, default_page_len_);
+  // Immediately unpin the large write page so that we're not using up extra reservation
+  // and so we don't append another row to the page.
+  ResetWritePage();
+  // Save some of the reservation we freed up so we can create the next write page when
+  // needed.
+  if (NeedWriteReservation()) {
+    buffer_pool_client_->SaveReservation(&write_page_reservation_, default_page_len_);
   }
   // The stream should be in a consistent state once the row is added.
   CHECK_CONSISTENCY();
-  return true;
 }
 
 bool BufferedTupleStreamV2::AddRow(TupleRow* row, Status* status) noexcept {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/081ecf01/be/src/runtime/buffered-tuple-stream-v2.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-v2.h b/be/src/runtime/buffered-tuple-stream-v2.h
index c06dc6c..1f21235 100644
--- a/be/src/runtime/buffered-tuple-stream-v2.h
+++ b/be/src/runtime/buffered-tuple-stream-v2.h
@@ -52,15 +52,15 @@ class TupleRow;
 /// PrepareForReadWrite() is called to initialize both read and write iterators to enable
 /// interleaved reads and writes.
 ///
-/// To use write-only mode, PrepareForWrite() is called once and AddRow()/AddRowCustom()
+/// To use write-only mode, PrepareForWrite() is called once and AddRow()/AddRowCustom*()
 /// are called repeatedly to initialize then advance a write iterator through the stream.
 /// Once the stream is fully written, it can be read back by calling PrepareForRead()
 /// then GetNext() repeatedly to advance a read iterator through the stream, or by
 /// calling GetRows() to get all of the rows at once.
 ///
 /// To use read/write mode, PrepareForReadWrite() is called once to initialize the read
-/// and write iterators. AddRow()/AddRowCustom() then advance a write iterator through the
-/// stream, and GetNext() advances a trailing read iterator through the stream.
+/// and write iterators. AddRow()/AddRowCustom*() then advance a write iterator through
+/// the stream, and GetNext() advances a trailing read iterator through the stream.
 ///
 /// Buffer management:
 /// The tuple stream is backed by a sequence of BufferPool Pages. The tuple stream uses
@@ -87,7 +87,7 @@ class TupleRow;
 /// To read or write a row larger than the default page size to/from an unpinned stream,
 /// the client must have max_page_len - default_page_len unused reservation. Writing a
 /// large row to an unpinned stream only uses the reservation for the duration of the
-/// AddRow()/AddRowCustom() call. Reading a large row from an unpinned stream uses the
+/// AddRow()/AddRowCustom*() call. Reading a large row from an unpinned stream uses the
 /// reservation until the next call to GetNext(). E.g. to partition a single unpinned
 /// stream into n unpinned streams, the reservation needed is (n - 1) *
 /// default_page_len + 2 * max_page_len: one large read buffer and one large write
@@ -187,15 +187,16 @@ class TupleRow;
 /// the stream may be freed on the next call to GetNext().
 /// TODO: IMPALA-4179, instead of needs_deep_copy, attach the pages' buffers to the batch.
 ///
-/// Manual construction of rows with AddRowCustom():
-/// The BufferedTupleStream supports allocation of uninitialized rows with AddRowCustom().
-/// AddRowCustom() is called instead of AddRow() if the client wants to manually construct
-/// a row. The caller of AddRowCustom() is responsible for providing a callback function
-/// that writes the row with exactly the layout described above.
+/// Manual construction of rows with AddRowCustomBegin()/AddRowCustomEnd():
+/// The BufferedTupleStream supports allocation of uninitialized rows with
+/// AddRowCustom*(). AddRowCustomBegin() is called instead of AddRow() if the client wants
+/// to manually construct a row. The caller of AddRowCustomBegin() is responsible for
+/// writing the row with exactly the layout described above then calling
+/// AddRowCustomEnd() when done.
 ///
 /// If a caller constructs a tuple in this way, the caller can set the pointers and they
 /// will not be modified until the stream is read via GetNext() or GetRows().
-/// TODO: IMPALA-5007: try to remove AddRowCustom() by unifying with AddRow().
+/// TODO: IMPALA-5007: try to remove AddRowCustom*() by unifying with AddRow().
 ///
 /// TODO: we need to be able to do read ahead for pages. We need some way to indicate a
 /// page will need to be pinned soon.
@@ -223,7 +224,8 @@ class BufferedTupleStreamV2 {
 
   /// Prepares the stream for writing by saving enough reservation for a default-size
   /// write page. Tries to increase reservation if there is not enough unused reservation
-  /// for a page. Called after Init() and before the first AddRow() or AddRowCustom() call.
+  /// for a page. Called after Init() and before the first AddRow() or
+  /// AddRowCustomBegin() call.
   /// 'got_reservation': set to true if there was enough reservation to initialize the
   ///     first write page and false if there was not enough reservation and no other
   ///     error was encountered. Undefined if an error status is returned.
@@ -231,8 +233,8 @@ class BufferedTupleStreamV2 {
 
   /// Prepares the stream for interleaved reads and writes by saving enough reservation
   /// for default-sized read and write pages. Called after Init() and before the first
-  /// AddRow() or AddRowCustom() call.
-  /// delete_on_read: Pages are deleted after they are read.
+  /// AddRow() or AddRowCustomBegin() call.
+  /// 'delete_on_read': Pages are deleted after they are read.
   /// 'got_reservation': set to true if there was enough reservation to initialize the
   ///     read and write pages and false if there was not enough reservation and no other
   ///     error was encountered. Undefined if an error status is returned.
@@ -240,11 +242,11 @@ class BufferedTupleStreamV2 {
       bool delete_on_read, bool* got_reservation) WARN_UNUSED_RESULT;
 
   /// Prepares the stream for reading, invalidating the write iterator (if there is one).
-  /// Therefore must be called after the last AddRow() or AddRowCustom() and before
+  /// Therefore must be called after the last AddRow() or AddRowCustomEnd() and before
   /// GetNext(). PrepareForRead() can be called multiple times to do multiple read passes
-  /// over the stream, unless PrepareForRead() or PrepareForReadWrite() was previously
-  /// called with delete_on_read = true.
-  /// delete_on_read: Pages are deleted after they are read.
+  /// over the stream, unless rows were read from the stream after PrepareForRead() or
+  /// PrepareForReadWrite() was called with delete_on_read = true.
+  /// 'delete_on_read': Pages are deleted after they are read.
   /// 'got_reservation': set to true if there was enough reservation to initialize the
   ///     first read page and false if there was not enough reservation and no other
   ///     error was encountered. Undefined if an error status is returned.
@@ -271,21 +273,19 @@ class BufferedTupleStreamV2 {
   /// returns an error, it should not be called again.
   bool AddRow(TupleRow* row, Status* status) noexcept WARN_UNUSED_RESULT;
 
-  /// A function that writes a row to 'data' with the format described in the class
-  /// comment.
+  /// Allocates space to store a row of 'size' bytes (including fixed and variable length
+  /// data). If successful, returns a pointer to the allocated row. The caller then must
+  /// writes valid data to the row and call AddRowCustomEnd().
   ///
-  /// Use boost::function instead of std::function because it is better at avoiding heap
-  /// allocations when capturing a small number of variables. In GCC 4.9.2/Boost 1.57,
-  /// boost::function can store up to 3 64-bit pointers without making a heap allocation,
-  /// but std::function always makes a heap allocation.
-  typedef boost::function<void(uint8_t* data)> WriteRowFn;
+  /// If unsuccessful, returns nullptr. The failure modes are the same as described in the
+  /// AddRow() comment.
+  ALWAYS_INLINE uint8_t* AddRowCustomBegin(int64_t size, Status* status);
 
-  /// Allocates space to store a row of 'size' bytes (including fixed and variable length
-  /// data). If successful, calls 'write_fn' with a pointer to the start of the allocated
-  /// space and returns true. Otherwise returns false. The failure modes are the same as
-  /// described in the AddRow() comment.
-  ALWAYS_INLINE bool AddRowCustom(
-      int64_t size, const WriteRowFn& write_fn, Status* status);
+  /// Called after AddRowCustomBegin() when done writing the row. Only should be called
+  /// if AddRowCustomBegin() succeeded. See the AddRowCustomBegin() comment for
+  /// explanation.
+  /// 'size': the size passed into AddRowCustomBegin().
+  void AddRowCustomEnd(int64_t size);
 
   /// Unflattens 'flat_row' into a regular TupleRow 'row'. Only valid to call if the
   /// stream is pinned. The row must have been allocated with the stream's row desc.
@@ -476,7 +476,7 @@ class BufferedTupleStreamV2 {
 
   /// The current page for writing. NULL if there is no write iterator or no current
   /// write page. Always pinned. Size is 'default_page_len_', except temporarily while
-  /// appending a larger row in AddRowCustomSlow().
+  /// appending a larger row between AddRowCustomBegin() and AddRowCustomEnd().
   Page* write_page_;
 
   /// Saved reservation for write iterator. 'default_page_len_' reservation is saved if
@@ -508,7 +508,8 @@ class BufferedTupleStreamV2 {
   /// Whether any tuple in the rows is nullable.
   const bool has_nullable_tuple_;
 
-  /// If true, pages are deleted after they are read.
+  /// If true, pages are deleted after they are read during this read pass. Once rows
+  /// have been read from a stream with 'delete_on_read_' true, this is always true.
   bool delete_on_read_;
 
   bool closed_; // Used for debugging.
@@ -532,9 +533,12 @@ class BufferedTupleStreamV2 {
   /// the current page.
   bool AddRowSlow(TupleRow* row, Status* status) noexcept;
 
-  /// The slow path for AddRowCustom() that is called if there is not sufficient space in
+  /// The slow path for AddRowCustomBegin() that is called if there is not sufficient space in
   /// the current page.
-  bool AddRowCustomSlow(int64_t size, const WriteRowFn& write_fn, Status* status) noexcept;
+  uint8_t* AddRowCustomBeginSlow(int64_t size, Status* status) noexcept;
+
+  /// The slow path for AddRowCustomEnd() that is called for large pages.
+  void AddLargeRowCustomEnd(int64_t size) noexcept;
 
   /// Copies 'row' into the buffer starting at *data and ending at the byte before
   /// 'data_end'. On success, returns true and updates *data to point after the last

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/081ecf01/be/src/runtime/buffered-tuple-stream-v2.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/buffered-tuple-stream-v2.inline.h b/be/src/runtime/buffered-tuple-stream-v2.inline.h
index a3b219c..7022249 100644
--- a/be/src/runtime/buffered-tuple-stream-v2.inline.h
+++ b/be/src/runtime/buffered-tuple-stream-v2.inline.h
@@ -31,12 +31,11 @@ inline int BufferedTupleStreamV2::NullIndicatorBytesPerRow() const {
   return BitUtil::RoundUpNumBytes(fixed_tuple_sizes_.size());
 }
 
-inline bool BufferedTupleStreamV2::AddRowCustom(
-    int64_t size, const WriteRowFn& write_fn, Status* status) {
+inline uint8_t* BufferedTupleStreamV2::AddRowCustomBegin(int64_t size, Status* status) {
   DCHECK(!closed_);
   DCHECK(has_write_iterator());
   if (UNLIKELY(write_page_ == nullptr || write_ptr_ + size > write_end_ptr_)) {
-    return AddRowCustomSlow(size, write_fn, status);
+    return AddRowCustomBeginSlow(size, status);
   }
   DCHECK(write_page_ != nullptr);
   DCHECK(write_page_->is_pinned());
@@ -46,8 +45,11 @@ inline bool BufferedTupleStreamV2::AddRowCustom(
 
   uint8_t* data = write_ptr_;
   write_ptr_ += size;
-  write_fn(data);
-  return true;
+  return data;
+}
+
+inline void BufferedTupleStreamV2::AddRowCustomEnd(int64_t size) {
+  if (UNLIKELY(size > default_page_len_)) AddLargeRowCustomEnd(size);
 }
 }

[2/2] incubator-impala git commit: [DOCS] Clean up trailing spaces

Posted by jr...@apache.org.

[DOCS] Clean up trailing spaces

The pre-commit hook that used to detect
and fix trailing spaces in doc XML files
seems to have bitrotted and some trailing
spaces made it into source files during the
initial upstream cleanup.

Change-Id: Ieeb6a7d557c37be981add8353cbd1756f2e1e423
Reviewed-on: http://gerrit.cloudera.org:8080/7373
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/07d3cea1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/07d3cea1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/07d3cea1

Branch: refs/heads/master
Commit: 07d3cea1f096239beebed3295608743fe60e12ec
Parents: 081ecf0
Author: John Russell <jr...@cloudera.com>
Authored: Thu Jul 6 23:27:40 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Jul 7 17:10:32 2017 +0000

----------------------------------------------------------------------
 docs/topics/impala_alter_table.xml        |  4 ++--
 docs/topics/impala_auditing.xml           | 16 ++++++++--------
 docs/topics/impala_compute_stats.xml      |  2 +-
 docs/topics/impala_datetime_functions.xml |  8 ++++----
 docs/topics/impala_describe.xml           |  2 +-
 docs/topics/impala_runtime_filtering.xml  |  4 ++--
 docs/topics/impala_show.xml               |  2 +-
 docs/topics/impala_troubleshooting.xml    |  2 +-
 8 files changed, 20 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_alter_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_table.xml b/docs/topics/impala_alter_table.xml
index e12e1b8..6e3a815 100644
--- a/docs/topics/impala_alter_table.xml
+++ b/docs/topics/impala_alter_table.xml
@@ -467,7 +467,7 @@ yes,no</codeblock>
       See <xref href="impala_perf_stats.xml#perf_table_stats_manual"/> for an example of using table properties to
       fine-tune the performance-related table statistics.
     </p>
-      
+
     <p>
       <b>To manually set or update table or column statistics:</b>
     </p>
@@ -669,7 +669,7 @@ select * from p2;
 
 alter table p2 drop column x;
 select * from p2;
-WARNINGS: 
+WARNINGS:
 File '<varname>hdfs_filename</varname>' has an incompatible Parquet schema for column 'add_columns.p2.s3'.
 Column type: STRING, Parquet schema:
 optional int32 x [i:1 d:1 r:0]

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_auditing.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_auditing.xml b/docs/topics/impala_auditing.xml
index 10d9268..6297cca 100644
--- a/docs/topics/impala_auditing.xml
+++ b/docs/topics/impala_auditing.xml
@@ -69,10 +69,10 @@ under the License.
         event log files.
       </li>
 
-      <li> 
+      <li>
         Use a cluster manager with governance capabilities to filter, visualize,
         and produce reports based on the audit logs collected
-        from all the hosts in the cluster. 
+        from all the hosts in the cluster.
       </li>
     </ul>
 
@@ -102,18 +102,18 @@ under the License.
         <codeph>fsync()</codeph> system call) to avoid loss of audit data in case of a crash.
       </p>
 
-      <p> 
+      <p>
         The runtime overhead of auditing applies to whichever host serves as the coordinator
         for the query, that is, the host you connect to when you issue the query. This might
         be the same host for all queries, or different applications or users might connect to
-        and issue queries through different hosts. 
+        and issue queries through different hosts.
       </p>
 
-      <p> 
+      <p>
         To avoid excessive I/O overhead on busy coordinator hosts, Impala syncs the audit log
         data (using the <codeph>fsync()</codeph> system call) periodically rather than after
         every query. Currently, the <codeph>fsync()</codeph> calls are issued at a fixed
-        interval, every 5 seconds. 
+        interval, every 5 seconds.
       </p>
 
       <p>
@@ -138,12 +138,12 @@ under the License.
 
     <conbody>
 
-      <p> 
+      <p>
         The audit log files represent the query information in JSON format, one query per line.
         Typically, rather than looking at the log files themselves, you should use cluster-management
         software to consolidate the log data from all Impala hosts and filter and visualize the results
         in useful ways. (If you do examine the raw log data, you might run the files through
-        a JSON pretty-printer first.) 
+        a JSON pretty-printer first.)
      </p>
 
       <p>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_compute_stats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_compute_stats.xml b/docs/topics/impala_compute_stats.xml
index 026bc0b..b7489c5 100644
--- a/docs/topics/impala_compute_stats.xml
+++ b/docs/topics/impala_compute_stats.xml
@@ -528,7 +528,7 @@ show table stats item_partitioned;
       Kudu tables. Therefore, you do not need to re-run the operation when
       you see -1 in the <codeph># Rows</codeph> column of the output from
       <codeph>SHOW TABLE STATS</codeph>. That column always shows -1 for
-      all Kudu tables. 
+      all Kudu tables.
     </p>
 
     <p conref="../shared/impala_common.xml#common/related_info"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_datetime_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_datetime_functions.xml b/docs/topics/impala_datetime_functions.xml
index e4afb4a..27c0260 100644
--- a/docs/topics/impala_datetime_functions.xml
+++ b/docs/topics/impala_datetime_functions.xml
@@ -193,7 +193,7 @@ select now(), current_timestamp();
 | 2016-05-19 16:10:14.237849000 | 2016-05-19 16:10:14.237849000 |
 +-------------------------------+-------------------------------+
 
-select current_timestamp() as right_now,            
+select current_timestamp() as right_now,
   current_timestamp() + interval 3 hours as in_three_hours;
 +-------------------------------+-------------------------------+
 | right_now                     | in_three_hours                |
@@ -412,7 +412,7 @@ select date_sub(cast('2016-05-31' as timestamp), interval 1 months) as 'april_31
           </p>
           <p conref="../shared/impala_common.xml#common/example_blurb"/>
           <p>
-            The following example shows how comparing a <q>late</q> value with 
+            The following example shows how comparing a <q>late</q> value with
             an <q>earlier</q> value produces a positive number. In this case,
             the result is (365 * 5) + 1, because one of the intervening years is
             a leap year.
@@ -760,8 +760,8 @@ select now() as right_now,
 +-------------------------------+-----------+------------+
 
 select now() as right_now,
-  extract(day from now()) as this_day,  
-  extract(hour from now()) as this_hour;  
+  extract(day from now()) as this_day,
+  extract(hour from now()) as this_hour;
 +-------------------------------+----------+-----------+
 | right_now                     | this_day | this_hour |
 +-------------------------------+----------+-----------+

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_describe.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_describe.xml b/docs/topics/impala_describe.xml
index 67af443..17d8875 100644
--- a/docs/topics/impala_describe.xml
+++ b/docs/topics/impala_describe.xml
@@ -737,7 +737,7 @@ Returned 27 row(s) in 0.17s</codeblock>
     </ul>
 
     <p rev="kudu">
-      The following example shows <codeph>DESCRIBE</codeph> output for a simple Kudu table, with 
+      The following example shows <codeph>DESCRIBE</codeph> output for a simple Kudu table, with
       a single-column primary key and all column attributes left with their default values:
     </p>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_runtime_filtering.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_runtime_filtering.xml b/docs/topics/impala_runtime_filtering.xml
index 418044f..3323afb 100644
--- a/docs/topics/impala_runtime_filtering.xml
+++ b/docs/topics/impala_runtime_filtering.xml
@@ -296,12 +296,12 @@ under the License.
             </li>
             <li>
               <p rev="2.6.0 IMPALA-3480">
-                <xref href="impala_runtime_filter_max_size.xml#runtime_filter_max_size"/> 
+                <xref href="impala_runtime_filter_max_size.xml#runtime_filter_max_size"/>
               </p>
             </li>
             <li>
               <p rev="2.6.0 IMPALA-3480">
-                <xref href="impala_runtime_filter_min_size.xml#runtime_filter_min_size"/> 
+                <xref href="impala_runtime_filter_min_size.xml#runtime_filter_min_size"/>
               </p>
             </li>
             <li>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_show.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml
index 3f99981..bd58d61 100644
--- a/docs/topics/impala_show.xml
+++ b/docs/topics/impala_show.xml
@@ -943,7 +943,7 @@ show table stats kudu_table;
         Kudu tables. Therefore, you do not need to re-run <codeph>COMPUTE STATS</codeph>
         when you see -1 in the <codeph># Rows</codeph> column of the output from
         <codeph>SHOW TABLE STATS</codeph>. That column always shows -1 for
-        all Kudu tables. 
+        all Kudu tables.
       </p>
 
       <p conref="../shared/impala_common.xml#common/example_blurb"/>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/07d3cea1/docs/topics/impala_troubleshooting.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_troubleshooting.xml b/docs/topics/impala_troubleshooting.xml
index bc7d301..f5dbdd2 100644
--- a/docs/topics/impala_troubleshooting.xml
+++ b/docs/topics/impala_troubleshooting.xml
@@ -112,7 +112,7 @@ under the License.
 $ sudo sysctl -w vm.drop_caches=3 vm.drop_caches=0
 vm.drop_caches = 3
 vm.drop_caches = 0
-$ sudo dd if=/dev/sda bs=1M of=/dev/null count=1k 
+$ sudo dd if=/dev/sda bs=1M of=/dev/null count=1k
 1024+0 records in
 1024+0 records out
 1073741824 bytes (1.1 GB) copied, 5.60373 s, 192 MB/s