You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/12/13 06:52:03 UTC

(impala) branch master updated (69aad8b5d -> b03e8ef95)

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 69aad8b5d IMPALA-12589: Ignore FbFileDesc.absolutePath() if it is null/empty
     new 0c9bfcaa4 IMPALA-12614: Use atomics for 64-bit host metrics
     new b03e8ef95 Revert "IMPALA-9923: Load ORC serially to hack around flakiness"

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/runtime/query-state.cc         |  8 ++++----
 be/src/util/system-state-info-test.cc | 10 +++++-----
 be/src/util/system-state-info.cc      |  8 ++++----
 be/src/util/system-state-info.h       |  9 +++++----
 bin/load-data.py                      | 11 -----------
 5 files changed, 18 insertions(+), 28 deletions(-)


(impala) 01/02: IMPALA-12614: Use atomics for 64-bit host metrics

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0c9bfcaa4cc1c74a406265e1d9a83721af19bd57
Author: Kurt Deschler <kd...@cloudera.com>
AuthorDate: Tue Dec 12 11:52:10 2023 -0500

    IMPALA-12614: Use atomics for 64-bit host metrics
    
    This patch changes 64-bit host metrics to use atomics to avoid potential
    partial load/store data races. There are no functional changes. This
    issue was exposed by TSAN tests when IMPALA-12385 enabled periodic
    metrics by default.
    
    Testing: TSAN tests cover this case.
    Change-Id: I88a15d3ccfeab29506427b3bfcaeebf3a2831176
    Reviewed-on: http://gerrit.cloudera.org:8080/20776
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/query-state.cc         |  8 ++++----
 be/src/util/system-state-info-test.cc | 10 +++++-----
 be/src/util/system-state-info.cc      |  8 ++++----
 be/src/util/system-state-info.h       |  9 +++++----
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/be/src/runtime/query-state.cc b/be/src/runtime/query-state.cc
index 5738448e8..dafc90b3d 100644
--- a/be/src/runtime/query-state.cc
+++ b/be/src/runtime/query-state.cc
@@ -233,20 +233,20 @@ Status QueryState::Init(const ExecQueryFInstancesRequestPB* exec_rpc_params,
     // Add network usage
     host_profile_->AddSamplingTimeSeriesCounter(
         "HostNetworkRx", TUnit::BYTES_PER_SECOND, [system_state_info] () {
-        return system_state_info->GetNetworkUsage().rx_rate;
+        return system_state_info->GetNetworkUsage().rx_rate.Load();
         });
     host_profile_->AddSamplingTimeSeriesCounter(
         "HostNetworkTx", TUnit::BYTES_PER_SECOND, [system_state_info] () {
-        return system_state_info->GetNetworkUsage().tx_rate;
+        return system_state_info->GetNetworkUsage().tx_rate.Load();
         });
     // Add disk stats
     host_profile_->AddSamplingTimeSeriesCounter(
         "HostDiskReadThroughput", TUnit::BYTES_PER_SECOND, [system_state_info] () {
-        return system_state_info->GetDiskStats().read_rate;
+        return system_state_info->GetDiskStats().read_rate.Load();
         });
     host_profile_->AddSamplingTimeSeriesCounter(
         "HostDiskWriteThroughput", TUnit::BYTES_PER_SECOND, [system_state_info] () {
-        return system_state_info->GetDiskStats().write_rate;
+        return system_state_info->GetDiskStats().write_rate.Load();
         });
   }
 
diff --git a/be/src/util/system-state-info-test.cc b/be/src/util/system-state-info-test.cc
index 50b0c9715..78735831f 100644
--- a/be/src/util/system-state-info-test.cc
+++ b/be/src/util/system-state-info-test.cc
@@ -35,7 +35,7 @@ TEST_F(SystemStateInfoTest, FirstCallReturnsZero) {
   EXPECT_EQ(0, r.user + r.system + r.iowait);
 
   const SystemStateInfo::NetworkUsage& n = info.GetNetworkUsage();
-  EXPECT_EQ(0, n.rx_rate + n.tx_rate);
+  EXPECT_EQ(0, n.rx_rate.Load() + n.tx_rate.Load());
 }
 
 // Smoke test to make sure that we read non-zero values from /proc/stat.
@@ -194,8 +194,8 @@ TEST_F(SystemStateInfoTest, ComputeNetworkUsage) {
   int period_ms = 500;
   info.ComputeNetworkUsage(period_ms);
   const SystemStateInfo::NetworkUsage& n = info.GetNetworkUsage();
-  EXPECT_EQ(n.rx_rate, 8000);
-  EXPECT_EQ(n.tx_rate, 12000);
+  EXPECT_EQ(n.rx_rate.Load(), 8000);
+  EXPECT_EQ(n.tx_rate.Load(), 12000);
 }
 
 // Tests the computation logic for disk statistics.
@@ -216,8 +216,8 @@ TEST_F(SystemStateInfoTest, ComputeDiskStats) {
   int period_ms = 500;
   info.ComputeDiskStats(period_ms);
   const SystemStateInfo::DiskStats& ds = info.GetDiskStats();
-  EXPECT_EQ(ds.read_rate, 2 * 1000 * SystemStateInfo::BYTES_PER_SECTOR);
-  EXPECT_EQ(ds.write_rate, 2 * 2000 * SystemStateInfo::BYTES_PER_SECTOR);
+  EXPECT_EQ(ds.read_rate.Load(), 2 * 1000 * SystemStateInfo::BYTES_PER_SECTOR);
+  EXPECT_EQ(ds.write_rate.Load(), 2 * 2000 * SystemStateInfo::BYTES_PER_SECTOR);
 }
 
 } // namespace impala
diff --git a/be/src/util/system-state-info.cc b/be/src/util/system-state-info.cc
index b0344cc97..931717d28 100644
--- a/be/src/util/system-state-info.cc
+++ b/be/src/util/system-state-info.cc
@@ -208,10 +208,10 @@ void SystemStateInfo::ComputeNetworkUsage(int64_t period_ms) {
   const NetworkValues& cur = network_values_[net_val_idx_];
   const NetworkValues& old = network_values_[1 - net_val_idx_];
   int64_t rx_bytes = cur[NET_RX_BYTES] - old[NET_RX_BYTES];
-  network_usage_.rx_rate = rx_bytes * 1000L / period_ms;
+  network_usage_.rx_rate.Store(rx_bytes * 1000L / period_ms);
 
   int64_t tx_bytes = cur[NET_TX_BYTES] - old[NET_TX_BYTES];
-  network_usage_.tx_rate = tx_bytes * 1000L / period_ms;
+  network_usage_.tx_rate.Store(tx_bytes * 1000L / period_ms);
 }
 
 void SystemStateInfo::ReadCurrentProcDiskStats() {
@@ -263,10 +263,10 @@ void SystemStateInfo::ComputeDiskStats(int64_t period_ms) {
   const DiskValues& cur = disk_values_[disk_val_idx_];
   const DiskValues& old = disk_values_[1 - disk_val_idx_];
   int64_t read_sectors = cur[DISK_SECTORS_READ] - old[DISK_SECTORS_READ];
-  disk_stats_.read_rate = read_sectors * BYTES_PER_SECTOR * 1000 / period_ms;
+  disk_stats_.read_rate.Store(read_sectors * BYTES_PER_SECTOR * 1000 / period_ms);
 
   int64_t write_sectors = cur[DISK_SECTORS_WRITTEN] - old[DISK_SECTORS_WRITTEN];
-  disk_stats_.write_rate = write_sectors * BYTES_PER_SECTOR * 1000 / period_ms;
+  disk_stats_.write_rate.Store(write_sectors * BYTES_PER_SECTOR * 1000 / period_ms);
 }
 
 } // namespace impala
diff --git a/be/src/util/system-state-info.h b/be/src/util/system-state-info.h
index a4c9b882a..fbdddef1c 100644
--- a/be/src/util/system-state-info.h
+++ b/be/src/util/system-state-info.h
@@ -23,6 +23,7 @@
 
 #include <gtest/gtest_prod.h> // for FRIEND_TEST
 
+#include "common/atomic.h"
 #include "common/names.h"
 #include "common/logging.h"
 
@@ -57,8 +58,8 @@ class SystemStateInfo {
 
   /// Network usage rates in bytes per second.
   struct NetworkUsage {
-    int64_t rx_rate;
-    int64_t tx_rate;
+    AtomicInt64 rx_rate;
+    AtomicInt64 tx_rate;
   };
 
   /// Returns a struct containing the network usage for the interval between the last two
@@ -67,8 +68,8 @@ class SystemStateInfo {
 
   /// Disk statistics rates in bytes per second
   struct DiskStats {
-    int64_t read_rate;
-    int64_t write_rate;
+    AtomicInt64 read_rate;
+    AtomicInt64 write_rate;
   };
 
   /// Returns a struct containing the disk throughput for the interval between the last


(impala) 02/02: Revert "IMPALA-9923: Load ORC serially to hack around flakiness"

Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b03e8ef95c856f499d17ea7815831e30e2e9f467
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Wed Nov 29 18:58:37 2023 -0800

    Revert "IMPALA-9923: Load ORC serially to hack around flakiness"
    
    This reverts commit dc2fdabbd1f2c930348671e17f885c5c54b628e4.
    
    Newer hive version and other fixes has allow ORC loading to happen in
    parallel.
    
    Change-Id: I67f4051dd07273f2b51843cb5c1ec2cf185c5924
    Reviewed-on: http://gerrit.cloudera.org:8080/20755
    Reviewed-by: Riza Suminto <ri...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 bin/load-data.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/bin/load-data.py b/bin/load-data.py
index a4cfd5a97..090524cf5 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -396,7 +396,6 @@ def main():
 
     impala_create_files = []
     hive_load_text_files = []
-    hive_load_orc_files = []
     hive_load_nontext_files = []
     hbase_create_files = []
     hbase_postload_files = []
@@ -408,8 +407,6 @@ def main():
       elif hive_load_match in filename:
         if 'text-none-none' in filename:
           hive_load_text_files.append(filename)
-        elif 'orc-def-block' in filename:
-          hive_load_orc_files.append(filename)
         else:
           hive_load_nontext_files.append(filename)
       elif hbase_create_match in filename:
@@ -432,7 +429,6 @@ def main():
 
     log_file_list("Impala Create Files:", impala_create_files)
     log_file_list("Hive Load Text Files:", hive_load_text_files)
-    log_file_list("Hive Load Orc Files:", hive_load_orc_files)
     log_file_list("Hive Load Non-Text Files:", hive_load_nontext_files)
     log_file_list("HBase Create Files:", hbase_create_files)
     log_file_list("HBase Post-Load Files:", hbase_postload_files)
@@ -457,13 +453,6 @@ def main():
     # need to be loaded first
     assert(len(hive_load_text_files) <= 1)
     hive_exec_query_files_parallel(thread_pool, hive_load_text_files)
-    # IMPALA-9923: Run ORC serially separately from other non-text formats. This hacks
-    # around flakiness seen when loading this in parallel. This should be removed as
-    # soon as possible.
-    assert(len(hive_load_orc_files) <= 1)
-    hive_exec_query_files_parallel(thread_pool, hive_load_orc_files)
-
-    # Load all non-text formats (goes parallel)
     hive_exec_query_files_parallel(thread_pool, hive_load_nontext_files)
 
     assert(len(hbase_postload_files) <= 1)