You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2024/01/04 19:39:58 UTC

(impala) 03/03: IMPALA-12668: Enable clang-tidy checks for implicit fallthrough

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit dd8ddf77c39ac34457497e160aefe707b7324331
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Fri Dec 15 16:34:47 2023 -0800

    IMPALA-12668: Enable clang-tidy checks for implicit fallthrough
    
    In switch/case statements, one case can fallthrough to the
    next case. Sometimes this is intentional, but it is also a
    common source of bugs (i.e. a missing break/return statement).
    Clang-Tidy's clang-diagnostic-implicit-fallthrough flags
    locations where a case falls through to the next case without
    an explicit fallthrough declaration.
    
    This change enables clang-diagnostic-implicit-fallthrough and
    fixes failing locations. Since Impala uses C++17, this uses
    C++17's [[fallthrough]] to indicate an explicit fallthrough.
    This also adjusts clang-tidy's output to suggest [[fallthrough]]
    as the preferred way to indicate fallthrough.
    
    Testing:
     - Ran core job
     - Ran clang tidy
    
    Change-Id: I6d65c92b442fa0317c3af228997571e124a54092
    Reviewed-on: http://gerrit.cloudera.org:8080/20847
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Zihao Ye <ey...@163.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 .clang-tidy                                      |  1 -
 be/src/benchmarks/atoi-benchmark.cc              | 19 ++++++--
 be/src/codegen/codegen-anyval-read-write-info.cc |  1 +
 be/src/codegen/codegen-anyval.cc                 |  1 +
 be/src/exec/orc/hdfs-orc-scanner.cc              |  1 +
 be/src/exec/parquet/parquet-column-stats.cc      |  1 +
 be/src/exprs/agg-fn-evaluator.cc                 |  4 +-
 be/src/exprs/anyval-util.h                       |  3 +-
 be/src/runtime/datetime-parser-common.cc         |  1 +
 be/src/runtime/descriptors.cc                    |  2 +
 be/src/runtime/io/disk-file.cc                   |  2 +
 be/src/runtime/raw-value.cc                      |  1 +
 be/src/runtime/raw-value.inline.h                |  3 +-
 be/src/runtime/runtime-filter-ir.cc              |  2 +
 be/src/runtime/types.cc                          |  1 +
 be/src/service/query-result-set.cc               |  1 +
 be/src/util/bit-packing.inline.h                 | 19 +++++---
 be/src/util/hash-util.h                          | 56 +++++++++++++++++-------
 be/src/util/jwt-util.cc                          |  3 +-
 be/src/util/redactor.cc                          |  1 +
 be/src/util/string-parser.h                      |  7 ++-
 bin/run_clang_tidy.sh                            |  7 ++-
 22 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index f54f14933..c4499f953 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -48,7 +48,6 @@ Checks: "-*,clang*,\
 -clang-diagnostic-gnu-anonymous-struct,\
 -clang-diagnostic-gnu-zero-variadic-macro-arguments,\
 -clang-diagnostic-header-hygiene,\
--clang-diagnostic-implicit-fallthrough,\
 -clang-diagnostic-missing-prototypes,\
 -clang-diagnostic-missing-variable-declarations,\
 -clang-diagnostic-nested-anon-types,\
diff --git a/be/src/benchmarks/atoi-benchmark.cc b/be/src/benchmarks/atoi-benchmark.cc
index b47f257d8..0fcdd068e 100644
--- a/be/src/benchmarks/atoi-benchmark.cc
+++ b/be/src/benchmarks/atoi-benchmark.cc
@@ -91,7 +91,9 @@ inline int32_t AtoiUnsafe(const char* s, int len) {
   bool negative = false;
   int i = 0;
   switch (*s) {
-    case '-': negative = true;
+    case '-':
+      negative = true;
+      [[fallthrough]];
     case '+': ++i;
   }
 
@@ -107,25 +109,34 @@ inline int32_t AtoiUnrolled(const char* s, int len) {
     int32_t val = 0;
     bool negative = false;
     switch (*s) {
-      case '-': negative = true;
+      case '-':
+        negative = true;
+        [[fallthrough]];
       case '+': --len; ++s;
     }
 
     switch(len) {
       case 8:
         val += (DIGIT(s[len - 8])) * 10000;
+        [[fallthrough]];
       case 7:
         val += (DIGIT(s[len - 7])) * 10000;
+        [[fallthrough]];
       case 6:
         val += (DIGIT(s[len - 6])) * 10000;
+        [[fallthrough]];
       case 5:
         val += (DIGIT(s[len - 5])) * 10000;
+        [[fallthrough]];
       case 4:
         val += (DIGIT(s[len - 4])) * 1000;
+        [[fallthrough]];
       case 3:
         val += (DIGIT(s[len - 3])) * 100;
+        [[fallthrough]];
       case 2:
         val += (DIGIT(s[len - 2])) * 10;
+        [[fallthrough]];
       case 1:
         val += (DIGIT(s[len - 1]));
     }
@@ -140,7 +151,9 @@ inline int32_t AtoiCased(const char* s, int len) {
     int32_t val = 0;
     bool negative = false;
     switch (*s) {
-      case '-': negative = true;
+      case '-':
+        negative = true;
+        [[fallthrough]];
       case '+': --len; ++s;
     }
 
diff --git a/be/src/codegen/codegen-anyval-read-write-info.cc b/be/src/codegen/codegen-anyval-read-write-info.cc
index 263e3f049..d68c300ef 100644
--- a/be/src/codegen/codegen-anyval-read-write-info.cc
+++ b/be/src/codegen/codegen-anyval-read-write-info.cc
@@ -134,6 +134,7 @@ void CodegenAnyValReadWriteInfo::CodegenConvertToCanonicalForm() {
       llvm::Value* new_val = CodegenAnyVal::ConvertToCanonicalForm(codegen_, builder_,
           type_, GetSimpleVal());
       SetSimpleVal(new_val);
+      break;
     }
     default:
       ;
diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc
index 773d3ccf0..b98f9e31c 100644
--- a/be/src/codegen/codegen-anyval.cc
+++ b/be/src/codegen/codegen-anyval.cc
@@ -598,6 +598,7 @@ void CodegenAnyVal::ConvertToCanonicalForm() {
       llvm::Value* new_val = ConvertToCanonicalForm(codegen_,
           builder_, type_, GetVal());
       SetVal(new_val);
+      break;
     }
     default:
       ;
diff --git a/be/src/exec/orc/hdfs-orc-scanner.cc b/be/src/exec/orc/hdfs-orc-scanner.cc
index 6858be523..6c8cf7822 100644
--- a/be/src/exec/orc/hdfs-orc-scanner.cc
+++ b/be/src/exec/orc/hdfs-orc-scanner.cc
@@ -689,6 +689,7 @@ void HdfsOrcScanner::SetSyntheticAcidFieldForOriginalFile(const SlotDescriptor*
       break;
     case ACID_FIELD_ROWID_INDEX:
       file_position_ = slot_desc;
+      break;
     default:
       break;
   }
diff --git a/be/src/exec/parquet/parquet-column-stats.cc b/be/src/exec/parquet/parquet-column-stats.cc
index 028601810..63e7ab116 100644
--- a/be/src/exec/parquet/parquet-column-stats.cc
+++ b/be/src/exec/parquet/parquet-column-stats.cc
@@ -174,6 +174,7 @@ bool ColumnStatsReader::ReadFromString(StatsField stats_field,
               static_cast<Decimal16Value*>(slot));
         }
       DCHECK(false) << "Unknown decimal byte size: " << col_type_.GetByteSize();
+      break;
     case TYPE_DATE:
       return ColumnStats<DateValue>::DecodePlainValue(encoded_value, slot, element_.type);
     default:
diff --git a/be/src/exprs/agg-fn-evaluator.cc b/be/src/exprs/agg-fn-evaluator.cc
index 7eafa1ae1..bf74ddab2 100644
--- a/be/src/exprs/agg-fn-evaluator.cc
+++ b/be/src/exprs/agg-fn-evaluator.cc
@@ -260,7 +260,9 @@ void AggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_slo
 #endif
           return;
         default:
-          break;
+          DCHECK(false) << "Unknown decimal byte size: "
+                        << dst_slot_desc.type().GetByteSize();
+          return;
       }
     case TYPE_DATE:
       *reinterpret_cast<DateValue*>(slot) =
diff --git a/be/src/exprs/anyval-util.h b/be/src/exprs/anyval-util.h
index c930012c0..f14f83526 100644
--- a/be/src/exprs/anyval-util.h
+++ b/be/src/exprs/anyval-util.h
@@ -330,7 +330,8 @@ class AnyValUtil {
 #endif
             return;
           default:
-            break;
+            DCHECK(false) << "Unknown decimal byte size: " << type.GetByteSize();
+            return;
         }
       case TYPE_DATE:
         *reinterpret_cast<DateVal*>(dst) =
diff --git a/be/src/runtime/datetime-parser-common.cc b/be/src/runtime/datetime-parser-common.cc
index efb026dac..1c81b6516 100644
--- a/be/src/runtime/datetime-parser-common.cc
+++ b/be/src/runtime/datetime-parser-common.cc
@@ -130,6 +130,7 @@ void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_ty
         break;
       case MISPLACED_FX_MODIFIER_ERROR:
         ss << "PARSE ERROR: FX modifier should be at the beginning of the format string.";
+        break;
       case CONFLICTING_DAY_OF_WEEK_TOKENS_ERROR:
         ss << "PARSE ERROR: Multiple day of week tokens provided.";
         break;
diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc
index 28e10c842..c3567fdba 100644
--- a/be/src/runtime/descriptors.cc
+++ b/be/src/runtime/descriptors.cc
@@ -1011,6 +1011,7 @@ CodegenAnyValReadWriteInfo CodegenAnyValToReadWriteInfo(CodegenAnyVal& any_val,
     case TYPE_STRUCT:
       DCHECK(false) << "Invalid type for this function. "
                     << "Call 'StoreStructToNativePtr()' instead.";
+      break;
     default:
       DCHECK(false) << "NYI: " << rwi.type().DebugString();
       break;
@@ -1074,6 +1075,7 @@ void SlotDescriptor::CodegenStoreNonNullAnyVal(
     case TYPE_STRUCT:
       DCHECK(false) << "Invalid type for this function. "
                     << "Call 'StoreStructToNativePtr()' instead.";
+      break;
     default:
       DCHECK(false) << "NYI: " << type.DebugString();
       break;
diff --git a/be/src/runtime/io/disk-file.cc b/be/src/runtime/io/disk-file.cc
index 767047567..85a378653 100644
--- a/be/src/runtime/io/disk-file.cc
+++ b/be/src/runtime/io/disk-file.cc
@@ -110,8 +110,10 @@ void MemBlock::Delete(bool* reserved, bool* allocated) {
       free(data_);
       data_ = nullptr;
       *allocated = true;
+      [[fallthrough]];
     case MemBlockStatus::RESERVED:
       *reserved = true;
+      [[fallthrough]];
     default:
       SetStatusLocked(lock, MemBlockStatus::DISABLED);
       DCHECK(data_ == nullptr);
diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc
index a6d9fbe2f..3e1721695 100644
--- a/be/src/runtime/raw-value.cc
+++ b/be/src/runtime/raw-value.cc
@@ -206,6 +206,7 @@ void RawValue::WriteNonNullPrimitive(const void* value, void* dst, const ColumnT
     case TYPE_STRUCT: {
       // Structs should be handled by a different Write() function within this class.
       DCHECK(false);
+      break;
     }
     default:
       DCHECK(false) << "RawValue::WriteNonNullPrimitive(): bad type: "
diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h
index 4f135856b..390a0ccfa 100644
--- a/be/src/runtime/raw-value.inline.h
+++ b/be/src/runtime/raw-value.inline.h
@@ -147,7 +147,8 @@ inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type)
           return reinterpret_cast<const Decimal16Value*>(v1)->value()
               == reinterpret_cast<const Decimal16Value*>(v2)->value();
         default:
-          break;
+          DCHECK(false) << "Unknown decimal byte size: " << type.GetByteSize();
+          return 0;
       }
     default:
       DCHECK(false) << type;
diff --git a/be/src/runtime/runtime-filter-ir.cc b/be/src/runtime/runtime-filter-ir.cc
index 5c4f8113a..b515a3fd9 100644
--- a/be/src/runtime/runtime-filter-ir.cc
+++ b/be/src/runtime/runtime-filter-ir.cc
@@ -37,12 +37,14 @@ bool IR_ALWAYS_INLINE RuntimeFilter::Eval(
           return filter->EvalOverlap(col_type, val, val);
         }
       }
+      break;
     }
     case TRuntimeFilterType::IN_LIST: {
       InListFilter* filter = get_in_list_filter();
       if (LIKELY(filter && !filter->AlwaysTrue())) {
         return filter->Find(val, col_type);
       }
+      break;
     }
   }
   return true;
diff --git a/be/src/runtime/types.cc b/be/src/runtime/types.cc
index cf7ba89b4..dd1beeeb1 100644
--- a/be/src/runtime/types.cc
+++ b/be/src/runtime/types.cc
@@ -146,6 +146,7 @@ TPrimitiveType::type ToThrift(PrimitiveType ptype) {
     case TYPE_ARRAY:
     case TYPE_MAP:
       DCHECK(false) << "NYI: " << ptype;
+      [[fallthrough]];
     default: return TPrimitiveType::INVALID_TYPE;
   }
 }
diff --git a/be/src/service/query-result-set.cc b/be/src/service/query-result-set.cc
index f32f05fa1..97d97d97e 100644
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -472,6 +472,7 @@ int HS2ColumnarResultSet::AddRows(
         to->binaryVal.values.insert(to->binaryVal.values.end(),
             from->binaryVal.values.begin() + start_idx,
             from->binaryVal.values.begin() + start_idx + rows_added);
+        break;
       default:
         DCHECK(false) << "Unsupported type: "
                       << TypeToString(ThriftToType(
diff --git a/be/src/util/bit-packing.inline.h b/be/src/util/bit-packing.inline.h
index 7863db0fb..6e859afae 100644
--- a/be/src/util/bit-packing.inline.h
+++ b/be/src/util/bit-packing.inline.h
@@ -411,8 +411,10 @@ const uint8_t* BitPacking::UnpackUpTo31Values(const uint8_t* __restrict__ in,
 
 #pragma push_macro("UNPACK_VALUES_CASE")
 #define UNPACK_VALUES_CASE(ignore1, i, ignore2) \
-  case 31 - i: out[30 - i] = \
-      static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer));
+  case 31 - i: \
+    out[30 - i] = \
+        static_cast<OutType>(UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer)); \
+    [[fallthrough]];
 
   // Use switch with fall-through cases to minimise branching.
   switch (num_values) {
@@ -451,11 +453,14 @@ const uint8_t* BitPacking::UnpackAndDecodeUpTo31Values(const uint8_t* __restrict
 
 #pragma push_macro("DECODE_VALUES_CASE")
 #define DECODE_VALUES_CASE(ignore1, i, ignore2) \
-  case 31 - i: { \
-    uint32_t idx = UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer); \
-    uint8_t* out_pos = reinterpret_cast<uint8_t*>(out) + (30 - i) * stride; \
-    DecodeValue(dict, dict_len, idx, reinterpret_cast<OutType*>(out_pos), decode_error); \
-  }
+  case 31 - i: \
+    { \
+      uint32_t idx = UnpackValue<BIT_WIDTH, 30 - i, false>(in_buffer); \
+      uint8_t* out_pos = reinterpret_cast<uint8_t*>(out) + (30 - i) * stride; \
+      DecodeValue(dict, dict_len, idx, reinterpret_cast<OutType*>(out_pos), \
+          decode_error); \
+    } \
+    [[fallthrough]];
 
   // Use switch with fall-through cases to minimise branching.
   switch (num_values) {
diff --git a/be/src/util/hash-util.h b/be/src/util/hash-util.h
index bb60997ae..d98dd47b3 100644
--- a/be/src/util/hash-util.h
+++ b/be/src/util/hash-util.h
@@ -140,14 +140,27 @@ class HashUtil {
 
     const uint8_t* data2 = reinterpret_cast<const uint8_t*>(data);
     switch (len & 7) {
-      case 7: h ^= uint64_t(data2[6]) << 48;
-      case 6: h ^= uint64_t(data2[5]) << 40;
-      case 5: h ^= uint64_t(data2[4]) << 32;
-      case 4: h ^= uint64_t(data2[3]) << 24;
-      case 3: h ^= uint64_t(data2[2]) << 16;
-      case 2: h ^= uint64_t(data2[1]) << 8;
-      case 1: h ^= uint64_t(data2[0]);
-              h *= MURMUR_PRIME;
+      case 7:
+        h ^= uint64_t(data2[6]) << 48;
+        [[fallthrough]];
+      case 6:
+        h ^= uint64_t(data2[5]) << 40;
+        [[fallthrough]];
+      case 5:
+        h ^= uint64_t(data2[4]) << 32;
+        [[fallthrough]];
+      case 4:
+        h ^= uint64_t(data2[3]) << 24;
+        [[fallthrough]];
+      case 3:
+        h ^= uint64_t(data2[2]) << 16;
+        [[fallthrough]];
+      case 2:
+        h ^= uint64_t(data2[1]) << 8;
+        [[fallthrough]];
+      case 1:
+        h ^= uint64_t(data2[0]);
+        h *= MURMUR_PRIME;
     }
 
     h ^= h >> MURMUR_R;
@@ -260,13 +273,26 @@ class HashUtil {
     v = 0;
 
     switch (len & 7) {
-      case 7: v ^= static_cast<uint64_t>(pos2[6]) << 48;
-      case 6: v ^= static_cast<uint64_t>(pos2[5]) << 40;
-      case 5: v ^= static_cast<uint64_t>(pos2[4]) << 32;
-      case 4: v ^= static_cast<uint64_t>(pos2[3]) << 24;
-      case 3: v ^= static_cast<uint64_t>(pos2[2]) << 16;
-      case 2: v ^= static_cast<uint64_t>(pos2[1]) << 8;
-      case 1: v ^= static_cast<uint64_t>(pos2[0]);
+      case 7:
+        v ^= static_cast<uint64_t>(pos2[6]) << 48;
+        [[fallthrough]];
+      case 6:
+        v ^= static_cast<uint64_t>(pos2[5]) << 40;
+        [[fallthrough]];
+      case 5:
+        v ^= static_cast<uint64_t>(pos2[4]) << 32;
+        [[fallthrough]];
+      case 4:
+        v ^= static_cast<uint64_t>(pos2[3]) << 24;
+        [[fallthrough]];
+      case 3:
+        v ^= static_cast<uint64_t>(pos2[2]) << 16;
+        [[fallthrough]];
+      case 2:
+        v ^= static_cast<uint64_t>(pos2[1]) << 8;
+        [[fallthrough]];
+      case 1:
+        v ^= static_cast<uint64_t>(pos2[0]);
         h ^= FastHashMix(v);
         h *= m;
     }
diff --git a/be/src/util/jwt-util.cc b/be/src/util/jwt-util.cc
index 63e031c4b..97d0df312 100644
--- a/be/src/util/jwt-util.cc
+++ b/be/src/util/jwt-util.cc
@@ -98,6 +98,7 @@ class JWKSetParser {
       case rapidjson::kNumberType:
         if (value.IsInt()) return "Integer";
         if (value.IsDouble()) return "Float";
+        [[fallthrough]];
       default:
         DCHECK(false);
         return "Unknown";
@@ -908,4 +909,4 @@ Status JWTHelper::GetCustomClaimUsername(const JWTDecodedToken* decoded_token,
   return status;
 }
 
-} // namespace impala
\ No newline at end of file
+} // namespace impala
diff --git a/be/src/util/redactor.cc b/be/src/util/redactor.cc
index fbdda6657..cbdff4400 100644
--- a/be/src/util/redactor.cc
+++ b/be/src/util/redactor.cc
@@ -63,6 +63,7 @@ string NameOfTypeOfJsonValue(const Value& value) {
     case rapidjson::kNumberType:
       if (value.IsInt()) return "Integer";
       if (value.IsDouble()) return "Float";
+      [[fallthrough]];
     default:
       DCHECK(false);
       return "Unknown";
diff --git a/be/src/util/string-parser.h b/be/src/util/string-parser.h
index c20432030..149c5d5eb 100644
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -154,6 +154,7 @@ class StringParser {
       switch (*s) {
         case '-':
           is_negative = true;
+          [[fallthrough]];
         case '+':
           ++s;
           --len;
@@ -310,6 +311,7 @@ class StringParser {
       case '-':
         negative = true;
         max_val = static_cast<UnsignedT>(std::numeric_limits<T>::max()) + 1;
+        [[fallthrough]];
       case '+': ++i;
     }
 
@@ -366,6 +368,7 @@ class StringParser {
       case '-':
         negative = true;
         max_val = static_cast<UnsignedT>(std::numeric_limits<T>::max()) + 1;
+        [[fallthrough]];
       case '+': i = 1;
     }
 
@@ -472,7 +475,9 @@ class StringParser {
     bool negative = false;
     int i = 0;
     switch (*s) {
-      case '-': negative = true;  // Fallthrough is intended.
+      case '-':
+        negative = true;
+        [[fallthrough]];
       case '+': i = 1;
     }
 
diff --git a/bin/run_clang_tidy.sh b/bin/run_clang_tidy.sh
index 4319873f5..816f201dd 100755
--- a/bin/run_clang_tidy.sh
+++ b/bin/run_clang_tidy.sh
@@ -62,10 +62,15 @@ TMP_STDERR=$(mktemp)
 STRCAT_MESSAGE="Impala-specific note: This can also be fixed using the StrCat() function \
 from be/src/gutil/strings strcat.h)"
 CLANG_STRING_CONCAT="performance-inefficient-string-concatenation"
+FALLTHROUGH_MESSAGE="Impala-specific note: Impala is a C++ 17 codebase, so the preferred \
+way to indicate intended fallthrough is C++ 17's [[fallthrough]]"
+CLANG_FALLTHROUGH="clang-diagnostic-implicit-fallthrough"
 trap "rm $TMP_STDERR" EXIT
 if ! run-clang-tidy.py -quiet -header-filter "${PIPE_DIRS%?}" \
                        -j"${CORES}" ${DIRS} 2> ${TMP_STDERR} | \
-  sed "/${CLANG_STRING_CONCAT}/ s#\$# ${STRCAT_MESSAGE}#";
+   sed "/${CLANG_STRING_CONCAT}/ s#\$# \n${STRCAT_MESSAGE}#" | \
+   sed "/${CLANG_FALLTHROUGH}/ s#\$# \n${FALLTHROUGH_MESSAGE}#" | \
+   sed 's#FALLTHROUGH_INTENDED#[[fallthrough]]#';
 then
   echo "run-clang-tidy.py hit an error, dumping stderr output"
   cat ${TMP_STDERR} >&2