You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by "imply-cheddar (via GitHub)" <gi...@apache.org> on 2023/05/17 01:25:27 UTC

[GitHub] [druid] imply-cheddar commented on a diff in pull request #14296: more resilient segment metadata, dont parallel merge internal segment metadata queries

imply-cheddar commented on code in PR #14296:
URL: https://github.com/apache/druid/pull/14296#discussion_r1195829980


##########
processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java:
##########
@@ -112,38 +112,47 @@ public Map<String, ColumnAnalysis> analyze(Segment segment)
         capabilities = storageAdapter.getColumnCapabilities(columnName);
       }
 
-      final ColumnAnalysis analysis;
-
-      switch (capabilities.getType()) {
-        case LONG:
-          final int bytesPerRow =
-              ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
-
-          analysis = analyzeNumericColumn(capabilities, numRows, bytesPerRow);
-          break;
-        case FLOAT:
-          analysis = analyzeNumericColumn(capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT);
-          break;
-        case DOUBLE:
-          analysis = analyzeNumericColumn(capabilities, numRows, Double.BYTES);
-          break;
-        case STRING:
-          if (index != null) {
-            analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
-          } else {
-            analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
-          }
-          break;
-        case ARRAY:
-          analysis = analyzeArrayColumn(capabilities);
-          break;
-        case COMPLEX:
-          final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
-          analysis = analyzeComplexColumn(capabilities, numRows, columnHolder);
-          break;
-        default:
-          log.warn("Unknown column type[%s].", capabilities.asTypeString());
-          analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
+      ColumnAnalysis analysis;
+
+      try {
+        switch (capabilities.getType()) {
+          case LONG:
+            final int bytesPerRow =
+                ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
+
+            analysis = analyzeNumericColumn(capabilities, numRows, bytesPerRow);
+            break;
+          case FLOAT:
+            analysis = analyzeNumericColumn(capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT);
+            break;
+          case DOUBLE:
+            analysis = analyzeNumericColumn(capabilities, numRows, Double.BYTES);
+            break;
+          case STRING:
+            if (index != null) {
+              analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
+            } else {
+              analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
+            }
+            break;
+          case ARRAY:
+            analysis = analyzeArrayColumn(capabilities);
+            break;
+          case COMPLEX:
+            final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
+            analysis = analyzeComplexColumn(capabilities, numRows, columnHolder);
+            break;
+          default:
+            log.warn("Unknown column type[%s].", capabilities.asTypeString());
+            analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
+        }
+      }
+      catch (Throwable throwable) {
+        // eat the exception and add error analysis, this is preferrable to exploding since exploding results in
+        // the broker downstream SQL metadata cache left in a state where it is unable to completely finish
+        // the SQL schema relies on this stuff functioning, and so will continuously retry when it faces a failure
+        log.warn(throwable, "Error analyzing column");

Review Comment:
   Might be nice to know the column name and it's capabilities and stuff in the log too?



##########
processing/src/main/java/org/apache/druid/query/metadata/SegmentAnalyzer.java:
##########
@@ -112,38 +112,47 @@ public Map<String, ColumnAnalysis> analyze(Segment segment)
         capabilities = storageAdapter.getColumnCapabilities(columnName);
       }
 
-      final ColumnAnalysis analysis;
-
-      switch (capabilities.getType()) {
-        case LONG:
-          final int bytesPerRow =
-              ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
-
-          analysis = analyzeNumericColumn(capabilities, numRows, bytesPerRow);
-          break;
-        case FLOAT:
-          analysis = analyzeNumericColumn(capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT);
-          break;
-        case DOUBLE:
-          analysis = analyzeNumericColumn(capabilities, numRows, Double.BYTES);
-          break;
-        case STRING:
-          if (index != null) {
-            analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
-          } else {
-            analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
-          }
-          break;
-        case ARRAY:
-          analysis = analyzeArrayColumn(capabilities);
-          break;
-        case COMPLEX:
-          final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
-          analysis = analyzeComplexColumn(capabilities, numRows, columnHolder);
-          break;
-        default:
-          log.warn("Unknown column type[%s].", capabilities.asTypeString());
-          analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
+      ColumnAnalysis analysis;
+
+      try {
+        switch (capabilities.getType()) {
+          case LONG:
+            final int bytesPerRow =
+                ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
+
+            analysis = analyzeNumericColumn(capabilities, numRows, bytesPerRow);
+            break;
+          case FLOAT:
+            analysis = analyzeNumericColumn(capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT);
+            break;
+          case DOUBLE:
+            analysis = analyzeNumericColumn(capabilities, numRows, Double.BYTES);
+            break;
+          case STRING:
+            if (index != null) {
+              analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
+            } else {
+              analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
+            }
+            break;
+          case ARRAY:
+            analysis = analyzeArrayColumn(capabilities);
+            break;
+          case COMPLEX:
+            final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
+            analysis = analyzeComplexColumn(capabilities, numRows, columnHolder);
+            break;
+          default:
+            log.warn("Unknown column type[%s].", capabilities.asTypeString());
+            analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
+        }
+      }
+      catch (Throwable throwable) {

Review Comment:
   Does it really need to be Throwable?  This catches OOME and stuff like that.  I hope that anything that needs catching here is a `RuntimeException` instead.



##########
sql/src/main/java/org/apache/druid/sql/calcite/schema/SegmentMetadataCache.java:
##########
@@ -920,7 +921,12 @@ protected Sequence<SegmentAnalysis> runSegmentMetadataQuery(
         querySegmentSpec,
         new AllColumnIncluderator(),
         false,
-        brokerInternalQueryConfig.getContext(),
+        // we are not merging segment metadata query result, why use parallel merge pool

Review Comment:
   nit: "disable the parallel merge because we don't care about the merge and don't want to consume its resources"



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org