You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/05/27 18:22:50 UTC

[GitHub] [incubator-pinot] mcvsubbu commented on a change in pull request #6977: Enhance debug endpoint for table debugging.

mcvsubbu commented on a change in pull request #6977:
URL: https://github.com/apache/incubator-pinot/pull/6977#discussion_r640807315



##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/debug/TableDebugInfo.java
##########
@@ -102,21 +113,71 @@ public int getNumBrokers() {
     return _brokerDebugInfos;
   }
 
+  @JsonPropertyOrder({"segmentName", "serverState"})
   public static class SegmentDebugInfo {
     private final String _segmentName;
-    private final List<IsEvState> _states;
 
-    public SegmentDebugInfo(String name, List<IsEvState> states) {
-      _segmentName = name;
-      _states = states;
+    private final Map<String, SegmentState> _serverStateMap;
+
+    @JsonCreator
+    public SegmentDebugInfo(@JsonProperty("segmentName") String segmentName,
+        @JsonProperty("serverState") Map<String, SegmentState> segmentServerState) {
+      _segmentName = segmentName;
+      _serverStateMap = segmentServerState;
     }
 
     public String getSegmentName() {
       return _segmentName;
     }
 
-    public List<IsEvState> getSegmentStateInServer() {
-      return _states;
+    public Map<String, SegmentState> getServerState() {
+      return _serverStateMap;
+    }
+  }
+
+  /**
+   * This class represents the state of segment on the server:
+   *
+   * <ul>
+   *   <li>Ideal State vs External view.</li>
+   *   <li>Segment related errors and consumer information.</li>
+   *   <li>Segment size.</li>
+   * </ul>
+   */
+  public static class SegmentState {
+    private final String _isState;

Review comment:
       nit: maybe `_intendedState` and `_actualState`? `_isState` is kind of misleading to be a boolean :)

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java
##########
@@ -156,38 +177,94 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa
    *
    * @param pinotHelixResourceManager Helix Resource Manager
    * @param tableNameWithType Name of table with type
+   * @param verbose If true, returns all segment info, else, only segments with errors
    * @return Debug information for segments
    */
   private List<TableDebugInfo.SegmentDebugInfo> debugSegments(PinotHelixResourceManager pinotHelixResourceManager,
-      String tableNameWithType) {
+      String tableNameWithType, boolean verbose) {
     ExternalView externalView = pinotHelixResourceManager.getTableExternalView(tableNameWithType);
     IdealState idealState = pinotHelixResourceManager.getTableIdealState(tableNameWithType);
 
-    List<TableDebugInfo.SegmentDebugInfo> segmentDebugInfos = new ArrayList<>();
+    List<TableDebugInfo.SegmentDebugInfo> result = new ArrayList<>();
     if (idealState == null) {
-      return segmentDebugInfos;
+      return result;
+    }
+
+    int serverRequestTimeoutMs = _controllerConf.getServerAdminRequestTimeoutSeconds() * 1000;
+    final Map<String, List<String>> serverToSegments =
+        _pinotHelixResourceManager.getServerToSegmentsMap(tableNameWithType);
+    Map<String, Map<String, SegmentServerDebugInfo>> segmentsDebugInfoFromServers;
+
+    BiMap<String, String> serverToEndpoints;
+    try {
+      serverToEndpoints = _pinotHelixResourceManager.getDataInstanceAdminEndpoints(serverToSegments.keySet());
+    } catch (InvalidConfigException e) {
+      throw new WebApplicationException(
+          "Caught exception when getting segment debug info for table: " + tableNameWithType);
     }
 
+    segmentsDebugInfoFromServers =
+        getSegmentsDebugInfoFromServers(tableNameWithType, serverToEndpoints, serverRequestTimeoutMs);
+
     for (Map.Entry<String, Map<String, String>> segmentMapEntry : idealState.getRecord().getMapFields().entrySet()) {
-      String segment = segmentMapEntry.getKey();
-      Map<String, String> instanceStateMap = segmentMapEntry.getValue();
+      String segmentName = segmentMapEntry.getKey();
+      Map<String, String> segmentIsMap = segmentMapEntry.getValue();
+
+      Map<String, TableDebugInfo.SegmentServerState> segmentServerState = new HashMap<>();
+      for (Map.Entry<String, Map<String, SegmentServerDebugInfo>> segmentEntry : segmentsDebugInfoFromServers
+          .entrySet()) {
+        String instanceName = segmentEntry.getKey();
 
-      List<TableDebugInfo.IsEvState> isEvStates = new ArrayList<>();
-      for (Map.Entry<String, String> entry : instanceStateMap.entrySet()) {
-        String serverName = entry.getKey();
-        String isState = entry.getValue();
+        String isState = segmentIsMap.get(instanceName);
+        String evState = (externalView != null) ? externalView.getStateMap(segmentName).get(instanceName) : "null";
+        Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap = segmentEntry.getValue();
 
-        String evState = (externalView != null) ? externalView.getStateMap(segment).get(serverName) : "null";
-        if (!isState.equals(evState)) {
-          isEvStates.add(new TableDebugInfo.IsEvState(serverName, isState, evState));
+        if (verbose || segmentHasErrors(segmentServerDebugInfoMap, isState, evState)) {
+          SegmentServerDebugInfo segmentServerDebugInfo = segmentServerDebugInfoMap.get(segmentName);
+
+          segmentServerState.put(instanceName,
+              new TableDebugInfo.SegmentServerState(isState, evState, segmentServerDebugInfo.getSegmentSize(),
+                  segmentServerDebugInfo.getConsumerInfo(), segmentServerDebugInfo.getErrorInfo()));
         }
       }
 
-      if (isEvStates.size() > 0) {
-        segmentDebugInfos.add(new TableDebugInfo.SegmentDebugInfo(segment, isEvStates));
+      if (!segmentServerState.isEmpty()) {
+        result.add(new TableDebugInfo.SegmentDebugInfo(segmentName, segmentServerState));
       }
     }
-    return segmentDebugInfos;
+
+    return result;
+  }
+
+  /**
+   * Helper method to check if a segment has any errors/issues.
+   *
+   * @param segmentServerDebugInfoMap Map from instanceName to segmentDebugInfo
+   * @param isState ideal state of segment
+   * @param evState external view of segment
+   * @return True if there's any error/issue for the segment, false otherwise.
+   */
+  private boolean segmentHasErrors(Map<String, SegmentServerDebugInfo> segmentServerDebugInfoMap, String isState,
+      String evState) {
+    if (!isState.equals(evState)) {
+      return true;

Review comment:
       The only way I know is the timestamp difference between IS and EV znodes.
   
   Another thing we need to decide is if the state of `OFFLINE` in both IS and EV, is that counted as an ERROR ? My opinion has always been that it should not, so I am fine with line 249. But I have heard other voices that say that this is an error condition.

##########
File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentServerDebugInfo.java
##########
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.restlet.resources;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+
+/**
+ * This class represents the server-side debug information for a segment.
+ */
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class SegmentServerDebugInfo {
+  private final String _segmentName;
+
+  private final SegmentConsumerInfo _consumerInfo;
+  private final SegmentErrorInfo _errorInfo;
+  private final String _segmentSize; // Segment Size in Human readable format

Review comment:
       Any reason this is a string? Server APIs are internal to pinot, and we can represent a long, right? (in that case, please add a unit to the name as in `_segmentSizeMB`)

##########
File path: pinot-server/src/main/java/org/apache/pinot/server/starter/helix/HelixInstanceDataManagerConfig.java
##########
@@ -103,7 +103,11 @@
   //
   private static final String MAX_PARALLEL_REFRESH_THREADS = "max.parallel.refresh.threads";
 
+  // Size of cache that holds errors.
+  private static final String ERROR_CACHE_SIZE = "error.cache.size";
+
   private final static String[] REQUIRED_KEYS = {INSTANCE_ID, INSTANCE_DATA_DIR, READ_MODE};
+  private static final long DEFAULT_ERROR_CACHE_SIZE = 100L;

Review comment:
       Can you make the default as 0 to preserve existing behavior? 

##########
File path: pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/SegmentServerDebugInfo.java
##########
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.restlet.resources;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+
+/**
+ * This class represents the server-side debug information for a segment.

Review comment:
       You may also want to add a comment that this class should not be used on the controller side user-facing interface. I am not sure if there is a way to annotate that. For now, a comment should suffice.

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java
##########
@@ -253,4 +330,46 @@ private TableDebugInfo debugTable(PinotHelixResourceManager pinotHelixResourceMa
     }
     return serverDebugInfos;
   }
+
+  /**
+   * This method makes a MultiGet call to all servers to get the segments debug info.
+   * @return Map of server to list of segment debug info's on the server.
+   */
+  private Map<String, Map<String, SegmentServerDebugInfo>> getSegmentsDebugInfoFromServers(String tableNameWithType,
+      BiMap<String, String> serverToEndpoints, int timeoutMs) {
+    LOGGER.info("Reading segments debug info from servers: {} for table: {}", serverToEndpoints.keySet(),
+        tableNameWithType);
+
+    List<String> serverUrls = new ArrayList<>(serverToEndpoints.size());
+    BiMap<String, String> endpointsToServers = serverToEndpoints.inverse();
+    for (String endpoint : endpointsToServers.keySet()) {
+      String segmentDebugInfoURI = String.format("%s/debug/tables/%s", endpoint, tableNameWithType);
+      serverUrls.add(segmentDebugInfoURI);
+    }
+
+    CompletionServiceHelper completionServiceHelper =
+        new CompletionServiceHelper(_executor, _connectionManager, endpointsToServers);
+    CompletionServiceHelper.CompletionServiceResponse serviceResponse =
+        completionServiceHelper.doMultiGetRequest(serverUrls, tableNameWithType, timeoutMs);
+
+    Map<String, Map<String, SegmentServerDebugInfo>> serverToSegmentDebugInfoList = new HashMap<>();
+    int failedParses = 0;
+    for (Map.Entry<String, String> streamResponse : serviceResponse._httpResponses.entrySet()) {
+      try {
+        List<SegmentServerDebugInfo> segmentDebugInfos =
+            JsonUtils.stringToObject(streamResponse.getValue(), new TypeReference<List<SegmentServerDebugInfo>>() {
+            });
+        Map<String, SegmentServerDebugInfo> segmentsMap = segmentDebugInfos.stream()
+            .collect(Collectors.toMap(SegmentServerDebugInfo::getSegmentName, Function.identity()));
+        serverToSegmentDebugInfoList.put(streamResponse.getKey(), segmentsMap);
+      } catch (IOException e) {
+        failedParses++;
+        LOGGER.error("Unable to parse server {} response due to an error: ", streamResponse.getKey(), e);
+      }
+    }
+    if (failedParses != 0) {

Review comment:
       Return this as metadata so the user knows? The idea is that the administrator does not need to look at logs, right?

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java
##########
@@ -63,6 +79,8 @@
 @Api(tags = Constants.CLUSTER_TAG)
 @Path("/")
 public class TableDebugResource {
+  private static final Logger LOGGER = LoggerFactory.getLogger(TableDebugResource.class);

Review comment:
       Perhaps add a comment at the header of this class that we don't intend to keep this backward compatible. Best is to annotate it somehow, but I am not sure how. We don't want people to use it in scripts, right? Alternatively, add a `/v1`/ in the uri path

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
##########
@@ -59,9 +65,14 @@
   protected HelixManager _helixManager;
   protected String _authToken;
 
+  // Fixed size LRU cache with InstanceName - SegmentName pair as key, and segment related
+  // errors as the value.
+  protected LoadingCache<Pair<String, String>, SegmentErrorInfo> _errorCache;

Review comment:
       Why do we need instance name in every key here? BaseTableDataManager lives on an instance, so this part of the key  will be sam for every segment. 

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/debug/TableDebugInfo.java
##########
@@ -102,21 +113,71 @@ public int getNumBrokers() {
     return _brokerDebugInfos;
   }
 
+  @JsonPropertyOrder({"segmentName", "serverState"})
   public static class SegmentDebugInfo {
     private final String _segmentName;
-    private final List<IsEvState> _states;
 
-    public SegmentDebugInfo(String name, List<IsEvState> states) {
-      _segmentName = name;
-      _states = states;
+    private final Map<String, SegmentState> _serverStateMap;
+
+    @JsonCreator
+    public SegmentDebugInfo(@JsonProperty("segmentName") String segmentName,
+        @JsonProperty("serverState") Map<String, SegmentState> segmentServerState) {
+      _segmentName = segmentName;
+      _serverStateMap = segmentServerState;
     }
 
     public String getSegmentName() {
       return _segmentName;
     }
 
-    public List<IsEvState> getSegmentStateInServer() {
-      return _states;
+    public Map<String, SegmentState> getServerState() {
+      return _serverStateMap;
+    }
+  }
+
+  /**
+   * This class represents the state of segment on the server:
+   *
+   * <ul>
+   *   <li>Ideal State vs External view.</li>
+   *   <li>Segment related errors and consumer information.</li>
+   *   <li>Segment size.</li>
+   * </ul>
+   */
+  public static class SegmentState {
+    private final String _isState;
+    private final String _evState;
+    private final String _segmentSize;
+    private final SegmentConsumerInfo _consumerInfo;
+    private final SegmentErrorInfo _errorInfo;

Review comment:
       So, SegmentErrorInfo is propagated as it is to external world? 

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/TableDebugResource.java
##########
@@ -82,9 +100,10 @@
   @Produces(MediaType.APPLICATION_JSON)
   @ApiOperation(value = "Get debug information for table.", notes = "Debug information for table.")
   @ApiResponses(value = {@ApiResponse(code = 200, message = "Success"), @ApiResponse(code = 500, message = "Internal server error")})
-  public String getClusterInfo(
+  public String getTableDebugInfo(
       @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName,
-      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr)
+      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr,
+      @ApiParam(value = "Get detailed information") @DefaultValue("false") @QueryParam("verbose") boolean verbose)

Review comment:
       I suggest make this an integer of verbosity level, since we may have multiple levels? Level 0 can show only abnormal stuff. Otherwise just display "All good" or something like that. Level 1 may include error msgs, and Level 2 stack traces, and so on. Gives us room to play with.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org