You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2022/06/20 19:10:04 UTC

[GitHub] [pinot] npawar commented on a diff in pull request #8914: add api to check segment storage tier

npawar commented on code in PR #8914:
URL: https://github.com/apache/pinot/pull/8914#discussion_r901929458


##########
pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java:
##########
@@ -717,6 +720,53 @@ public String getServerMetadata(
     return segmentsMetadata;
   }
 
+  @GET
+  @Path("segments/{tableName}/tier")
+  @Produces(MediaType.APPLICATION_JSON)
+  @ApiOperation(value = "Get storage tier for all segments in the given table", notes = "Get storage tier for all "
+      + "segments in the given table")
+  public TableTierReader.TableTierDetails getTableTier(
+      @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName,
+      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr) {
+    LOGGER.info("Received a request to get storage tier for all segments for table {}", tableName);
+    return getTableTierInternal(tableName, null, tableTypeStr);
+  }
+
+  @GET
+  @Path("segments/{tableName}/{segmentName}/tier")
+  @Produces(MediaType.APPLICATION_JSON)
+  @ApiOperation(value = "Get storage tiers for the given segment", notes = "Get storage tiers for the given segment")
+  public TableTierReader.TableTierDetails getSegmentTier(
+      @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName,
+      @ApiParam(value = "Name of the segment", required = true) @PathParam("segmentName") @Encoded String segmentName,
+      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr) {
+    segmentName = URIUtils.decode(segmentName);
+    LOGGER.info("Received a request to get storage tier for segment {} in table {}", segmentName, tableName);
+    return getTableTierInternal(tableName, segmentName, tableTypeStr);
+  }
+
+  private TableTierReader.TableTierDetails getTableTierInternal(String tableName, @Nullable String segmentName,
+      String tableTypeStr) {

Review Comment:
   tableTypeStr also Nullable?



##########
pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableTierInfo.java:
##########
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.common.restlet.resources;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.util.Map;
+
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class TableTierInfo {
+  private final String _tableName;
+  private final Map<String, String> _segmentTiers;

Review Comment:
   please add some description in form of JsonPropertyDescription



##########
pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotSegmentRestletResource.java:
##########
@@ -717,6 +720,53 @@ public String getServerMetadata(
     return segmentsMetadata;
   }
 
+  @GET
+  @Path("segments/{tableName}/tier")
+  @Produces(MediaType.APPLICATION_JSON)
+  @ApiOperation(value = "Get storage tier for all segments in the given table", notes = "Get storage tier for all "
+      + "segments in the given table")
+  public TableTierReader.TableTierDetails getTableTier(
+      @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName,
+      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr) {
+    LOGGER.info("Received a request to get storage tier for all segments for table {}", tableName);
+    return getTableTierInternal(tableName, null, tableTypeStr);
+  }
+
+  @GET
+  @Path("segments/{tableName}/{segmentName}/tier")
+  @Produces(MediaType.APPLICATION_JSON)
+  @ApiOperation(value = "Get storage tiers for the given segment", notes = "Get storage tiers for the given segment")
+  public TableTierReader.TableTierDetails getSegmentTier(
+      @ApiParam(value = "Name of the table", required = true) @PathParam("tableName") String tableName,
+      @ApiParam(value = "Name of the segment", required = true) @PathParam("segmentName") @Encoded String segmentName,
+      @ApiParam(value = "OFFLINE|REALTIME") @QueryParam("type") String tableTypeStr) {
+    segmentName = URIUtils.decode(segmentName);
+    LOGGER.info("Received a request to get storage tier for segment {} in table {}", segmentName, tableName);
+    return getTableTierInternal(tableName, segmentName, tableTypeStr);
+  }
+
+  private TableTierReader.TableTierDetails getTableTierInternal(String tableName, @Nullable String segmentName,
+      String tableTypeStr) {
+    TableType tableType = Constants.validateTableType(tableTypeStr);
+    String tableNameWithType =

Review Comment:
   if user doesn't provide tableTypeStr, then we will only return results for OFFLINE table, even if it was a hybrid table. How about just making tableTypeStr a required prop in the APIs?



##########
pinot-controller/src/main/java/org/apache/pinot/controller/util/TableTierReader.java:
##########
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.controller.util;
+
+import com.google.common.collect.BiMap;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Executor;
+import javax.annotation.Nullable;
+import org.apache.commons.httpclient.HttpConnectionManager;
+import org.apache.pinot.common.exception.InvalidConfigException;
+import org.apache.pinot.common.restlet.resources.TableTierInfo;
+import org.apache.pinot.controller.helix.core.PinotHelixResourceManager;
+
+
+/**
+ * Reads segment storage tiers from servers for the given table.
+ */
+public class TableTierReader {
+  private final Executor _executor;
+  private final HttpConnectionManager _connectionManager;
+  private final PinotHelixResourceManager _helixResourceManager;
+
+  public TableTierReader(Executor executor, HttpConnectionManager connectionManager,
+      PinotHelixResourceManager helixResourceManager) {
+    _executor = executor;
+    _connectionManager = connectionManager;
+    _helixResourceManager = helixResourceManager;
+  }
+
+  /**
+   * Get the segment storage tiers for the given table. The servers or segments not responding the request are
+   * recorded in the result to be checked by caller.
+   *
+   * @param tableNameWithType table name with type
+   * @param timeoutMs timeout for reading segment tiers from servers
+   * @return details of segment storage tiers for the given table
+   */
+  public TableTierDetails getTableTierDetails(String tableNameWithType, @Nullable String segmentName, int timeoutMs)
+      throws InvalidConfigException {
+    Map<String, List<String>> serverToSegmentsMap = new HashMap<>();
+    if (segmentName == null) {
+      serverToSegmentsMap.putAll(_helixResourceManager.getServerToSegmentsMap(tableNameWithType));
+    } else {
+      List<String> segmentInList = Collections.singletonList(segmentName);
+      for (String server : _helixResourceManager.getServers(tableNameWithType, segmentName)) {
+        serverToSegmentsMap.put(server, segmentInList);
+      }
+    }
+    BiMap<String, String> endpoints = _helixResourceManager.getDataInstanceAdminEndpoints(serverToSegmentsMap.keySet());
+    ServerTableTierReader serverTableTierReader = new ServerTableTierReader(_executor, _connectionManager);
+    Map<String, TableTierInfo> serverToTableTierInfoMap =
+        serverTableTierReader.getTableTierInfoFromServers(endpoints, tableNameWithType, timeoutMs);
+
+    TableTierDetails tableTierDetails = new TableTierDetails(tableNameWithType);
+    for (Map.Entry<String, List<String>> entry : serverToSegmentsMap.entrySet()) {
+      String server = entry.getKey();
+      TableTierInfo tableTierInfo = serverToTableTierInfoMap.get(server);
+      if (tableTierInfo == null) {
+        tableTierDetails._missingServers.add(server);
+        continue;
+      }
+      Map<String, String> segmentTiers = tableTierInfo.getSegmentTiers();
+      for (String expectedSegment : entry.getValue()) {
+        if (!segmentTiers.containsKey(expectedSegment)) {
+          tableTierDetails._missingSegments.computeIfAbsent(server, (k) -> new HashSet<>()).add(expectedSegment);
+        } else {
+          tableTierDetails._segmentTiers.computeIfAbsent(expectedSegment, (k) -> new HashMap<>())
+              .put(server, segmentTiers.get(expectedSegment));
+        }
+      }
+    }
+    return tableTierDetails;
+  }
+
+  // This class aggregates the TableTierInfo returned from multi servers.
+  public static class TableTierDetails {
+    private final String _tableName;
+    private final Set<String> _missingServers = new HashSet<>();

Review Comment:
   splitting these details (missing servers, missing segments, responses from happy path) is nice when you want to look at just one of these. But I'm wondering, if this will make it too many places to look at to get the complete picture.
   wdyt about having all the info in _segmentTiers? the missingSegments and missingServers can be there in addition. so what i mean is, for the missing segment, put it in `segmentTiers` map, as segment->server,null/empty ? And also have an entry for any segment that was supposed to be found on the missingServer.
   If there's some gotcha in doing the above (like local tier is also represented as null?), then maybe just make `missingSegments` map as segment->server, so it is easier to read the 2 together?



##########
pinot-server/src/main/java/org/apache/pinot/server/api/resources/TableTierResource.java:
##########
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.server.api.resources;
+
+import io.swagger.annotations.Api;
+import io.swagger.annotations.ApiKeyAuthDefinition;
+import io.swagger.annotations.ApiOperation;
+import io.swagger.annotations.ApiParam;
+import io.swagger.annotations.ApiResponse;
+import io.swagger.annotations.ApiResponses;
+import io.swagger.annotations.Authorization;
+import io.swagger.annotations.SecurityDefinition;
+import io.swagger.annotations.SwaggerDefinition;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import javax.inject.Inject;
+import javax.ws.rs.DefaultValue;
+import javax.ws.rs.GET;
+import javax.ws.rs.Path;
+import javax.ws.rs.PathParam;
+import javax.ws.rs.Produces;
+import javax.ws.rs.QueryParam;
+import javax.ws.rs.WebApplicationException;
+import javax.ws.rs.core.HttpHeaders;
+import javax.ws.rs.core.MediaType;
+import javax.ws.rs.core.Response;
+import org.apache.pinot.common.restlet.resources.ResourceUtils;
+import org.apache.pinot.common.restlet.resources.TableTierInfo;
+import org.apache.pinot.core.data.manager.InstanceDataManager;
+import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager;
+import org.apache.pinot.segment.local.data.manager.SegmentDataManager;
+import org.apache.pinot.segment.local.data.manager.TableDataManager;
+import org.apache.pinot.segment.spi.ImmutableSegment;
+import org.apache.pinot.server.starter.ServerInstance;
+
+import static org.apache.pinot.spi.utils.CommonConstants.SWAGGER_AUTHORIZATION_KEY;
+
+
+/**
+ * A server-side API to get the storage tiers of immutable segments of the given table from the server being requested.
+ */
+@Api(tags = "Table", authorizations = {@Authorization(value = SWAGGER_AUTHORIZATION_KEY)})
+@SwaggerDefinition(securityDefinition = @SecurityDefinition(apiKeyAuthDefinitions = @ApiKeyAuthDefinition(name =
+    HttpHeaders.AUTHORIZATION, in = ApiKeyAuthDefinition.ApiKeyLocation.HEADER, key = SWAGGER_AUTHORIZATION_KEY)))
+@Path("/")
+public class TableTierResource {
+
+  @Inject
+  private ServerInstance _serverInstance;
+
+  @GET
+  @Produces(MediaType.APPLICATION_JSON)
+  @Path("/tables/{tableName}/tier")

Review Comment:
   s/tableName/tableNameWithType ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org