You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2019/07/24 23:12:29 UTC
[GitHub] [incubator-druid] jihoonson commented on a change in pull request #8141: Use PartitionsSpec for all task types

jihoonson commented on a change in pull request #8141: Use PartitionsSpec for all task types
URL: https://github.com/apache/incubator-druid/pull/8141#discussion_r307057919
 
 

 ##########
 File path: indexing-hadoop/src/main/java/org/apache/druid/indexer/partitions/PartitionsSpec.java
 ##########
 @@ -19,40 +19,55 @@
 
 package org.apache.druid.indexer.partitions;
 
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonSubTypes;
 import com.fasterxml.jackson.annotation.JsonTypeInfo;
-import org.apache.druid.indexer.HadoopDruidIndexerConfig;
-import org.apache.druid.indexer.Jobby;
 
-import java.util.List;
+import javax.annotation.Nullable;
 
-@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = HashedPartitionsSpec.class)
+/**
+ * PartitionsSpec describes the secondary partitioning method for data ingestion.
+ */
+@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type", defaultImpl = HadoopHashedPartitionsSpec.class)
 @JsonSubTypes(value = {
-    @JsonSubTypes.Type(name = "dimension", value = SingleDimensionPartitionsSpec.class),
-    @JsonSubTypes.Type(name = "hashed", value = HashedPartitionsSpec.class)
+    @JsonSubTypes.Type(name = "dimension", value = HadoopSingleDimensionPartitionsSpec.class), // backward compatibility
+    @JsonSubTypes.Type(name = "hashed", value = HadoopHashedPartitionsSpec.class), // backward compatibility
+    @JsonSubTypes.Type(name = "hadoop_single_dim_partitions", value = HadoopSingleDimensionPartitionsSpec.class),
+    @JsonSubTypes.Type(name = "hadoop_hashed_partitions", value = HadoopHashedPartitionsSpec.class),
+    @JsonSubTypes.Type(name = "single_dim_partitions", value = SingleDimensionPartitionsSpec.class),
+    @JsonSubTypes.Type(name = "hashed_partitions", value = HashedPartitionsSpec.class),
+    @JsonSubTypes.Type(name = "dynamic_partitions", value = DynamicPartitionsSpec.class)
 })
 public interface PartitionsSpec
 {
-  @JsonIgnore
-  Jobby getPartitionJob(HadoopDruidIndexerConfig config);
-
-  @JsonProperty
-  long getTargetPartitionSize();
-
-  @JsonProperty
-  long getMaxPartitionSize();
+  int DEFAULT_MAX_ROWS_PER_SEGMENT = 5_000_000;
 
-  @JsonProperty
-  boolean isAssumeGrouped();
+  /**
+   * Returns the max number of rows per segment.
+   * Implementations can have different default values which it could be even null.
+   * Callers should use the right value depending on the context if this returns null.
+   */
+  @Nullable
+  Integer getMaxRowsPerSegment();
 
-  @JsonIgnore
-  boolean isDeterminingPartitions();
+  /**
+   * Returns true if this partitionsSpec needs to determine the number of partitions to start data ingetsion.
 
 Review comment:
   Thanks, fixed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org