You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ne...@apache.org on 2021/02/02 02:42:50 UTC

[incubator-pinot] branch sharded_consumer_type_support_with_kinesis updated (f9f2419 -> db2136a)

This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a change to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git.


 discard f9f2419  Avoid writing 'stream' and also 'stream.kinesis.topic.name'
 discard 5ff70d2  Add unit tests in Kinesis consumer (#6410)
    omit 467965b  Add tests for end-of-life cases
    omit f00c8dc  End-of-shard as end criteria AND consume shards in order
    omit efbeea2  LIcense headers
    omit bf81aac  Cleanup, javadocs, comments
    omit f39dbb7  Remove new partition groups creation in commit
    omit 666c220  Dont create new CONSUMING segment if shard has reached end of life
    omit 9c31f41  Use shardId's last digits as partitionGroupId
    omit 05e126b  Remove unused classes and changes
    omit c35d4a6  Consumer tweaks to get it working
    omit f006615  Fix offsets in StreamMetadataProvider impl
    omit 781e167  Add support for stream partition offsets (#6402)
    omit a341b28  Implementation fixes
    omit 6240808  fixing compilation
    omit eac6c90  Return message batch instead of list in the fetch result
    omit abe6647  Add isEndOfPartition check in checkpoints
    omit bd7f673  Handle timeout exception in consumer and make shard iterator type configurable
    omit 0af84b1  Add test code for kinesis
    omit 97d3ffc  Change shard metadata logic
    omit 6fcdf3e  Refactor: get shard iterator methods
    omit 173b3c4  Handle closed connections
    omit bdbab1e  Refactor code
    omit 2c1bb76  Handle exceptions
    omit b0f82c4  Add license header
    omit 4eb2aa6  fetch records with timeout
    omit 2399cbb  Add Kinesis config wrapper
    omit f3db54c  Add license headers
    omit 9bdc27f  Reformat code
    omit d8b18cd  Move shardId out of checkpoint to partition group metadata
    omit f9c317d  Fix consumer code
    omit 86fd014  Refactor kinesis shard metadata interface and add shardId to the metadata
    omit 855e39d  Refactor PartitionGroupMetadataMap interface
    omit e7a5314  Add kinesis code to handle offsets
    omit 5b44917  Add PartitionGroupMetdataMap interface
    omit e094e49  Add initial implementation of Kinesis consumer
    omit 87d0c26  Add interfaces for V2 consumers
    omit 6f4336e  default methods to avoid interface changes
    omit 9aa7e58  Server side changes and some fixes
    omit 0fa424e  An attempt at server-side changes
    omit eb0e4c7  Checnges in test to make it complie
    omit 1cb09cf  More controller side changes
    omit 1f81c1e  Separate PartitionGroupInfo and PartitionGroupMetadata
    omit b139460  Controller side code
    omit 3ef1870  Rename partitionId to partitionGroupId
    omit 652a3e6  StreamPartitionOffset to implement Checkpoint
    omit 54ce986  Controller side changes pseudo code
     add 1bb6c14  Add FST index which works on top of REGEXP_LIKE operator. (#6120)
     add fed6750  Adding a version option to pinot admin to show all the component versions (#6380)
     add e268a79  Adding S3PinotFS as one of the default PinotFS for Quickstart (#6379)
     add ea0bfa0  Add json index support (#6216)
     add 4ea345d  Adding more rolling file log4j2 configs for docker images (#6390)
     add b936f90  Update pinot realtime to match helm chart. (#6392)
     add 6b43aef  cleanup tar.gz segment files on job exit (#6385)
     add 13bd443  Fix remaining links to outdated helm chart repos. (#6394)
     add 93a4515  Adding Pinot minion segment generation and push task. (#6340)
     add 19101e9  Make required interfaces or classes serializable for spark (#6384)
     add 37f2e28  Segment reset API (#6336)
     add e87e141  [TE] limit reflection scanning range (#6405)
     add 02ab193  remove accidental junit imports (#6404)
     add 8fcb17d  Compatibility test for segment operations upload and delete (#6382)
     add 8def748  Make minion tasks pluggable via reflection (#6395)
     add f09de82  Fix the raw single column distinct executors (#6411)
     add 7e0398b  data gen - use unique output file names in csv mode (#6407)
     add e11bcce  Real-time json index (#6408)
     add d04785c  Introduce 'LOOKUP' Transform Function (#6383)
     add 3d4c98d  [TE]frontend - Add support for Group Constituents and Entity Metric components (#6421)
     add e018695  Fix escape character in transform function literals (#6416)
     add a5c4ed2  Update groovy (#6425)
     add 3e4c325  [INFRA] Cancel the previous builds (#6429)
     add 3d24302  refactor AnomaliesResponses to avoid duplicate code (#6406)
     add 19e3d51  [TE] endpoint - harleyjj/rca - add forecast, upper, lower, and yoXy to aggregate/chunk enpoint (#6432)
     add 33830cb  [TE] suppress the anomaly if current value is NaN (#6428)
     add 68fbb9c  [TE] frontend - harleyjj/rca - display metric funnel in metrics table when forecast selected (#6333)
     add 99fe289  [TE] fix dimensional summary in emails (#6443)
     add 72a7849  Support confluent schema registry (#6434)
     add 832ece7  Bug Fix column metadata, read from the correct property config for hasFSTIndex (#6441)
     add 8d3d4d4  Merge H3-index branch to master (#6409)
     add e8c4636  Fixing the issue that raw table name extracts from segment metadata might contain _OFFLINE suffix (#6445)
     add 950295a  Fix gcs listFiles (#6426)
     add d83e371  Adding ImportData sub command in pinot admin (#6396)
     add 28882ba  Adding pinot minion component into helm (#6430)
     add 8085fb7  setting default Data Type while setting type in Add Schema dialog (#6452)
     add ec29b82  additional scalar functions for array type (#6446)
     add 5de1d4b  Fixing pinot helm 0.2.3 package (#6459)
     add b592c8c  Adding CRON scheduler for Pinot tasks (#6451)
     add 60c802c  Fixing pinot hadoop fs isDirectory method (#6461)
     add 174a77b  [TE]frontend - Add support for feedback propagation for Entity Monitoring (#6447)
     add 079f27a  Cleanup dictionary and forward index loading in SegmentPreProcessor (#6455)
     add a36d1c0  [TE] Feedback propagation (#6463)
     add a56f330  Broker time segment pruner(#6189): (#6462)
     add 0417e20  additional string scalar functions (#6458)
     add 79c9460  Dimension table storage quota config and validation (#6465)
     add 2e7cdcd  Always store raw table name in segment metadata (#6457)
     add 4a89534  Replace BrokerRequestOptimizer with QueryOptimizer to also optimize the PinotQuery (#6423)
     add 0e8c0e2  [TE] clean up template as to not repeat code and reduce conditionals (#6448)
     add f17be35  Adding cluster config to config number of concurrent tasks per instance for minion task: SegmentGenerationAndPushTaskGenerator (#6468)
     add 8c8897e  Simplify SegmentGenerationAndPushTask handling getting schema and table config (#6469)
     add f7af798  Handle scheduler calls with proper response when it's disabled. (#6474)
     add 7649f9f  Remove the usage of deprecated range delimiter (#6475)
     add 04e2bcc  Update superset docker image repo (#6477)
     add dde3c18  TLS-support for client-pinot and pinot-internode connections (#6418)
     add 830eb94  [TE]frontend - Activate the composite anomalies route (#6472)
     add ec15f41  Bump up quartz version to 2.3.2 to prevent XXE (#6484)
     add 63d4266  Create CONTRIBUTING.md (#6481)
     add e209230  Create CODE_OF_CONDUCT.md (#6482)
     add e5bf05b  Fix the overflow issue when loading the large dictionary into the buffer (#6476)
     add f292730  Fix missing copyrights (#6489)
     add 98803dd  Adding pinot minion metrics to jmx prometheus reporter (#6488)
     add bacaed2  Guard against multiple consuming segments for same partition (#6483)
     add fde307b  Adding scalar function JsonPathArray to extract arrays from json (#6490)
     add d7f4fec  Support chained transform functions (#6495)
     add 28bfad9  Fixing -segmentNameGeneratorType help message (#6498)
     add cf35e6e  Fixing pinot controller metrics prefix (#6499)
     add 56df592  Fixing groovy parser with back slash (#6501)
     add b8f70e7  Support generating derived column during segment load (#6494)
     add 6c30c76  [TE] migrate PQL queries to standard SQL (#6486)
     add 1ae53fe  Adding cron scheduler metrics reporting (#6502)
     add 0f398a7  Fixing quickstart launcher from IDE (#6508)
     add 209f57c  [TE]frontend - Add filtering support for Entity Monitoring tables (#6514)
     new 6502a7d  Controller side changes pseudo code
     new 7504c31  StreamPartitionOffset to implement Checkpoint
     new 3892fc4  Rename partitionId to partitionGroupId
     new 96621f4  Controller side code
     new a7fba5a  Separate PartitionGroupInfo and PartitionGroupMetadata
     new 75547ec  More controller side changes
     new 8afc48f  Checnges in test to make it complie
     new 31c64a0  An attempt at server-side changes
     new ae863a1  Server side changes and some fixes
     new 396dae0  default methods to avoid interface changes
     new 3f14cf0  Add interfaces for V2 consumers
     new 72a77c6  Add initial implementation of Kinesis consumer
     new 2546098  Add PartitionGroupMetdataMap interface
     new 0491903  Add kinesis code to handle offsets
     new 5ed893e  Refactor PartitionGroupMetadataMap interface
     new 92ddaab  Refactor kinesis shard metadata interface and add shardId to the metadata
     new b0d8c1b  Fix consumer code
     new 7a4fccc  Move shardId out of checkpoint to partition group metadata
     new 6c8af2b  Reformat code
     new b05ad08  Add license headers
     new 96c32c4  Add Kinesis config wrapper
     new 262d326  fetch records with timeout
     new 29068ca  Add license header
     new 760ba06  Handle exceptions
     new 71ac64d  Refactor code
     new 0b8bb67  Handle closed connections
     new be19cf6  Refactor: get shard iterator methods
     new 26085a8  Change shard metadata logic
     new 1288012  Add test code for kinesis
     new cf23ee3  Handle timeout exception in consumer and make shard iterator type configurable
     new eb428cd  Add isEndOfPartition check in checkpoints
     new c5c42d4  Return message batch instead of list in the fetch result
     new 6cb0ebb  fixing compilation
     new d079c81  Implementation fixes
     new a3deab3  Add support for stream partition offsets (#6402)
     new 9c33895  Fix offsets in StreamMetadataProvider impl
     new 2116206  Consumer tweaks to get it working
     new 570a95a  Remove unused classes and changes
     new d7d0408  Use shardId's last digits as partitionGroupId
     new 214c007  Dont create new CONSUMING segment if shard has reached end of life
     new 40b6dbe  Remove new partition groups creation in commit
     new ce1a646  Cleanup, javadocs, comments
     new 7d1f7a1  LIcense headers
     new 751e212  End-of-shard as end criteria AND consume shards in order
     new dcb2ee1  Add tests for end-of-life cases
     new ab9655b  Add unit tests in Kinesis consumer (#6410)
     new db2136a  Avoid writing 'stream' and also 'stream.kinesis.topic.name'

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (f9f2419)
            \
             N -- N -- N   refs/heads/sharded_consumer_type_support_with_kinesis (db2136a)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 47 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../workflows/pinot_tests-workflow-run.yml         |    31 +-
 .github/workflows/scripts/.pinot_quickstart.sh     |    27 +
 .gitignore                                         |     1 -
 CODE_OF_CONDUCT.md                                 |    96 +
 CONTRIBUTING.md                                    |   234 +
 docker/images/pinot-superset/Dockerfile            |     2 +-
 .../images/pinot/etc/conf/pinot-broker-log4j2.xml  |    62 +
 .../pinot/etc/conf/pinot-controller-log4j2.xml     |    61 +
 .../images/pinot/etc/conf/pinot-server-log4j2.xml  |    61 +
 .../etc/jmx_prometheus_javaagent/configs/pinot.yml |    52 +-
 kubernetes/helm/index.yaml                         |    50 +-
 kubernetes/helm/pinot-0.2.3.tgz                    |   Bin 0 -> 24664 bytes
 kubernetes/helm/pinot/Chart.yaml                   |     4 +-
 kubernetes/helm/pinot/README.md                    |    25 +-
 .../helm/pinot/pinot-realtime-quickstart.yml       |   684 +-
 kubernetes/helm/pinot/requirements.lock            |     2 +-
 kubernetes/helm/pinot/requirements.yaml            |     2 +-
 kubernetes/helm/pinot/templates/_helpers.tpl       |    23 +
 .../minion/configmap.yaml}                         |    16 +-
 .../minion/service-headless.yaml}                  |    33 +-
 .../{Chart.yaml => templates/minion/service.yaml}  |    33 +-
 .../helm/pinot/templates/minion/statefulset.yml    |   108 +
 kubernetes/helm/pinot/values.yaml                  |    50 +
 kubernetes/helm/thirdeye/Chart.lock                |     6 +-
 kubernetes/helm/thirdeye/Chart.yaml                |     3 +-
 .../broker/broker/BrokerAdminApiApplication.java   |    23 +-
 .../broker/broker/helix/HelixBrokerStarter.java    |    36 +-
 .../requesthandler/BaseBrokerRequestHandler.java   |   122 +-
 .../SingleConnectionBrokerRequestHandler.java      |     5 +-
 .../routing/segmentpruner/TimeSegmentPruner.java   |    52 +-
 .../broker/pruner/SegmentZKMetadataPrunerTest.java |     2 +-
 .../request/PqlAndCalciteSqlCompatibilityTest.java |   161 +-
 .../requesthandler/BrokerRequestOptionsTest.java   |    44 +-
 .../LiteralOnlyBrokerRequestTest.java              |     4 +-
 .../requesthandler/RangeMergeOptimizerTest.java    |    20 +-
 .../routing/segmentpruner/SegmentPrunerTest.java   |    68 +-
 pinot-common/pom.xml                               |     4 +
 .../antlr4/org/apache/pinot/pql/parsers/PQL2.g4    |     4 +
 .../common/function/TransformFunctionType.java     |     1 +
 .../common/function/scalar/ArrayFunctions.java     |    59 +
 .../common/function/scalar/JsonFunctions.java      |    55 +
 .../common/function/scalar/StringFunctions.java    |   127 +-
 .../common/metadata/segment/SegmentZKMetadata.java |    21 +-
 .../pinot/common/metrics/ControllerGauge.java      |     5 +-
 .../pinot/common/metrics/ControllerMeter.java      |     3 +-
 .../pinot/common/metrics/ControllerTimer.java      |     2 +-
 .../apache/pinot/common/minion/MinionClient.java   |     2 +-
 .../pinot/common/request/AggregationInfo.java      |    52 +-
 .../apache/pinot/common/request/BrokerRequest.java |   353 +-
 .../apache/pinot/common/request/DataSource.java    |    11 +-
 .../apache/pinot/common/request/Expression.java    |    35 +-
 .../pinot/common/request/ExpressionType.java       |     4 +-
 .../pinot/common/request/FilterOperator.java       |    11 +-
 .../apache/pinot/common/request/FilterQuery.java   |    46 +-
 .../pinot/common/request/FilterQueryMap.java       |    11 +-
 .../org/apache/pinot/common/request/Function.java  |    21 +-
 .../org/apache/pinot/common/request/GroupBy.java   |    85 +-
 .../pinot/common/request/HavingFilterQuery.java    |    46 +-
 .../pinot/common/request/HavingFilterQueryMap.java |    11 +-
 .../apache/pinot/common/request/Identifier.java    |    16 +-
 .../pinot/common/request/InstanceRequest.java      |    76 +-
 .../org/apache/pinot/common/request/Literal.java   |     4 +-
 .../apache/pinot/common/request/PinotQuery.java    |    61 +-
 .../apache/pinot/common/request/QuerySource.java   |    11 +-
 .../org/apache/pinot/common/request/QueryType.java |    31 +-
 .../org/apache/pinot/common/request/Selection.java |    94 +-
 .../apache/pinot/common/request/SelectionSort.java |    16 +-
 .../request/transform/TransformExpressionTree.java |     3 +-
 .../pinot/common/response/ProcessingException.java |   283 +-
 .../apache/pinot/common/utils/CommonConstants.java |    27 +-
 .../common/utils/FileUploadDownloadClient.java     |    40 +-
 .../apache/pinot/parsers/utils/ParserUtils.java    |    37 +-
 .../parsers/PinotQuery2BrokerRequestConverter.java |     3 +-
 .../org/apache/pinot/pql/parsers/Pql2Compiler.java |    27 +-
 .../parsers/pql2/ast/BetweenPredicateAstNode.java  |     7 +-
 .../pql2/ast/ComparisonPredicateAstNode.java       |    17 +-
 .../pinot/pql/parsers/pql2/ast/FilterKind.java     |     3 +-
 .../apache/pinot/sql/parsers/CalciteSqlParser.java |    21 +-
 .../pinot/common/function/JsonFunctionsTest.java   |    86 +
 .../transform/TransformExpressionTreeTest.java     |    11 +-
 .../apache/pinot/pql/parsers/Pql2CompilerTest.java |    10 +-
 pinot-common/src/thrift/request.thrift             |     3 +-
 .../spark/connector/PinotServerDataFetcher.scala   |     2 +
 pinot-controller/pom.xml                           |     4 +
 .../apache/pinot/controller/ControllerConf.java    |    47 +-
 .../apache/pinot/controller/ControllerStarter.java |    29 +-
 .../api/ControllerAdminApiApplication.java         |    65 +-
 .../api/resources/PinotQueryResource.java          |    39 +-
 .../api/resources/PinotSegmentRestletResource.java |    65 +-
 .../api/resources/PinotTableRestletResource.java   |    57 +
 .../api/resources/PinotTaskRestletResource.java    |   120 +
 .../api/resources/ServerTableSizeReader.java       |     2 +-
 .../helix/ControllerRequestURLBuilder.java         |    20 +-
 .../helix/core/PinotHelixResourceManager.java      |   178 +-
 .../helix/core/minion/ClusterInfoAccessor.java     |    16 +
 .../helix/core/minion/CronJobScheduleJob.java      |    63 +
 .../helix/core/minion/PinotTaskManager.java        |   268 +-
 .../core/minion/TableTaskSchedulerUpdater.java     |    57 +
 .../generator/ConvertToRawIndexTaskGenerator.java  |     7 +-
 .../core/minion/generator/PinotTaskGenerator.java  |    14 +-
 .../RealtimeToOfflineSegmentsTaskGenerator.java    |    10 +-
 .../SegmentGenerationAndPushTaskGenerator.java     |   371 +
 .../minion/generator/TaskGeneratorRegistry.java    |    58 +-
 .../core/periodictask/ControllerPeriodicTask.java  |     4 +
 .../realtime/PinotLLCRealtimeSegmentManager.java   |    19 +
 .../core/realtime/PinotRealtimeSegmentManager.java |    10 +-
 .../controller/util/CompletionServiceHelper.java   |     7 +-
 .../util/ConsumingSegmentInfoReader.java           |     2 +-
 .../pinot/controller/util/FileIngestionUtils.java  |     2 +-
 .../pinot/controller/util/ListenerConfigUtil.java  |    93 -
 .../util/ServerSegmentMetadataReader.java          |     2 +-
 .../Homepage/Operations/SchemaComponent.tsx        |    42 +-
 .../pinot/controller/ControllerTestUtils.java      |    15 +-
 .../api/ConsumingSegmentInfoReaderTest.java        |     2 +-
 .../controller/api/PinotSegmentsMetadataTest.java  |     2 +-
 .../api/PinotTableRestletResourceTest.java         |    93 +
 .../controller/api/SegmentCompletionUtilsTest.java |     2 +-
 .../controller/api/ServerTableSizeReaderTest.java  |     2 +-
 .../pinot/controller/api/TableSizeReaderTest.java  |     2 +-
 .../helix/core/PinotHelixResourceManagerTest.java  |     4 +-
 .../helix/core/minion/PinotTaskManagerTest.java    |   145 +
 ...RealtimeToOfflineSegmentsTaskGeneratorTest.java |    43 +-
 .../SegmentGenerationAndPushTaskGeneratorTest.java |    80 +
 .../core/util/SegmentDeletionManagerTest.java      |     3 +-
 .../controller/util/ListenerConfigUtilTest.java    |    44 +-
 pinot-core/pom.xml                                 |    17 +-
 .../org/apache/pinot/core/common/DataSource.java   |    21 +
 .../apache/pinot/core/common/MinionConstants.java  |     8 +
 .../core/data/function/FunctionEvaluator.java      |     5 +
 .../data/function/GroovyFunctionEvaluator.java     |     5 +-
 .../data/function/InbuiltFunctionEvaluator.java    |    33 +-
 .../data/function/TimeSpecFunctionEvaluator.java   |    13 +-
 .../manager/offline/DimensionTableDataManager.java |    11 +
 .../realtime/HLRealtimeSegmentDataManager.java     |     4 +-
 .../realtime/LLRealtimeSegmentDataManager.java     |    16 +-
 .../core/data/partition/PartitionFunction.java     |     5 +-
 .../data/readers/PinotSegmentColumnReader.java     |    13 +
 .../recordtransformer/ExpressionTransformer.java   |    40 +-
 .../data/recordtransformer/RecordTransformer.java  |     3 +-
 .../transform/function/ScalarFunctions.java        |    27 +
 .../transform/function/StDistanceFunction.java     |    40 +-
 .../generator/SegmentGeneratorConfig.java          |    91 +-
 .../core/indexsegment/mutable/MutableSegment.java  |     6 +-
 .../indexsegment/mutable/MutableSegmentImpl.java   |    75 +-
 .../io/util/VarLengthBytesValueReaderWriter.java   |   241 -
 .../pinot/core/io/util/VarLengthValueReader.java   |   121 +
 .../pinot/core/io/util/VarLengthValueWriter.java   |   133 +
 .../operator/filter/BitmapBasedFilterOperator.java |     6 +-
 .../core/operator/filter/FilterOperatorUtils.java  |    18 +-
 .../operator/filter/H3IndexFilterOperator.java     |   246 +
 .../operator/filter/JsonMatchFilterOperator.java   |    59 +
 .../FSTBasedRegexpPredicateEvaluatorFactory.java   |   156 +
 .../function/LookupTransformFunction.java          |   332 +
 .../function/TransformFunctionFactory.java         |     1 +
 .../org/apache/pinot/core/plan/FilterPlanNode.java |    78 +-
 .../BaseRawFloatSingleColumnDistinctExecutor.java  |     2 +-
 .../BaseRawIntSingleColumnDistinctExecutor.java    |     2 +-
 .../BaseRawLongSingleColumnDistinctExecutor.java   |     2 +-
 .../BaseRawStringSingleColumnDistinctExecutor.java |     2 +-
 .../optimizer/filter/MergeEqInFilterOptimizer.java |    16 +-
 .../filter/MergeRangeFilterOptimizer.java          |     5 +-
 .../{Predicate.java => JsonMatchPredicate.java}    |    57 +-
 .../query/request/context/predicate/Predicate.java |     2 +-
 .../request/context/predicate/RangePredicate.java  |    18 +-
 .../BrokerRequestToQueryContextConverter.java      |   268 +-
 .../context/utils/QueryContextConverterUtils.java  |     4 +
 .../converter/RealtimeSegmentConverter.java        |     7 +-
 .../RealtimeSegmentSegmentCreationDataSource.java  |     3 +-
 .../core/realtime/impl/RealtimeSegmentConfig.java  |    46 +-
 .../realtime/impl/geospatial/MutableH3Index.java   |    78 +
 .../RealtimeLuceneTextIndexReader.java             |     6 +
 .../core/realtime/impl/json/MutableJsonIndex.java  |   269 +
 .../core/requesthandler/RangeMergeOptimizer.java   |     3 +-
 .../segment/creator/ColumnIndexCreationInfo.java   |     3 +-
 .../core/segment/creator/ColumnStatistics.java     |     3 +-
 ...DataSource.java => GeoSpatialIndexCreator.java} |    21 +-
 ...eationDataSource.java => JsonIndexCreator.java} |    21 +-
 .../segment/creator/SegmentCreationDataSource.java |     3 +-
 .../pinot/core/segment/creator/SegmentCreator.java |     6 +-
 .../creator/SegmentIndexCreationDriver.java        |     3 +-
 .../segment/creator/SegmentIndexCreationInfo.java  |     5 +-
 .../creator/SegmentPreIndexStatsContainer.java     |     5 +-
 .../pinot/core/segment/creator/TextIndexType.java  |     3 +-
 .../creator/impl/SegmentColumnarIndexCreator.java  |   145 +-
 .../creator/impl/SegmentDictionaryCreator.java     |    41 +-
 .../core/segment/creator/impl/V1Constants.java     |     5 +
 .../impl/inv/BitmapInvertedIndexWriter.java        |    90 +
 .../inv/OffHeapBitmapInvertedIndexCreator.java     |    43 +-
 .../impl/inv/OnHeapBitmapInvertedIndexCreator.java |    45 +-
 .../impl/inv/geospatial/BaseH3IndexCreator.java    |   168 +
 .../creator/impl/inv/geospatial/H3IndexConfig.java |    50 +
 .../impl/inv/geospatial/H3IndexResolution.java     |    71 +
 .../impl/inv/geospatial/OffHeapH3IndexCreator.java |   218 +
 .../impl/inv/geospatial/OnHeapH3IndexCreator.java  |    49 +
 .../impl/inv/json/BaseJsonIndexCreator.java        |   173 +
 .../impl/inv/json/OffHeapJsonIndexCreator.java     |   282 +
 .../impl/inv/json/OnHeapJsonIndexCreator.java      |    60 +
 .../impl/inv/text/LuceneFSTIndexCreator.java       |   103 +
 .../segment/index/column/ColumnIndexContainer.java |    23 +-
 .../index/column/PhysicalColumnIndexContainer.java |    77 +-
 .../converter/SegmentV1V2ToV3FormatConverter.java  |    53 +-
 .../segment/index/datasource/BaseDataSource.java   |    27 +
 .../index/datasource/ImmutableDataSource.java      |     5 +-
 .../index/datasource/MutableDataSource.java        |    15 +-
 .../segment/index/loader/IndexLoadingConfig.java   |    87 +-
 .../core/segment/index/loader/LoaderUtils.java     |    47 +
 .../segment/index/loader/SegmentPreProcessor.java  |    31 +-
 .../defaultcolumn/BaseDefaultColumnHandler.java    |   303 +-
 .../loader/defaultcolumn/DefaultColumnHandler.java |     5 +-
 .../defaultcolumn/DefaultColumnHandlerFactory.java |     9 +-
 .../defaultcolumn/V1DefaultColumnHandler.java      |    16 +-
 .../defaultcolumn/V3DefaultColumnHandler.java      |    34 +-
 .../index/loader/invertedindex/H3IndexHandler.java |   161 +
 .../loader/invertedindex/InvertedIndexHandler.java |    19 +-
 .../loader/invertedindex/JsonIndexHandler.java     |   158 +
 .../invertedindex/LuceneFSTIndexHandler.java       |   146 +
 .../loader/invertedindex/RangeIndexHandler.java    |    30 +-
 .../loader/invertedindex/TextIndexHandler.java     |    47 +-
 .../segment/index/metadata/ColumnMetadata.java     |    20 +-
 .../segment/index/metadata/SegmentMetadata.java    |     4 +
 .../index/metadata/SegmentMetadataImpl.java        |    49 +-
 .../index/readers/BaseImmutableDictionary.java     |    15 +-
 .../index/readers/BitmapInvertedIndexReader.java   |    87 +-
 .../{TextIndexReader.java => H3IndexReader.java}   |    19 +-
 .../{TextIndexReader.java => JsonIndexReader.java} |    10 +-
 .../index/readers/LuceneFSTIndexReader.java        |    82 +
 .../segment/index/readers/TextIndexReader.java     |     8 +
 .../readers/geospatial/ImmutableH3IndexReader.java |    84 +
 .../readers/json/ImmutableJsonIndexReader.java     |   243 +
 .../index/readers/text/LuceneTextIndexReader.java  |     6 +
 .../pinot/core/segment/memory/PinotByteBuffer.java |    15 +-
 .../core/segment/name/SegmentNameGenerator.java    |     3 +-
 .../pinot/core/segment/store/ColumnIndexType.java  |     5 +-
 .../core/segment/store/FilePerIndexDirectory.java  |    13 +-
 .../pinot/core/segment/store/SegmentDirectory.java |    25 +-
 .../core/segment/store/SegmentDirectoryPaths.java  |     9 +
 .../segment/store/SegmentLocalFSDirectory.java     |    10 -
 .../segment/store/SingleFileIndexDirectory.java    |     9 +-
 .../virtualcolumn/VirtualColumnIndexContainer.java |    17 +
 .../core/startree/v2/store/StarTreeDataSource.java |     6 +-
 .../pinot/core/transport}/ListenerConfig.java      |    36 +-
 .../apache/pinot/core/transport/QueryRouter.java   |    34 +-
 .../apache/pinot/core/transport/QueryServer.java   |    51 +
 .../pinot/core/transport/ServerChannels.java       |    44 +
 .../pinot/core/transport/ServerInstance.java       |    21 +
 .../core/transport/ServerRoutingInstance.java      |    30 +-
 .../org/apache/pinot/core/transport/TlsConfig.java |    77 +
 .../H3Utils.java}                                  |    21 +-
 .../apache/pinot/core/util/ListenerConfigUtil.java |   207 +
 .../apache/pinot/core/util/TableConfigUtils.java   |    24 +-
 .../java/org/apache/pinot/core/util/TlsUtils.java  |   269 +
 .../org/apache/pinot/core/util/fst/FSTBuilder.java |    65 +
 .../pinot/core/util/fst/PinotBufferIndexInput.java |    89 +
 .../apache/pinot/core/util/fst/RegexpMatcher.java  |   164 +
 .../core/data/function/InbuiltFunctionsTest.java   |    71 +
 .../offline/DimensionTableDataManagerTest.java     |     5 +
 .../ExpressionTransformerTest.java                 |    29 +
 .../transform/StDistanceFunctionTest.java          |     8 +-
 .../MutableSegmentImplAggregateMetricsTest.java    |    16 +-
 .../util/VarLengthBytesValueReaderWriterTest.java  |   172 -
 .../io/util/VarLengthValueReaderWriterTest.java    |   117 +
 .../NoDictionaryRangePredicateEvaluatorTest.java   |   104 +-
 .../function/BaseTransformFunctionTest.java        |    21 +
 .../function/LookupTransformFunctionTest.java      |   386 +
 .../ScalarTransformFunctionWrapperTest.java        |   297 +-
 .../pinot/core/plan/CombinePlanNodeTest.java       |     2 +-
 .../core/query/optimizer/QueryOptimizerTest.java   |    32 +-
 .../query/pruner/ColumnValueSegmentPrunerTest.java |     4 +-
 .../inv/geospatial/H3IndexResolutionTest.java}     |    20 +-
 .../core/segment/index/ColumnMetadataTest.java     |     1 +
 .../pinot/core/segment/index/H3IndexTest.java      |   110 +
 .../pinot/core/segment/index/JsonIndexTest.java    |   216 +
 .../index/creator/LuceneFSTIndexCreatorTest.java   |    74 +
 .../core/segment/index/loader/LoaderTest.java      |   139 +
 .../index/loader/SegmentPreProcessorTest.java      |   115 +-
 .../BaseDefaultColumnHandlerTest.java              |    26 +-
 .../segment/store/SegmentLocalFSDirectoryTest.java |     2 +-
 .../pinot/core/util/TableConfigUtilsTest.java      |    57 +-
 .../org/apache/pinot/queries/BaseQueriesTest.java  |     2 +-
 .../apache/pinot/queries/DistinctQueriesTest.java  |    44 +-
 .../queries/FSTBasedRegexpLikeQueriesTest.java     |   477 +
 .../apache/pinot/queries/H3IndexQueriesTest.java   |   233 +
 .../pinot/queries/TextSearchQueriesTest.java       |     3 +-
 .../java/org/apache/pinot/util/FSTBuilderTest.java |   107 +
 .../src/test/resources/data/newColumnsSchema4.json |    84 +
 .../pinot/compat/tests/ClusterDescriptor.java      |    10 +-
 .../org/apache/pinot/compat/tests/SegmentOp.java   |   256 +-
 .../org/apache/pinot/compat/tests/TableOp.java     |    26 +-
 .../tests/AdminConsoleIntegrationTest.java         |    13 +
 .../tests/BaseClusterIntegrationTestSet.java       |     5 +
 .../tests/ClusterIntegrationTestUtils.java         |    55 +-
 .../pinot/integration/tests/ClusterTest.java       |    66 +-
 .../tests/JsonPathClusterIntegrationTest.java      |    80 +-
 .../tests/OfflineClusterIntegrationTest.java       |    55 +-
 .../tests/SimpleMinionClusterIntegrationTest.java  |    71 +-
 ...onulls_default_column_test_extra_columns.schema |     8 +
 .../compat-tests/configs/feature-test-1.json       |    28 +-
 .../src/test/resources/compat-tests/sample.yaml    |    16 +-
 pinot-minion/pom.xml                               |     5 +
 .../org/apache/pinot/minion/MinionStarter.java     |    35 +-
 .../DefaultMinionEventObserver.java                |     2 +-
 .../DefaultMinionEventObserverFactory.java         |    18 +-
 .../minion/event/EventObserverFactoryRegistry.java |    85 +
 .../{events => event}/MinionEventObserver.java     |     2 +-
 .../MinionEventObserverFactory.java                |    19 +-
 .../events/EventObserverFactoryRegistry.java       |    50 -
 .../ConvertToRawIndexTaskExecutorFactory.java      |    14 +
 .../executor/MergeRollupTaskExecutorFactory.java   |    15 +
 .../minion/executor/PinotTaskExecutorFactory.java  |    14 +-
 .../minion/executor/PurgeTaskExecutorFactory.java  |    14 +
 ...altimeToOfflineSegmentsTaskExecutorFactory.java |    18 +-
 .../executor/SegmentGenerationAndPushResult.java   |    91 +
 .../SegmentGenerationAndPushTaskExecutor.java      |   314 +
 ...gmentGenerationAndPushTaskExecutorFactory.java} |    18 +-
 .../executor/TaskExecutorFactoryRegistry.java      |    61 +-
 .../minion/taskfactory/TaskFactoryRegistry.java    |    22 +-
 .../batch/common/SegmentGenerationTaskRunner.java  |    14 +-
 .../ingestion/batch/common/SegmentPushUtils.java   |    44 +-
 .../batch/hadoop/HadoopSegmentCreationMapper.java  |     3 +-
 .../spark/SparkSegmentGenerationJobRunner.java     |     3 +-
 .../standalone/SegmentGenerationJobRunner.java     |     2 +-
 .../apache/pinot/hadoop/io/PinotOutputFormat.java  |     3 +-
 .../v0_deprecated/pinot-spark/pom.xml              |   282 +-
 .../src/test/java/SegmentCreationSparkTest.java    |   141 +
 .../src/test/resources}/test_sample_data.csv       |     0
 .../apache/pinot/plugin/filesystem/GcsPinotFS.java |    23 +-
 .../pinot/plugin/filesystem/HadoopPinotFS.java     |    13 +-
 ...aConfluentSchemaRegistryAvroMessageDecoder.java |     2 +-
 pinot-server/pom.xml                               |     4 -
 .../org/apache/pinot/server/conf/ServerConf.java   |    20 +-
 .../pinot/server/starter/ServerInstance.java       |    48 +-
 .../server/starter/helix/AdminApiApplication.java  |    25 +-
 .../server/starter/helix/HelixServerStarter.java   |    40 +-
 .../apache/pinot/server/api/AccessControlTest.java |    11 +-
 .../apache/pinot/server/api/BaseResourceTest.java  |    12 +-
 .../pinot/server/api/TablesResourceTest.java       |    14 +-
 .../annotations/minion/EventObserverFactory.java   |    20 +-
 .../annotations/minion/TaskExecutorFactory.java    |    20 +-
 .../spi/annotations/minion/TaskGenerator.java      |    20 +-
 .../apache/pinot/spi/config/BaseJsonConfig.java    |     3 +-
 .../apache/pinot/spi/config/table/FieldConfig.java |     2 +-
 .../pinot/spi/config/table/IndexingConfig.java     |    10 +
 .../java/org/apache/pinot/spi/data/FieldSpec.java  |     3 +-
 .../pinot/spi/data/IngestionSchemaValidator.java   |     4 +-
 .../java/org/apache/pinot/spi/data/Schema.java     |     5 +-
 .../apache/pinot/spi/data/TimeGranularitySpec.java |     5 +-
 .../apache/pinot/spi/data/readers/GenericRow.java  |     3 +-
 .../pinot/spi/data/readers/RecordExtractor.java    |     3 +-
 .../pinot/spi/data/readers/RecordReader.java       |     3 +-
 .../spi/ingestion/batch/BatchConfigProperties.java |     5 +-
 .../batch/spec/SegmentGenerationJobSpec.java       |    26 +
 .../batch/spec/SegmentGenerationTaskSpec.java      |     7 +-
 .../pinot/spi/ingestion/batch/spec/TlsSpec.java    |    45 +-
 .../org/apache/pinot/spi/plugin/PluginManager.java |    44 +
 .../pinot/spi/utils/GroovyTemplateUtils.java       |     4 +-
 .../pinot/spi/utils/IngestionConfigUtils.java      |     8 +-
 .../java/org/apache/pinot/spi/utils/JsonUtils.java |   191 +-
 .../org/apache/pinot/spi/utils/JsonUtilsTest.java  |   145 +
 .../resources/ingestion_job_spec_template.yaml     |     7 +-
 pinot-tools/pom.xml                                |    49 +
 .../pinot/tools/BatchQuickstartWithMinion.java     |    18 +-
 .../org/apache/pinot/tools/BootstrapTableTool.java |   168 +-
 .../org/apache/pinot/tools/GenericQuickstart.java  |   149 +
 .../org/apache/pinot/tools/JoinQuickStart.java     |     7 +
 .../apache/pinot/tools/JsonIndexQuickStart.java    |    89 +
 .../java/org/apache/pinot/tools/Quickstart.java    |    17 +-
 .../pinot/tools/RealtimeJsonIndexQuickStart.java   |   111 +
 .../pinot/tools/admin/PinotAdministrator.java      |    20 +-
 .../tools/admin/command/AddSchemaCommand.java      |    18 +-
 .../pinot/tools/admin/command/AddTableCommand.java |    17 +-
 .../tools/admin/command/AddTenantCommand.java      |    13 +-
 .../tools/admin/command/BootstrapTableCommand.java |     6 +-
 .../tools/admin/command/ChangeTableState.java      |    18 +-
 .../tools/admin/command/ImportDataCommand.java     |   390 +
 .../command/LaunchDataIngestionJobCommand.java     |     9 +
 .../admin/command/OperateClusterConfigCommand.java |    15 +-
 .../tools/admin/command/PostQueryCommand.java      |    15 +-
 .../tools/admin/command/QuickStartCommand.java     |     7 +
 .../tools/admin/command/QuickstartRunner.java      |    48 +-
 .../tools/admin/command/StartMinionCommand.java    |    28 +-
 .../tools/admin/command/UploadSegmentCommand.java  |    15 +-
 .../pinot/tools/data/generator/DataGenerator.java  |     2 +-
 .../tools/scan/query/RangePredicateFilter.java     |    36 +-
 .../converter/DictionaryToRawIndexConverter.java   |     4 +-
 .../pinot/tools/streams/MeetupRsvpJsonStream.java  |    63 +
 .../pinot/tools/streams/MeetupRsvpStream.java      |   149 +-
 .../main/resources/conf/pinot-minion-log4j2.xml    |    48 +
 .../main/resources/conf/pinot-service-log4j2.xml   |     8 +
 .../githubEvents_offline_table_config.json         |    27 +
 .../batch/githubEvents/githubEvents_schema.json    |    40 +
 .../batch/githubEvents/ingestionJobSpec.yaml       |   139 +
 .../githubEvents/rawdata/githubEvents_data.json    | 10000 +++++++++++++++++++
 .../batch/githubEvents/sparkIngestionJobSpec.yaml  |   147 +
 .../batch/starbucksStores/ingestionJobSpec.yaml    |   139 +
 .../batch/starbucksStores/rawdata/data.csv         |  6444 ++++++++++++
 .../starbucksStores_offline_table_config.json      |    34 +
 .../starbucksStores/starbucksStores_schema.json    |    29 +
 .../airlineStats_offline_table_config.json         |    44 +
 .../batch}/airlineStats/airlineStats_schema.json   |     0
 .../baseballStats_offline_table_config.json        |    43 +
 .../batch/baseballStats/baseballStats_schema.json  |     0
 .../baseballStats/rawdata/baseballStats_data.csv   |     0
 ... => json_meetupRsvp_realtime_table_config.json} |    34 +-
 .../stream/meetupRsvp/json_meetupRsvp_schema.json  |    51 +
 .../stream/meetupRsvp/meetupRsvp_schema.json       |     2 +-
 .../upsert_meetupRsvp_realtime_table_config.json   |    10 +
 .../meetupRsvp/upsert_meetupRsvp_schema.json       |     2 +-
 pom.xml                                            |    16 +-
 thirdeye/pom.xml                                   |     2 +-
 .../dashboard/ThirdEyeDashboardApplication.java    |     8 +-
 .../dashboard/resources/AnomalyResource.java       |    44 +-
 .../resources/v2/RootCauseMetricResource.java      |    61 +-
 .../dashboard/resources/v2/RootCauseResource.java  |    11 +-
 .../bao/TestMergedAnomalyResultManager.java        |    19 +
 .../thirdeye/datasource/pinot/PqlUtilsTest.java    |    26 +-
 .../app/mocks/compositeAnomalies.js                |   188 +-
 .../app/pods/components/alert-details/template.hbs |   387 +-
 .../pods/components/anomaly-summary/component.js   |    14 +-
 .../components/composite-anomalies/component.js    |    58 +-
 .../composite-anomalies/data-table/template.hbs    |    24 +
 .../entity-metrics-anomalies/component.js          |    56 +
 .../entity-metrics-anomalies/template.hbs          |     1 +
 .../group-constituents-anomalies/component.js      |    56 +
 .../group-constituents-anomalies/template.hbs      |     1 +
 .../parent-anomalies/component.js                  |    96 +-
 .../parent-anomalies/template.hbs                  |    21 +-
 .../components/composite-anomalies/template.hbs    |    11 +-
 .../pods/components/rootcause-anomaly/component.js |   207 +-
 .../pods/components/rootcause-metrics/component.js |   220 +-
 .../anomalies-list/template.hbs                    |     6 +-
 .../criticality/template.hbs                       |     3 +
 .../current-predicted/template.hbs                 |     6 +
 .../dimensions/template.hbs                        |     3 +
 .../group-name/component.js                        |    10 +
 .../group-name/template.hbs                        |     3 +
 .../composite-anomalies-table/metric/template.hbs  |     3 +
 .../resolution/component.js                        |    60 +-
 .../resolution/template.hbs                        |    65 +-
 .../start-duration/component.js                    |    10 +
 .../start-duration/template.hbs                    |    10 +-
 .../explore/composite-anomalies/controller.js      |    10 +
 .../explore/composite-anomalies/template.hbs       |     9 +-
 .../app/pods/manage/explore/route.js               |    56 +-
 .../pods/partials/rootcause/metrics/template.hbs   |     1 +
 .../app/pods/rootcause/controller.js               |   687 +-
 .../app/pods/services/api/anomalies/service.js     |    76 +-
 .../services/rootcause-aggregates-cache/service.js |    74 +-
 .../app/shared/metricFunnelColumns.js              |    56 +
 thirdeye/thirdeye-frontend/app/styles/app.scss     |     2 +-
 .../app/styles/components/breadcrumb-list.scss     |     2 +
 .../app/styles/components/te-anomaly-table.scss    |     1 +
 .../pods/custom/composite-anomalies-table.scss     |    53 +
 .../styles/pods/custom/parent-anomalies-table.scss |    21 -
 .../app/styles/shared/_styles.scss                 |     6 +-
 .../app/utils/anomalies-tree-parser.js             |   104 +-
 thirdeye/thirdeye-frontend/app/utils/anomaly.js    |   131 +-
 .../app/utils/composite-anomalies.js               |   141 +
 thirdeye/thirdeye-frontend/app/utils/constants.js  |     4 +-
 thirdeye/thirdeye-frontend/app/utils/utils.js      |   122 +-
 .../composite-anomalies/component-test.js          |     2 +-
 .../entity-metrics-anomalies/component-test.js     |    74 +
 .../group-constituents-anomalies/component-test.js |    83 +
 .../parent-anomalies/component-test.js             |    16 +-
 .../tests/unit/utils/anomalies-tree-parser-test.js |   251 +-
 .../tests/unit/utils/utils-test.js                 |    44 +
 .../anomaly/ThirdEyeAnomalyApplication.java        |     7 +
 .../thirdeye/common/ThirdEyeConfiguration.java     |    13 +
 .../resources/v2/BaselineParsingUtils.java         |    15 +-
 .../bao/jdbc/MergedAnomalyResultManagerImpl.java   |    25 +-
 .../datasource/pinot/PinotDataSourceTimeQuery.java |    12 +-
 .../datasource/pinot/PinotThirdEyeDataSource.java  |    12 +-
 .../pinot/{PqlUtils.java => SqlUtils.java}         |    63 +-
 .../pinot/resources/PinotDataSourceResource.java   |     2 +-
 .../detection/DetectionPipelineTaskRunner.java     |     4 +-
 .../registry/DetectionAlertRegistry.java           |    28 +-
 .../annotation/registry/DetectionRegistry.java     |    24 +-
 .../rootcause/timeseries/BaselineAggregate.java    |    21 +
 .../datalayer/bao/MergedAnomalyResultManager.java  |     2 +
 .../installation/cloud/on-premise.md               |     4 +-
 479 files changed, 36865 insertions(+), 5131 deletions(-)
 copy docker/images/pinot-superset/Dockerfile => .github/workflows/pinot_tests-workflow-run.yml (60%)
 create mode 100644 CODE_OF_CONDUCT.md
 create mode 100644 CONTRIBUTING.md
 create mode 100644 docker/images/pinot/etc/conf/pinot-broker-log4j2.xml
 create mode 100644 docker/images/pinot/etc/conf/pinot-controller-log4j2.xml
 create mode 100644 docker/images/pinot/etc/conf/pinot-server-log4j2.xml
 create mode 100644 kubernetes/helm/pinot-0.2.3.tgz
 copy kubernetes/helm/pinot/{requirements.yaml => templates/minion/configmap.yaml} (72%)
 copy kubernetes/helm/pinot/{Chart.yaml => templates/minion/service-headless.yaml} (59%)
 copy kubernetes/helm/pinot/{Chart.yaml => templates/minion/service.yaml} (59%)
 create mode 100644 kubernetes/helm/pinot/templates/minion/statefulset.yml
 create mode 100644 pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java
 create mode 100644 pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TableTaskSchedulerUpdater.java
 create mode 100644 pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/SegmentGenerationAndPushTaskGenerator.java
 delete mode 100644 pinot-controller/src/main/java/org/apache/pinot/controller/util/ListenerConfigUtil.java
 create mode 100644 pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManagerTest.java
 create mode 100644 pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/generator/SegmentGenerationAndPushTaskGeneratorTest.java
 delete mode 100644 pinot-core/src/main/java/org/apache/pinot/core/io/util/VarLengthBytesValueReaderWriter.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/io/util/VarLengthValueReader.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/io/util/VarLengthValueWriter.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/operator/filter/H3IndexFilterOperator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/operator/filter/JsonMatchFilterOperator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/LookupTransformFunction.java
 copy pinot-core/src/main/java/org/apache/pinot/core/query/request/context/predicate/{Predicate.java => JsonMatchPredicate.java} (50%)
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/geospatial/MutableH3Index.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/json/MutableJsonIndex.java
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/{SegmentCreationDataSource.java => GeoSpatialIndexCreator.java} (68%)
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/{SegmentCreationDataSource.java => JsonIndexCreator.java} (69%)
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/BitmapInvertedIndexWriter.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/BaseH3IndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/H3IndexConfig.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/H3IndexResolution.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/OffHeapH3IndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/OnHeapH3IndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/json/BaseJsonIndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/json/OffHeapJsonIndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/json/OnHeapJsonIndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/H3IndexHandler.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/JsonIndexHandler.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/loader/invertedindex/LuceneFSTIndexHandler.java
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/{TextIndexReader.java => H3IndexReader.java} (63%)
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/{TextIndexReader.java => JsonIndexReader.java} (77%)
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/LuceneFSTIndexReader.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/geospatial/ImmutableH3IndexReader.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/segment/index/readers/json/ImmutableJsonIndexReader.java
 rename {pinot-controller/src/main/java/org/apache/pinot/controller/api/listeners => pinot-core/src/main/java/org/apache/pinot/core/transport}/ListenerConfig.java (67%)
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/transport/TlsConfig.java
 copy pinot-core/src/main/java/org/apache/pinot/core/{segment/creator/SegmentIndexCreationInfo.java => util/H3Utils.java} (70%)
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/util/ListenerConfigUtil.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/util/TlsUtils.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/util/fst/FSTBuilder.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/util/fst/PinotBufferIndexInput.java
 create mode 100644 pinot-core/src/main/java/org/apache/pinot/core/util/fst/RegexpMatcher.java
 delete mode 100644 pinot-core/src/test/java/org/apache/pinot/core/io/util/VarLengthBytesValueReaderWriterTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/io/util/VarLengthValueReaderWriterTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/LookupTransformFunctionTest.java
 copy pinot-core/src/{main/java/org/apache/pinot/core/segment/creator/SegmentIndexCreationInfo.java => test/java/org/apache/pinot/core/segment/creator/impl/inv/geospatial/H3IndexResolutionTest.java} (60%)
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/segment/index/H3IndexTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/segment/index/JsonIndexTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/core/segment/index/creator/LuceneFSTIndexCreatorTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/queries/FSTBasedRegexpLikeQueriesTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/queries/H3IndexQueriesTest.java
 create mode 100644 pinot-core/src/test/java/org/apache/pinot/util/FSTBuilderTest.java
 create mode 100644 pinot-core/src/test/resources/data/newColumnsSchema4.json
 rename pinot-minion/src/main/java/org/apache/pinot/minion/{events => event}/DefaultMinionEventObserver.java (97%)
 rename pinot-minion/src/main/java/org/apache/pinot/minion/{events => event}/DefaultMinionEventObserverFactory.java (75%)
 create mode 100644 pinot-minion/src/main/java/org/apache/pinot/minion/event/EventObserverFactoryRegistry.java
 rename pinot-minion/src/main/java/org/apache/pinot/minion/{events => event}/MinionEventObserver.java (97%)
 rename pinot-minion/src/main/java/org/apache/pinot/minion/{events => event}/MinionEventObserverFactory.java (72%)
 delete mode 100644 pinot-minion/src/main/java/org/apache/pinot/minion/events/EventObserverFactoryRegistry.java
 create mode 100644 pinot-minion/src/main/java/org/apache/pinot/minion/executor/SegmentGenerationAndPushResult.java
 create mode 100644 pinot-minion/src/main/java/org/apache/pinot/minion/executor/SegmentGenerationAndPushTaskExecutor.java
 copy pinot-minion/src/main/java/org/apache/pinot/minion/executor/{ConvertToRawIndexTaskExecutorFactory.java => SegmentGenerationAndPushTaskExecutorFactory.java} (64%)
 create mode 100644 pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/test/java/SegmentCreationSparkTest.java
 copy {pinot-core/src/test/resources/data => pinot-plugins/pinot-batch-ingestion/v0_deprecated/pinot-spark/src/test/resources}/test_sample_data.csv (100%)
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/SegmentCreationDataSource.java => pinot-spi/src/main/java/org/apache/pinot/spi/annotations/minion/EventObserverFactory.java (57%)
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/SegmentCreationDataSource.java => pinot-spi/src/main/java/org/apache/pinot/spi/annotations/minion/TaskExecutorFactory.java (57%)
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/SegmentCreationDataSource.java => pinot-spi/src/main/java/org/apache/pinot/spi/annotations/minion/TaskGenerator.java (58%)
 rename pinot-controller/src/main/java/org/apache/pinot/controller/api/listeners/TlsConfiguration.java => pinot-spi/src/main/java/org/apache/pinot/spi/ingestion/batch/spec/TlsSpec.java (64%)
 create mode 100644 pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
 copy pinot-core/src/main/java/org/apache/pinot/core/segment/creator/SegmentIndexCreationInfo.java => pinot-tools/src/main/java/org/apache/pinot/tools/BatchQuickstartWithMinion.java (68%)
 create mode 100644 pinot-tools/src/main/java/org/apache/pinot/tools/GenericQuickstart.java
 create mode 100644 pinot-tools/src/main/java/org/apache/pinot/tools/JsonIndexQuickStart.java
 create mode 100644 pinot-tools/src/main/java/org/apache/pinot/tools/RealtimeJsonIndexQuickStart.java
 create mode 100644 pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/ImportDataCommand.java
 create mode 100644 pinot-tools/src/main/java/org/apache/pinot/tools/streams/MeetupRsvpJsonStream.java
 create mode 100644 pinot-tools/src/main/resources/conf/pinot-minion-log4j2.xml
 create mode 100644 pinot-tools/src/main/resources/examples/batch/githubEvents/githubEvents_offline_table_config.json
 create mode 100644 pinot-tools/src/main/resources/examples/batch/githubEvents/githubEvents_schema.json
 create mode 100644 pinot-tools/src/main/resources/examples/batch/githubEvents/ingestionJobSpec.yaml
 create mode 100644 pinot-tools/src/main/resources/examples/batch/githubEvents/rawdata/githubEvents_data.json
 create mode 100644 pinot-tools/src/main/resources/examples/batch/githubEvents/sparkIngestionJobSpec.yaml
 create mode 100644 pinot-tools/src/main/resources/examples/batch/starbucksStores/ingestionJobSpec.yaml
 create mode 100644 pinot-tools/src/main/resources/examples/batch/starbucksStores/rawdata/data.csv
 create mode 100644 pinot-tools/src/main/resources/examples/batch/starbucksStores/starbucksStores_offline_table_config.json
 create mode 100644 pinot-tools/src/main/resources/examples/batch/starbucksStores/starbucksStores_schema.json
 create mode 100644 pinot-tools/src/main/resources/examples/minions/batch/airlineStats/airlineStats_offline_table_config.json
 copy pinot-tools/src/main/resources/examples/{stream => minions/batch}/airlineStats/airlineStats_schema.json (100%)
 create mode 100644 pinot-tools/src/main/resources/examples/minions/batch/baseballStats/baseballStats_offline_table_config.json
 copy pinot-tools/src/main/resources/examples/{ => minions}/batch/baseballStats/baseballStats_schema.json (100%)
 copy pinot-tools/src/main/resources/examples/{ => minions}/batch/baseballStats/rawdata/baseballStats_data.csv (100%)
 copy pinot-tools/src/main/resources/examples/stream/meetupRsvp/{upsert_meetupRsvp_realtime_table_config.json => json_meetupRsvp_realtime_table_config.json} (65%)
 create mode 100644 pinot-tools/src/main/resources/examples/stream/meetupRsvp/json_meetupRsvp_schema.json
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/components/composite-anomalies/data-table/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/components/composite-anomalies/entity-metrics-anomalies/component.js
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/components/composite-anomalies/entity-metrics-anomalies/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/components/composite-anomalies/group-constituents-anomalies/component.js
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/components/composite-anomalies/group-constituents-anomalies/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/criticality/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/current-predicted/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/dimensions/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/group-name/component.js
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/group-name/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/metric/template.hbs
 create mode 100644 thirdeye/thirdeye-frontend/app/pods/custom/composite-anomalies-table/start-duration/component.js
 create mode 100644 thirdeye/thirdeye-frontend/app/shared/metricFunnelColumns.js
 create mode 100644 thirdeye/thirdeye-frontend/app/styles/pods/custom/composite-anomalies-table.scss
 delete mode 100644 thirdeye/thirdeye-frontend/app/styles/pods/custom/parent-anomalies-table.scss
 create mode 100644 thirdeye/thirdeye-frontend/app/utils/composite-anomalies.js
 create mode 100644 thirdeye/thirdeye-frontend/tests/integration/pods/components/composite-anomalies/entity-metrics-anomalies/component-test.js
 create mode 100644 thirdeye/thirdeye-frontend/tests/integration/pods/components/composite-anomalies/group-constituents-anomalies/component-test.js
 create mode 100644 thirdeye/thirdeye-frontend/tests/unit/utils/utils-test.js
 rename thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datasource/pinot/{PqlUtils.java => SqlUtils.java} (92%)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 45/47: Add tests for end-of-life cases

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit dcb2ee1354ff86417d28276ef03fbd0eaa758236
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Wed Jan 20 17:17:18 2021 -0800

    Add tests for end-of-life cases
---
 .../realtime/PinotLLCRealtimeSegmentManager.java   |  26 ++--
 .../PinotLLCRealtimeSegmentManagerTest.java        | 162 +++++++++++++++++----
 .../realtime/LLRealtimeSegmentDataManagerTest.java |  10 +-
 3 files changed, 155 insertions(+), 43 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 72caaf4..b137b5d 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -515,21 +515,20 @@ public class PinotLLCRealtimeSegmentManager {
     // If there were no splits/merges we would receive A,B
     List<PartitionGroupInfo> newPartitionGroupInfoList =
         getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
+    Set<Integer> newPartitionGroupSet =
+        newPartitionGroupInfoList.stream().map(PartitionGroupInfo::getPartitionGroupId).collect(Collectors.toSet());
     int numPartitions = newPartitionGroupInfoList.size();
 
     // Only if committingSegment's partitionGroup is present in the newPartitionGroupInfoList, we create new segment metadata
     String newConsumingSegmentName = null;
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     long newSegmentCreationTimeMs = getCurrentTimeMs();
-    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
-      if (partitionGroupInfo.getPartitionGroupId() == committingSegmentPartitionGroupId) {
-        LLCSegmentName newLLCSegment = new LLCSegmentName(rawTableName, committingSegmentPartitionGroupId,
-            committingLLCSegment.getSequenceNumber() + 1, newSegmentCreationTimeMs);
-        createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegment, newSegmentCreationTimeMs,
-            committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
-        newConsumingSegmentName = newLLCSegment.getSegmentName();
-        break;
-      }
+    if (newPartitionGroupSet.contains(committingSegmentPartitionGroupId)) {
+      LLCSegmentName newLLCSegment = new LLCSegmentName(rawTableName, committingSegmentPartitionGroupId,
+          committingLLCSegment.getSequenceNumber() + 1, newSegmentCreationTimeMs);
+      createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegment, newSegmentCreationTimeMs,
+          committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
+      newConsumingSegmentName = newLLCSegment.getSegmentName();
     }
 
     // TODO: create new partition groups also here
@@ -943,7 +942,10 @@ public class PinotLLCRealtimeSegmentManager {
    * a) metadata status is IN_PROGRESS, segment state is CONSUMING - happy path
    * b) metadata status is IN_PROGRESS, segment state is OFFLINE - create new metadata and new CONSUMING segment
    * c) metadata status is DONE, segment state is OFFLINE - create new metadata and new CONSUMING segment
-   * d) metadata status is DONE, segment state is CONSUMING - create new metadata and new CONSUMING segment
+   * d) metadata status is DONE, segment state is CONSUMING -
+   * If shard not reached end of life, create new metadata and new CONSUMING segment. Update current segment to ONLINE in ideal state.
+   * If shard reached end of life, do not create new metadata and CONSUMING segment. Simply update current segment to ONLINE in ideal state
+   *
    * 2) Segment is absent from ideal state - add new segment to ideal state
    *
    * Also checks if it is too soon to correct (could be in the process of committing segment)
@@ -985,8 +987,8 @@ public class PinotLLCRealtimeSegmentManager {
     // Possible things to repair:
     // 1. The latest metadata is in DONE state, but the idealstate says segment is CONSUMING:
     //    a. Create metadata for next segment and find hosts to assign it to.
-    //    b. update current segment in idealstate to ONLINE
-    //    c. add new segment in idealstate to CONSUMING on the hosts.
+    //    b. update current segment in idealstate to ONLINE (only if partition is present in newPartitionGroupInfo)
+    //    c. add new segment in idealstate to CONSUMING on the hosts (only if partition is present in newPartitionGroupInfo)
     // 2. The latest metadata is IN_PROGRESS, but segment is not there in idealstate.
     //    a. change prev segment to ONLINE in idealstate
     //    b. add latest segment to CONSUMING in idealstate.
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index ecbf2ef..e8309d3 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.controller.helix.core.realtime;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -58,7 +59,6 @@ import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.LongMsgOffset;
-import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
@@ -249,6 +249,49 @@ public class PinotLLCRealtimeSegmentManagerTest {
     } catch (IllegalStateException e) {
       // Expected
     }
+
+    // committing segment's partitionGroupId no longer in the newPartitionGroupInfoList
+    List<PartitionGroupInfo> partitionGroupInfoListWithout0 =
+        segmentManager.getPartitionGroupInfoList(segmentManager._streamConfig, Collections.emptyList());
+    partitionGroupInfoListWithout0.remove(0);
+    segmentManager._partitionGroupInfoList = partitionGroupInfoListWithout0;
+
+    // Commit a segment for partition 0 - No new entries created for partition which reached end of life
+    committingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 2, CURRENT_TIME_MS).getSegmentName();
+    String committingSegmentStartOffset = segmentManager._segmentZKMetadataMap.get(committingSegment).getStartOffset();
+    String committingSegmentEndOffset =
+        new LongMsgOffset(Long.parseLong(committingSegmentStartOffset) + NUM_DOCS).toString();
+    committingSegmentDescriptor = new CommittingSegmentDescriptor(committingSegment, committingSegmentEndOffset, 0L);
+    committingSegmentDescriptor.setSegmentMetadata(mockSegmentMetadata());
+    int instanceStateMapSize = instanceStatesMap.size();
+    int metadataMapSize = segmentManager._segmentZKMetadataMap.size();
+    segmentManager.commitSegmentMetadata(REALTIME_TABLE_NAME, committingSegmentDescriptor);
+    // No changes in the number of ideal state or zk entries
+    assertEquals(instanceStatesMap.size(), instanceStateMapSize);
+    assertEquals(segmentManager._segmentZKMetadataMap.size(), metadataMapSize);
+
+    // Verify instance states for committed segment and new consuming segment
+    committedSegmentInstanceStateMap = instanceStatesMap.get(committingSegment);
+    assertNotNull(committedSegmentInstanceStateMap);
+    assertEquals(new HashSet<>(committedSegmentInstanceStateMap.values()),
+        Collections.singleton(SegmentStateModel.ONLINE));
+
+    consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 3, CURRENT_TIME_MS).getSegmentName();
+    consumingSegmentInstanceStateMap = instanceStatesMap.get(consumingSegment);
+    assertNull(consumingSegmentInstanceStateMap);
+
+    // Verify segment ZK metadata for committed segment and new consuming segment
+    committedSegmentZKMetadata = segmentManager._segmentZKMetadataMap.get(committingSegment);
+    assertEquals(committedSegmentZKMetadata.getStatus(), Status.DONE);
+    assertEquals(committedSegmentZKMetadata.getStartOffset(), committingSegmentStartOffset);
+    assertEquals(committedSegmentZKMetadata.getEndOffset(), committingSegmentEndOffset);
+    assertEquals(committedSegmentZKMetadata.getCreationTime(), CURRENT_TIME_MS);
+    assertEquals(committedSegmentZKMetadata.getCrc(), Long.parseLong(CRC));
+    assertEquals(committedSegmentZKMetadata.getIndexVersion(), SEGMENT_VERSION);
+    assertEquals(committedSegmentZKMetadata.getTotalDocs(), NUM_DOCS);
+
+    consumingSegmentZKMetadata = segmentManager._segmentZKMetadataMap.get(consumingSegment);
+    assertNull(consumingSegmentZKMetadata);
   }
 
   /**
@@ -411,6 +454,20 @@ public class PinotLLCRealtimeSegmentManagerTest {
    *
    * 4. MaxSegmentCompletionTime: Segment completion has 5 minutes to retry and complete between steps 1 and 3.
    * Correction: Do not correct the segments before the allowed time for segment completion
+   *
+   *
+   * End-of-shard case:
+   * Additionally, shards of some streams may be detected as reached end-of-life when committing.
+   * In such cases, step 2 is skipped, and step 3 is done partially (change committing segment state to ONLINE
+   * but don't create new segment with state CONSUMING)
+   *
+   * Scenarios:
+   * 1. Step 3 failed - we will find segment ZK metadata DONE, but ideal state CONSUMING
+   * Correction: Since shard has ended, do not create new segment ZK metadata, or new entry in ideal state.
+   * Simply update CONSUMING segment in ideal state to ONLINE
+   *
+   * 2. Shard which has reached EOL detected - we will find segment ZK metadata DONE and ideal state ONLINE
+   * Correction: No repair needed. Acceptable case.
    */
   @Test
   public void testRepairs() {
@@ -422,12 +479,12 @@ public class PinotLLCRealtimeSegmentManagerTest {
     // Remove the CONSUMING segment from the ideal state for partition 0 (step 3 failed)
     String consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 0, CURRENT_TIME_MS).getSegmentName();
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, null);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Remove the CONSUMING segment from the ideal state and segment ZK metadata map for partition 0 (step 2 failed)
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, null);
     assertNotNull(segmentManager._segmentZKMetadataMap.remove(consumingSegment));
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // 2 partitions commit segment
     for (int partitionId = 0; partitionId < 2; partitionId++) {
@@ -442,12 +499,12 @@ public class PinotLLCRealtimeSegmentManagerTest {
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 1, CURRENT_TIME_MS).getSegmentName();
     String latestCommittedSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 0, CURRENT_TIME_MS).getSegmentName();
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, latestCommittedSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Remove the CONSUMING segment from the ideal state and segment ZK metadata map for partition 0 (step 2 failed)
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, latestCommittedSegment);
     assertNotNull(segmentManager._segmentZKMetadataMap.remove(consumingSegment));
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     /*
       Test all replicas of the new segment are OFFLINE
@@ -461,12 +518,12 @@ public class PinotLLCRealtimeSegmentManagerTest {
     // Turn all the replicas for the CONSUMING segment to OFFLINE for partition 0
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 0, CURRENT_TIME_MS).getSegmentName();
     turnNewConsumingSegmentOffline(instanceStatesMap, consumingSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Turn all the replicas for the CONSUMING segment to OFFLINE for partition 0 again
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 1, CURRENT_TIME_MS).getSegmentName();
     turnNewConsumingSegmentOffline(instanceStatesMap, consumingSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // 2 partitions commit segment
     for (int partitionId = 0; partitionId < 2; partitionId++) {
@@ -484,22 +541,51 @@ public class PinotLLCRealtimeSegmentManagerTest {
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 3, CURRENT_TIME_MS).getSegmentName();
     latestCommittedSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 2, CURRENT_TIME_MS).getSegmentName();
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, latestCommittedSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Remove the CONSUMING segment from the ideal state and segment ZK metadata map for partition 0 (step 2 failed)
     removeNewConsumingSegment(instanceStatesMap, consumingSegment, latestCommittedSegment);
     assertNotNull(segmentManager._segmentZKMetadataMap.remove(consumingSegment));
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Turn all the replicas for the CONSUMING segment to OFFLINE for partition 0
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 3, CURRENT_TIME_MS).getSegmentName();
     turnNewConsumingSegmentOffline(instanceStatesMap, consumingSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
 
     // Turn all the replicas for the CONSUMING segment to OFFLINE for partition 0 again
     consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 0, 4, CURRENT_TIME_MS).getSegmentName();
     turnNewConsumingSegmentOffline(instanceStatesMap, consumingSegment);
-    testRepairs(segmentManager);
+    testRepairs(segmentManager, Collections.emptyList());
+
+    /*
+     * End of shard cases
+     */
+    // 1 reached end of shard.
+    List<PartitionGroupInfo> partitionGroupInfoListWithout1 =
+        segmentManager.getPartitionGroupInfoList(segmentManager._streamConfig, Collections.emptyList());
+    partitionGroupInfoListWithout1.remove(1);
+    segmentManager._partitionGroupInfoList = partitionGroupInfoListWithout1;
+    // noop
+    testRepairs(segmentManager, Collections.emptyList());
+
+    // 1 commits segment - should not create new metadata or CONSUMING segment
+    String segmentName = new LLCSegmentName(RAW_TABLE_NAME, 1, 1, CURRENT_TIME_MS).getSegmentName();
+    String startOffset = segmentManager._segmentZKMetadataMap.get(segmentName).getStartOffset();
+    CommittingSegmentDescriptor committingSegmentDescriptor = new CommittingSegmentDescriptor(segmentName,
+        new LongMsgOffset(Long.parseLong(startOffset) + NUM_DOCS).toString(), 0L);
+    committingSegmentDescriptor.setSegmentMetadata(mockSegmentMetadata());
+    segmentManager.commitSegmentMetadata(REALTIME_TABLE_NAME, committingSegmentDescriptor);
+    // ONLINE in IS and metadata DONE, but end of shard (not present in partition group list), so don't repair
+    testRepairs(segmentManager, Lists.newArrayList(1));
+
+    // make the last ONLINE segment of the shard as CONSUMING (failed between step1 and 3)
+    segmentManager._partitionGroupInfoList = partitionGroupInfoListWithout1;
+    consumingSegment = new LLCSegmentName(RAW_TABLE_NAME, 1, 1, CURRENT_TIME_MS).getSegmentName();
+    turnNewConsumingSegmentConsuming(instanceStatesMap, consumingSegment);
+
+    // makes the IS to ONLINE, but creates no new entries, because end of shard.
+    testRepairs(segmentManager, Lists.newArrayList(1));
   }
 
   /**
@@ -539,7 +625,19 @@ public class PinotLLCRealtimeSegmentManagerTest {
     }
   }
 
-  private void testRepairs(FakePinotLLCRealtimeSegmentManager segmentManager) {
+  /**
+   * Turns all instances for the segment to CONSUMING in the ideal state.
+   */
+  private void turnNewConsumingSegmentConsuming(Map<String, Map<String, String>> instanceStatesMap,
+      String consumingSegment) {
+    Map<String, String> consumingSegmentInstanceStateMap = instanceStatesMap.get(consumingSegment);
+    assertNotNull(consumingSegmentInstanceStateMap);
+    for (Map.Entry<String, String> entry : consumingSegmentInstanceStateMap.entrySet()) {
+      entry.setValue(SegmentStateModel.CONSUMING);
+    }
+  }
+
+  private void testRepairs(FakePinotLLCRealtimeSegmentManager segmentManager, List<Integer> shardsEnded) {
     Map<String, Map<String, String>> oldInstanceStatesMap =
         cloneInstanceStatesMap(segmentManager._idealState.getRecord().getMapFields());
     segmentManager._exceededMaxSegmentCompletionTime = false;
@@ -547,7 +645,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     verifyNoChangeToOldEntries(segmentManager, oldInstanceStatesMap);
     segmentManager._exceededMaxSegmentCompletionTime = true;
     segmentManager.ensureAllPartitionsConsuming();
-    verifyRepairs(segmentManager);
+    verifyRepairs(segmentManager, shardsEnded);
   }
 
   /**
@@ -564,7 +662,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     }
   }
 
-  private void verifyRepairs(FakePinotLLCRealtimeSegmentManager segmentManager) {
+  private void verifyRepairs(FakePinotLLCRealtimeSegmentManager segmentManager, List<Integer> shardsEnded) {
     Map<String, Map<String, String>> instanceStatesMap = segmentManager._idealState.getRecord().getMapFields();
 
     // Segments are the same for ideal state and ZK metadata
@@ -597,16 +695,18 @@ public class PinotLLCRealtimeSegmentManagerTest {
       int numSegments = segments.size();
 
       String latestSegment = segments.get(numSegments - 1);
-
-      // Latest segment should have CONSUMING instance but no ONLINE instance in ideal state
       Map<String, String> instanceStateMap = instanceStatesMap.get(latestSegment);
-      assertTrue(instanceStateMap.containsValue(SegmentStateModel.CONSUMING));
-      assertFalse(instanceStateMap.containsValue(SegmentStateModel.ONLINE));
-
-      // Latest segment ZK metadata should be IN_PROGRESS
-      assertEquals(segmentManager._segmentZKMetadataMap.get(latestSegment).getStatus(), Status.IN_PROGRESS);
+      if (!shardsEnded.contains(partitionId)) {
+        // Latest segment should have CONSUMING instance but no ONLINE instance in ideal state
+        assertTrue(instanceStateMap.containsValue(SegmentStateModel.CONSUMING));
+        assertFalse(instanceStateMap.containsValue(SegmentStateModel.ONLINE));
+
+        // Latest segment ZK metadata should be IN_PROGRESS
+        assertEquals(segmentManager._segmentZKMetadataMap.get(latestSegment).getStatus(), Status.IN_PROGRESS);
+        numSegments --;
+      }
 
-      for (int i = 0; i < numSegments - 1; i++) {
+      for (int i = 0; i < numSegments; i++) {
         String segmentName = segments.get(i);
 
         // Committed segment should have all instances in ONLINE state
@@ -620,8 +720,13 @@ public class PinotLLCRealtimeSegmentManagerTest {
         // Verify segment start/end offset
         assertEquals(segmentZKMetadata.getStartOffset(),
             new LongMsgOffset(PARTITION_OFFSET.getOffset() + i * (long) NUM_DOCS).toString());
-        assertEquals(segmentZKMetadata.getEndOffset(),
-            segmentManager._segmentZKMetadataMap.get(segments.get(i + 1)).getStartOffset());
+        if (shardsEnded.contains(partitionId) && ((i + 1) == numSegments)) {
+          assertEquals(Long.parseLong(segmentZKMetadata.getEndOffset()),
+              Long.parseLong(segmentZKMetadata.getStartOffset()) + NUM_DOCS);
+        } else {
+          assertEquals(segmentZKMetadata.getEndOffset(),
+              segmentManager._segmentZKMetadataMap.get(segments.get(i + 1)).getStartOffset());
+        }
       }
     }
   }
@@ -818,6 +923,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     Map<String, Integer> _segmentZKMetadataVersionMap = new HashMap<>();
     IdealState _idealState;
     int _numPartitions;
+    List<PartitionGroupInfo> _partitionGroupInfoList = null;
     boolean _exceededMaxSegmentCompletionTime = false;
 
     FakePinotLLCRealtimeSegmentManager() {
@@ -919,9 +1025,11 @@ public class PinotLLCRealtimeSegmentManagerTest {
     @Override
     List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
-      return IntStream.range(0, _numPartitions).mapToObj(i -> new PartitionGroupInfo(i,
-          PARTITION_OFFSET))
-          .collect(Collectors.toList());
+      if (_partitionGroupInfoList != null) {
+        return _partitionGroupInfoList;
+      } else {
+        return IntStream.range(0, _numPartitions).mapToObj(i -> new PartitionGroupInfo(i, PARTITION_OFFSET)).collect(Collectors.toList());
+      }
     }
 
     @Override
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
index d7aec8d..ae8b138 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
@@ -35,6 +35,7 @@ import org.apache.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metrics.ServerMetrics;
 import org.apache.pinot.common.protocols.SegmentCompletionProtocol;
+import org.apache.pinot.common.utils.CommonConstants.Segment.Realtime.Status;
 import org.apache.pinot.common.utils.LLCSegmentName;
 import org.apache.pinot.core.data.manager.config.InstanceDataManagerConfig;
 import org.apache.pinot.core.indexsegment.mutable.MutableSegmentImpl;
@@ -147,6 +148,7 @@ public class LLRealtimeSegmentDataManagerTest {
     segmentZKMetadata.setSegmentName(_segmentNameStr);
     segmentZKMetadata.setStartOffset(_startOffset.toString());
     segmentZKMetadata.setCreationTime(System.currentTimeMillis());
+    segmentZKMetadata.setStatus(Status.IN_PROGRESS);
     return segmentZKMetadata;
   }
 
@@ -771,7 +773,7 @@ public class LLRealtimeSegmentDataManagerTest {
     public Field _state;
     public Field _shouldStop;
     public Field _stopReason;
-    private Field _streamMsgOffsetFactory;
+    private final Field _checkpointFactory;
     public LinkedList<LongMsgOffset> _consumeOffsets = new LinkedList<>();
     public LinkedList<SegmentCompletionProtocol.Response> _responses = new LinkedList<>();
     public boolean _commitSegmentCalled = false;
@@ -810,9 +812,9 @@ public class LLRealtimeSegmentDataManagerTest {
       _stopReason = LLRealtimeSegmentDataManager.class.getDeclaredField("_stopReason");
       _stopReason.setAccessible(true);
       _semaphoreMap = semaphoreMap;
-      _streamMsgOffsetFactory = LLRealtimeSegmentDataManager.class.getDeclaredField("_streamPartitionMsgOffsetFactory");
-      _streamMsgOffsetFactory.setAccessible(true);
-      _streamMsgOffsetFactory.set(this, new LongMsgOffsetFactory());
+      _checkpointFactory = LLRealtimeSegmentDataManager.class.getDeclaredField("_checkpointFactory");
+      _checkpointFactory.setAccessible(true);
+      _checkpointFactory.set(this, new LongMsgOffsetFactory());
     }
 
     public String getStopReason() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 22/47: fetch records with timeout

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 262d326be4dc8ee8b1a77bc65668ef3fc12c4aea
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 11:44:38 2020 +0530

    fetch records with timeout
---
 .../plugin/stream/kinesis/KinesisConsumer.java     | 30 ++++++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 96241d4..910b9ee 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -21,6 +21,12 @@ package org.apache.pinot.plugin.stream.kinesis;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
@@ -39,6 +45,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   String _stream;
   Integer _maxRecords;
   String _shardId;
+  ExecutorService _executorService;
 
   public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
     super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
@@ -46,10 +53,27 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     _maxRecords = kinesisConfig.maxRecordsToFetch();
     KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
     _shardId = kinesisShardMetadata.getShardId();
+    _executorService = Executors.newSingleThreadExecutor();
   }
 
   @Override
   public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
+    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(new Callable<KinesisFetchResult>() {
+      @Override
+      public KinesisFetchResult call()
+          throws Exception {
+        return getResult(start, end);
+      }
+    });
+
+    try {
+      return kinesisFetchResultFuture.get(timeout, TimeUnit.MILLISECONDS);
+    } catch(Exception e){
+      return null;
+    }
+  }
+
+  private KinesisFetchResult getResult(Checkpoint start, Checkpoint end) {
     try {
       KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
 
@@ -65,9 +89,8 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
       }
 
       String nextStartSequenceNumber = null;
-      Long startTimestamp = System.currentTimeMillis();
 
-      while (shardIterator != null && !isTimedOut(startTimestamp, timeout)) {
+      while (shardIterator != null) {
         GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
         GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
 
@@ -119,7 +142,4 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     return getShardIteratorResponse.shardIterator();
   }
 
-  private boolean isTimedOut(Long startTimestamp, Long timeout) {
-    return (System.currentTimeMillis() - startTimestamp) >= timeout;
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 41/47: Remove new partition groups creation in commit

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 40b6dbe760e7ceb9cde8bad1af7180446b51bb85
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Jan 7 17:42:23 2021 -0800

    Remove new partition groups creation in commit
---
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 57 +++++++++-------------
 .../realtime/LLRealtimeSegmentDataManager.java     |  3 +-
 2 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 9fa6850..9a0786b 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -469,6 +469,8 @@ public class PinotLLCRealtimeSegmentManager {
   private void commitSegmentMetadataInternal(String realtimeTableName,
       CommittingSegmentDescriptor committingSegmentDescriptor) {
     String committingSegmentName = committingSegmentDescriptor.getSegmentName();
+    LLCSegmentName committingLLCSegment = new LLCSegmentName(committingSegmentName);
+    int committingSegmentPartitionGroupId = committingLLCSegment.getPartitionGroupId();
     LOGGER.info("Committing segment metadata for segment: {}", committingSegmentName);
 
     TableConfig tableConfig = getTableConfig(realtimeTableName);
@@ -495,51 +497,40 @@ public class PinotLLCRealtimeSegmentManager {
 
     // Step-2
 
-    // Say we currently were consuming from 2 shards A, B. Of those, A is the one committing.
+    // Example: Say we currently were consuming from 2 shards A, B. Of those, A is the one committing.
 
-    // get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS]
+    // Get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS]
     List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
     PartitionLevelStreamConfig streamConfig = new PartitionLevelStreamConfig(tableConfig.getTableName(),
         IngestionConfigUtils.getStreamConfigMap(tableConfig));
 
-    // find new partition groups [A],[B],[C],[D] (assume A split into C D)
+    // Find new partition groups [A],[B],[C],[D] (assume A split into C D)
     // If segment has consumed all of A, we will receive B,C,D
     // If segment is still not reached last msg of A, we will receive A,B,C,D
+    // If there were no splits/merges we would receive A,B
     List<PartitionGroupInfo> newPartitionGroupInfoList =
         getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
     int numPartitions = newPartitionGroupInfoList.size();
 
-    // create new segment metadata, only if PartitionGroupInfo was returned for it in the newPartitionGroupInfoList
-    Map<Integer, PartitionGroupMetadata> currentGroupIdToMetadata = currentPartitionGroupMetadataList.stream().collect(
-        Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
-
-    List<String> newConsumingSegmentNames = new ArrayList<>();
+    // Only if committingSegment's partitionGroup is present in the newPartitionGroupInfoList, we create new segment metadata
+    String newConsumingSegmentName = null;
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     long newSegmentCreationTimeMs = getCurrentTimeMs();
     for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
-      int newPartitionGroupId = partitionGroupInfo.getPartitionGroupId();
-      PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
-      if (currentPartitionGroupMetadata == null) { // not present in current state. New partition found.
-        // make new segment
-        // fixme: letting validation manager do this would be best, otherwise we risk creating multiple CONSUMING segments
-        String newLLCSegmentName =
-            setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo, newSegmentCreationTimeMs,
-                instancePartitions, numPartitions, numReplicas);
-        newConsumingSegmentNames.add(newLLCSegmentName);
-      } else {
-        LLCSegmentName committingLLCSegment = new LLCSegmentName(committingSegmentName);
-        // Update this only for committing segment. All other partitions should get updated by their own commit call
-        if (newPartitionGroupId == committingLLCSegment.getPartitionGroupId()) {
-          Preconditions.checkState(currentPartitionGroupMetadata.getStatus().equals(Status.DONE.toString()));
-          LLCSegmentName newLLCSegmentName = new LLCSegmentName(rawTableName, newPartitionGroupId,
-              currentPartitionGroupMetadata.getSequenceNumber() + 1, newSegmentCreationTimeMs);
-          createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, newSegmentCreationTimeMs,
-              committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
-          newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
-        }
+      if (partitionGroupInfo.getPartitionGroupId() == committingSegmentPartitionGroupId) {
+        LLCSegmentName newLLCSegment = new LLCSegmentName(rawTableName, committingSegmentPartitionGroupId,
+            committingLLCSegment.getSequenceNumber() + 1, newSegmentCreationTimeMs);
+        createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegment, newSegmentCreationTimeMs,
+            committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
+        newConsumingSegmentName = newLLCSegment.getSegmentName();
+        break;
       }
     }
 
+    // TODO: create new partition groups also here
+    //  Cannot do it at the moment, because of the timestamp suffix on the segment name.
+    //  Different committing segments could create a CONSUMING segment for same new partitionGroup, with different name
+
     // Step-3
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
     Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
@@ -554,7 +545,7 @@ public class PinotLLCRealtimeSegmentManager {
     Lock lock = _idealStateUpdateLocks[lockIndex];
     try {
       lock.lock();
-      updateIdealStateOnSegmentCompletion(realtimeTableName, committingSegmentName, newConsumingSegmentNames,
+      updateIdealStateOnSegmentCompletion(realtimeTableName, committingSegmentName, newConsumingSegmentName,
           segmentAssignment, instancePartitionsMap);
     } finally {
       lock.unlock();
@@ -846,7 +837,7 @@ public class PinotLLCRealtimeSegmentManager {
    */
   @VisibleForTesting
   void updateIdealStateOnSegmentCompletion(String realtimeTableName, String committingSegmentName,
-      List<String> newSegmentNames, SegmentAssignment segmentAssignment,
+      String newSegmentName, SegmentAssignment segmentAssignment,
       Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap) {
     HelixHelper.updateIdealState(_helixManager, realtimeTableName, idealState -> {
       assert idealState != null;
@@ -863,11 +854,7 @@ public class PinotLLCRealtimeSegmentManager {
             "Exceeded max segment completion time for segment " + committingSegmentName);
       }
       updateInstanceStatesForNewConsumingSegment(idealState.getRecord().getMapFields(), committingSegmentName,
-          null, segmentAssignment, instancePartitionsMap);
-      for (String newSegmentName : newSegmentNames) {
-        updateInstanceStatesForNewConsumingSegment(idealState.getRecord().getMapFields(), null,
-            newSegmentName, segmentAssignment, instancePartitionsMap);
-      }
+          newSegmentName, segmentAssignment, instancePartitionsMap);
       return idealState;
     }, RetryPolicies.exponentialBackoffRetryPolicy(10, 1000L, 1.2f));
   }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index c889193..bc49830 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -308,11 +308,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
           _stopReason = SegmentCompletionProtocol.REASON_ROW_LIMIT;
           return true;
         } else if (_endOfPartitionGroup) {
+          // FIXME: handle numDocsIndexed == 0 case
           segmentLogger.info("Stopping consumption due to end of partitionGroup reached nRows={} numRowsIndexed={}, numRowsConsumed={}",
               _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
           _stopReason = SegmentCompletionProtocol.REASON_END_OF_PARTITION_GROUP;
-          // fixme: what happens if reached endOfPartitionGroup but numDocsIndexed == 0
-          //  If we decide to only setupNewPartitions via ValidationManager, we don't need commit on endOfShard
           return true;
         }
         return false;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 31/47: Add isEndOfPartition check in checkpoints

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit eb428cded05ff23b587fe0ed61b89a3b0ec9dd8e
Author: KKcorps <kh...@gmail.com>
AuthorDate: Thu Dec 24 17:58:40 2020 +0530

    Add isEndOfPartition check in checkpoints
---
 .../pinot/plugin/stream/kinesis/KinesisCheckpoint.java       | 12 +++++++++++-
 .../apache/pinot/plugin/stream/kinesis/KinesisConsumer.java  | 10 +++++++++-
 .../main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 027b789..54e26d0 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -22,11 +22,22 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 
 public class KinesisCheckpoint implements Checkpoint {
   String _sequenceNumber;
+  Boolean _isEndOfPartition = false;
 
   public KinesisCheckpoint(String sequenceNumber) {
     _sequenceNumber = sequenceNumber;
   }
 
+  public KinesisCheckpoint(String sequenceNumber, Boolean isEndOfPartition) {
+    _sequenceNumber = sequenceNumber;
+    _isEndOfPartition = isEndOfPartition;
+  }
+
+  @Override
+  public boolean isEndOfPartition() {
+    return _isEndOfPartition;
+  }
+
   public String getSequenceNumber() {
     return _sequenceNumber;
   }
@@ -38,7 +49,6 @@ public class KinesisCheckpoint implements Checkpoint {
 
   @Override
   public KinesisCheckpoint deserialize(byte[] blob) {
-    //TODO: Implement SerDe
     return new KinesisCheckpoint(new String(blob));
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index abbc753..336468a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -95,6 +95,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
       }
 
       String nextStartSequenceNumber = null;
+      boolean isEndOfShard = false;
 
       while (shardIterator != null) {
         GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
@@ -114,14 +115,21 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
           }
         }
 
+        if(getRecordsResponse.hasChildShards()){
+          //This statement returns true only when end of current shard has reached.
+          isEndOfShard = true;
+          break;
+        }
+
         shardIterator = getRecordsResponse.nextShardIterator();
+
       }
 
       if (nextStartSequenceNumber == null && recordList.size() > 0) {
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       }
 
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber, isEndOfShard);
       KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
 
       return kinesisFetchResult;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
index 030fe4e..0195684 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.spi.stream.v2;
 
 public interface Checkpoint {
+  boolean isEndOfPartition();
   byte[] serialize();
   Checkpoint deserialize(byte[] blob);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 09/47: Server side changes and some fixes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit ae863a1087a370ed388a37c91cbd46acdece23b5
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Sat Jan 2 17:10:21 2021 -0800

    Server side changes and some fixes
---
 .../realtime/LLRealtimeSegmentDataManager.java     | 45 ++++++++++------------
 .../realtime/SegmentBuildTimeLeaseExtender.java    |  3 +-
 .../realtime/LLRealtimeSegmentDataManagerTest.java |  7 ++--
 .../impl/fakestream/FakeStreamConsumerFactory.java |  2 +-
 ...lakyConsumerRealtimeClusterIntegrationTest.java |  2 +-
 .../stream/kafka09/KafkaConsumerFactory.java       |  2 +-
 .../stream/kafka20/KafkaConsumerFactory.java       |  4 +-
 .../kafka20/KafkaPartitionLevelConsumer.java       |  1 +
 .../kafka20/KafkaStreamMetadataProvider.java       | 19 ++++++++-
 ...y.java => PartitionGroupCheckpointFactory.java} |  6 +--
 .../pinot/spi/stream/PartitionGroupConsumer.java   |  4 +-
 .../pinot/spi/stream/PartitionLevelConsumer.java   |  8 +++-
 .../pinot/spi/stream/StreamConsumerFactory.java    |  2 +-
 .../stream/StreamPartitionMsgOffsetFactory.java    |  6 ++-
 14 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 054676e..80aa9d8 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -34,7 +34,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import javax.annotation.Nullable;
@@ -71,8 +70,9 @@ import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
-import org.apache.pinot.spi.stream.FetchResult;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.MessageBatch;
+import org.apache.pinot.spi.stream.PartitionGroupCheckpointFactory;
 import org.apache.pinot.spi.stream.PartitionGroupConsumer;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
@@ -83,8 +83,6 @@ import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
 import org.apache.pinot.spi.stream.StreamDecoderProvider;
 import org.apache.pinot.spi.stream.StreamMessageDecoder;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
-import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
-import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 import org.apache.pinot.spi.stream.TransientConsumerException;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.joda.time.DateTime;
@@ -152,13 +150,13 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   public class SegmentBuildDescriptor {
     final File _segmentTarFile;
     final Map<String, File> _metadataFileMap;
-    final StreamPartitionMsgOffset _offset;
+    final Checkpoint _offset;
     final long _waitTimeMillis;
     final long _buildTimeMillis;
     final long _segmentSizeBytes;
 
     public SegmentBuildDescriptor(@Nullable File segmentTarFile, @Nullable Map<String, File> metadataFileMap,
-        StreamPartitionMsgOffset offset, long buildTimeMillis, long waitTimeMillis, long segmentSizeBytes) {
+        Checkpoint offset, long buildTimeMillis, long waitTimeMillis, long segmentSizeBytes) {
       _segmentTarFile = segmentTarFile;
       _metadataFileMap = metadataFileMap;
       _offset = _streamPartitionMsgOffsetFactory.create(offset);
@@ -167,7 +165,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       _segmentSizeBytes = segmentSizeBytes;
     }
 
-    public StreamPartitionMsgOffset getOffset() {
+    public Checkpoint getOffset() {
       return _offset;
     }
 
@@ -225,7 +223,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   private final String _metricKeyName;
   private final ServerMetrics _serverMetrics;
   private final MutableSegmentImpl _realtimeSegment;
-  private StreamPartitionMsgOffset _currentOffset;
+  private Checkpoint _currentOffset;
   private volatile State _state;
   private volatile int _numRowsConsumed = 0;
   private volatile int _numRowsIndexed = 0; // Can be different from _numRowsConsumed when metrics update is enabled.
@@ -236,12 +234,12 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   private final SegmentVersion _segmentVersion;
   private final SegmentBuildTimeLeaseExtender _leaseExtender;
   private SegmentBuildDescriptor _segmentBuildDescriptor;
-  private final StreamConsumerFactory _streamConsumerFactory;
-  private final StreamPartitionMsgOffsetFactory _streamPartitionMsgOffsetFactory;
+  private StreamConsumerFactory _streamConsumerFactory;
+  private PartitionGroupCheckpointFactory _streamPartitionMsgOffsetFactory;
 
   // Segment end criteria
   private volatile long _consumeEndTime = 0;
-  private StreamPartitionMsgOffset _finalOffset; // Used when we want to catch up to this one
+  private Checkpoint _finalOffset; // Used when we want to catch up to this one
   private volatile boolean _shouldStop = false;
 
   // It takes 30s to locate controller leader, and more if there are multiple controller failures.
@@ -272,7 +270,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   private final String _instanceId;
   private final ServerSegmentCompletionProtocolHandler _protocolHandler;
   private final long _consumeStartTime;
-  private final StreamPartitionMsgOffset _startOffset;
+  private final Checkpoint _startOffset;
   private final PartitionLevelStreamConfig _partitionLevelStreamConfig;
 
   private long _lastLogTime = 0;
@@ -371,7 +369,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     final long idlePipeSleepTimeMillis = 100;
     final long maxIdleCountBeforeStatUpdate = (3 * 60 * 1000) / (idlePipeSleepTimeMillis + _partitionLevelStreamConfig
         .getFetchTimeoutMillis());  // 3 minute count
-    StreamPartitionMsgOffset lastUpdatedOffset = _streamPartitionMsgOffsetFactory
+    Checkpoint lastUpdatedOffset = _streamPartitionMsgOffsetFactory
         .create(_currentOffset);  // so that we always update the metric when we enter this method.
     long consecutiveIdleCount = 0;
     // At this point, we know that we can potentially move the offset, so the old saved segment file is not valid
@@ -384,9 +382,8 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       // Update _currentOffset upon return from this method
       MessageBatch messageBatch;
       try {
-        FetchResult fetchResult = _partitionGroupConsumer
+        messageBatch = _partitionGroupConsumer
             .fetchMessages(_currentOffset, null, _partitionLevelStreamConfig.getFetchTimeoutMillis());
-        messageBatch = fetchResult.getMessages();
         consecutiveErrorCount = 0;
       } catch (TransientConsumerException e) {
         handleTransientStreamErrors(e);
@@ -560,7 +557,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
           _state = State.HOLDING;
           SegmentCompletionProtocol.Response response = postSegmentConsumedMsg();
           SegmentCompletionProtocol.ControllerResponseStatus status = response.getStatus();
-          StreamPartitionMsgOffset rspOffset = extractOffset(response);
+          Checkpoint rspOffset = extractOffset(response);
           boolean success;
           switch (status) {
             case NOT_LEADER:
@@ -666,7 +663,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   }
 
   @VisibleForTesting
-  protected StreamPartitionMsgOffset extractOffset(SegmentCompletionProtocol.Response response) {
+  protected Checkpoint extractOffset(SegmentCompletionProtocol.Response response) {
     if (response.getStreamPartitionMsgOffset() != null) {
       return _streamPartitionMsgOffsetFactory.create(response.getStreamPartitionMsgOffset());
     } else {
@@ -722,7 +719,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   }
 
   @VisibleForTesting
-  protected StreamPartitionMsgOffset getCurrentOffset() {
+  protected Checkpoint getCurrentOffset() {
     return _currentOffset;
   }
 
@@ -891,14 +888,14 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   }
 
   private void closeKafkaConsumers() {
-    closePartitionLevelConsumer();
+    closePartitionGroupConsumer();
     closeStreamMetadataProvider();
     if (_acquiredConsumerSemaphore.compareAndSet(true, false)) {
       _partitionGroupConsumerSemaphore.release();
     }
   }
 
-  private void closePartitionLevelConsumer() {
+  private void closePartitionGroupConsumer() {
     try {
       _partitionGroupConsumer.close();
     } catch (Exception e) {
@@ -966,7 +963,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       // Remove the segment file before we do anything else.
       removeSegmentFile();
       _leaseExtender.removeSegment(_segmentNameStr);
-      final StreamPartitionMsgOffset endOffset = _streamPartitionMsgOffsetFactory.create(llcMetadata.getEndOffset());
+      final Checkpoint endOffset = _streamPartitionMsgOffsetFactory.create(llcMetadata.getEndOffset());
       segmentLogger
           .info("State: {}, transitioning from CONSUMING to ONLINE (startOffset: {}, endOffset: {})", _state.toString(),
               _startOffset, endOffset);
@@ -1043,7 +1040,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     return System.currentTimeMillis();
   }
 
-  private boolean catchupToFinalOffset(StreamPartitionMsgOffset endOffset, long timeoutMs) {
+  private boolean catchupToFinalOffset(Checkpoint endOffset, long timeoutMs) {
     _finalOffset = endOffset;
     _consumeEndTime = now() + timeoutMs;
     _state = State.CONSUMING_TO_ONLINE;
@@ -1316,10 +1313,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
    */
   private void makeStreamConsumer(String reason) {
     if (_partitionGroupConsumer != null) {
-      closePartitionLevelConsumer();
+      closePartitionGroupConsumer();
     }
     segmentLogger.info("Creating new stream consumer, reason: {}", reason);
-    _partitionGroupConsumer = _streamConsumerFactory.createPartitionGroupConsumer(_partitionGroupMetadata);
+    _partitionGroupConsumer = _streamConsumerFactory.createPartitionGroupConsumer(_clientId, _partitionGroupMetadata);
   }
 
   /**
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/SegmentBuildTimeLeaseExtender.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/SegmentBuildTimeLeaseExtender.java
index 69d7e80..b1a1342 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/SegmentBuildTimeLeaseExtender.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/SegmentBuildTimeLeaseExtender.java
@@ -29,6 +29,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.pinot.common.metrics.ServerMetrics;
 import org.apache.pinot.common.protocols.SegmentCompletionProtocol;
 import org.apache.pinot.server.realtime.ServerSegmentCompletionProtocolHandler;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -91,7 +92,7 @@ public class SegmentBuildTimeLeaseExtender {
    * @param initialBuildTimeMs is the initial time budget that SegmentCompletionManager has allocated.
    * @param offset The offset at which this segment is being built.
    */
-  public void addSegment(String segmentId, long initialBuildTimeMs, StreamPartitionMsgOffset offset) {
+  public void addSegment(String segmentId, long initialBuildTimeMs, Checkpoint offset) {
     final long initialDelayMs = initialBuildTimeMs * 9 / 10;
     final SegmentCompletionProtocol.Request.Params reqParams = new SegmentCompletionProtocol.Request.Params();
     reqParams.withStreamPartitionMsgOffset(offset.toString()).withSegmentName(segmentId).withExtraTimeSec(EXTRA_TIME_SECONDS)
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
index d09bdeb..d7aec8d 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
@@ -46,6 +46,7 @@ import org.apache.pinot.core.upsert.PartitionUpsertMetadataManager;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.env.PinotConfiguration;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.LongMsgOffsetFactory;
 import org.apache.pinot.spi.stream.PermanentConsumerException;
@@ -193,7 +194,7 @@ public class LLRealtimeSegmentDataManagerTest {
               + "  \"status\" : \"CATCH_UP\""
               + "}";
       SegmentCompletionProtocol.Response response = SegmentCompletionProtocol.Response.fromJsonString(responseStr);
-      StreamPartitionMsgOffset extractedOffset = segmentDataManager.extractOffset(response);
+      Checkpoint extractedOffset = segmentDataManager.extractOffset(response);
       Assert.assertEquals(extractedOffset.compareTo(new LongMsgOffset(offset)), 0);
     }
     {
@@ -207,7 +208,7 @@ public class LLRealtimeSegmentDataManagerTest {
               + "  \"status\" : \"CATCH_UP\""
               + "}";
       SegmentCompletionProtocol.Response response = SegmentCompletionProtocol.Response.fromJsonString(responseStr);
-      StreamPartitionMsgOffset extractedOffset = segmentDataManager.extractOffset(response);
+      Checkpoint extractedOffset = segmentDataManager.extractOffset(response);
       Assert.assertEquals(extractedOffset.compareTo(new LongMsgOffset(offset)), 0);
     }
     {
@@ -221,7 +222,7 @@ public class LLRealtimeSegmentDataManagerTest {
               + "  \"status\" : \"CATCH_UP\""
               + "}";
       SegmentCompletionProtocol.Response response = SegmentCompletionProtocol.Response.fromJsonString(responseStr);
-      StreamPartitionMsgOffset extractedOffset = segmentDataManager.extractOffset(response);
+      Checkpoint extractedOffset = segmentDataManager.extractOffset(response);
       Assert.assertEquals(extractedOffset.compareTo(new LongMsgOffset(offset)), 0);
     }
     segmentDataManager.destroy();
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index 6121eef..fbeb808 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -71,7 +71,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
 
   @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
     return null;
   }
 
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
index d917d73..c7523e3 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
@@ -121,7 +121,7 @@ public class FlakyConsumerRealtimeClusterIntegrationTest extends RealtimeCluster
     }
 
     @Override
-    public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
+    public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
       return null;
     }
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
index 82c282c..fe5a461 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
@@ -54,7 +54,7 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   }
 
   @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
     return null;
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
index c73aacb..b6746ff 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
@@ -51,7 +51,7 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   }
 
   @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
-    return null;
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
+    return new KafkaPartitionLevelConsumer(clientId, _streamConfig, metadata.getPartitionGroupId());
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
index f9b4365..25b1742 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
@@ -26,6 +26,7 @@ import java.util.concurrent.TimeoutException;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.consumer.ConsumerRecords;
 import org.apache.kafka.common.utils.Bytes;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index ef22b6a..1d3162a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
+import org.apache.kafka.common.TopicPartition;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
@@ -76,7 +77,7 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
     // use offset criteria from stream config
     for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
       StreamPartitionMsgOffset streamPartitionMsgOffset =
-          fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000);
+          fetchStreamPartitionOffsetInternal(i, _streamConfig.getOffsetCriteria(), 5000);
       newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
     }
     return newPartitionGroupInfoList;
@@ -104,6 +105,22 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
     return new LongMsgOffset(offset);
   }
 
+  private StreamPartitionMsgOffset fetchStreamPartitionOffsetInternal(int partitionId, @Nonnull OffsetCriteria offsetCriteria, long timeoutMillis) {
+    Preconditions.checkNotNull(offsetCriteria);
+    TopicPartition topicPartition = new TopicPartition(_topic, partitionId);
+    long offset = -1;
+    if (offsetCriteria.isLargest()) {
+      offset =  _consumer.endOffsets(Collections.singletonList(topicPartition), Duration.ofMillis(timeoutMillis))
+          .get(topicPartition);
+    } else if (offsetCriteria.isSmallest()) {
+      offset =  _consumer.beginningOffsets(Collections.singletonList(topicPartition), Duration.ofMillis(timeoutMillis))
+          .get(topicPartition);
+    } else {
+      throw new IllegalArgumentException("Unknown initial offset value " + offsetCriteria.toString());
+    }
+    return new LongMsgOffset(offset);
+  }
+
   @Override
   public void close()
       throws IOException {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
similarity index 89%
copy from pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java
copy to pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
index d61d32d..14d2f39 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
@@ -25,7 +25,7 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
  * An interface to be implemented by streams that are consumed using Pinot LLC consumption.
  */
 @InterfaceStability.Evolving
-public interface StreamPartitionMsgOffsetFactory {
+public interface PartitionGroupCheckpointFactory {
   /**
    * Initialization, called once when the factory is created.
    * @param streamConfig
@@ -37,7 +37,7 @@ public interface StreamPartitionMsgOffsetFactory {
    * @param offsetStr
    * @return StreamPartitionMsgOffset
    */
-  StreamPartitionMsgOffset create(String offsetStr);
+  Checkpoint create(String offsetStr);
 
   /**
    * Construct an offset from another one provided, of the same type.
@@ -45,5 +45,5 @@ public interface StreamPartitionMsgOffsetFactory {
    * @param other
    * @return
    */
-  StreamPartitionMsgOffset create(StreamPartitionMsgOffset other);
+  Checkpoint create(Checkpoint other);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
index bbbdaad..b421268 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
@@ -19,8 +19,10 @@
 package org.apache.pinot.spi.stream;
 
 import java.io.Closeable;
+import java.util.concurrent.TimeoutException;
 
 
 public interface PartitionGroupConsumer extends Closeable {
-  FetchResult fetchMessages(Checkpoint start, Checkpoint end, long timeout);
+  MessageBatch fetchMessages(Checkpoint start, Checkpoint end, int timeout)
+      throws TimeoutException;
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
index 3a0a1d2..3bedc8a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
@@ -28,7 +28,7 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
  */
 @InterfaceAudience.Public
 @InterfaceStability.Stable
-public interface PartitionLevelConsumer extends Closeable {
+public interface PartitionLevelConsumer extends Closeable, PartitionGroupConsumer {
 
   /**
    * Is here for backward compatibility for a short time.
@@ -62,4 +62,10 @@ public interface PartitionLevelConsumer extends Closeable {
     long endOffsetLong = endOffset == null ? Long.MAX_VALUE : ((LongMsgOffset)endOffset).getOffset();
     return fetchMessages(startOffsetLong, endOffsetLong, timeoutMillis);
   }
+
+  default MessageBatch fetchMessages(Checkpoint startCheckpoint, Checkpoint endCheckpoint, int timeoutMillis)
+      throws java.util.concurrent.TimeoutException {
+    // TODO Issue 5359 remove this default implementation once all kafka consumers have migrated to use this API
+    return fetchMessages((StreamPartitionMsgOffset) startCheckpoint, (StreamPartitionMsgOffset) endCheckpoint, timeoutMillis);
+  }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
index 9caf61b..db48a83 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
@@ -75,5 +75,5 @@ public abstract class StreamConsumerFactory {
   }
 
   // creates a consumer which consumes from a partition group
-  public abstract PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata);
+  public abstract PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java
index d61d32d..2e3386c 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffsetFactory.java
@@ -25,7 +25,7 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
  * An interface to be implemented by streams that are consumed using Pinot LLC consumption.
  */
 @InterfaceStability.Evolving
-public interface StreamPartitionMsgOffsetFactory {
+public interface StreamPartitionMsgOffsetFactory extends PartitionGroupCheckpointFactory{
   /**
    * Initialization, called once when the factory is created.
    * @param streamConfig
@@ -46,4 +46,8 @@ public interface StreamPartitionMsgOffsetFactory {
    * @return
    */
   StreamPartitionMsgOffset create(StreamPartitionMsgOffset other);
+
+  default Checkpoint create(Checkpoint other) {
+    return create((StreamPartitionMsgOffset) other);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 44/47: End-of-shard as end criteria AND consume shards in order

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 751e21205fa53b8c6db01c8ba26aa8b3d5ace424
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Fri Jan 15 17:38:28 2021 -0800

    End-of-shard as end criteria AND consume shards in order
---
 .../segment/LLCRealtimeSegmentZKMetadata.java      |   5 -
 .../protocols/SegmentCompletionProtocol.java       |   2 +
 .../realtime/PinotLLCRealtimeSegmentManager.java   |  31 +++---
 .../RealtimeSegmentValidationManager.java          |   2 +-
 .../PinotLLCRealtimeSegmentManagerTest.java        |   2 +-
 .../realtime/LLRealtimeSegmentDataManager.java     |  35 ++++---
 .../kinesis/KinesisStreamMetadataProvider.java     | 106 ++++++++++++---------
 .../pinot/spi/stream/PartitionGroupInfo.java       |   6 +-
 .../pinot/spi/stream/PartitionGroupMetadata.java   |  16 ++--
 .../pinot/spi/stream/StreamMetadataProvider.java   |   2 +-
 10 files changed, 119 insertions(+), 88 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/LLCRealtimeSegmentZKMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/LLCRealtimeSegmentZKMetadata.java
index b8b8d95..7cb19a7 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/LLCRealtimeSegmentZKMetadata.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/LLCRealtimeSegmentZKMetadata.java
@@ -87,11 +87,6 @@ public class LLCRealtimeSegmentZKMetadata extends RealtimeSegmentZKMetadata {
   public ZNRecord toZNRecord() {
     ZNRecord znRecord = super.toZNRecord();
     znRecord.setSimpleField(START_OFFSET, _startOffset);
-    if (_endOffset == null) {
-      // TODO Issue 5359 Keep this until all components have upgraded to a version that can handle _offset being null
-      // For backward compatibility until all components have been upgraded to deal with null value for _endOffset
-      _endOffset = Long.toString(Long.MAX_VALUE);
-    }
     znRecord.setSimpleField(END_OFFSET, _endOffset);
     znRecord.setIntField(NUM_REPLICAS, _numReplicas);
     znRecord.setSimpleField(DOWNLOAD_URL, _downloadUrl);
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
index dd1330d..6dcbda2 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
@@ -138,6 +138,8 @@ public class SegmentCompletionProtocol {
 
   public static final String REASON_ROW_LIMIT = "rowLimit";  // Stop reason sent by server as max num rows reached
   public static final String REASON_TIME_LIMIT = "timeLimit";  // Stop reason sent by server as max time reached
+  public static final String REASON_END_OF_PARTITION_GROUP = "endOfPartitionGroup";
+      // Stop reason sent by server as end of partitionGroup reached
 
   // Canned responses
   public static final Response RESP_NOT_LEADER =
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 27d487b..72caaf4 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -78,6 +78,7 @@ import org.apache.pinot.spi.filesystem.PinotFS;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupCheckpointFactory;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
@@ -166,7 +167,8 @@ public class PinotLLCRealtimeSegmentManager {
    * Using the ideal state and segment metadata, return a list of {@link PartitionGroupMetadata}
    * for latest segment of each partition group
    */
-  public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState) {
+  public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState,
+      StreamConfig streamConfig) {
     List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
 
     // From all segment names in the ideal state, find unique partition group ids and their latest segment
@@ -185,6 +187,8 @@ public class PinotLLCRealtimeSegmentManager {
     }
 
     // Create a PartitionGroupMetadata for each latest segment
+    PartitionGroupCheckpointFactory checkpointFactory =
+        StreamConsumerFactoryProvider.create(streamConfig).createStreamMsgOffsetFactory();
     for (Map.Entry<Integer, LLCSegmentName> entry : partitionGroupIdToLatestSegment.entrySet()) {
       int partitionGroupId = entry.getKey();
       LLCSegmentName llcSegmentName = entry.getValue();
@@ -195,7 +199,9 @@ public class PinotLLCRealtimeSegmentManager {
           (LLCRealtimeSegmentZKMetadata) realtimeSegmentZKMetadata;
       PartitionGroupMetadata partitionGroupMetadata =
           new PartitionGroupMetadata(partitionGroupId, llcSegmentName.getSequenceNumber(),
-              llRealtimeSegmentZKMetadata.getStartOffset(), llRealtimeSegmentZKMetadata.getEndOffset(),
+              checkpointFactory.create(llRealtimeSegmentZKMetadata.getStartOffset()),
+              llRealtimeSegmentZKMetadata.getEndOffset() == null ? null
+                  : checkpointFactory.create(llRealtimeSegmentZKMetadata.getEndOffset()),
               llRealtimeSegmentZKMetadata.getStatus().toString());
       partitionGroupMetadataList.add(partitionGroupMetadata);
     }
@@ -498,9 +504,10 @@ public class PinotLLCRealtimeSegmentManager {
     // Example: Say we currently were consuming from 2 shards A, B. Of those, A is the one committing.
 
     // Get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS]
-    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
     PartitionLevelStreamConfig streamConfig = new PartitionLevelStreamConfig(tableConfig.getTableName(),
         IngestionConfigUtils.getStreamConfigMap(tableConfig));
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList =
+        getCurrentPartitionGroupMetadataList(idealState, streamConfig);
 
     // Find new partition groups [A],[B],[C],[D] (assume A split into C D)
     // If segment has consumed all of A, we will receive B,C,D
@@ -610,9 +617,7 @@ public class PinotLLCRealtimeSegmentManager {
       int numPartitions, int numReplicas) {
     String realtimeTableName = tableConfig.getTableName();
     String segmentName = newLLCSegmentName.getSegmentName();
-    StreamPartitionMsgOffsetFactory offsetFactory =
-        StreamConsumerFactoryProvider.create(streamConfig).createStreamMsgOffsetFactory();
-    StreamPartitionMsgOffset startOffset = offsetFactory.create(committingSegmentDescriptor.getNextOffset());
+    String startOffset = committingSegmentDescriptor.getNextOffset();
     LOGGER
         .info("Creating segment ZK metadata for new CONSUMING segment: {} with start offset: {} and creation time: {}",
             segmentName, startOffset, creationTimeMs);
@@ -621,7 +626,7 @@ public class PinotLLCRealtimeSegmentManager {
     newSegmentZKMetadata.setTableName(realtimeTableName);
     newSegmentZKMetadata.setSegmentName(segmentName);
     newSegmentZKMetadata.setCreationTime(creationTimeMs);
-    newSegmentZKMetadata.setStartOffset(startOffset.toString());
+    newSegmentZKMetadata.setStartOffset(startOffset);
     // Leave maxOffset as null.
     newSegmentZKMetadata.setNumReplicas(numReplicas);
     newSegmentZKMetadata.setStatus(Status.IN_PROGRESS);
@@ -808,7 +813,7 @@ public class PinotLLCRealtimeSegmentManager {
       assert idealState != null;
       if (idealState.isEnabled()) {
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList =
-            getCurrentPartitionGroupMetadataList(idealState);
+            getCurrentPartitionGroupMetadataList(idealState, streamConfig);
         List<PartitionGroupInfo> newPartitionGroupInfoList =
             getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
         return ensureAllPartitionsConsuming(tableConfig, streamConfig, idealState, newPartitionGroupInfoList);
@@ -1121,7 +1126,7 @@ public class PinotLLCRealtimeSegmentManager {
     return idealState;
   }
 
-  private StreamPartitionMsgOffset getPartitionGroupStartCheckpoint(StreamConfig streamConfig, int partitionGroupId) {
+  private Checkpoint getPartitionGroupStartCheckpoint(StreamConfig streamConfig, int partitionGroupId) {
     Map<String, String> streamConfigMapWithSmallestOffsetCriteria = new HashMap<>(streamConfig.getStreamConfigsMap());
     streamConfigMapWithSmallestOffsetCriteria.put(StreamConfigProperties
             .constructStreamProperty(streamConfig.getType(), StreamConfigProperties.STREAM_CONSUMER_OFFSET_CRITERIA),
@@ -1130,12 +1135,10 @@ public class PinotLLCRealtimeSegmentManager {
         new StreamConfig(streamConfig.getTableNameWithType(), streamConfigMapWithSmallestOffsetCriteria);
     List<PartitionGroupInfo> smallestOffsetCriteriaPartitionGroupInfo =
         getPartitionGroupInfoList(smallestOffsetCriteriaStreamConfig, Collections.emptyList());
-    StreamPartitionMsgOffset partitionStartOffset = null;
+    Checkpoint partitionStartOffset = null;
     for (PartitionGroupInfo info : smallestOffsetCriteriaPartitionGroupInfo) {
       if (info.getPartitionGroupId() == partitionGroupId) {
-        StreamPartitionMsgOffsetFactory factory =
-            StreamConsumerFactoryProvider.create(streamConfig).createStreamMsgOffsetFactory();
-        partitionStartOffset = factory.create(info.getStartCheckpoint());
+        partitionStartOffset = info.getStartCheckpoint();
         break;
       }
     }
@@ -1155,7 +1158,7 @@ public class PinotLLCRealtimeSegmentManager {
       long creationTimeMs, InstancePartitions instancePartitions, int numPartitionGroups, int numReplicas) {
     String realtimeTableName = tableConfig.getTableName();
     int partitionGroupId = partitionGroupInfo.getPartitionGroupId();
-    String startCheckpoint = partitionGroupInfo.getStartCheckpoint();
+    String startCheckpoint = partitionGroupInfo.getStartCheckpoint().toString();
     LOGGER.info("Setting up new partition group: {} for table: {}", partitionGroupId, realtimeTableName);
 
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
index 96604dd..d611433 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
@@ -58,7 +58,7 @@ public class RealtimeSegmentValidationManager extends ControllerPeriodicTask<Rea
       LeadControllerManager leadControllerManager, PinotLLCRealtimeSegmentManager llcRealtimeSegmentManager,
       ValidationMetrics validationMetrics, ControllerMetrics controllerMetrics) {
     super("RealtimeSegmentValidationManager", config.getRealtimeSegmentValidationFrequencyInSeconds(),
-        6000, pinotHelixResourceManager,
+        config.getRealtimeSegmentValidationManagerInitialDelaySeconds(), pinotHelixResourceManager,
         leadControllerManager, controllerMetrics);
     _llcRealtimeSegmentManager = llcRealtimeSegmentManager;
     _validationMetrics = validationMetrics;
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index c19a845..ecbf2ef 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -920,7 +920,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
       return IntStream.range(0, _numPartitions).mapToObj(i -> new PartitionGroupInfo(i,
-          PARTITION_OFFSET.toString()))
+          PARTITION_OFFSET))
           .collect(Collectors.toList());
     }
 
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index e6e1402..1083757 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -160,7 +160,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         Checkpoint offset, long buildTimeMillis, long waitTimeMillis, long segmentSizeBytes) {
       _segmentTarFile = segmentTarFile;
       _metadataFileMap = metadataFileMap;
-      _offset = _streamPartitionMsgOffsetFactory.create(offset);
+      _offset = _checkpointFactory.create(offset);
       _buildTimeMillis = buildTimeMillis;
       _waitTimeMillis = waitTimeMillis;
       _segmentSizeBytes = segmentSizeBytes;
@@ -235,11 +235,12 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   private final SegmentVersion _segmentVersion;
   private final SegmentBuildTimeLeaseExtender _leaseExtender;
   private SegmentBuildDescriptor _segmentBuildDescriptor;
-  private StreamConsumerFactory _streamConsumerFactory;
-  private PartitionGroupCheckpointFactory _streamPartitionMsgOffsetFactory;
+  private final StreamConsumerFactory _streamConsumerFactory;
+  private final PartitionGroupCheckpointFactory _checkpointFactory;
 
   // Segment end criteria
   private volatile long _consumeEndTime = 0;
+  private boolean _endOfPartitionGroup = false;
   private Checkpoint _finalOffset; // Used when we want to catch up to this one
   private volatile boolean _shouldStop = false;
 
@@ -307,6 +308,14 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
               _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
           _stopReason = SegmentCompletionProtocol.REASON_ROW_LIMIT;
           return true;
+        } else if (_endOfPartitionGroup) {
+          segmentLogger.info(
+              "Stopping consumption due to end of partitionGroup reached nRows={} numRowsIndexed={}, numRowsConsumed={}",
+              _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
+          _stopReason = SegmentCompletionProtocol.REASON_END_OF_PARTITION_GROUP;
+          // fixme: Handle creating a segment with 0 rows.
+          //  Happens if endOfPartitionGroup reached but no rows were consumed
+          return true;
         }
         return false;
 
@@ -370,7 +379,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     final long idlePipeSleepTimeMillis = 100;
     final long maxIdleCountBeforeStatUpdate = (3 * 60 * 1000) / (idlePipeSleepTimeMillis + _partitionLevelStreamConfig
         .getFetchTimeoutMillis());  // 3 minute count
-    Checkpoint lastUpdatedOffset = _streamPartitionMsgOffsetFactory
+    Checkpoint lastUpdatedOffset = _checkpointFactory
         .create(_currentOffset);  // so that we always update the metric when we enter this method.
     long consecutiveIdleCount = 0;
     // At this point, we know that we can potentially move the offset, so the old saved segment file is not valid
@@ -385,6 +394,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       try {
         messageBatch = _partitionGroupConsumer
             .fetchMessages(_currentOffset, null, _partitionLevelStreamConfig.getFetchTimeoutMillis());
+        _endOfPartitionGroup = messageBatch.isEndOfPartitionGroup();
         consecutiveErrorCount = 0;
       } catch (TimeoutException e) {
         handleTransientStreamErrors(e);
@@ -411,7 +421,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
 //        _serverMetrics.setValueOfTableGauge(_metricKeyName, ServerGauge.HIGHEST_KAFKA_OFFSET_CONSUMED, _currentOffset.getOffset());
 //        _serverMetrics.setValueOfTableGauge(_metricKeyName, ServerGauge.HIGHEST_STREAM_OFFSET_CONSUMED, _currentOffset.getOffset());
         _serverMetrics.setValueOfTableGauge(_metricKeyName, ServerGauge.LLC_PARTITION_CONSUMING, 1);
-        lastUpdatedOffset = _streamPartitionMsgOffsetFactory.create(_currentOffset);
+        lastUpdatedOffset = _checkpointFactory.create(_currentOffset);
       } else {
         // We did not consume any rows. Update the partition-consuming metric only if we have been idling for a long time.
         // Create a new stream consumer wrapper, in case we are stuck on something.
@@ -669,10 +679,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   @VisibleForTesting
   protected Checkpoint extractOffset(SegmentCompletionProtocol.Response response) {
     if (response.getStreamPartitionMsgOffset() != null) {
-      return _streamPartitionMsgOffsetFactory.create(response.getStreamPartitionMsgOffset());
+      return _checkpointFactory.create(response.getStreamPartitionMsgOffset());
     } else {
       // TODO Issue 5359 Remove this once the protocol is upgraded on server and controller
-      return _streamPartitionMsgOffsetFactory.create(Long.toString(response.getOffset()));
+      return _checkpointFactory.create(Long.toString(response.getOffset()));
     }
   }
 
@@ -967,7 +977,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       // Remove the segment file before we do anything else.
       removeSegmentFile();
       _leaseExtender.removeSegment(_segmentNameStr);
-      final Checkpoint endOffset = _streamPartitionMsgOffsetFactory.create(llcMetadata.getEndOffset());
+      final Checkpoint endOffset = _checkpointFactory.create(llcMetadata.getEndOffset());
       segmentLogger
           .info("State: {}, transitioning from CONSUMING to ONLINE (startOffset: {}, endOffset: {})", _state.toString(),
               _startOffset, endOffset);
@@ -1127,14 +1137,15 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     _partitionLevelStreamConfig =
         new PartitionLevelStreamConfig(_tableNameWithType, IngestionConfigUtils.getStreamConfigMap(_tableConfig));
     _streamConsumerFactory = StreamConsumerFactoryProvider.create(_partitionLevelStreamConfig);
-    _streamPartitionMsgOffsetFactory =
+    _checkpointFactory =
         StreamConsumerFactoryProvider.create(_partitionLevelStreamConfig).createStreamMsgOffsetFactory();
     _streamTopic = _partitionLevelStreamConfig.getTopicName();
     _segmentNameStr = _segmentZKMetadata.getSegmentName();
     _llcSegmentName = llcSegmentName;
     _partitionGroupId = _llcSegmentName.getPartitionGroupId();
     _partitionGroupMetadata = new PartitionGroupMetadata(_partitionGroupId, _llcSegmentName.getSequenceNumber(),
-        _segmentZKMetadata.getStartOffset(), _segmentZKMetadata.getEndOffset(),
+        _checkpointFactory.create(_segmentZKMetadata.getStartOffset()),
+        _segmentZKMetadata.getEndOffset() == null ? null : _checkpointFactory.create(_segmentZKMetadata.getEndOffset()),
         _segmentZKMetadata.getStatus().toString());
     _partitionGroupConsumerSemaphore = partitionGroupConsumerSemaphore;
     _acquiredConsumerSemaphore = new AtomicBoolean(false);
@@ -1279,8 +1290,8 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     }
 
     _realtimeSegment = new MutableSegmentImpl(realtimeSegmentConfigBuilder.build(), serverMetrics);
-    _startOffset = _streamPartitionMsgOffsetFactory.create(_segmentZKMetadata.getStartOffset());
-    _currentOffset = _streamPartitionMsgOffsetFactory.create(_startOffset);
+    _startOffset = _checkpointFactory.create(_segmentZKMetadata.getStartOffset());
+    _currentOffset = _checkpointFactory.create(_startOffset);
     _resourceTmpDir = new File(resourceDataDir, "_tmp");
     if (!_resourceTmpDir.exists()) {
       _resourceTmpDir.mkdirs();
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index b22bbe4..42150a3 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -21,12 +21,15 @@ package org.apache.pinot.plugin.stream.kinesis;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeoutException;
 import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupConsumer;
@@ -69,7 +72,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
   /**
    * This call returns all active shards, taking into account the consumption status for those shards.
    * PartitionGroupInfo is returned for a shard if:
-   * 1. It is a branch new shard i.e. no partitionGroupMetadata was found for it in the current list
+   * 1. It is a branch new shard AND its parent has been consumed completely
    * 2. It is still being actively consumed from i.e. the consuming partition has not reached the end of the shard
    */
   @Override
@@ -77,54 +80,57 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
       throws IOException, TimeoutException {
 
-    Map<Integer, PartitionGroupMetadata> currentPartitionGroupMap = currentPartitionGroupsMetadata.stream()
-        .collect(Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
-
     List<PartitionGroupInfo> newPartitionGroupInfos = new ArrayList<>();
-    List<Shard> shards = _kinesisConnectionHandler.getShards();
-    for (Shard shard : shards) {
-      KinesisCheckpoint newStartCheckpoint;
-
-      String shardId = shard.shardId();
-      int partitionGroupId = getPartitionGroupIdFromShardId(shardId);
-      PartitionGroupMetadata currentPartitionGroupMetadata = currentPartitionGroupMap.get(partitionGroupId);
-
-      if (currentPartitionGroupMetadata != null) { // existing shard
-        KinesisCheckpoint currentEndCheckpoint = null;
-        try {
-          currentEndCheckpoint = new KinesisCheckpoint(currentPartitionGroupMetadata.getEndCheckpoint());
-        } catch (Exception e) {
-          // ignore. No end checkpoint yet for IN_PROGRESS segment
-        }
-        if (currentEndCheckpoint != null) { // end checkpoint available i.e. committing/committed segment
-          String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
-          if (endingSequenceNumber != null) { // shard has ended
-            // check if segment has consumed all the messages already
-            PartitionGroupConsumer partitionGroupConsumer =
-                _kinesisStreamConsumerFactory.createPartitionGroupConsumer(_clientId, currentPartitionGroupMetadata);
-
-            MessageBatch messageBatch;
-            try {
-              messageBatch = partitionGroupConsumer.fetchMessages(currentEndCheckpoint, null, _fetchTimeoutMs);
-            } finally {
-              partitionGroupConsumer.close();
-            }
-            if (messageBatch.isEndOfPartitionGroup()) {
-              // shard has ended. Skip it from results
-              continue;
-            }
+
+    Map<String, Shard> shardIdToShardMap =
+        _kinesisConnectionHandler.getShards().stream().collect(Collectors.toMap(Shard::shardId, s -> s));
+    Set<String> shardsInCurrent = new HashSet<>();
+    Set<String> shardsEnded = new HashSet<>();
+
+    // TODO: Once we start supporting multiple shards in a PartitionGroup,
+    //  we need to iterate over all shards to check if any of them have reached end
+
+    // Process existing shards. Add them to new list if still consuming from them
+    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
+      KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) currentPartitionGroupMetadata.getStartCheckpoint();
+      String shardId = kinesisStartCheckpoint.getShardToStartSequenceMap().keySet().iterator().next();
+      Shard shard = shardIdToShardMap.get(shardId);
+      shardsInCurrent.add(shardId);
+
+      Checkpoint newStartCheckpoint;
+      Checkpoint currentEndCheckpoint = currentPartitionGroupMetadata.getEndCheckpoint();
+      if (currentEndCheckpoint != null) { // Segment DONE (committing/committed)
+        String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
+        if (endingSequenceNumber != null) { // Shard has ended, check if we're also done consuming it
+          if (consumedEndOfShard(currentEndCheckpoint, currentPartitionGroupMetadata)) {
+            shardsEnded.add(shardId);
+            continue; // Shard ended and we're done consuming it. Skip
           }
-          newStartCheckpoint = currentEndCheckpoint;
-        } else {
-          newStartCheckpoint = new KinesisCheckpoint(currentPartitionGroupMetadata.getStartCheckpoint());
         }
-      } else { // new shard
+        newStartCheckpoint = currentEndCheckpoint;
+      } else { // Segment IN_PROGRESS
+        newStartCheckpoint = currentPartitionGroupMetadata.getStartCheckpoint();
+      }
+      newPartitionGroupInfos.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(), newStartCheckpoint));
+    }
+
+    // Add new shards. Parent should be null (new table case, very first shards) OR we should be flagged as reached EOL and completely consumed.
+    for (Map.Entry<String, Shard> entry : shardIdToShardMap.entrySet()) {
+      String newShardId = entry.getKey();
+      if (shardsInCurrent.contains(newShardId)) {
+        continue;
+      }
+      Checkpoint newStartCheckpoint;
+      Shard newShard = entry.getValue();
+      String parentShardId = newShard.parentShardId();
+
+      if (parentShardId == null || shardsEnded.contains(parentShardId)) {
         Map<String, String> shardToSequenceNumberMap = new HashMap<>();
-        shardToSequenceNumberMap.put(shardId, shard.sequenceNumberRange().startingSequenceNumber());
+        shardToSequenceNumberMap.put(newShardId, newShard.sequenceNumberRange().startingSequenceNumber());
         newStartCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
+        int partitionGroupId = getPartitionGroupIdFromShardId(newShardId);
+        newPartitionGroupInfos.add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint));
       }
-
-      newPartitionGroupInfos.add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint.serialize()));
     }
     return newPartitionGroupInfos;
   }
@@ -138,6 +144,20 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     return shardIdNum.isEmpty() ? 0 : Integer.parseInt(shardIdNum);
   }
 
+  private boolean consumedEndOfShard(Checkpoint startCheckpoint, PartitionGroupMetadata partitionGroupMetadata)
+      throws IOException, TimeoutException {
+    PartitionGroupConsumer partitionGroupConsumer =
+        _kinesisStreamConsumerFactory.createPartitionGroupConsumer(_clientId, partitionGroupMetadata);
+
+    MessageBatch messageBatch;
+    try {
+      messageBatch = partitionGroupConsumer.fetchMessages(startCheckpoint, null, _fetchTimeoutMs);
+    } finally {
+      partitionGroupConsumer.close();
+    }
+    return messageBatch.isEndOfPartitionGroup();
+  }
+
   @Override
   public void close() {
 
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
index 758953d..b06e878 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
@@ -27,9 +27,9 @@ package org.apache.pinot.spi.stream;
 public class PartitionGroupInfo {
 
   private final int _partitionGroupId;
-  private final String _startCheckpoint;
+  private final Checkpoint _startCheckpoint;
 
-  public PartitionGroupInfo(int partitionGroupId, String startCheckpoint) {
+  public PartitionGroupInfo(int partitionGroupId, Checkpoint startCheckpoint) {
     _partitionGroupId = partitionGroupId;
     _startCheckpoint = startCheckpoint;
   }
@@ -38,7 +38,7 @@ public class PartitionGroupInfo {
     return _partitionGroupId;
   }
 
-  public String getStartCheckpoint() {
+  public Checkpoint getStartCheckpoint() {
     return _startCheckpoint;
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index a99a82b..1ac12fb 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -22,12 +22,12 @@ public class PartitionGroupMetadata {
 
   private final int _partitionGroupId;
   private int _sequenceNumber;
-  private String _startCheckpoint;
-  private String _endCheckpoint;
+  private Checkpoint _startCheckpoint;
+  private Checkpoint _endCheckpoint;
   private String _status;
 
-  public PartitionGroupMetadata(int partitionGroupId, int sequenceNumber, String startCheckpoint,
-      String endCheckpoint, String status) {
+  public PartitionGroupMetadata(int partitionGroupId, int sequenceNumber, Checkpoint startCheckpoint,
+      Checkpoint endCheckpoint, String status) {
     _partitionGroupId = partitionGroupId;
     _sequenceNumber = sequenceNumber;
     _startCheckpoint = startCheckpoint;
@@ -47,19 +47,19 @@ public class PartitionGroupMetadata {
     _sequenceNumber = sequenceNumber;
   }
 
-  public String getStartCheckpoint() {
+  public Checkpoint getStartCheckpoint() {
     return _startCheckpoint;
   }
 
-  public void setStartCheckpoint(String startCheckpoint) {
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
     _startCheckpoint = startCheckpoint;
   }
 
-  public String getEndCheckpoint() {
+  public Checkpoint getEndCheckpoint() {
     return _endCheckpoint;
   }
 
-  public void setEndCheckpoint(String endCheckpoint) {
+  public void setEndCheckpoint(Checkpoint endCheckpoint) {
     _endCheckpoint = endCheckpoint;
   }
 
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index cecc708..4b2751c 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -82,7 +82,7 @@ public interface StreamMetadataProvider extends Closeable {
           streamConsumerFactory.createPartitionMetadataProvider(clientId, i);
       StreamPartitionMsgOffset streamPartitionMsgOffset =
           partitionMetadataProvider.fetchStreamPartitionOffset(streamConfig.getOffsetCriteria(), timeoutMillis);
-      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset));
     }
     return newPartitionGroupInfoList;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 01/47: Controller side changes pseudo code

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 6502a7d29214cc1ad3bdf9feba169192d1c50ff7
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Wed Dec 23 17:08:08 2020 -0800

    Controller side changes pseudo code
---
 .../helix/core/PinotHelixResourceManager.java      | 29 ++++++++
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 79 ++++++++++++++++++++++
 .../org/apache/pinot/spi/stream/Checkpoint.java    | 24 +++++++
 .../org/apache/pinot/spi/stream/FetchResult.java   | 27 ++++++++
 .../pinot/spi/stream/PartitionGroupConsumer.java   | 23 +++++++
 .../pinot/spi/stream/PartitionGroupMetadata.java   | 41 +++++++++++
 .../spi/stream/PartitionGroupMetadataList.java     | 30 ++++++++
 .../org/apache/pinot/spi/stream/StreamConfig.java  |  6 +-
 .../pinot/spi/stream/StreamConsumerFactory.java    |  9 ++-
 9 files changed, 266 insertions(+), 2 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index ebf07d8..fa117fa 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -88,6 +88,7 @@ import org.apache.pinot.common.metadata.instance.InstanceZKMetadata;
 import org.apache.pinot.common.metadata.segment.OfflineSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
 import org.apache.pinot.common.utils.CommonConstants;
+import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.common.utils.CommonConstants.Helix;
 import org.apache.pinot.common.utils.CommonConstants.Helix.StateModel.BrokerResourceStateModel;
 import org.apache.pinot.common.utils.CommonConstants.Helix.StateModel.SegmentStateModel;
@@ -125,7 +126,10 @@ import org.apache.pinot.spi.config.table.TenantConfig;
 import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.config.tenant.Tenant;
 import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
+import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.spi.utils.retry.RetryPolicies;
@@ -1350,6 +1354,10 @@ public class PinotHelixResourceManager {
         IngestionConfigUtils.getStreamConfigMap(realtimeTableConfig));
     IdealState idealState = getTableIdealState(realtimeTableName);
 
+    if (streamConfig.isShardedConsumerType()) {
+      setupShardedRealtimeTable(streamConfig, idealState, realtimeTableConfig.getValidationConfig().getReplicasPerPartitionNumber());
+    }
+
     if (streamConfig.hasHighLevelConsumerType()) {
       if (idealState == null) {
         LOGGER.info("Initializing IdealState for HLC table: {}", realtimeTableName);
@@ -1382,6 +1390,27 @@ public class PinotHelixResourceManager {
     }
   }
 
+  /**
+   * Sets up the realtime table ideal state
+   * @param streamConfig
+   */
+  private void setupShardedRealtimeTable(StreamConfig streamConfig, IdealState idealState, int numReplicas) {
+    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+
+    // get current partition groups and their metadata - this will be empty when creating the table
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = _pinotLLCRealtimeSegmentManager.getCurrentPartitionGroupMetadataList(idealState);
+
+    // get new partition groups and their metadata,
+    // Assume table has 3 shards. Say we get [0], [1], [2] groups (for now assume that each group contains only 1 shard)
+    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
+        streamConsumerFactory.getPartitionGroupMetadataList(currentPartitionGroupMetadataList);
+
+    // setup segment zk metadata and ideal state for all the new found partition groups
+    _pinotLLCRealtimeSegmentManager.setupNewPartitionGroups(newPartitionGroupMetadataList, numReplicas);
+  }
+
+
+
   private void ensurePropertyStoreEntryExistsForHighLevelConsumer(String realtimeTableName) {
     String propertyStorePath = ZKMetadataProvider.constructPropertyStorePathForResource(realtimeTableName);
     if (!_propertyStore.exists(propertyStorePath, AccessOption.PERSISTENT)) {
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 42d50d0..8a29489 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -45,6 +45,7 @@ import org.apache.pinot.common.metadata.ZKMetadataProvider;
 import org.apache.pinot.common.metadata.segment.ColumnPartitionMetadata;
 import org.apache.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.SegmentPartitionMetadata;
+import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.common.metrics.ControllerMeter;
 import org.apache.pinot.common.metrics.ControllerMetrics;
 import org.apache.pinot.common.protocols.SegmentCompletionProtocol;
@@ -75,10 +76,12 @@ import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.filesystem.PinotFS;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.PartitionOffsetFetcher;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConfigProperties;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
@@ -157,6 +160,82 @@ public class PinotLLCRealtimeSegmentManager {
     _flushThresholdUpdateManager = new FlushThresholdUpdateManager();
   }
 
+  /**
+   * The committing segment will call this.
+   * 
+   * For example, say we have 3 shards, grouped into PartitionGroups as [0], [1], [2]
+   * Now segment of PG (partition group) 0 is committing. First, we'll update the metadata to DONE, and ideal state to ONLINE
+   * Then, the currentPartitionGroupMetadata list will contain - [1], [2]
+   * The newPartitionGroupMetadata list will contain - [0], [1], [2]
+   * We then get the set of PGs for which new segments need to be made - [0]
+   */
+  public void commitPartitionGroup(String realtimeTableName, CommittingSegmentDescriptor committingSegmentDescriptor) {
+    TableConfig realtimeTableConfig = getTableConfig(realtimeTableName);
+    StreamConfig streamConfig = new StreamConfig(realtimeTableName, IngestionConfigUtils.getStreamConfigMap(realtimeTableConfig));
+    int numReplicas = realtimeTableConfig.getValidationConfig().getReplicasPerPartitionNumber();
+    IdealState idealState = getIdealState(realtimeTableName);
+
+    // update status in segment metadata to DONE
+    // ..
+
+    // update Ideal State for this segment to ONLINE
+    // ..
+
+    // fetch current partition groups (which are actively CONSUMING - from example above, [1], [2])
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
+
+    // get new partition groups (honor any groupings which are already consuming - [0], [1], [2])
+    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
+        streamConsumerFactory.getPartitionGroupMetadataList(currentPartitionGroupMetadataList);
+
+    // from the above list, remove the partition groups which are already CONSUMING
+    // i.e. newPartitionGroups - currentPartitionGroups. Therefore, ([0], [1], [2]) - ([1], [2]) = ([0])
+    // ..
+
+    // setup segment metadata and ideal state for the new found  partition groups
+    setupNewPartitionGroups(newPartitionGroupMetadataList, numReplicas);
+  }
+
+  public void setupIdealStateForConsuming(List<SegmentZKMetadata> segmentZKMetadata, int numReplicas) {
+    // add all segments from the list to ideal state, with state CONSUMING
+  }
+
+  public void persistSegmentMetadata(List<SegmentZKMetadata> segmentMetadata) {
+    // persist new segment metadata from list to zk
+  }
+
+  /**
+   * Using the list of partition group metadata, create a list of equivalent segment zk metadata
+   */
+  public List<SegmentZKMetadata> constructSegmentMetadata(List<PartitionGroupMetadata> partitionGroupMetadataList) {
+    List<SegmentZKMetadata> segmentZKMetadata = new ArrayList<>();
+    // for each partition group construct a segment zk metadata object
+    return segmentZKMetadata;
+  }
+
+  /**
+   * Using the ideal state, return a list of the current partition groups
+   */
+  public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState) {
+    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
+    // from all segment names in the ideal state, find unique groups
+
+    // create a PartitionGroupMetadata, one for each group
+    return partitionGroupMetadataList;
+  }
+
+  public void setupNewPartitionGroups(List<PartitionGroupMetadata> newPartitionGroupMetadataList, int numReplicas) {
+    // construct segment zk metadata for the new partition groups
+    List<SegmentZKMetadata> segmentMetadata = constructSegmentMetadata(newPartitionGroupMetadataList);
+
+    // create these new segments metadata
+    persistSegmentMetadata(segmentMetadata);
+
+    // setup ideal state for the new segments
+    setupIdealStateForConsuming(segmentMetadata, numReplicas);
+  }
+
   public boolean getIsSplitCommitEnabled() {
     return _controllerConf.getAcceptSplitCommit();
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
new file mode 100644
index 0000000..627c964
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+public interface Checkpoint {
+  byte[] serialize();
+  Checkpoint deserialize(byte[] blob);
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
new file mode 100644
index 0000000..b0ed6e5
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+import java.util.List;
+
+
+public interface FetchResult<T> {
+  Checkpoint getLastCheckpoint();
+  List<T> getMessages();
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
new file mode 100644
index 0000000..2f138c2
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
@@ -0,0 +1,23 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+public interface PartitionGroupConsumer {
+  FetchResult fetch(Checkpoint start, Checkpoint end, long timeout);
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
new file mode 100644
index 0000000..779c167
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+import java.util.List;
+
+
+public interface PartitionGroupMetadata {
+
+  int getGroupId();
+
+  List<String> getPartitions();
+
+  Checkpoint getStartCheckpoint(); // similar to getStartOffset
+
+  Checkpoint getEndCheckpoint(); // similar to getEndOffset
+
+  void setStartCheckpoint(Checkpoint startCheckpoint);
+
+  void setEndCheckpoint(Checkpoint endCheckpoint);
+
+  byte[] serialize();
+
+  PartitionGroupMetadata deserialize(byte[] blob);
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java
new file mode 100644
index 0000000..1568d63
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+import java.util.List;
+
+
+public interface PartitionGroupMetadataList {
+
+  List<PartitionGroupMetadata> getMetadataList();
+
+  PartitionGroupMetadata getPartitionGroupMetadata(int index);
+
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
index d343203..a3e359e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
@@ -41,7 +41,7 @@ public class StreamConfig {
    * The type of the stream consumer either HIGHLEVEL or LOWLEVEL. For backward compatibility, adding SIMPLE which is equivalent to LOWLEVEL
    */
   public enum ConsumerType {
-    HIGHLEVEL, LOWLEVEL
+    HIGHLEVEL, LOWLEVEL, SHARDED
   }
 
   public static final int DEFAULT_FLUSH_THRESHOLD_ROWS = 5_000_000;
@@ -273,6 +273,10 @@ public class StreamConfig {
     return _consumerTypes.contains(ConsumerType.LOWLEVEL);
   }
 
+  public boolean isShardedConsumerType() {
+    return _consumerTypes.size() == 1 && _consumerTypes.get(0).equals(ConsumerType.SHARDED);
+  }
+
   public String getConsumerFactoryClassName() {
     return _consumerFactoryClassName;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
index 27205c9..4db0fb1 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
@@ -18,8 +18,8 @@
  */
 package org.apache.pinot.spi.stream;
 
+import java.util.List;
 import java.util.Set;
-import org.apache.pinot.spi.data.Schema;
 
 
 /**
@@ -73,4 +73,11 @@ public abstract class StreamConsumerFactory {
   public StreamPartitionMsgOffsetFactory createStreamMsgOffsetFactory() {
     return new LongMsgOffsetFactory();
   }
+
+  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
+  public abstract List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata);
+
+  // creates a consumer which consumes from a partition group
+  public abstract PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 08/47: An attempt at server-side changes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 31c64a0cc138146dc59c1ce665f3ca72fd7b52f9
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Dec 31 17:19:24 2020 -0800

    An attempt at server-side changes
---
 .../realtime/LLRealtimeSegmentDataManager.java     | 22 +++++++++++++---------
 .../org/apache/pinot/spi/stream/FetchResult.java   |  5 +----
 .../pinot/spi/stream/PartitionGroupConsumer.java   |  2 +-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 0938251..054676e 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -71,8 +71,10 @@ import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.stream.FetchResult;
 import org.apache.pinot.spi.stream.MessageBatch;
-import org.apache.pinot.spi.stream.PartitionLevelConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.PermanentConsumerException;
 import org.apache.pinot.spi.stream.RowMetadata;
@@ -249,10 +251,11 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   private Thread _consumerThread;
   private final String _streamTopic;
   private final int _partitionGroupId;
+  private final PartitionGroupMetadata _partitionGroupMetadata;
   final String _clientId;
   private final LLCSegmentName _llcSegmentName;
   private final RecordTransformer _recordTransformer;
-  private PartitionLevelConsumer _partitionLevelConsumer = null;
+  private PartitionGroupConsumer _partitionGroupConsumer = null;
   private StreamMetadataProvider _streamMetadataProvider = null;
   private final File _resourceTmpDir;
   private final String _tableNameWithType;
@@ -381,12 +384,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       // Update _currentOffset upon return from this method
       MessageBatch messageBatch;
       try {
-        messageBatch = _partitionLevelConsumer
+        FetchResult fetchResult = _partitionGroupConsumer
             .fetchMessages(_currentOffset, null, _partitionLevelStreamConfig.getFetchTimeoutMillis());
+        messageBatch = fetchResult.getMessages();
         consecutiveErrorCount = 0;
-      } catch (TimeoutException e) {
-        handleTransientStreamErrors(e);
-        continue;
       } catch (TransientConsumerException e) {
         handleTransientStreamErrors(e);
         continue;
@@ -899,7 +900,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
 
   private void closePartitionLevelConsumer() {
     try {
-      _partitionLevelConsumer.close();
+      _partitionGroupConsumer.close();
     } catch (Exception e) {
       segmentLogger.warn("Could not close stream consumer", e);
     }
@@ -1131,6 +1132,9 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     _segmentNameStr = _segmentZKMetadata.getSegmentName();
     _llcSegmentName = llcSegmentName;
     _partitionGroupId = _llcSegmentName.getPartitionGroupId();
+    _partitionGroupMetadata = new PartitionGroupMetadata(_partitionGroupId, _llcSegmentName.getSequenceNumber(),
+        _segmentZKMetadata.getStartOffset(), _segmentZKMetadata.getEndOffset(),
+        _segmentZKMetadata.getStatus().toString());
     _partitionGroupConsumerSemaphore = partitionGroupConsumerSemaphore;
     _acquiredConsumerSemaphore = new AtomicBoolean(false);
     _metricKeyName = _tableNameWithType + "-" + _streamTopic + "-" + _partitionGroupId;
@@ -1311,11 +1315,11 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
    * @param reason
    */
   private void makeStreamConsumer(String reason) {
-    if (_partitionLevelConsumer != null) {
+    if (_partitionGroupConsumer != null) {
       closePartitionLevelConsumer();
     }
     segmentLogger.info("Creating new stream consumer, reason: {}", reason);
-    _partitionLevelConsumer = _streamConsumerFactory.createPartitionLevelConsumer(_clientId, _partitionGroupId);
+    _partitionGroupConsumer = _streamConsumerFactory.createPartitionGroupConsumer(_partitionGroupMetadata);
   }
 
   /**
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
index b0ed6e5..7e8a911 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
@@ -18,10 +18,7 @@
  */
 package org.apache.pinot.spi.stream;
 
-import java.util.List;
-
-
 public interface FetchResult<T> {
   Checkpoint getLastCheckpoint();
-  List<T> getMessages();
+  MessageBatch<T> getMessages();
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
index e096e67..bbbdaad 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
@@ -22,5 +22,5 @@ import java.io.Closeable;
 
 
 public interface PartitionGroupConsumer extends Closeable {
-  FetchResult fetch(Checkpoint start, Checkpoint end, long timeout);
+  FetchResult fetchMessages(Checkpoint start, Checkpoint end, long timeout);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 11/47: Add interfaces for V2 consumers

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 3f14cf086d2117af2d24d73714e79e08d64c461d
Author: KKcorps <kh...@gmail.com>
AuthorDate: Thu Dec 10 19:08:15 2020 +0530

    Add interfaces for V2 consumers
---
 .../org/apache/pinot/spi/stream/v2/Checkpoint.java    |  6 ++++++
 .../org/apache/pinot/spi/stream/v2/ConsumerV2.java    |  6 ++++++
 .../org/apache/pinot/spi/stream/v2/FetchResult.java   |  7 +++++++
 .../pinot/spi/stream/v2/PartitionGroupMetadata.java   | 16 ++++++++++++++++
 .../pinot/spi/stream/v2/SegmentNameGenerator.java     |  7 +++++++
 .../pinot/spi/stream/v2/StreamConsumerFactoryV2.java  | 19 +++++++++++++++++++
 6 files changed, 61 insertions(+)

diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
new file mode 100644
index 0000000..0856454
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
@@ -0,0 +1,6 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface Checkpoint {
+  byte[] serialize();
+  Checkpoint deserialize(byte[] blob);
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
new file mode 100644
index 0000000..afc8d38
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
@@ -0,0 +1,6 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface ConsumerV2 {
+  FetchResult fetch(Checkpoint start, Checkpoint end, long timeout);
+}
+
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
new file mode 100644
index 0000000..b490835
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
@@ -0,0 +1,7 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface FetchResult {
+  Checkpoint getLastCheckpoint();
+  byte[] getMessages();
+}
+
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
new file mode 100644
index 0000000..27c5ce7
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
@@ -0,0 +1,16 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface PartitionGroupMetadata {
+  Checkpoint getStartCheckpoint(); // similar to getStartOffset
+
+  Checkpoint getEndCheckpoint(); // similar to getEndOffset
+
+  void setStartCheckpoint(Checkpoint startCheckpoint);
+
+  void setEndCheckpoint(Checkpoint endCheckpoint);
+
+  byte[] serialize();
+
+  PartitionGroupMetadata deserialize(byte[] blob);
+}
+
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
new file mode 100644
index 0000000..689c686
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
@@ -0,0 +1,7 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface SegmentNameGenerator {
+  // generates a unique name for a partition group based on the metadata
+    String generateSegmentName(PartitionGroupMetadata metadata);
+
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
new file mode 100644
index 0000000..bd3017d
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
@@ -0,0 +1,19 @@
+package org.apache.pinot.spi.stream.v2;
+
+import java.util.Map;
+import org.apache.pinot.spi.stream.StreamConfig;
+
+
+public interface StreamConsumerFactoryV2 {
+  void init(StreamConfig streamConfig);
+
+  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
+  Map<Long, PartitionGroupMetadata> getPartitionGroupsMetadata(Map<Long, PartitionGroupMetadata> currentPartitionGroupsMetadata);
+
+  // creates a name generator which generates segment name for a partition group
+  SegmentNameGenerator getSegmentNameGenerator();
+
+  // creates a consumer which consumes from a partition group
+  ConsumerV2 createConsumer(PartitionGroupMetadata metadata);
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 15/47: Refactor PartitionGroupMetadataMap interface

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 5ed893eef9f92df42e8997e0213027c05ca91e10
Author: KKcorps <kh...@gmail.com>
AuthorDate: Fri Dec 11 23:56:08 2020 +0530

    Refactor PartitionGroupMetadataMap interface
---
 .../org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
index 3c344bc..702f08a 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
@@ -1,4 +1,12 @@
 package org.apache.pinot.spi.stream.v2;
 
+import java.util.List;
+
+
 public interface PartitionGroupMetadataMap {
+
+  List<PartitionGroupMetadata> getMetadataList();
+
+  PartitionGroupMetadata getPartitionGroupMetadata(int index);
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 27/47: Refactor: get shard iterator methods

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit be19cf6866235d7bf4ce0a74424ae2378f40c8bc
Author: KKcorps <kh...@gmail.com>
AuthorDate: Mon Dec 21 14:25:25 2020 +0530

    Refactor: get shard iterator methods
---
 .../plugin/stream/kinesis/KinesisConsumer.java     | 25 ++++++++++++----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index fd48a92..3263f87 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -163,21 +163,24 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   }
 
   private String getShardIterator(KinesisCheckpoint kinesisStartCheckpoint) {
-    GetShardIteratorResponse getShardIteratorResponse;
-
     if (kinesisStartCheckpoint.getSequenceNumber() != null) {
-      String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
-      getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId)
-              .shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
-              .startingSequenceNumber(kinesisStartSequenceNumber).build());
+      return getShardIterator(ShardIteratorType.AT_SEQUENCE_NUMBER, kinesisStartCheckpoint.getSequenceNumber());
     } else {
-      getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream)
-              .shardIteratorType(ShardIteratorType.LATEST).build());
+      return getShardIterator(ShardIteratorType.LATEST, null);
     }
+  }
 
-    return getShardIteratorResponse.shardIterator();
+  public String getShardIterator(ShardIteratorType shardIteratorType, String sequenceNumber){
+    if(sequenceNumber == null){
+      return _kinesisClient.getShardIterator(
+          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream)
+              .shardIteratorType(shardIteratorType).build()).shardIterator();
+    }else{
+      return _kinesisClient.getShardIterator(
+          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId)
+              .shardIteratorType(shardIteratorType)
+              .startingSequenceNumber(sequenceNumber).build()).shardIterator();
+    }
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 29/47: Add test code for kinesis

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 12880126a7a0c5a8b2e1ac3656e64fce7ebe47d5
Author: KKcorps <kh...@gmail.com>
AuthorDate: Tue Dec 22 22:05:02 2020 +0530

    Add test code for kinesis
---
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 17 +++++--
 .../kinesis/KinesisPartitionGroupMetadataMap.java  | 16 +++----
 .../plugin/stream/kinesis/KinesisConsumerTest.java | 54 ++++++++++++++++++++++
 3 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index d2e8715..a81d11f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -18,11 +18,14 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.util.Map;
 import org.apache.pinot.spi.stream.StreamConfig;
 
 
 public class KinesisConfig {
-  private final StreamConfig _streamConfig;
+  private final Map<String, String> _props;
+
+  public static final String STREAM = "stream";
   private static final String AWS_REGION = "aws-region";
   private static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
 
@@ -30,18 +33,22 @@ public class KinesisConfig {
   private static final String DEFAULT_MAX_RECORDS = "20";
 
   public KinesisConfig(StreamConfig streamConfig) {
-    _streamConfig = streamConfig;
+    _props = streamConfig.getStreamConfigsMap();
+  }
+
+  public KinesisConfig(Map<String, String> props) {
+    _props = props;
   }
 
   public String getStream(){
-    return _streamConfig.getTopicName();
+    return _props.get(STREAM);
   }
 
   public String getAwsRegion(){
-    return _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, DEFAULT_AWS_REGION);
+    return _props.getOrDefault(AWS_REGION, DEFAULT_AWS_REGION);
   }
 
   public Integer maxRecordsToFetch(){
-    return Integer.parseInt(_streamConfig.getStreamConfigsMap().getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
+    return Integer.parseInt(_props.getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index d77579e..626c8ea 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -35,28 +35,28 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
   private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
 
   public KinesisPartitionGroupMetadataMap(String stream, String awsRegion,
-      PartitionGroupMetadataMap partitionGroupMetadataMap) {
+      PartitionGroupMetadataMap currentPartitionGroupMetadataMap) {
     //TODO: Handle child shards. Do not consume data from child shard unless parent is finished.
     //Return metadata only for shards in current metadata
     super(stream, awsRegion);
     KinesisPartitionGroupMetadataMap currentPartitionMeta =
-        (KinesisPartitionGroupMetadataMap) partitionGroupMetadataMap;
+        (KinesisPartitionGroupMetadataMap) currentPartitionGroupMetadataMap;
     List<PartitionGroupMetadata> currentMetaList = currentPartitionMeta.getMetadataList();
 
     List<Shard> shardList = getShards();
 
-    Map<String, PartitionGroupMetadata> metadataMap = new HashMap<>();
+    Map<String, PartitionGroupMetadata> currentMetadataMap = new HashMap<>();
     for (PartitionGroupMetadata partitionGroupMetadata : currentMetaList) {
       KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
-      metadataMap.put(kinesisShardMetadata.getShardId(), kinesisShardMetadata);
+      currentMetadataMap.put(kinesisShardMetadata.getShardId(), kinesisShardMetadata);
     }
 
     for (Shard shard : shardList) {
-      if (metadataMap.containsKey(shard.shardId())) {
+      if (currentMetadataMap.containsKey(shard.shardId())) {
         //Return existing shard metadata
-        _stringPartitionGroupMetadataIndex.add(metadataMap.get(shard.shardId()));
-      } else if (metadataMap.containsKey(shard.parentShardId())) {
-        KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) metadataMap.get(shard.parentShardId());
+        _stringPartitionGroupMetadataIndex.add(currentMetadataMap.get(shard.shardId()));
+      } else if (currentMetadataMap.containsKey(shard.parentShardId())) {
+        KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) currentMetadataMap.get(shard.parentShardId());
         if (isProcessingFinished(kinesisShardMetadata)) {
           //Add child shards for processing since parent has finished
           appendShardMetadata(stream, awsRegion, shard);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
new file mode 100644
index 0000000..f8a0551
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -0,0 +1,54 @@
+package org.apache.pinot.plugin.stream.kinesis; /**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import software.amazon.awssdk.services.kinesis.model.Record;
+import software.amazon.awssdk.services.kinesis.model.Shard;
+
+
+public class KinesisConsumerTest {
+  public static void main(String[] args) {
+    Map<String, String> props = new HashMap<>();
+    props.put("stream", "kinesis-test");
+    props.put("aws-region", "us-west-2");
+    props.put("maxRecords", "10");
+
+    KinesisConfig kinesisConfig = new KinesisConfig(props);
+
+    KinesisConnectionHandler kinesisConnectionHandler = new KinesisConnectionHandler("kinesis-test", "us-west-2");
+
+    List<Shard> shardList = kinesisConnectionHandler.getShards();
+
+    for(Shard shard : shardList) {
+      KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig, new KinesisShardMetadata(shard.shardId(), "kinesis-test", "us-west-2"));
+
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shard.sequenceNumberRange().startingSequenceNumber());
+      KinesisFetchResult fetchResult = kinesisConsumer.fetch(kinesisCheckpoint, null, 6 * 10 * 1000L);
+
+      List<Record> list = fetchResult.getMessages();
+
+      System.out.println("SHARD: " + shard.shardId());
+      for (Record record : list) {
+        System.out.println("SEQ-NO: " + record.sequenceNumber() + ", DATA: " + record.data().asUtf8String());
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 43/47: LIcense headers

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 7d1f7a17dbd1fa0076829e66739e19b7490210bc
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Mon Jan 11 17:21:04 2021 -0800

    LIcense headers
---
 .../plugin/stream/kinesis/KinesisMsgOffsetFactory.java | 18 ++++++++++++++++++
 .../stream/kinesis/KinesisStreamMetadataProvider.java  | 18 ++++++++++++++++++
 .../plugin/stream/kinesis/KinesisConsumerTest.java     | 10 ++++++----
 .../org/apache/pinot/spi/stream/LongMsgOffset.java     |  9 ++++-----
 4 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
index 8f6b932..5cd57c9 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.io.IOException;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index 1083969..b22bbe4 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.io.IOException;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index 57baae9..f9ed779 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -1,4 +1,4 @@
-package org.apache.pinot.plugin.stream.kinesis; /**
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -16,6 +16,7 @@ package org.apache.pinot.plugin.stream.kinesis; /**
  * specific language governing permissions and limitations
  * under the License.
  */
+package org.apache.pinot.plugin.stream.kinesis;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -44,8 +45,7 @@ public class KinesisConsumerTest {
     for (Shard shard : shardList) {
       System.out.println("SHARD: " + shard.shardId());
 
-      KinesisConsumer kinesisConsumer =
-          new KinesisConsumer(kinesisConfig);
+      KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig);
       System.out.println(
           "Kinesis Checkpoint Range: < " + shard.sequenceNumberRange().startingSequenceNumber() + ", " + shard
               .sequenceNumberRange().endingSequenceNumber() + " >");
@@ -57,7 +57,9 @@ public class KinesisConsumerTest {
 
       System.out.println("Found " + n + " messages ");
       for (int i = 0; i < n; i++) {
-        System.out.println("SEQ-NO: " + kinesisRecordsBatch.getMessageOffsetAtIndex(i) + ", DATA: " + kinesisRecordsBatch.getMessageAtIndex(i));
+        System.out.println(
+            "SEQ-NO: " + kinesisRecordsBatch.getMessageOffsetAtIndex(i) + ", DATA: " + kinesisRecordsBatch
+                .getMessageAtIndex(i));
       }
       kinesisConsumer.close();
     }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
index e8fa275..a5fa722 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
@@ -1,8 +1,3 @@
-package org.apache.pinot.spi.stream;
-
-import com.google.common.annotations.VisibleForTesting;
-
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -21,6 +16,10 @@ import com.google.common.annotations.VisibleForTesting;
  * specific language governing permissions and limitations
  * under the License.
  */
+package org.apache.pinot.spi.stream;
+
+import com.google.common.annotations.VisibleForTesting;
+
 public class LongMsgOffset implements StreamPartitionMsgOffset {
   private final long _offset;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 47/47: Avoid writing 'stream' and also 'stream.kinesis.topic.name'

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit db2136ae4a7f20c3ed9a6c282c4d6325207fe877
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Mon Feb 1 18:35:56 2021 -0800

    Avoid writing 'stream' and also 'stream.kinesis.topic.name'
---
 .../java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java  | 6 ++++--
 .../pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java | 4 +++-
 .../test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java | 3 +--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 6e46498..0e8cc8a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -20,6 +20,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.Map;
 import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamConfigProperties;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
@@ -27,7 +28,7 @@ import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
  * Kinesis stream specific config
  */
 public class KinesisConfig {
-  public static final String STREAM = "stream";
+  public static final String STREAM_TYPE = "kinesis";
   public static final String SHARD_ITERATOR_TYPE = "shard-iterator-type";
   public static final String AWS_REGION = "aws-region";
   public static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
@@ -45,7 +46,8 @@ public class KinesisConfig {
   }
 
   public String getStream() {
-    return _props.get(STREAM);
+    return _props
+        .get(StreamConfigProperties.constructStreamProperty(STREAM_TYPE, StreamConfigProperties.STREAM_TOPIC_NAME));
   }
 
   public String getAwsRegion() {
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
index 1e832fa..324d559 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import org.apache.pinot.spi.stream.StreamConfigProperties;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
@@ -34,7 +35,8 @@ public class KinesisConsumerIntegrationTest {
   public static void main(String[] args)
       throws IOException {
     Map<String, String> props = new HashMap<>();
-    props.put(KinesisConfig.STREAM, STREAM_NAME);
+    props.put(StreamConfigProperties
+        .constructStreamProperty(KinesisConfig.STREAM_TYPE, StreamConfigProperties.STREAM_TOPIC_NAME), STREAM_NAME);
     props.put(KinesisConfig.AWS_REGION, AWS_REGION);
     props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
     props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java
index 28d02de..f58cf24 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java
@@ -31,7 +31,6 @@ public class TestUtils {
 
   public static StreamConfig getStreamConfig() {
     Map<String, String> props = new HashMap<>();
-    props.put(KinesisConfig.STREAM, STREAM_NAME);
     props.put(KinesisConfig.AWS_REGION, AWS_REGION);
     props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
     props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
@@ -46,7 +45,7 @@ public class TestUtils {
 
   public static KinesisConfig getKinesisConfig() {
     Map<String, String> props = new HashMap<>();
-    props.put(KinesisConfig.STREAM, STREAM_NAME);
+    props.put("stream.kinesis.topic.name", STREAM_NAME);
     props.put(KinesisConfig.AWS_REGION, AWS_REGION);
     props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
     props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 21/47: Add Kinesis config wrapper

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 96c32c461b006a15cdf34170a40f076e09be73cc
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 11:35:18 2020 +0530

    Add Kinesis config wrapper
---
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 29 ++++++++
 .../plugin/stream/kinesis/KinesisConsumer.java     | 78 ++++++++++++----------
 .../stream/kinesis/KinesisConsumerFactory.java     | 10 ++-
 3 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
new file mode 100644
index 0000000..01d666a
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -0,0 +1,29 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import org.apache.pinot.spi.stream.StreamConfig;
+
+
+public class KinesisConfig {
+  private final StreamConfig _streamConfig;
+  private static final String AWS_REGION = "aws-region";
+  private static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
+
+  private static final String DEFAULT_AWS_REGION = "us-central-1";
+  private static final String DEFAULT_MAX_RECORDS = "20";
+
+  public KinesisConfig(StreamConfig streamConfig) {
+    _streamConfig = streamConfig;
+  }
+
+  public String getStream(){
+    return _streamConfig.getTopicName();
+  }
+
+  public String getAwsRegion(){
+    return _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, DEFAULT_AWS_REGION);
+  }
+
+  public Integer maxRecordsToFetch(){
+    return Integer.parseInt(_streamConfig.getStreamConfigsMap().getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 7670f06..96241d4 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -30,71 +30,75 @@ import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+import software.amazon.awssdk.services.kinesis.model.KinesisException;
 import software.amazon.awssdk.services.kinesis.model.Record;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
-
+//TODO: Handle exceptions and timeout
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
   String _stream;
   Integer _maxRecords;
   String _shardId;
 
-  public KinesisConsumer(String stream, StreamConfig streamConfig, PartitionGroupMetadata partitionGroupMetadata) {
-    super(stream, streamConfig.getStreamConfigsMap().getOrDefault("aws-region", "global"));
-    _stream = stream;
-    _maxRecords = Integer.parseInt(streamConfig.getStreamConfigsMap().getOrDefault("maxRecords", "20"));
+  public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
+    super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
+    _stream = kinesisConfig.getStream();
+    _maxRecords = kinesisConfig.maxRecordsToFetch();
     KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
     _shardId = kinesisShardMetadata.getShardId();
   }
 
   @Override
   public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
-    KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
+    try {
+      KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
 
-    String shardIterator = getShardIterator(kinesisStartCheckpoint);
+      String shardIterator = getShardIterator(kinesisStartCheckpoint);
 
-    List<Record> recordList = new ArrayList<>();
+      List<Record> recordList = new ArrayList<>();
 
-    String kinesisEndSequenceNumber = null;
+      String kinesisEndSequenceNumber = null;
 
-    if (end != null) {
-      KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
-      kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
-    }
+      if (end != null) {
+        KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
+        kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
+      }
 
-    String nextStartSequenceNumber = null;
-    Long startTimestamp = System.currentTimeMillis();
+      String nextStartSequenceNumber = null;
+      Long startTimestamp = System.currentTimeMillis();
 
-    while (shardIterator != null && !isTimedOut(startTimestamp, timeout)) {
-      GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
-      GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
+      while (shardIterator != null && !isTimedOut(startTimestamp, timeout)) {
+        GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
+        GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
 
-      if (getRecordsResponse.records().size() > 0) {
-        recordList.addAll(getRecordsResponse.records());
-        nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+        if (getRecordsResponse.records().size() > 0) {
+          recordList.addAll(getRecordsResponse.records());
+          nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
 
-        if (kinesisEndSequenceNumber != null
-            && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0) {
-          nextStartSequenceNumber = kinesisEndSequenceNumber;
-          break;
-        }
+          if (kinesisEndSequenceNumber != null && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0) {
+            nextStartSequenceNumber = kinesisEndSequenceNumber;
+            break;
+          }
 
-        if (recordList.size() >= _maxRecords) {
-          break;
+          if (recordList.size() >= _maxRecords) {
+            break;
+          }
         }
-      }
 
-      shardIterator = getRecordsResponse.nextShardIterator();
-    }
+        shardIterator = getRecordsResponse.nextShardIterator();
+      }
 
-    if (nextStartSequenceNumber == null && recordList.size() > 0) {
-      nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
-    }
+      if (nextStartSequenceNumber == null && recordList.size() > 0) {
+        nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+      }
 
-    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
-    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
+      KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
 
-    return kinesisFetchResult;
+      return kinesisFetchResult;
+    }catch (KinesisException e){
+      return null;
+    }
   }
 
   private String getShardIterator(KinesisCheckpoint kinesisStartCheckpoint) {
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 931fa07..da39aab 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -28,19 +28,17 @@ import org.apache.pinot.spi.stream.v2.StreamConsumerFactoryV2;
 
 
 public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
-  private StreamConfig _streamConfig;
-  private final String AWS_REGION = "aws-region";
+  private KinesisConfig _kinesisConfig;
 
   @Override
   public void init(StreamConfig streamConfig) {
-    _streamConfig = streamConfig;
+    _kinesisConfig = new KinesisConfig(streamConfig);
   }
 
   @Override
   public PartitionGroupMetadataMap getPartitionGroupsMetadata(
       PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
-    return new KinesisPartitionGroupMetadataMap(_streamConfig.getTopicName(),
-        _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "global"));
+    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion());
   }
 
   @Override
@@ -50,6 +48,6 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
 
   @Override
   public ConsumerV2 createConsumer(PartitionGroupMetadata metadata) {
-    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig, metadata);
+    return new KinesisConsumer(_kinesisConfig, metadata);
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 03/47: Rename partitionId to partitionGroupId

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 3892fc417c2f7d07e15b78eae1e1b3dd09e60090
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Wed Dec 30 13:46:22 2020 -0800

    Rename partitionId to partitionGroupId
---
 .../segmentselector/RealtimeSegmentSelector.java   |  2 +-
 .../apache/pinot/common/utils/LLCSegmentName.java  | 24 +++++-----
 .../org/apache/pinot/common/utils/SegmentName.java |  2 +-
 .../pinot/common/utils/SegmentNameBuilderTest.java |  6 +--
 .../helix/core/PinotHelixResourceManager.java      |  5 ++-
 .../helix/core/PinotTableIdealStateBuilder.java    | 15 ++++---
 .../segment/RealtimeSegmentAssignment.java         |  6 +--
 .../RealtimeToOfflineSegmentsTaskGenerator.java    |  4 +-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 51 ++++++++++++----------
 .../SegmentSizeBasedFlushThresholdUpdater.java     |  2 +-
 .../PinotLLCRealtimeSegmentManagerTest.java        | 16 ++++---
 .../realtime/LLRealtimeSegmentDataManager.java     | 34 ++++++++-------
 .../manager/realtime/RealtimeTableDataManager.java | 13 +++---
 .../realtime/LLRealtimeSegmentDataManagerTest.java | 10 ++---
 .../fakestream/FakePartitionGroupMetadata.java     | 48 ++++++++++++++++++++
 .../impl/fakestream/FakeStreamConsumerFactory.java | 10 +----
 .../fakestream/FakeStreamMetadataProvider.java     | 15 ++++++-
 ...lakyConsumerRealtimeClusterIntegrationTest.java |  9 +---
 ...PartitionLLCRealtimeClusterIntegrationTest.java |  6 +--
 .../stream/kafka09/KafkaConsumerFactory.java       |  9 +---
 .../kafka09/KafkaPartitionGroupMetadata.java       | 48 ++++++++++++++++++++
 .../kafka09/KafkaStreamMetadataProvider.java       | 26 +++++++++++
 .../kafka09/KafkaPartitionLevelConsumerTest.java   |  2 +-
 .../stream/kafka20/KafkaConsumerFactory.java       |  9 +---
 .../kafka20/KafkaPartitionGroupMetadata.java       | 48 ++++++++++++++++++++
 .../kafka20/KafkaStreamMetadataProvider.java       | 21 +++++++++
 ...her.java => PartitionGroupMetadataFetcher.java} | 18 +++++---
 .../pinot/spi/stream/PartitionOffsetFetcher.java   | 15 ++++---
 .../pinot/spi/stream/StreamConsumerFactory.java    |  8 +---
 .../pinot/spi/stream/StreamMetadataProvider.java   |  9 +++-
 30 files changed, 347 insertions(+), 144 deletions(-)

diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentselector/RealtimeSegmentSelector.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentselector/RealtimeSegmentSelector.java
index f462326..2d778c6 100644
--- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentselector/RealtimeSegmentSelector.java
+++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/segmentselector/RealtimeSegmentSelector.java
@@ -95,7 +95,7 @@ public class RealtimeSegmentSelector implements SegmentSelector {
         if (instanceStateMap.containsValue(SegmentStateModel.CONSUMING)) {
           // Keep the first CONSUMING segment for each partition
           LLCSegmentName llcSegmentName = new LLCSegmentName(segment);
-          partitionIdToFirstConsumingLLCSegmentMap.compute(llcSegmentName.getPartitionId(), (k, consumingSegment) -> {
+          partitionIdToFirstConsumingLLCSegmentMap.compute(llcSegmentName.getPartitionGroupId(), (k, consumingSegment) -> {
             if (consumingSegment == null) {
               return llcSegmentName;
             } else {
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/LLCSegmentName.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/LLCSegmentName.java
index adc24ad..a66bb3c 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/LLCSegmentName.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/LLCSegmentName.java
@@ -26,7 +26,7 @@ import org.joda.time.DateTimeZone;
 public class LLCSegmentName extends SegmentName implements Comparable {
   private final static String DATE_FORMAT = "yyyyMMdd'T'HHmm'Z'";
   private final String _tableName;
-  private final int _partitionId;
+  private final int _partitionGroupId;
   private final int _sequenceNumber;
   private final String _creationTime;
   private final String _segmentName;
@@ -39,22 +39,22 @@ public class LLCSegmentName extends SegmentName implements Comparable {
     _segmentName = segmentName;
     String[] parts = StringUtils.splitByWholeSeparator(segmentName, SEPARATOR);
     _tableName = parts[0];
-    _partitionId = Integer.parseInt(parts[1]);
+    _partitionGroupId = Integer.parseInt(parts[1]);
     _sequenceNumber = Integer.parseInt(parts[2]);
     _creationTime = parts[3];
   }
 
-  public LLCSegmentName(String tableName, int partitionId, int sequenceNumber, long msSinceEpoch) {
+  public LLCSegmentName(String tableName, int partitionGroupId, int sequenceNumber, long msSinceEpoch) {
     if (!isValidComponentName(tableName)) {
       throw new RuntimeException("Invalid table name " + tableName);
     }
     _tableName = tableName;
-    _partitionId = partitionId;
+    _partitionGroupId = partitionGroupId;
     _sequenceNumber = sequenceNumber;
     // ISO8601 date: 20160120T1234Z
     DateTime dateTime = new DateTime(msSinceEpoch, DateTimeZone.UTC);
     _creationTime = dateTime.toString(DATE_FORMAT);
-    _segmentName = tableName + SEPARATOR + partitionId + SEPARATOR + sequenceNumber + SEPARATOR + _creationTime;
+    _segmentName = tableName + SEPARATOR + partitionGroupId + SEPARATOR + sequenceNumber + SEPARATOR + _creationTime;
   }
 
   /**
@@ -75,13 +75,13 @@ public class LLCSegmentName extends SegmentName implements Comparable {
   }
 
   @Override
-  public int getPartitionId() {
-    return _partitionId;
+  public int getPartitionGroupId() {
+    return _partitionGroupId;
   }
 
   @Override
   public String getPartitionRange() {
-    return Integer.toString(getPartitionId());
+    return Integer.toString(getPartitionGroupId());
   }
 
   @Override
@@ -110,9 +110,9 @@ public class LLCSegmentName extends SegmentName implements Comparable {
       throw new RuntimeException(
           "Cannot compare segment names " + this.getSegmentName() + " and " + other.getSegmentName());
     }
-    if (this.getPartitionId() > other.getPartitionId()) {
+    if (this.getPartitionGroupId() > other.getPartitionGroupId()) {
       return 1;
-    } else if (this.getPartitionId() < other.getPartitionId()) {
+    } else if (this.getPartitionGroupId() < other.getPartitionGroupId()) {
       return -1;
     } else {
       if (this.getSequenceNumber() > other.getSequenceNumber()) {
@@ -141,7 +141,7 @@ public class LLCSegmentName extends SegmentName implements Comparable {
 
     LLCSegmentName segName = (LLCSegmentName) o;
 
-    if (_partitionId != segName._partitionId) {
+    if (_partitionGroupId != segName._partitionGroupId) {
       return false;
     }
     if (_sequenceNumber != segName._sequenceNumber) {
@@ -159,7 +159,7 @@ public class LLCSegmentName extends SegmentName implements Comparable {
   @Override
   public int hashCode() {
     int result = _tableName != null ? _tableName.hashCode() : 0;
-    result = 31 * result + _partitionId;
+    result = 31 * result + _partitionGroupId;
     result = 31 * result + _sequenceNumber;
     result = 31 * result + (_creationTime != null ? _creationTime.hashCode() : 0);
     result = 31 * result + (_segmentName != null ? _segmentName.hashCode() : 0);
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/SegmentName.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/SegmentName.java
index 6763f6d..b0c00ae 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/SegmentName.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/SegmentName.java
@@ -63,7 +63,7 @@ public abstract class SegmentName {
     throw new RuntimeException("No groupId in " + getSegmentName());
   }
 
-  public int getPartitionId() {
+  public int getPartitionGroupId() {
     throw new RuntimeException("No partitionId in " + getSegmentName());
   }
 
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/SegmentNameBuilderTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/SegmentNameBuilderTest.java
index f632f51..de606cc 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/utils/SegmentNameBuilderTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/SegmentNameBuilderTest.java
@@ -58,7 +58,7 @@ public class SegmentNameBuilderTest {
     // Check partition range
     assertEquals(longNameSegment.getPartitionRange(), "0");
     assertEquals(shortNameSegment.getPartitionRange(), "ALL");
-    assertEquals(llcSegment.getPartitionId(), 0);
+    assertEquals(llcSegment.getPartitionGroupId(), 0);
 
     // Check groupId
     assertEquals(longNameSegment.getGroupId(), "myTable_REALTIME_1234567_0");
@@ -127,14 +127,14 @@ public class SegmentNameBuilderTest {
 
     LLCSegmentName segName1 = new LLCSegmentName(tableName, partitionId, sequenceNumber, msSinceEpoch);
     Assert.assertEquals(segName1.getSegmentName(), segmentName);
-    Assert.assertEquals(segName1.getPartitionId(), partitionId);
+    Assert.assertEquals(segName1.getPartitionGroupId(), partitionId);
     Assert.assertEquals(segName1.getCreationTime(), creationTime);
     Assert.assertEquals(segName1.getSequenceNumber(), sequenceNumber);
     Assert.assertEquals(segName1.getTableName(), tableName);
 
     LLCSegmentName segName2 = new LLCSegmentName(segmentName);
     Assert.assertEquals(segName2.getSegmentName(), segmentName);
-    Assert.assertEquals(segName2.getPartitionId(), partitionId);
+    Assert.assertEquals(segName2.getPartitionGroupId(), partitionId);
     Assert.assertEquals(segName2.getCreationTime(), creationTime);
     Assert.assertEquals(segName2.getSequenceNumber(), sequenceNumber);
     Assert.assertEquals(segName2.getTableName(), tableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index fa117fa..a04e0bc 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -130,6 +130,7 @@ import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
+import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.spi.utils.retry.RetryPolicies;
@@ -1396,6 +1397,8 @@ public class PinotHelixResourceManager {
    */
   private void setupShardedRealtimeTable(StreamConfig streamConfig, IdealState idealState, int numReplicas) {
     StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
+        .createStreamMetadataProvider(streamConfig.getTopicName() + "_" + System.currentTimeMillis());
 
     // get current partition groups and their metadata - this will be empty when creating the table
     List<PartitionGroupMetadata> currentPartitionGroupMetadataList = _pinotLLCRealtimeSegmentManager.getCurrentPartitionGroupMetadataList(idealState);
@@ -1403,7 +1406,7 @@ public class PinotHelixResourceManager {
     // get new partition groups and their metadata,
     // Assume table has 3 shards. Say we get [0], [1], [2] groups (for now assume that each group contains only 1 shard)
     List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamConsumerFactory.getPartitionGroupMetadataList(currentPartitionGroupMetadataList);
+        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 5000);
 
     // setup segment zk metadata and ideal state for all the new found partition groups
     _pinotLLCRealtimeSegmentManager.setupNewPartitionGroups(newPartitionGroupMetadataList, numReplicas);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
index a564542..1e95966 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
@@ -32,7 +32,8 @@ import org.apache.pinot.common.utils.config.TagNameUtils;
 import org.apache.pinot.common.utils.helix.HelixHelper;
 import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
 import org.apache.pinot.spi.config.table.TableConfig;
-import org.apache.pinot.spi.stream.PartitionCountFetcher;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.PartitionGroupMetadataFetcher;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.apache.pinot.spi.utils.retry.RetryPolicies;
@@ -115,13 +116,15 @@ public class PinotTableIdealStateBuilder {
     pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
   }
 
-  public static int getPartitionCount(StreamConfig streamConfig) {
-    PartitionCountFetcher partitionCountFetcher = new PartitionCountFetcher(streamConfig);
+  public static List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig,
+      List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+    PartitionGroupMetadataFetcher partitionGroupMetadataFetcher =
+        new PartitionGroupMetadataFetcher(streamConfig, currentPartitionGroupMetadataList);
     try {
-      RetryPolicies.noDelayRetryPolicy(3).attempt(partitionCountFetcher);
-      return partitionCountFetcher.getPartitionCount();
+      RetryPolicies.noDelayRetryPolicy(3).attempt(partitionGroupMetadataFetcher);
+      return partitionGroupMetadataFetcher.getPartitionGroupMetadataList();
     } catch (Exception e) {
-      Exception fetcherException = partitionCountFetcher.getException();
+      Exception fetcherException = partitionGroupMetadataFetcher.getException();
       LOGGER.error("Could not get partition count for {}", streamConfig.getTopicName(), fetcherException);
       throw new RuntimeException(fetcherException);
     }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java
index a069734..e27958f 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java
@@ -136,7 +136,7 @@ public class RealtimeSegmentAssignment implements SegmentAssignment {
    * Helper method to assign instances for CONSUMING segment based on the segment partition id and instance partitions.
    */
   private List<String> assignConsumingSegment(String segmentName, InstancePartitions instancePartitions) {
-    int partitionId = new LLCSegmentName(segmentName).getPartitionId();
+    int partitionId = new LLCSegmentName(segmentName).getPartitionGroupId();
 
     int numReplicaGroups = instancePartitions.getNumReplicaGroups();
     if (numReplicaGroups == 1) {
@@ -325,7 +325,7 @@ public class RealtimeSegmentAssignment implements SegmentAssignment {
 
         Map<Integer, List<String>> partitionIdToSegmentsMap = new HashMap<>();
         for (String segmentName : currentAssignment.keySet()) {
-          int partitionId = new LLCSegmentName(segmentName).getPartitionId();
+          int partitionId = new LLCSegmentName(segmentName).getPartitionGroupId();
           partitionIdToSegmentsMap.computeIfAbsent(partitionId, k -> new ArrayList<>()).add(segmentName);
         }
 
@@ -360,7 +360,7 @@ public class RealtimeSegmentAssignment implements SegmentAssignment {
       // Replica-group based assignment
 
       // Uniformly spray the segment partitions over the instance partitions
-      int segmentPartitionId = new LLCSegmentName(segmentName).getPartitionId();
+      int segmentPartitionId = new LLCSegmentName(segmentName).getPartitionGroupId();
       int numPartitions = instancePartitions.getNumPartitions();
       int partitionId = segmentPartitionId % numPartitions;
       return SegmentAssignmentUtils.assignSegmentWithReplicaGroup(currentAssignment, instancePartitions, partitionId);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/RealtimeToOfflineSegmentsTaskGenerator.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/RealtimeToOfflineSegmentsTaskGenerator.java
index a278396..8208d8e 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/RealtimeToOfflineSegmentsTaskGenerator.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/generator/RealtimeToOfflineSegmentsTaskGenerator.java
@@ -252,11 +252,11 @@ public class RealtimeToOfflineSegmentsTaskGenerator implements PinotTaskGenerato
     Map<Integer, LLCSegmentName> latestLLCSegmentNameMap = new HashMap<>();
     for (LLCRealtimeSegmentZKMetadata metadata : realtimeSegmentsMetadataList) {
       LLCSegmentName llcSegmentName = new LLCSegmentName(metadata.getSegmentName());
-      allPartitions.add(llcSegmentName.getPartitionId());
+      allPartitions.add(llcSegmentName.getPartitionGroupId());
 
       if (metadata.getStatus().equals(Segment.Realtime.Status.DONE)) {
         completedSegmentsMetadataList.add(metadata);
-        latestLLCSegmentNameMap.compute(llcSegmentName.getPartitionId(), (partitionId, latestLLCSegmentName) -> {
+        latestLLCSegmentNameMap.compute(llcSegmentName.getPartitionGroupId(), (partitionId, latestLLCSegmentName) -> {
           if (latestLLCSegmentName == null) {
             return llcSegmentName;
           } else {
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 8a29489..189be8b 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -83,6 +83,7 @@ import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConfigProperties;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
+import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
@@ -186,8 +187,10 @@ public class PinotLLCRealtimeSegmentManager {
 
     // get new partition groups (honor any groupings which are already consuming - [0], [1], [2])
     StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
+        .createStreamMetadataProvider(streamConfig.getTopicName() + " " + System.currentTimeMillis());
     List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamConsumerFactory.getPartitionGroupMetadataList(currentPartitionGroupMetadataList);
+        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
 
     // from the above list, remove the partition groups which are already CONSUMING
     // i.e. newPartitionGroups - currentPartitionGroups. Therefore, ([0], [1], [2]) - ([1], [2]) = ([0])
@@ -292,7 +295,8 @@ public class PinotLLCRealtimeSegmentManager {
     PartitionLevelStreamConfig streamConfig =
         new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
     InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
-    int numPartitions = getNumPartitions(streamConfig);
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
+    int numPartitionGroups = getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList).size();
     int numReplicas = getNumReplicas(tableConfig, instancePartitions);
 
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
@@ -301,9 +305,9 @@ public class PinotLLCRealtimeSegmentManager {
 
     long currentTimeMs = getCurrentTimeMs();
     Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
-    for (int partitionId = 0; partitionId < numPartitions; partitionId++) {
+    for (int partitionGroupId = 0; partitionGroupId < numPartitionGroups; partitionGroupId++) {
       String segmentName =
-          setupNewPartition(tableConfig, streamConfig, partitionId, currentTimeMs, instancePartitions, numPartitions,
+          setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupId, currentTimeMs, instancePartitions, numPartitionGroups,
               numReplicas);
       updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
           instancePartitionsMap);
@@ -635,7 +639,7 @@ public class PinotLLCRealtimeSegmentManager {
 
     // Add the partition metadata if available
     SegmentPartitionMetadata partitionMetadata =
-        getPartitionMetadataFromTableConfig(tableConfig, newLLCSegmentName.getPartitionId());
+        getPartitionMetadataFromTableConfig(tableConfig, newLLCSegmentName.getPartitionGroupId());
     if (partitionMetadata != null) {
       newSegmentZKMetadata.setPartitionMetadata(partitionMetadata);
     }
@@ -705,22 +709,23 @@ public class PinotLLCRealtimeSegmentManager {
   }
 
   @VisibleForTesting
-  int getNumPartitions(StreamConfig streamConfig) {
-    return PinotTableIdealStateBuilder.getPartitionCount(streamConfig);
+  List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig,
+      List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+    return PinotTableIdealStateBuilder.getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList);
   }
 
   @VisibleForTesting
   StreamPartitionMsgOffset getPartitionOffset(StreamConfig streamConfig, OffsetCriteria offsetCriteria,
-      int partitionId) {
+      int partitionGroupId) {
     PartitionOffsetFetcher partitionOffsetFetcher =
-        new PartitionOffsetFetcher(offsetCriteria, partitionId, streamConfig);
+        new PartitionOffsetFetcher(offsetCriteria, partitionGroupId, streamConfig);
     try {
       RetryPolicies.fixedDelayRetryPolicy(3, 1000L).attempt(partitionOffsetFetcher);
       return partitionOffsetFetcher.getOffset();
     } catch (Exception e) {
       throw new IllegalStateException(String
           .format("Failed to fetch the offset for topic: %s, partition: %s with criteria: %s",
-              streamConfig.getTopicName(), partitionId, offsetCriteria), e);
+              streamConfig.getTopicName(), partitionGroupId, offsetCriteria), e);
     }
   }
 
@@ -768,7 +773,7 @@ public class PinotLLCRealtimeSegmentManager {
     Map<Integer, LLCSegmentName> latestLLCSegmentNameMap = new HashMap<>();
     for (String segmentName : segments) {
       LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName);
-      latestLLCSegmentNameMap.compute(llcSegmentName.getPartitionId(), (partitionId, latestLLCSegmentName) -> {
+      latestLLCSegmentNameMap.compute(llcSegmentName.getPartitionGroupId(), (partitionId, latestLLCSegmentName) -> {
         if (latestLLCSegmentName == null) {
           return llcSegmentName;
         } else {
@@ -821,10 +826,12 @@ public class PinotLLCRealtimeSegmentManager {
     Preconditions.checkState(!_isStopping, "Segment manager is stopping");
 
     String realtimeTableName = tableConfig.getTableName();
-    int numPartitions = getNumPartitions(streamConfig);
     HelixHelper.updateIdealState(_helixManager, realtimeTableName, idealState -> {
       assert idealState != null;
       if (idealState.isEnabled()) {
+        List<PartitionGroupMetadata> currentPartitionGroupMetadataList =
+            getCurrentPartitionGroupMetadataList(idealState);
+        int numPartitions = getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList).size();
         return ensureAllPartitionsConsuming(tableConfig, streamConfig, idealState, numPartitions);
       } else {
         LOGGER.info("Skipping LLC segments validation for disabled table: {}", realtimeTableName);
@@ -1085,7 +1092,7 @@ public class PinotLLCRealtimeSegmentManager {
           String previousConsumingSegment = null;
           for (Map.Entry<String, Map<String, String>> segmentEntry : instanceStatesMap.entrySet()) {
             LLCSegmentName llcSegmentName = new LLCSegmentName(segmentEntry.getKey());
-            if (llcSegmentName.getPartitionId() == partitionId && segmentEntry.getValue()
+            if (llcSegmentName.getPartitionGroupId() == partitionId && segmentEntry.getValue()
                 .containsValue(SegmentStateModel.CONSUMING)) {
               previousConsumingSegment = llcSegmentName.getSegmentName();
               break;
@@ -1110,7 +1117,7 @@ public class PinotLLCRealtimeSegmentManager {
     for (int partitionId = 0; partitionId < numPartitions; partitionId++) {
       if (!latestSegmentZKMetadataMap.containsKey(partitionId)) {
         String newSegmentName =
-            setupNewPartition(tableConfig, streamConfig, partitionId, currentTimeMs, instancePartitions, numPartitions,
+            setupNewPartitionGroup(tableConfig, streamConfig, partitionId, currentTimeMs, instancePartitions, numPartitions,
                 numReplicas);
         updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, newSegmentName, segmentAssignment,
             instancePartitionsMap);
@@ -1121,7 +1128,7 @@ public class PinotLLCRealtimeSegmentManager {
   }
 
   private LLCSegmentName getNextLLCSegmentName(LLCSegmentName lastLLCSegmentName, long creationTimeMs) {
-    return new LLCSegmentName(lastLLCSegmentName.getTableName(), lastLLCSegmentName.getPartitionId(),
+    return new LLCSegmentName(lastLLCSegmentName.getTableName(), lastLLCSegmentName.getPartitionGroupId(),
         lastLLCSegmentName.getSequenceNumber() + 1, creationTimeMs);
   }
 
@@ -1129,21 +1136,21 @@ public class PinotLLCRealtimeSegmentManager {
    * Sets up a new partition.
    * <p>Persists the ZK metadata for the first CONSUMING segment, and returns the segment name.
    */
-  private String setupNewPartition(TableConfig tableConfig, PartitionLevelStreamConfig streamConfig, int partitionId,
-      long creationTimeMs, InstancePartitions instancePartitions, int numPartitions, int numReplicas) {
+  private String setupNewPartitionGroup(TableConfig tableConfig, PartitionLevelStreamConfig streamConfig, int partitionGroupId,
+      long creationTimeMs, InstancePartitions instancePartitions, int numPartitionGroups, int numReplicas) {
     String realtimeTableName = tableConfig.getTableName();
-    LOGGER.info("Setting up new partition: {} for table: {}", partitionId, realtimeTableName);
+    LOGGER.info("Setting up new partition group: {} for table: {}", partitionGroupId, realtimeTableName);
 
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     LLCSegmentName newLLCSegmentName =
-        new LLCSegmentName(rawTableName, partitionId, STARTING_SEQUENCE_NUMBER, creationTimeMs);
+        new LLCSegmentName(rawTableName, partitionGroupId, STARTING_SEQUENCE_NUMBER, creationTimeMs);
     String newSegmentName = newLLCSegmentName.getSegmentName();
     StreamPartitionMsgOffset startOffset =
-        getPartitionOffset(streamConfig, streamConfig.getOffsetCriteria(), partitionId);
+        getPartitionOffset(streamConfig, streamConfig.getOffsetCriteria(), partitionGroupId);
     CommittingSegmentDescriptor committingSegmentDescriptor =
         new CommittingSegmentDescriptor(null, startOffset.toString(), 0);
     createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, creationTimeMs,
-        committingSegmentDescriptor, null, instancePartitions, numPartitions, numReplicas);
+        committingSegmentDescriptor, null, instancePartitions, numPartitionGroups, numReplicas);
 
     return newSegmentName;
   }
@@ -1157,7 +1164,7 @@ public class PinotLLCRealtimeSegmentManager {
     int numPartitions = 0;
     for (String segmentName : idealState.getRecord().getMapFields().keySet()) {
       if (LLCSegmentName.isLowLevelConsumerSegmentName(segmentName)) {
-        numPartitions = Math.max(numPartitions, new LLCSegmentName(segmentName).getPartitionId() + 1);
+        numPartitions = Math.max(numPartitions, new LLCSegmentName(segmentName).getPartitionGroupId() + 1);
       }
     }
     return numPartitions;
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/segment/SegmentSizeBasedFlushThresholdUpdater.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/segment/SegmentSizeBasedFlushThresholdUpdater.java
index 2e73806..56ae29e 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/segment/SegmentSizeBasedFlushThresholdUpdater.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/segment/SegmentSizeBasedFlushThresholdUpdater.java
@@ -102,7 +102,7 @@ public class SegmentSizeBasedFlushThresholdUpdater implements FlushThresholdUpda
     // less same characteristics at any one point in time).
     // However, when we start a new table or change controller mastership, we can have any partition completing first.
     // It is best to learn the ratio as quickly as we can, so we allow any partition to supply the value.
-    if (new LLCSegmentName(newSegmentName).getPartitionId() == 0 || _latestSegmentRowsToSizeRatio == 0) {
+    if (new LLCSegmentName(newSegmentName).getPartitionGroupId() == 0 || _latestSegmentRowsToSizeRatio == 0) {
       if (_latestSegmentRowsToSizeRatio > 0) {
         _latestSegmentRowsToSizeRatio =
             CURRENT_SEGMENT_RATIO_WEIGHT * currentRatio + PREVIOUS_SEGMENT_RATIO_WEIGHT * _latestSegmentRowsToSizeRatio;
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 4888f17..743e719 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -30,6 +30,8 @@ import java.util.Map;
 import java.util.Random;
 import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import javax.annotation.Nullable;
 import org.apache.commons.io.FileUtils;
 import org.apache.helix.model.IdealState;
@@ -48,6 +50,7 @@ import org.apache.pinot.controller.helix.core.assignment.segment.SegmentAssignme
 import org.apache.pinot.controller.helix.core.realtime.segment.CommittingSegmentDescriptor;
 import org.apache.pinot.controller.util.SegmentCompletionUtils;
 import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
+import org.apache.pinot.core.realtime.impl.fakestream.FakePartitionGroupMetadata;
 import org.apache.pinot.core.realtime.impl.fakestream.FakeStreamConfigUtils;
 import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -57,6 +60,7 @@ import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
@@ -333,7 +337,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
       assertTrue(oldSegmentZKMetadataMap.containsKey(segmentName));
       assertTrue(segmentZKMetadataMap.containsKey(segmentName));
       assertEquals(segmentZKMetadataMap.get(segmentName), oldSegmentZKMetadataMap.get(segmentName));
-      oldNumPartitions = Math.max(oldNumPartitions, new LLCSegmentName(segmentName).getPartitionId() + 1);
+      oldNumPartitions = Math.max(oldNumPartitions, new LLCSegmentName(segmentName).getPartitionGroupId() + 1);
     }
 
     // Check that for new partitions, each partition should have exactly 1 new segment in CONSUMING state, and metadata
@@ -341,7 +345,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     Map<Integer, List<String>> partitionIdToSegmentsMap = new HashMap<>();
     for (Map.Entry<String, Map<String, String>> entry : instanceStatesMap.entrySet()) {
       String segmentName = entry.getKey();
-      int partitionId = new LLCSegmentName(segmentName).getPartitionId();
+      int partitionId = new LLCSegmentName(segmentName).getPartitionGroupId();
       partitionIdToSegmentsMap.computeIfAbsent(partitionId, k -> new ArrayList<>()).add(segmentName);
     }
     for (int partitionId = oldNumPartitions; partitionId < segmentManager._numPartitions; partitionId++) {
@@ -579,7 +583,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
       if (instanceStateMap.containsValue(SegmentStateModel.ONLINE) || instanceStateMap.containsValue(
           SegmentStateModel.CONSUMING)) {
         LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName);
-        int partitionsId = llcSegmentName.getPartitionId();
+        int partitionsId = llcSegmentName.getPartitionGroupId();
         Map<Integer, String> sequenceNumberToSegmentMap = partitionIdToSegmentsMap.get(partitionsId);
         int sequenceNumber = llcSegmentName.getSequenceNumber();
         assertFalse(sequenceNumberToSegmentMap.containsKey(sequenceNumber));
@@ -910,12 +914,12 @@ public class PinotLLCRealtimeSegmentManagerTest {
     }
 
     @Override
-    int getNumPartitions(StreamConfig streamConfig) {
-      return _numPartitions;
+    List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+      return IntStream.range(0, _numPartitions).mapToObj(FakePartitionGroupMetadata::new).collect(Collectors.toList());
     }
 
     @Override
-    LongMsgOffset getPartitionOffset(StreamConfig streamConfig, OffsetCriteria offsetCriteria, int partitionId) {
+    LongMsgOffset getPartitionOffset(StreamConfig streamConfig, OffsetCriteria offsetCriteria, int partitionGroupId) {
       // The criteria for this test should always be SMALLEST (for default streaming config and new added partitions)
       assertTrue(offsetCriteria.isSmallest());
       return PARTITION_OFFSET;
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 84d4592..13a9ab2 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -27,6 +27,7 @@ import java.io.IOException;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -214,7 +215,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   // Semaphore for each partitionId only, which is to prevent two different Kafka consumers
   // from consuming with the same partitionId in parallel in the same host.
   // See the comments in {@link RealtimeTableDataManager}.
-  private final Semaphore _partitionConsumerSemaphore;
+  private final Semaphore _partitionGroupConsumerSemaphore;
   // A boolean flag to check whether the current thread has acquired the semaphore.
   // This boolean is needed because the semaphore is shared by threads; every thread holding this semaphore can
   // modify the permit. This boolean make sure the semaphore gets released only once when the partition stops consuming.
@@ -247,7 +248,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
 
   private Thread _consumerThread;
   private final String _streamTopic;
-  private final int _streamPartitionId;
+  private final int _partitionGroupId;
   final String _clientId;
   private final LLCSegmentName _llcSegmentName;
   private final RecordTransformer _recordTransformer;
@@ -705,7 +706,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   @Override
   public Map<String, String> getPartitionToCurrentOffset() {
     Map<String, String> partitionToCurrentOffset = new HashMap<>();
-    partitionToCurrentOffset.put(String.valueOf(_streamPartitionId), _currentOffset.toString());
+    partitionToCurrentOffset.put(String.valueOf(_partitionGroupId), _currentOffset.toString());
     return partitionToCurrentOffset;
   }
 
@@ -730,8 +731,8 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   }
 
   @VisibleForTesting
-  protected Semaphore getPartitionConsumerSemaphore() {
-    return _partitionConsumerSemaphore;
+  protected Semaphore getPartitionGroupConsumerSemaphore() {
+    return _partitionGroupConsumerSemaphore;
   }
 
   @VisibleForTesting
@@ -892,7 +893,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     closePartitionLevelConsumer();
     closeStreamMetadataProvider();
     if (_acquiredConsumerSemaphore.compareAndSet(true, false)) {
-      _partitionConsumerSemaphore.release();
+      _partitionGroupConsumerSemaphore.release();
     }
   }
 
@@ -1102,7 +1103,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   // If the transition is OFFLINE to ONLINE, the caller should have downloaded the segment and we don't reach here.
   public LLRealtimeSegmentDataManager(RealtimeSegmentZKMetadata segmentZKMetadata, TableConfig tableConfig,
       RealtimeTableDataManager realtimeTableDataManager, String resourceDataDir, IndexLoadingConfig indexLoadingConfig,
-      Schema schema, LLCSegmentName llcSegmentName, Semaphore partitionConsumerSemaphore, ServerMetrics serverMetrics,
+      Schema schema, LLCSegmentName llcSegmentName, Semaphore partitionGroupConsumerSemaphore, ServerMetrics serverMetrics,
       @Nullable PartitionUpsertMetadataManager partitionUpsertMetadataManager) {
     _segBuildSemaphore = realtimeTableDataManager.getSegmentBuildSemaphore();
     _segmentZKMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
@@ -1129,10 +1130,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     _streamTopic = _partitionLevelStreamConfig.getTopicName();
     _segmentNameStr = _segmentZKMetadata.getSegmentName();
     _llcSegmentName = llcSegmentName;
-    _streamPartitionId = _llcSegmentName.getPartitionId();
-    _partitionConsumerSemaphore = partitionConsumerSemaphore;
+    _partitionGroupId = _llcSegmentName.getPartitionGroupId();
+    _partitionGroupConsumerSemaphore = partitionGroupConsumerSemaphore;
     _acquiredConsumerSemaphore = new AtomicBoolean(false);
-    _metricKeyName = _tableNameWithType + "-" + _streamTopic + "-" + _streamPartitionId;
+    _metricKeyName = _tableNameWithType + "-" + _streamTopic + "-" + _partitionGroupId;
     segmentLogger = LoggerFactory.getLogger(LLRealtimeSegmentDataManager.class.getName() + "_" + _segmentNameStr);
     _tableStreamName = _tableNameWithType + "_" + _streamTopic;
     _memoryManager = getMemoryManager(realtimeTableDataManager.getConsumerDir(), _segmentNameStr,
@@ -1210,14 +1211,14 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
     // Create message decoder
     Set<String> fieldsToRead = IngestionUtils.getFieldsForRecordExtractor(_tableConfig.getIngestionConfig(), _schema);
     _messageDecoder = StreamDecoderProvider.create(_partitionLevelStreamConfig, fieldsToRead);
-    _clientId = _streamTopic + "-" + _streamPartitionId;
+    _clientId = _streamTopic + "-" + _partitionGroupId;
 
     // Create record transformer
     _recordTransformer = CompositeTransformer.getDefaultTransformer(tableConfig, schema);
 
     // Acquire semaphore to create Kafka consumers
     try {
-      _partitionConsumerSemaphore.acquire();
+      _partitionGroupConsumerSemaphore.acquire();
       _acquiredConsumerSemaphore.set(true);
     } catch (InterruptedException e) {
       String errorMsg = "InterruptedException when acquiring the partitionConsumerSemaphore";
@@ -1243,7 +1244,8 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         //       long as the partition function is not changed.
         int numPartitions = columnPartitionConfig.getNumPartitions();
         try {
-          int numStreamPartitions = _streamMetadataProvider.fetchPartitionCount(/*maxWaitTimeMs=*/5000L);
+          // fixme: get this from ideal state
+          int numStreamPartitions = _streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000).size();
           if (numStreamPartitions != numPartitions) {
             segmentLogger.warn(
                 "Number of stream partitions: {} does not match number of partitions in the partition config: {}, using number of stream partitions",
@@ -1261,7 +1263,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         realtimeSegmentConfigBuilder.setPartitionColumn(partitionColumn);
         realtimeSegmentConfigBuilder
             .setPartitionFunction(PartitionFunctionFactory.getPartitionFunction(partitionFunctionName, numPartitions));
-        realtimeSegmentConfigBuilder.setPartitionId(_streamPartitionId);
+        realtimeSegmentConfigBuilder.setPartitionId(_partitionGroupId);
       } else {
         segmentLogger.warn("Cannot partition on multiple columns: {}", columnPartitionMap.keySet());
       }
@@ -1313,7 +1315,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       closePartitionLevelConsumer();
     }
     segmentLogger.info("Creating new stream consumer, reason: {}", reason);
-    _partitionLevelConsumer = _streamConsumerFactory.createPartitionLevelConsumer(_clientId, _streamPartitionId);
+    _partitionLevelConsumer = _streamConsumerFactory.createPartitionLevelConsumer(_clientId, _partitionGroupId);
   }
 
   /**
@@ -1325,7 +1327,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       closeStreamMetadataProvider();
     }
     segmentLogger.info("Creating new stream metadata provider, reason: {}", reason);
-    _streamMetadataProvider = _streamConsumerFactory.createPartitionMetadataProvider(_clientId, _streamPartitionId);
+    _streamMetadataProvider = _streamConsumerFactory.createPartitionMetadataProvider(_clientId, _partitionGroupId);
   }
 
   // This should be done during commit? We may not always commit when we build a segment....
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index 33283b9..9850048 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -36,6 +36,7 @@ import java.util.concurrent.TimeUnit;
 import javax.annotation.concurrent.ThreadSafe;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.io.FileUtils;
+import org.apache.helix.model.IdealState;
 import org.apache.pinot.common.Utils;
 import org.apache.pinot.common.metadata.ZKMetadataProvider;
 import org.apache.pinot.common.metadata.instance.InstanceZKMetadata;
@@ -89,7 +90,7 @@ public class RealtimeTableDataManager extends BaseTableDataManager {
   // In some streams, it's possible that having multiple consumers (with the same consumer name on the same host) consuming from the same stream partition can lead to bugs.
   // The semaphores will stay in the hash map even if the consuming partitions move to a different host.
   // We expect that there will be a small number of semaphores, but that may be ok.
-  private final Map<Integer, Semaphore> _partitionIdToSemaphoreMap = new ConcurrentHashMap<>();
+  private final Map<Integer, Semaphore> _partitionGroupIdToSemaphoreMap = new ConcurrentHashMap<>();
 
   // The old name of the stats file used to be stats.ser which we changed when we moved all packages
   // from com.linkedin to org.apache because of not being able to deserialize the old files using the newer classes
@@ -274,7 +275,7 @@ public class RealtimeTableDataManager extends BaseTableDataManager {
       llcSegmentName = new LLCSegmentName(segmentName);
       if (_tableUpsertMetadataManager != null) {
         partitionUpsertMetadataManager =
-            _tableUpsertMetadataManager.getOrCreatePartitionManager(llcSegmentName.getPartitionId());
+            _tableUpsertMetadataManager.getOrCreatePartitionManager(llcSegmentName.getPartitionGroupId());
       }
     }
 
@@ -307,11 +308,11 @@ public class RealtimeTableDataManager extends BaseTableDataManager {
         }
 
         // Generates only one semaphore for every partitionId
-        int partitionId = llcSegmentName.getPartitionId();
-        _partitionIdToSemaphoreMap.putIfAbsent(partitionId, new Semaphore(1));
+        int partitionGroupId = llcSegmentName.getPartitionGroupId();
+        _partitionGroupIdToSemaphoreMap.putIfAbsent(partitionGroupId, new Semaphore(1));
         manager =
             new LLRealtimeSegmentDataManager(realtimeSegmentZKMetadata, tableConfig, this, _indexDir.getAbsolutePath(),
-                indexLoadingConfig, schema, llcSegmentName, _partitionIdToSemaphoreMap.get(partitionId), _serverMetrics,
+                indexLoadingConfig, schema, llcSegmentName, _partitionGroupIdToSemaphoreMap.get(partitionGroupId), _serverMetrics,
                 partitionUpsertMetadataManager);
       }
       _logger.info("Initialize RealtimeSegmentDataManager - " + segmentName);
@@ -336,7 +337,7 @@ public class RealtimeTableDataManager extends BaseTableDataManager {
     columnToReaderMap.put(_timeColumnName, new PinotSegmentColumnReader(immutableSegment, _timeColumnName));
     int numTotalDocs = immutableSegment.getSegmentMetadata().getTotalDocs();
     String segmentName = immutableSegment.getSegmentName();
-    int partitionId = new LLCSegmentName(immutableSegment.getSegmentName()).getPartitionId();
+    int partitionId = new LLCSegmentName(immutableSegment.getSegmentName()).getPartitionGroupId();
     PartitionUpsertMetadataManager partitionUpsertMetadataManager =
         _tableUpsertMetadataManager.getOrCreatePartitionManager(partitionId);
     int numPrimaryKeyColumns = _primaryKeyColumns.size();
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
index 0017c43..d09bdeb 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManagerTest.java
@@ -719,7 +719,7 @@ public class LLRealtimeSegmentDataManagerTest {
     long timeout = 10_000L;
     FakeLLRealtimeSegmentDataManager firstSegmentDataManager = createFakeSegmentManager();
     Assert.assertTrue(firstSegmentDataManager.getAcquiredConsumerSemaphore().get());
-    Semaphore firstSemaphore = firstSegmentDataManager.getPartitionConsumerSemaphore();
+    Semaphore firstSemaphore = firstSegmentDataManager.getPartitionGroupConsumerSemaphore();
     Assert.assertEquals(firstSemaphore.availablePermits(), 0);
     Assert.assertFalse(firstSemaphore.hasQueuedThreads());
 
@@ -751,18 +751,18 @@ public class LLRealtimeSegmentDataManagerTest {
         "Failed to acquire the semaphore for the second segment manager in " + timeout + "ms");
 
     Assert.assertTrue(secondSegmentDataManager.get().getAcquiredConsumerSemaphore().get());
-    Semaphore secondSemaphore = secondSegmentDataManager.get().getPartitionConsumerSemaphore();
+    Semaphore secondSemaphore = secondSegmentDataManager.get().getPartitionGroupConsumerSemaphore();
     Assert.assertEquals(firstSemaphore, secondSemaphore);
     Assert.assertEquals(secondSemaphore.availablePermits(), 0);
     Assert.assertFalse(secondSemaphore.hasQueuedThreads());
 
     // Call destroy method the 2nd time on the first segment manager, the permits in semaphore won't increase.
     firstSegmentDataManager.destroy();
-    Assert.assertEquals(firstSegmentDataManager.getPartitionConsumerSemaphore().availablePermits(), 0);
+    Assert.assertEquals(firstSegmentDataManager.getPartitionGroupConsumerSemaphore().availablePermits(), 0);
 
     // The permit finally gets released in the Semaphore.
     secondSegmentDataManager.get().destroy();
-    Assert.assertEquals(secondSegmentDataManager.get().getPartitionConsumerSemaphore().availablePermits(), 1);
+    Assert.assertEquals(secondSegmentDataManager.get().getPartitionGroupConsumerSemaphore().availablePermits(), 1);
   }
 
   public static class FakeLLRealtimeSegmentDataManager extends LLRealtimeSegmentDataManager {
@@ -800,7 +800,7 @@ public class LLRealtimeSegmentDataManagerTest {
         throws Exception {
       super(segmentZKMetadata, tableConfig, realtimeTableDataManager, resourceDataDir,
           new IndexLoadingConfig(makeInstanceDataManagerConfig(), tableConfig), schema, llcSegmentName,
-          semaphoreMap.get(llcSegmentName.getPartitionId()), serverMetrics,
+          semaphoreMap.get(llcSegmentName.getPartitionGroupId()), serverMetrics,
           new PartitionUpsertMetadataManager("testTable_REALTIME", 0, serverMetrics));
       _state = LLRealtimeSegmentDataManager.class.getDeclaredField("_state");
       _state.setAccessible(true);
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java
new file mode 100644
index 0000000..78ee12c
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java
@@ -0,0 +1,48 @@
+package org.apache.pinot.core.realtime.impl.fakestream;
+
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+
+
+public class FakePartitionGroupMetadata implements PartitionGroupMetadata {
+
+  private final int _groupId;
+  public FakePartitionGroupMetadata(int groupId) {
+    _groupId = groupId;
+  }
+
+  @Override
+  public int getGroupId() {
+    return getGroupId();
+  }
+
+  @Override
+  public Checkpoint getStartCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public Checkpoint getEndCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
+
+  }
+
+  @Override
+  public void setEndCheckpoint(Checkpoint endCheckpoint) {
+
+  }
+
+  @Override
+  public byte[] serialize() {
+    return new byte[0];
+  }
+
+  @Override
+  public PartitionGroupMetadata deserialize(byte[] blob) {
+    return null;
+  }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index 9669223..289b226 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.core.realtime.impl.fakestream;
 
-import java.util.List;
 import java.util.Set;
 import org.apache.pinot.core.util.IngestionUtils;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -69,14 +68,9 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
     return new FakeStreamMetadataProvider(_streamConfig);
   }
 
-  @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
-    return null;
-  }
 
   @Override
-  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
     return null;
   }
 
@@ -93,7 +87,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
     // stream metadata provider
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory.createStreamMetadataProvider(clientId);
-    int partitionCount = streamMetadataProvider.fetchPartitionCount(10_000);
+    int partitionCount = streamMetadataProvider.getPartitionGroupMetadataList(null, 10_000).size();
     System.out.println(partitionCount);
 
     // Partition metadata provider
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
index e0b8ebd..c96d06a 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
@@ -19,9 +19,12 @@
 package org.apache.pinot.core.realtime.impl.fakestream;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -31,7 +34,7 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
  * StreamMetadataProvider implementation for the fake stream
  */
 public class FakeStreamMetadataProvider implements StreamMetadataProvider {
-  private int _numPartitions;
+  private final int _numPartitions;
 
   public FakeStreamMetadataProvider(StreamConfig streamConfig) {
     _numPartitions = FakeStreamConfigUtils.getNumPartitions(streamConfig);
@@ -42,6 +45,16 @@ public class FakeStreamMetadataProvider implements StreamMetadataProvider {
     return _numPartitions;
   }
 
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
+    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
+    for (int i = 0; i < _numPartitions; i++) {
+      partitionGroupMetadataList.add(new FakePartitionGroupMetadata(i));
+    }
+    return partitionGroupMetadataList;
+  }
+
   public long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis) throws TimeoutException {
     throw new UnsupportedOperationException("This method is deprecated");
   }
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
index 808a464..d917d73 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
@@ -19,7 +19,6 @@
 package org.apache.pinot.integration.tests;
 
 import java.lang.reflect.Constructor;
-import java.util.List;
 import java.util.Random;
 import java.util.Set;
 import org.apache.pinot.spi.data.readers.GenericRow;
@@ -122,13 +121,7 @@ public class FlakyConsumerRealtimeClusterIntegrationTest extends RealtimeCluster
     }
 
     @Override
-    public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-        List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
-      return null;
-    }
-
-    @Override
-    public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+    public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
       return null;
     }
   }
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SegmentPartitionLLCRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SegmentPartitionLLCRealtimeClusterIntegrationTest.java
index cd4f9b3..0196bde 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SegmentPartitionLLCRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SegmentPartitionLLCRealtimeClusterIntegrationTest.java
@@ -165,7 +165,7 @@ public class SegmentPartitionLLCRealtimeClusterIntegrationTest extends BaseClust
       assertNotNull(columnPartitionMetadata);
       assertTrue(columnPartitionMetadata.getFunctionName().equalsIgnoreCase("murmur"));
       assertEquals(columnPartitionMetadata.getNumPartitions(), 2);
-      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionId();
+      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionGroupId();
       assertEquals(columnPartitionMetadata.getPartitions(), Collections.singleton(streamPartitionId));
       numSegmentsForPartition[streamPartitionId]++;
     }
@@ -236,7 +236,7 @@ public class SegmentPartitionLLCRealtimeClusterIntegrationTest extends BaseClust
       assertNotNull(columnPartitionMetadata);
       assertTrue(columnPartitionMetadata.getFunctionName().equalsIgnoreCase("murmur"));
       assertEquals(columnPartitionMetadata.getNumPartitions(), 2);
-      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionId();
+      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionGroupId();
       numSegmentsForPartition[streamPartitionId]++;
 
       if (segmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
@@ -313,7 +313,7 @@ public class SegmentPartitionLLCRealtimeClusterIntegrationTest extends BaseClust
       assertNotNull(columnPartitionMetadata);
       assertTrue(columnPartitionMetadata.getFunctionName().equalsIgnoreCase("murmur"));
       assertEquals(columnPartitionMetadata.getNumPartitions(), 2);
-      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionId();
+      int streamPartitionId = new LLCSegmentName(segmentZKMetadata.getSegmentName()).getPartitionGroupId();
       numSegmentsForPartition[streamPartitionId]++;
 
       if (segmentZKMetadata.getStatus() == Status.IN_PROGRESS) {
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
index b8ed19d..82c282c 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.plugin.stream.kafka09;
 
-import java.util.List;
 import java.util.Set;
 import org.apache.pinot.spi.stream.PartitionGroupConsumer;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
@@ -55,13 +54,7 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   }
 
   @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
-    return null;
-  }
-
-  @Override
-  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
     return null;
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java
new file mode 100644
index 0000000..1d792ac
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java
@@ -0,0 +1,48 @@
+package org.apache.pinot.plugin.stream.kafka09;
+
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+
+
+public class KafkaPartitionGroupMetadata implements PartitionGroupMetadata {
+
+  private final int _groupId;
+  public KafkaPartitionGroupMetadata(int partitionId) {
+    _groupId = partitionId;
+  }
+
+  @Override
+  public int getGroupId() {
+    return _groupId;
+  }
+
+  @Override
+  public Checkpoint getStartCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public Checkpoint getEndCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
+
+  }
+
+  @Override
+  public void setEndCheckpoint(Checkpoint endCheckpoint) {
+
+  }
+
+  @Override
+  public byte[] serialize() {
+    return new byte[0];
+  }
+
+  @Override
+  public PartitionGroupMetadata deserialize(byte[] blob) {
+    return null;
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
index 06ee697..865ae96 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
@@ -22,9 +22,13 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.Uninterruptibles;
 import java.io.IOException;
+import java.time.Duration;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.TimeUnit;
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import kafka.api.PartitionOffsetRequestInfo;
 import kafka.common.TopicAndPartition;
 import kafka.javaapi.OffsetRequest;
@@ -36,6 +40,7 @@ import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -84,7 +89,12 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
    * @return
    */
   @Override
+  @Deprecated
   public synchronized int fetchPartitionCount(long timeoutMillis) {
+    return fetchPartitionCountInternal(timeoutMillis);
+  }
+
+  private int fetchPartitionCountInternal(long timeoutMillis) {
     int unknownTopicReplyCount = 0;
     final int MAX_UNKNOWN_TOPIC_REPLY_COUNT = 10;
     int kafkaErrorCount = 0;
@@ -145,6 +155,22 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
     throw new TimeoutException();
   }
 
+  /**
+   * Fetch the partition group metadata list
+   * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
+   *                                       Hence current partition groups are not needed to compute the new partition groups
+   */
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      @Nullable List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
+    int partitionCount = fetchPartitionCountInternal(timeoutMillis);
+    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>(partitionCount);
+    for (int i = 0; i < partitionCount; i++) {
+      partitionGroupMetadataList.add(new KafkaPartitionGroupMetadata(i));
+    }
+    return partitionGroupMetadataList;
+  }
+
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
       throws java.util.concurrent.TimeoutException {
     throw new UnsupportedOperationException("The use of this method s not supported");
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
index beb82e5..fbdfdfb 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
@@ -291,7 +291,7 @@ public class KafkaPartitionLevelConsumerTest {
 
     KafkaStreamMetadataProvider streamMetadataProvider =
         new KafkaStreamMetadataProvider(clientId, streamConfig, mockKafkaSimpleConsumerFactory);
-    Assert.assertEquals(streamMetadataProvider.fetchPartitionCount(10000L), 2);
+    Assert.assertEquals(streamMetadataProvider.getPartitionGroupMetadataList(null, 10000L), 2);
   }
 
   @Test
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
index 806baff..c73aacb 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.plugin.stream.kafka20;
 
-import java.util.List;
 import java.util.Set;
 import org.apache.pinot.spi.stream.PartitionGroupConsumer;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
@@ -52,13 +51,7 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   }
 
   @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
-    return null;
-  }
-
-  @Override
-  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata) {
     return null;
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java
new file mode 100644
index 0000000..31ae75a
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java
@@ -0,0 +1,48 @@
+package org.apache.pinot.plugin.stream.kafka20;
+
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+
+
+public class KafkaPartitionGroupMetadata implements PartitionGroupMetadata {
+
+  private final int _groupId;
+  public KafkaPartitionGroupMetadata(int partitionId) {
+    _groupId = partitionId;
+  }
+
+  @Override
+  public int getGroupId() {
+    return _groupId;
+  }
+
+  @Override
+  public Checkpoint getStartCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public Checkpoint getEndCheckpoint() {
+    return null;
+  }
+
+  @Override
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
+
+  }
+
+  @Override
+  public void setEndCheckpoint(Checkpoint endCheckpoint) {
+
+  }
+
+  @Override
+  public byte[] serialize() {
+    return new byte[0];
+  }
+
+  @Override
+  public PartitionGroupMetadata deserialize(byte[] blob) {
+    return null;
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index c0e2041..187c61b 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -21,11 +21,15 @@ package org.apache.pinot.plugin.stream.kafka20;
 import com.google.common.base.Preconditions;
 import java.io.IOException;
 import java.time.Duration;
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -42,10 +46,27 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
   }
 
   @Override
+  @Deprecated
   public int fetchPartitionCount(long timeoutMillis) {
     return _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
   }
 
+  /**
+   * Fetch the partitionGroupMetadata list.
+   * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
+   *                                       Hence current partition groups are not needed to compute the new partition groups
+   */
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      @Nullable List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
+    int partitionCount = _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
+    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>(partitionCount);
+    for (int i = 0; i < partitionCount; i++) {
+      partitionGroupMetadataList.add(new KafkaPartitionGroupMetadata(i));
+    }
+    return partitionGroupMetadataList;
+  }
+
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
       throws java.util.concurrent.TimeoutException {
     throw new UnsupportedOperationException("The use of this method is not supported");
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionCountFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java
similarity index 74%
rename from pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionCountFetcher.java
rename to pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java
index d523235..e1ce1a6 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionCountFetcher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.spi.stream;
 
+import java.util.List;
 import java.util.concurrent.Callable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -26,24 +27,27 @@ import org.slf4j.LoggerFactory;
 /**
  * Fetches the partition count of a stream using the {@link StreamMetadataProvider}
  */
-public class PartitionCountFetcher implements Callable<Boolean> {
+public class PartitionGroupMetadataFetcher implements Callable<Boolean> {
 
-  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionCountFetcher.class);
+  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionGroupMetadataFetcher.class);
 
   private int _partitionCount = -1;
+  private List<PartitionGroupMetadata> _partitionGroupMetadataList;
+  private List<PartitionGroupMetadata> _currentPartitionGroupMetadata;
   private final StreamConfig _streamConfig;
   private StreamConsumerFactory _streamConsumerFactory;
   private Exception _exception;
   private final String _topicName;
 
-  public PartitionCountFetcher(StreamConfig streamConfig) {
+  public PartitionGroupMetadataFetcher(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
     _streamConfig = streamConfig;
     _streamConsumerFactory = StreamConsumerFactoryProvider.create(_streamConfig);
     _topicName = streamConfig.getTopicName();
+    _currentPartitionGroupMetadata = currentPartitionGroupMetadataList;
   }
 
-  public int getPartitionCount() {
-    return _partitionCount;
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList() {
+    return _partitionGroupMetadataList;
   }
 
   public Exception getException() {
@@ -59,10 +63,10 @@ public class PartitionCountFetcher implements Callable<Boolean> {
   public Boolean call()
       throws Exception {
 
-    String clientId = PartitionCountFetcher.class.getSimpleName() + "-" + _topicName;
+    String clientId = PartitionGroupMetadataFetcher.class.getSimpleName() + "-" + _topicName;
     try (
         StreamMetadataProvider streamMetadataProvider = _streamConsumerFactory.createStreamMetadataProvider(clientId)) {
-      _partitionCount = streamMetadataProvider.fetchPartitionCount(/*maxWaitTimeMs=*/5000L);
+      _partitionGroupMetadataList = streamMetadataProvider.getPartitionGroupMetadataList(_currentPartitionGroupMetadata, /*maxWaitTimeMs=*/5000L);
       if (_exception != null) {
         // We had at least one failure, but succeeded now. Log an info
         LOGGER.info("Successfully retrieved partition count as {} for topic {}", _partitionCount, _topicName);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java
index 1d50160..b92f04d 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java
@@ -33,16 +33,16 @@ public class PartitionOffsetFetcher implements Callable<Boolean> {
 
   private final String _topicName;
   private final OffsetCriteria _offsetCriteria;
-  private final int _partitionId;
+  private final int _partitionGroupId;
 
   private Exception _exception = null;
   private StreamPartitionMsgOffset _offset;
   private StreamConsumerFactory _streamConsumerFactory;
   StreamConfig _streamConfig;
 
-  public PartitionOffsetFetcher(final OffsetCriteria offsetCriteria, int partitionId, StreamConfig streamConfig) {
+  public PartitionOffsetFetcher(final OffsetCriteria offsetCriteria, int partitionGroupId, StreamConfig streamConfig) {
     _offsetCriteria = offsetCriteria;
-    _partitionId = partitionId;
+    _partitionGroupId = partitionGroupId;
     _streamConfig = streamConfig;
     _streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     _topicName = streamConfig.getTopicName();
@@ -64,18 +64,19 @@ public class PartitionOffsetFetcher implements Callable<Boolean> {
   @Override
   public Boolean call()
       throws Exception {
-    String clientId = PartitionOffsetFetcher.class.getSimpleName() + "-" + _topicName + "-" + _partitionId;
+    String clientId = PartitionOffsetFetcher.class.getSimpleName() + "-" + _topicName + "-" + _partitionGroupId;
     try (StreamMetadataProvider streamMetadataProvider = _streamConsumerFactory
-        .createPartitionMetadataProvider(clientId, _partitionId)) {
+        .createPartitionMetadataProvider(clientId, _partitionGroupId)) {
       _offset =
           streamMetadataProvider.fetchStreamPartitionOffset(_offsetCriteria, STREAM_PARTITION_OFFSET_FETCH_TIMEOUT_MILLIS);
       if (_exception != null) {
         LOGGER.info("Successfully retrieved offset({}) for stream topic {} partition {}", _offset, _topicName,
-            _partitionId);
+            _partitionGroupId);
       }
       return Boolean.TRUE;
     } catch (TransientConsumerException e) {
-      LOGGER.warn("Temporary exception when fetching offset for topic {} partition {}:{}", _topicName, _partitionId,
+      LOGGER.warn("Temporary exception when fetching offset for topic {} partition {}:{}", _topicName,
+          _partitionGroupId,
           e.getMessage());
       _exception = e;
       return Boolean.FALSE;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
index 4db0fb1..9caf61b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.spi.stream;
 
-import java.util.List;
 import java.util.Set;
 
 
@@ -42,6 +41,7 @@ public abstract class StreamConsumerFactory {
    * @param partition the partition id of the partition for which this consumer is being created
    * @return
    */
+  @Deprecated
   public abstract PartitionLevelConsumer createPartitionLevelConsumer(String clientId, int partition);
 
   /**
@@ -74,10 +74,6 @@ public abstract class StreamConsumerFactory {
     return new LongMsgOffsetFactory();
   }
 
-  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  public abstract List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata);
-
   // creates a consumer which consumes from a partition group
-  public abstract PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata);
+  public abstract PartitionGroupConsumer createPartitionGroupConsumer(PartitionGroupMetadata metadata);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index 557ffc4..5b9104e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.spi.stream;
 
 import java.io.Closeable;
+import java.util.List;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.annotations.InterfaceAudience;
 import org.apache.pinot.spi.annotations.InterfaceStability;
@@ -32,11 +33,15 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
 public interface StreamMetadataProvider extends Closeable {
   /**
    * Fetches the number of partitions for a topic given the stream configs
-   * @param timeoutMillis
-   * @return
+   * @deprecated use getPartitionGroupMetadataList instead
    */
+  @Deprecated
   int fetchPartitionCount(long timeoutMillis);
 
+  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
+  List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis);
+
   // Issue 5953 Retain this interface for 0.5.0, remove in 0.6.0
   @Deprecated
   long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 16/47: Refactor kinesis shard metadata interface and add shardId to the metadata

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 92ddaab79edd23e232e2b2fd8fcc187830c66d40
Author: KKcorps <kh...@gmail.com>
AuthorDate: Fri Dec 11 23:57:29 2020 +0530

    Refactor kinesis shard metadata interface and add shardId to the metadata
---
 .../kinesis/KinesisPartitionGroupMetadataMap.java    | 20 +++++++++++++-------
 .../plugin/stream/kinesis/KinesisShardMetadata.java  |  6 ++++++
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index bc3fef2..87f7235 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -1,8 +1,7 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
-import java.util.HashMap;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
 import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
@@ -11,7 +10,7 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
 public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler implements PartitionGroupMetadataMap {
-  private Map<String, PartitionGroupMetadata> _stringPartitionGroupMetadataMap = new HashMap<>();
+  private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
 
   public KinesisPartitionGroupMetadataMap(String stream, String awsRegion){
     super(awsRegion);
@@ -20,12 +19,19 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
     for(Shard shard : shardList){
       String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
       KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream);
-      shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
-      _stringPartitionGroupMetadataMap.put(shard.shardId(), shardMetadata);
+      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
+      _stringPartitionGroupMetadataIndex.add(shardMetadata);
     }
   }
 
-  public Map<String, PartitionGroupMetadata> getPartitionMetadata(){
-      return _stringPartitionGroupMetadataMap;
+  @Override
+  public List<PartitionGroupMetadata> getMetadataList() {
+    return _stringPartitionGroupMetadataIndex;
   }
+
+  @Override
+  public PartitionGroupMetadata getPartitionGroupMetadata(int index) {
+    return _stringPartitionGroupMetadataIndex.get(index);
+  }
+
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index d50d821..4a19285 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -8,6 +8,7 @@ import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
+  String _shardId;
   Checkpoint _startCheckpoint;
   Checkpoint _endCheckpoint;
 
@@ -16,6 +17,11 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
         ShardIteratorType.LATEST).streamName(streamName).build());
     _startCheckpoint = new KinesisCheckpoint(getShardIteratorResponse.shardIterator());
     _endCheckpoint = null;
+    _shardId = shardId;
+  }
+
+  public String getShardId() {
+    return _shardId;
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 35/47: Add support for stream partition offsets (#6402)

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit a3deab3ea5154f03debf10751e273dc1a82ec919
Author: Kartik Khare <kh...@gmail.com>
AuthorDate: Tue Jan 5 01:19:28 2021 +0530

    Add support for stream partition offsets (#6402)
---
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |  3 +-
 .../plugin/stream/kinesis/KinesisConsumer.java     | 10 +++++--
 .../stream/kinesis/KinesisConsumerFactory.java     |  5 ++++
 .../stream/kinesis/KinesisMsgOffsetFactory.java    | 32 ++++++++++++++++++++++
 .../plugin/stream/kinesis/KinesisRecordsBatch.java | 15 ++++++----
 5 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 1b8f86e..d42f899 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -23,10 +23,11 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import java.io.IOException;
 import java.util.Map;
 import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.utils.JsonUtils;
 
 
-public class KinesisCheckpoint implements Checkpoint {
+public class KinesisCheckpoint implements StreamPartitionMsgOffset {
   private Map<String, String> _shardToStartSequenceMap;
 
   public KinesisCheckpoint(Map<String, String> shardToStartSequenceMap) {
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 8a24208..8ed3de7 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -80,6 +80,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
         createConnection();
       }
 
+      //TODO: iterate upon all the shardIds in the map
       Map.Entry<String, String> next = kinesisStartCheckpoint.getShardToStartSequenceMap().entrySet().iterator().next();
       String shardIterator = getShardIterator(next.getKey(), next.getValue());
 
@@ -125,7 +126,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       }
 
-      return new KinesisRecordsBatch(recordList);
+      return new KinesisRecordsBatch(recordList, next.getKey());
     } catch (ProvisionedThroughputExceededException e) {
       LOG.warn("The request rate for the stream is too high", e);
       return handleException(kinesisStartCheckpoint, recordList);
@@ -147,13 +148,16 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
   }
 
   private KinesisRecordsBatch handleException(KinesisCheckpoint start, List<Record> recordList) {
+    String shardId = start.getShardToStartSequenceMap().entrySet().iterator().next().getKey();
+
     if (recordList.size() > 0) {
       String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       Map<String, String> newCheckpoint = new HashMap<>(start.getShardToStartSequenceMap());
       newCheckpoint.put(newCheckpoint.keySet().iterator().next(), nextStartSequenceNumber);
-      return new KinesisRecordsBatch(recordList);
+
+      return new KinesisRecordsBatch(recordList, shardId);
     } else {
-      return new KinesisRecordsBatch(recordList);
+      return new KinesisRecordsBatch(recordList, shardId);
 
     }
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index aa90812..631f240 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -25,6 +25,7 @@ import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamLevelConsumer;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 
 
 public class KinesisConsumerFactory extends StreamConsumerFactory {
@@ -55,4 +56,8 @@ public class KinesisConsumerFactory extends StreamConsumerFactory {
     return new KinesisConsumer(new KinesisConfig(_streamConfig));
   }
 
+  @Override
+  public StreamPartitionMsgOffsetFactory createStreamMsgOffsetFactory() {
+    return new KinesisMsgOffsetFactory();
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
new file mode 100644
index 0000000..f234bae
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
@@ -0,0 +1,32 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.io.IOException;
+import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
+
+
+public class KinesisMsgOffsetFactory implements StreamPartitionMsgOffsetFactory {
+
+  KinesisConfig _kinesisConfig;
+
+  @Override
+  public void init(StreamConfig streamConfig) {
+    _kinesisConfig = new KinesisConfig(streamConfig);
+  }
+
+  @Override
+  public StreamPartitionMsgOffset create(String offsetStr) {
+    try {
+      return new KinesisCheckpoint(offsetStr);
+    }catch (IOException e){
+      return null;
+    }
+  }
+
+  @Override
+  public StreamPartitionMsgOffset create(StreamPartitionMsgOffset other) {
+    return new KinesisCheckpoint(((KinesisCheckpoint) other).getShardToStartSequenceMap());
+  }
+
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
index 04bf4e6..fb4bfb3 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -18,7 +18,9 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.RowMetadata;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -27,9 +29,11 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 
 public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   private List<Record> _recordList;
+  private String _shardId;
 
-  public KinesisRecordsBatch(List<Record> recordList) {
+  public KinesisRecordsBatch(List<Record> recordList, String shardId) {
     _recordList = recordList;
+    _shardId = shardId;
   }
 
   @Override
@@ -39,13 +43,12 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
 
   @Override
   public byte[] getMessageAtIndex(int index) {
-    return _recordList.get(index).data().asByteArray();
+    return _recordList.get(index).data().asByteBuffer().array();
   }
 
   @Override
   public int getMessageOffsetAtIndex(int index) {
-    //TODO: Doesn't translate to offset. Needs to be replaced.
-    return _recordList.get(index).hashCode();
+    return _recordList.get(index).data().asByteBuffer().arrayOffset();
   }
 
   @Override
@@ -60,7 +63,9 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
 
   @Override
   public StreamPartitionMsgOffset getNextStreamParitionMsgOffsetAtIndex(int index) {
-    throw new UnsupportedOperationException();
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put(_shardId, _recordList.get(index).sequenceNumber());
+    return new KinesisCheckpoint(shardToSequenceMap);
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 07/47: Checnges in test to make it complie

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 8afc48f8752c9044af791279e54568f4a124759e
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Dec 31 15:49:33 2020 -0800

    Checnges in test to make it complie
---
 .../controller/helix/core/PinotHelixResourceManager.java  |  4 ++--
 .../core/realtime/PinotLLCRealtimeSegmentManager.java     |  2 +-
 .../core/realtime/PinotLLCRealtimeSegmentManagerTest.java | 15 +++++++++++----
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index c86f14c..b2949e7 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -1356,7 +1356,7 @@ public class PinotHelixResourceManager {
       idealState = PinotTableIdealStateBuilder
           .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
               _enableBatchMessageMode);
-      _pinotLLCRealtimeSegmentManager.setupNewTable(realtimeTableConfig, idealState);
+      _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
       LOGGER.info("Successfully setup table for SHARDED consumers for {} ", realtimeTableName);
     } else {
 
@@ -1385,7 +1385,7 @@ public class PinotHelixResourceManager {
           idealState = PinotTableIdealStateBuilder
               .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
                   _enableBatchMessageMode);
-          _pinotLLCRealtimeSegmentManager.setupNewTable(realtimeTableConfig, idealState);
+          _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
           LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
         } else {
           LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 0654a38..a6ef625 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -207,7 +207,7 @@ public class PinotLLCRealtimeSegmentManager {
   /**
    * Sets up the realtime table ideal state for a table of consumer type SHARDED
    */
-  public void setupNewTable(TableConfig tableConfig, IdealState idealState) {
+  public void setUpNewTable(TableConfig tableConfig, IdealState idealState) {
     Preconditions.checkState(!_isStopping, "Segment manager is stopping");
 
     String realtimeTableName = tableConfig.getTableName();
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 42bdedc..75c8057 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -907,15 +907,22 @@ public class PinotLLCRealtimeSegmentManagerTest {
 
     @Override
     void updateIdealStateOnSegmentCompletion(String realtimeTableName, String committingSegmentName,
-        String newSegmentName, SegmentAssignment segmentAssignment,
+        List<String> newSegmentNames, SegmentAssignment segmentAssignment,
         Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap) {
       updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), committingSegmentName,
-          newSegmentName, segmentAssignment, instancePartitionsMap);
+          null, segmentAssignment, instancePartitionsMap);
+      for (String segmentName : newSegmentNames) {
+        updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), null,
+            segmentName, segmentAssignment, instancePartitionsMap);
+      }
     }
 
     @Override
-    List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
-      return IntStream.range(0, _numPartitions).mapToObj(FakePartitionGroupMetadata::new).collect(Collectors.toList());
+    List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
+        List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+      return IntStream.range(0, _numPartitions).mapToObj(i -> new PartitionGroupInfo(i,
+          getPartitionOffset(streamConfig, OffsetCriteria.SMALLEST_OFFSET_CRITERIA, i).toString()))
+          .collect(Collectors.toList());
     }
 
     @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 39/47: Use shardId's last digits as partitionGroupId

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit d7d04081131363e582340dfd8dc11fc1a92f3e5a
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Wed Jan 6 18:21:20 2021 -0800

    Use shardId's last digits as partitionGroupId
---
 .../helix/core/PinotHelixResourceManager.java      |  4 +-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 10 +++
 .../kinesis/KinesisStreamMetadataProvider.java     | 79 +++++++++++++++++-----
 .../pinot/spi/stream/StreamMetadataProvider.java   |  3 +-
 4 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index b50da5f..d1c8755 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -1374,8 +1374,8 @@ public class PinotHelixResourceManager {
       // (unless there are low-level segments already present)
       if (ZKMetadataProvider.getLLCRealtimeSegments(_propertyStore, realtimeTableName).isEmpty()) {
         PinotTableIdealStateBuilder
-            .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
-                _enableBatchMessageMode);
+            .buildLowLevelRealtimeIdealStateFor(_pinotLLCRealtimeSegmentManager, realtimeTableName, realtimeTableConfig,
+                idealState, _enableBatchMessageMode);
         LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
       } else {
         LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 61ef719..bbd1ef3 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -519,6 +519,11 @@ public class PinotLLCRealtimeSegmentManager {
       PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
       if (currentPartitionGroupMetadata == null) { // not present in current state. New partition found.
         // make new segment
+        // FIXME: flushThreshold of segment is actually (configured threshold/numPartitions)
+        //  In Kinesis, with every split/merge, we get new partitions, and an old partition gets deactivated.
+        //  However, the getPartitionGroupInfo call returns ALL shards, regardless of whether they're active or not.
+        //  So our numPartitions will forever keep increasing.
+        // TODO: can the getPartitionGroupInfo return the active partitions only, based on the checkpoints passed in current?
         String newLLCSegmentName =
             setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo, newSegmentCreationTimeMs,
                 instancePartitions, numPartitions, numReplicas);
@@ -534,6 +539,11 @@ public class PinotLLCRealtimeSegmentManager {
           createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, newSegmentCreationTimeMs,
               committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
           newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
+
+          // FIXME: a new CONSUMING segment is created even if EOL for this shard has been reached.
+          //  the logic in getPartitionGroupInfo to prevent returning of EOLed shards isn't working
+          //  OPTION: Since consumer knows about it, it can pass param in request/committingSegmentDescriptor "isEndOfShard"
+          //  We can set that in metadata for validation manager to skip these partitions
         }
       }
     }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index f86d06c..6c55a18 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -6,7 +6,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeoutException;
+import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
@@ -16,44 +18,85 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
 public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
-  private final KinesisConfig _kinesisConfig;
-  private KinesisConnectionHandler _kinesisConnectionHandler;
+  private final KinesisConnectionHandler _kinesisConnectionHandler;
 
   public KinesisStreamMetadataProvider(String clientId, KinesisConfig kinesisConfig) {
-    _kinesisConfig = kinesisConfig;
     _kinesisConnectionHandler = new KinesisConnectionHandler(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
   }
 
   @Override
   public int fetchPartitionCount(long timeoutMillis) {
-    return 0;
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
-      throws TimeoutException {
-    return 0;
+  public long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis) {
+    throw new UnsupportedOperationException();
   }
 
   @Override
   public List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
-      throws TimeoutException {
-    List<PartitionGroupInfo> partitionGroupInfos = new ArrayList<>();
+      throws IOException {
+
+    Map<Integer, PartitionGroupMetadata> currentPartitionGroupMap =
+        currentPartitionGroupsMetadata.stream().collect(Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
+
+    List<PartitionGroupInfo> newPartitionGroupInfos = new ArrayList<>();
     List<Shard> shards = _kinesisConnectionHandler.getShards();
-    for (Shard shard : shards) {
-      Map<String, String> shardToSequenceNumMap = new HashMap<>();
-      shardToSequenceNumMap.put(shard.shardId(), shard.sequenceNumberRange().startingSequenceNumber());
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceNumMap);
-      partitionGroupInfos
-          .add(new PartitionGroupInfo(Math.abs(shard.shardId().hashCode()), kinesisCheckpoint.serialize()));
+    for (Shard shard : shards) { // go over all shards
+      String shardId = shard.shardId();
+      int partitionGroupId = getPartitionGroupIdFromShardId(shardId);
+      PartitionGroupMetadata currentPartitionGroupMetadata = currentPartitionGroupMap.get(partitionGroupId);
+      KinesisCheckpoint newStartCheckpoint;
+      if (currentPartitionGroupMetadata != null) { // existing shard
+        KinesisCheckpoint currentEndCheckpoint = null;
+        try {
+          currentEndCheckpoint = new KinesisCheckpoint(currentPartitionGroupMetadata.getEndCheckpoint());
+        } catch (Exception e) {
+          // ignore. No end checkpoint yet for IN_PROGRESS segment
+        }
+        if (currentEndCheckpoint != null) { // end checkpoint available i.e. committing segment
+          String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
+          if (endingSequenceNumber != null) { // shard has ended
+            // FIXME: this logic is not working
+            //  was expecting sequenceNumOfLastMsgInShard == endSequenceNumOfShard.
+            //  But it is much lesser than the endSeqNumOfShard
+            Map<String, String> shardToSequenceNumberMap = new HashMap<>();
+            shardToSequenceNumberMap.put(shardId, endingSequenceNumber);
+            KinesisCheckpoint shardEndCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
+            if (currentEndCheckpoint.compareTo(shardEndCheckpoint) >= 0) {
+              // shard has ended AND we have reached the end checkpoint.
+              // skip this partition group in the result
+              continue;
+            }
+          }
+          newStartCheckpoint = currentEndCheckpoint;
+        } else {
+          newStartCheckpoint = new KinesisCheckpoint(currentPartitionGroupMetadata.getStartCheckpoint());
+        }
+      } else { // new shard
+        Map<String, String> shardToSequenceNumberMap = new HashMap<>();
+        shardToSequenceNumberMap.put(shardId, shard.sequenceNumberRange().startingSequenceNumber());
+        newStartCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
+      }
+      newPartitionGroupInfos
+          .add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint.serialize()));
     }
-    return partitionGroupInfos;
+    return newPartitionGroupInfos;
+  }
+
+  /**
+   * Converts a shardId string to a partitionGroupId integer by parsing the digits of the shardId
+   * e.g. "shardId-000000000001" becomes 1
+   */
+  private int getPartitionGroupIdFromShardId(String shardId) {
+    String shardIdNum = StringUtils.stripStart(StringUtils.removeStart(shardId, "shardId-"), "0");
+    return shardIdNum.isEmpty() ? 0 : Integer.parseInt(shardIdNum);
   }
 
   @Override
-  public void close()
-      throws IOException {
+  public void close() {
 
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index c64f710..be2e819 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -19,6 +19,7 @@
 package org.apache.pinot.spi.stream;
 
 import java.io.Closeable;
+import java.io.IOException;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.List;
@@ -64,7 +65,7 @@ public interface StreamMetadataProvider extends Closeable {
    */
   default List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
-      throws TimeoutException {
+      throws TimeoutException, IOException {
     int partitionCount = fetchPartitionCount(timeoutMillis);
     List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 24/47: Handle exceptions

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 760ba067ab25a3dfaefc6a0534c53ea0f5d62672
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 23:41:06 2020 +0530

    Handle exceptions
---
 .../plugin/stream/kinesis/KinesisConsumer.java     | 59 +++++++++++++++++-----
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 910b9ee..dfd6cda 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -32,12 +32,18 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import org.apache.pinot.spi.stream.v2.FetchResult;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.kinesis.model.ExpiredIteratorException;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+import software.amazon.awssdk.services.kinesis.model.InvalidArgumentException;
 import software.amazon.awssdk.services.kinesis.model.KinesisException;
+import software.amazon.awssdk.services.kinesis.model.ProvisionedThroughputExceededException;
 import software.amazon.awssdk.services.kinesis.model.Record;
+import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 //TODO: Handle exceptions and timeout
@@ -46,6 +52,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   Integer _maxRecords;
   String _shardId;
   ExecutorService _executorService;
+  private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
 
   public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
     super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
@@ -58,13 +65,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
 
   @Override
   public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
-    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(new Callable<KinesisFetchResult>() {
-      @Override
-      public KinesisFetchResult call()
-          throws Exception {
-        return getResult(start, end);
-      }
-    });
+    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(() -> getResult(start, end));
 
     try {
       return kinesisFetchResultFuture.get(timeout, TimeUnit.MILLISECONDS);
@@ -74,13 +75,13 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   }
 
   private KinesisFetchResult getResult(Checkpoint start, Checkpoint end) {
+    List<Record> recordList = new ArrayList<>();
+    KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
+
     try {
-      KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
 
       String shardIterator = getShardIterator(kinesisStartCheckpoint);
 
-      List<Record> recordList = new ArrayList<>();
-
       String kinesisEndSequenceNumber = null;
 
       if (end != null) {
@@ -119,8 +120,42 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
       KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
 
       return kinesisFetchResult;
-    }catch (KinesisException e){
-      return null;
+    }catch (ProvisionedThroughputExceededException e) {
+      LOG.warn(
+          "The request rate for the stream is too high"
+      , e);
+      return handleException(kinesisStartCheckpoint, recordList);
+    }
+    catch (ExpiredIteratorException e) {
+      LOG.warn(
+          "ShardIterator expired while trying to fetch records",e
+      );
+      return handleException(kinesisStartCheckpoint, recordList);
+    }
+    catch (ResourceNotFoundException | InvalidArgumentException e) {
+      // aws errors
+      LOG.error("Encountered AWS error while attempting to fetch records", e);
+      return handleException(kinesisStartCheckpoint, recordList);
+    }
+    catch (KinesisException e) {
+      LOG.warn("Encountered unknown unrecoverable AWS exception", e);
+      throw new RuntimeException(e);
+    }
+    catch (Throwable e) {
+      // non transient errors
+      LOG.error("Unknown fetchRecords exception", e);
+      throw new RuntimeException(e);
+    }
+  }
+
+  private KinesisFetchResult handleException(KinesisCheckpoint start, List<Record> recordList) {
+    if(recordList.size() > 0){
+      String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
+      return new KinesisFetchResult(kinesisCheckpoint, recordList);
+    }else{
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(start.getSequenceNumber());
+      return new KinesisFetchResult(kinesisCheckpoint, recordList);
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 37/47: Consumer tweaks to get it working

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 211620640d70377b4169b3cec47258501410af86
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Mon Jan 4 18:01:52 2021 -0800

    Consumer tweaks to get it working
---
 .../protocols/SegmentCompletionProtocol.java       | 19 ++++++++++++----
 .../plugin/stream/kinesis/KinesisConsumer.java     |  7 ++++--
 .../plugin/stream/kinesis/KinesisRecordsBatch.java | 16 ++++++--------
 .../org/apache/pinot/spi/stream/MessageBatch.java  |  2 ++
 .../pinot/spi/stream/PartitionGroupMetadata.java   | 25 +++++++++++-----------
 5 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
index 04f300b..dd1330d 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
@@ -24,6 +24,9 @@ import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
 import java.util.concurrent.TimeUnit;
 import org.apache.pinot.spi.utils.JsonUtils;
 
@@ -180,6 +183,15 @@ public class SegmentCompletionProtocol {
     }
 
     public String getUrl(String hostPort, String protocol) {
+      String streamPartitionMsgOffset;
+      try {
+        streamPartitionMsgOffset = _params.getStreamPartitionMsgOffset() == null ? null :
+            URLEncoder.encode(_params.getStreamPartitionMsgOffset(), StandardCharsets.UTF_8.toString());
+      } catch (UnsupportedEncodingException e) {
+        throw new IllegalStateException(
+            "Caught exception when encoding streamPartitionMsgOffset string: " + _params.getStreamPartitionMsgOffset(),
+            e);
+      }
       return protocol + "://" + hostPort + "/" + _msgType + "?" + PARAM_SEGMENT_NAME + "=" + _params.getSegmentName()
           + "&" + PARAM_OFFSET + "=" + _params.getOffset() + "&" + PARAM_INSTANCE_ID + "=" + _params.getInstanceId() + (
           _params.getReason() == null ? "" : ("&" + PARAM_REASON + "=" + _params.getReason())) + (
@@ -190,10 +202,9 @@ public class SegmentCompletionProtocol {
           + (_params.getSegmentSizeBytes() <= 0 ? ""
           : ("&" + PARAM_SEGMENT_SIZE_BYTES + "=" + _params.getSegmentSizeBytes())) + (_params.getNumRows() <= 0 ? ""
           : ("&" + PARAM_ROW_COUNT + "=" + _params.getNumRows())) + (_params.getSegmentLocation() == null ? ""
-          : ("&" + PARAM_SEGMENT_LOCATION + "=" + _params.getSegmentLocation()))
-          + (_params.getStreamPartitionMsgOffset() == null ? ""
-          : ("&" + PARAM_STREAM_PARTITION_MSG_OFFSET + "=" + _params.getStreamPartitionMsgOffset()))
-          ;
+          : ("&" + PARAM_SEGMENT_LOCATION + "=" + _params.getSegmentLocation())) + (
+          streamPartitionMsgOffset == null ? ""
+              : ("&" + PARAM_STREAM_PARTITION_MSG_OFFSET + "=" + streamPartitionMsgOffset));
     }
 
     public static class Params {
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 8ed3de7..a97f3dc 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -59,13 +59,13 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
   }
 
   @Override
-  public KinesisRecordsBatch fetchMessages(Checkpoint start, Checkpoint end, int timeout) {
+  public KinesisRecordsBatch fetchMessages(Checkpoint start, Checkpoint end, int timeoutMs) {
     List<Record> recordList = new ArrayList<>();
     Future<KinesisRecordsBatch> kinesisFetchResultFuture =
         _executorService.submit(() -> getResult(start, end, recordList));
 
     try {
-      return kinesisFetchResultFuture.get(timeout, TimeUnit.MILLISECONDS);
+      return kinesisFetchResultFuture.get(timeoutMs, TimeUnit.MILLISECONDS);
     } catch (Exception e) {
       return handleException((KinesisCheckpoint) start, recordList);
     }
@@ -127,6 +127,9 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
       }
 
       return new KinesisRecordsBatch(recordList, next.getKey());
+    } catch (IllegalStateException e) {
+      LOG.warn("Illegal state exception, connection is broken", e);
+      return handleException(kinesisStartCheckpoint, recordList);
     } catch (ProvisionedThroughputExceededException e) {
       LOG.warn("The request rate for the stream is too high", e);
       return handleException(kinesisStartCheckpoint, recordList);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
index fb4bfb3..fdc883b 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -18,9 +18,11 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.nio.ByteBuffer;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import javax.annotation.Nullable;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.RowMetadata;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -28,8 +30,8 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 
 
 public class KinesisRecordsBatch implements MessageBatch<byte[]> {
-  private List<Record> _recordList;
-  private String _shardId;
+  private final List<Record> _recordList;
+  private final String _shardId;
 
   public KinesisRecordsBatch(List<Record> recordList, String shardId) {
     _recordList = recordList;
@@ -43,12 +45,11 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
 
   @Override
   public byte[] getMessageAtIndex(int index) {
-    return _recordList.get(index).data().asByteBuffer().array();
+    return _recordList.get(index).data().asByteArray();
   }
-
   @Override
   public int getMessageOffsetAtIndex(int index) {
-    return _recordList.get(index).data().asByteBuffer().arrayOffset();
+    return ByteBuffer.wrap(_recordList.get(index).data().asByteArray()).arrayOffset();
   }
 
   @Override
@@ -57,11 +58,6 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   }
 
   @Override
-  public RowMetadata getMetadataAtIndex(int index) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
   public StreamPartitionMsgOffset getNextStreamParitionMsgOffsetAtIndex(int index) {
     Map<String, String> shardToSequenceMap = new HashMap<>();
     shardToSequenceMap.put(_shardId, _recordList.get(index).sequenceNumber());
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
index 3052b9e..5af72c0 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.spi.stream;
 
+import javax.annotation.Nullable;
 import org.apache.pinot.spi.annotations.InterfaceAudience;
 import org.apache.pinot.spi.annotations.InterfaceStability;
 
@@ -61,6 +62,7 @@ public interface MessageBatch<T> {
    * Returns the metadata associated with the message at a particular index. This typically includes the timestamp
    * when the message was ingested by the upstream stream-provider and other relevant metadata.
    */
+  @Nullable
   default RowMetadata getMetadataAtIndex(int index) {
     return null;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index 7c4e3ef..aaf20b6 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -36,18 +36,7 @@ public class PartitionGroupMetadata {
     _sequenceNumber = sequenceNumber;
     _startCheckpoint = startCheckpoint;
     _endCheckpoint = endCheckpoint;
-  }
-
-  public void setSequenceNumber(int sequenceNumber) {
-    _sequenceNumber = sequenceNumber;
-  }
-
-  public void setStartCheckpoint(String startCheckpoint) {
-    _startCheckpoint = startCheckpoint;
-  }
-
-  public void setEndCheckpoint(String endCheckpoint) {
-    _endCheckpoint = endCheckpoint;
+    _status = status;
   }
 
   public int getPartitionGroupId() {
@@ -58,14 +47,26 @@ public class PartitionGroupMetadata {
     return _sequenceNumber;
   }
 
+  public void setSequenceNumber(int sequenceNumber) {
+    _sequenceNumber = sequenceNumber;
+  }
+
   public String getStartCheckpoint() {
     return _startCheckpoint;
   }
 
+  public void setStartCheckpoint(String startCheckpoint) {
+    _startCheckpoint = startCheckpoint;
+  }
+
   public String getEndCheckpoint() {
     return _endCheckpoint;
   }
 
+  public void setEndCheckpoint(String endCheckpoint) {
+    _endCheckpoint = endCheckpoint;
+  }
+
   public String getStatus() {
     return _status;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 32/47: Return message batch instead of list in the fetch result

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit c5c42d497320a3e7aedca4a7e1c43808e69222f9
Author: KKcorps <kh...@gmail.com>
AuthorDate: Thu Dec 31 11:24:42 2020 +0530

    Return message batch instead of list in the fetch result
---
 .../plugin/stream/kinesis/KinesisFetchResult.java  |  7 +--
 .../plugin/stream/kinesis/KinesisRecordsBatch.java | 52 ++++++++++++++++++++++
 .../plugin/stream/kinesis/KinesisConsumerTest.java |  7 +--
 .../apache/pinot/spi/stream/v2/FetchResult.java    |  3 +-
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index aedcd5d..39561f3 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -20,12 +20,13 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
 import java.util.List;
+import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.FetchResult;
 import software.amazon.awssdk.services.kinesis.model.Record;
 
 
-public class KinesisFetchResult implements FetchResult<Record> {
+public class KinesisFetchResult implements FetchResult<byte[]> {
   private final KinesisCheckpoint _kinesisCheckpoint;
   private final List<Record> _recordList;
 
@@ -40,7 +41,7 @@ public class KinesisFetchResult implements FetchResult<Record> {
   }
 
   @Override
-  public List<Record> getMessages() {
-    return _recordList;
+  public KinesisRecordsBatch getMessages() {
+    return new KinesisRecordsBatch(_recordList);
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
new file mode 100644
index 0000000..ed51f8f
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -0,0 +1,52 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.List;
+import org.apache.pinot.spi.stream.MessageBatch;
+import org.apache.pinot.spi.stream.RowMetadata;
+import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
+import software.amazon.awssdk.services.kinesis.model.Record;
+
+
+public class KinesisRecordsBatch implements MessageBatch<byte[]> {
+  private List<Record> _recordList;
+
+  public KinesisRecordsBatch(List<Record> recordList) {
+    _recordList = recordList;
+  }
+
+  @Override
+  public int getMessageCount() {
+    return _recordList.size();
+  }
+
+  @Override
+  public byte[] getMessageAtIndex(int index) {
+    return _recordList.get(index).data().asByteArray();
+  }
+
+  @Override
+  public int getMessageOffsetAtIndex(int index) {
+    //TODO: Doesn't translate to offset. Needs to be replaced.
+    return _recordList.get(index).hashCode();
+  }
+
+  @Override
+  public int getMessageLengthAtIndex(int index) {
+    return _recordList.get(index).data().asByteArray().length;
+  }
+
+  @Override
+  public RowMetadata getMetadataAtIndex(int index) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public StreamPartitionMsgOffset getNextStreamParitionMsgOffsetAtIndex(int index) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getNextStreamMessageOffsetAtIndex(int index) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index 17691c4..6f660f7 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -48,10 +48,11 @@ public class KinesisConsumerTest {
       KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shard.sequenceNumberRange().startingSequenceNumber());
       KinesisFetchResult fetchResult = kinesisConsumer.fetch(kinesisCheckpoint, null, 6 * 10 * 1000L);
 
-      List<Record> list = fetchResult.getMessages();
+      KinesisRecordsBatch list = fetchResult.getMessages();
+      int n = list.getMessageCount();
 
-      for (Record record : list) {
-        System.out.println("SEQ-NO: " + record.sequenceNumber() + ", DATA: " + record.data().asUtf8String());
+      for (int i=0;i<n;i++) {
+        System.out.println("SEQ-NO: " + list.getMessageOffsetAtIndex(i) + ", DATA: " + list.getMessageAtIndex(i));
       }
     }
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
index 9d14473..2188ac9 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
@@ -19,10 +19,11 @@
 package org.apache.pinot.spi.stream.v2;
 
 import java.util.List;
+import org.apache.pinot.spi.stream.MessageBatch;
 
 
 public interface FetchResult<T> {
   Checkpoint getLastCheckpoint();
-  List<T> getMessages();
+  MessageBatch<T> getMessages();
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 05/47: Separate PartitionGroupInfo and PartitionGroupMetadata

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit a7fba5a7ffc843ea576d23e330cd2fd8441ee5fb
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Dec 31 14:46:46 2020 -0800

    Separate PartitionGroupInfo and PartitionGroupMetadata
---
 .../helix/core/PinotHelixResourceManager.java      |  12 +--
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 108 ++++++++-------------
 .../impl/fakestream/FakeStreamConsumerFactory.java |   3 +-
 .../kafka09/KafkaPartitionLevelConsumerTest.java   |   2 +-
 .../kafka20/KafkaStreamMetadataProvider.java       |  28 ++++--
 .../pinot/spi/stream/PartitionGroupInfo.java       |  43 ++++++++
 .../pinot/spi/stream/StreamMetadataProvider.java   |   6 +-
 7 files changed, 115 insertions(+), 87 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index 1f36e4f..f0d52bc 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -126,12 +126,7 @@ import org.apache.pinot.spi.config.table.TenantConfig;
 import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.config.tenant.Tenant;
 import org.apache.pinot.spi.data.Schema;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.StreamConfig;
-import org.apache.pinot.spi.stream.StreamConsumerFactory;
-import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
-import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.apache.pinot.spi.utils.builder.TableNameBuilder;
 import org.apache.pinot.spi.utils.retry.RetryPolicies;
@@ -1361,9 +1356,8 @@ public class PinotHelixResourceManager {
       idealState = PinotTableIdealStateBuilder
           .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
               _enableBatchMessageMode);
-      _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
-      LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
-      _pinotLLCRealtimeSegmentManager.setupNewShardedTable(rawRealtimeTableConfig, idealState);
+      _pinotLLCRealtimeSegmentManager.setupNewShardedTable(realtimeTableConfig, idealState);
+      LOGGER.info("Successfully setup table for SHARDED consumers for {} ", realtimeTableName);
     } else {
 
       if (streamConfig.hasHighLevelConsumerType()) {
@@ -1391,7 +1385,7 @@ public class PinotHelixResourceManager {
           idealState = PinotTableIdealStateBuilder
               .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
                   _enableBatchMessageMode);
-          _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
+          _pinotLLCRealtimeSegmentManager.setupNewShardedTable(realtimeTableConfig, idealState);
           LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
         } else {
           LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 9b03fa4..528125b 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -29,6 +29,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
@@ -78,6 +79,7 @@ import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.filesystem.PinotFS;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.PartitionOffsetFetcher;
@@ -221,8 +223,14 @@ public class PinotLLCRealtimeSegmentManager {
     StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
         .createStreamMetadataProvider(streamConfig.getTopicName() + "_" + System.currentTimeMillis());
-    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000);
+
+    List<PartitionGroupInfo> newPartitionGroupMetadataList;
+    try {
+      newPartitionGroupMetadataList =
+          streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000);
+    } catch (TimeoutException e) {
+      throw new IllegalStateException(e);
+    }
     int numPartitionGroups = newPartitionGroupMetadataList.size();
 
     InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
@@ -234,8 +242,8 @@ public class PinotLLCRealtimeSegmentManager {
 
     long currentTimeMs = getCurrentTimeMs();
     Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
-    for (PartitionGroupMetadata partitionGroupMetadata : newPartitionGroupMetadataList) {
-      String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupMetadata.getPartitionGroupId(),
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupMetadataList) {
+      String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo,
           currentTimeMs, instancePartitions, numPartitionGroups, numReplicas);
       updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
           instancePartitionsMap);
@@ -277,50 +285,6 @@ public class PinotLLCRealtimeSegmentManager {
   }
 
   /**
-   * Sets up the initial segments for a new LLC real-time table.
-   * <p>NOTE: the passed in IdealState may contain HLC segments if both HLC and LLC are configured.
-   */
-  public void setUpNewTable(TableConfig tableConfig, IdealState idealState) {
-    Preconditions.checkState(!_isStopping, "Segment manager is stopping");
-
-    String realtimeTableName = tableConfig.getTableName();
-    LOGGER.info("Setting up new LLC table: {}", realtimeTableName);
-
-    // Make sure all the existing segments are HLC segments
-    List<String> currentSegments = getAllSegments(realtimeTableName);
-    for (String segmentName : currentSegments) {
-      // TODO: Should return 4xx HTTP status code. Currently all exceptions are returning 500
-      Preconditions.checkState(SegmentName.isHighLevelConsumerSegmentName(segmentName),
-          "Cannot set up new LLC table: %s with existing non-HLC segment: %s", realtimeTableName, segmentName);
-    }
-
-    _flushThresholdUpdateManager.clearFlushThresholdUpdater(realtimeTableName);
-
-    PartitionLevelStreamConfig streamConfig =
-        new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
-    InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
-    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
-    int numPartitionGroups = getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList).size();
-    int numReplicas = getNumReplicas(tableConfig, instancePartitions);
-
-    SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
-    Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
-        Collections.singletonMap(InstancePartitionsType.CONSUMING, instancePartitions);
-
-    long currentTimeMs = getCurrentTimeMs();
-    Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
-    for (int partitionGroupId = 0; partitionGroupId < numPartitionGroups; partitionGroupId++) {
-      String segmentName =
-          setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupId, currentTimeMs, instancePartitions, numPartitionGroups,
-              numReplicas);
-      updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
-          instancePartitionsMap);
-    }
-
-    setIdealState(realtimeTableName, idealState);
-  }
-
-  /**
    * Removes all LLC segments from the given IdealState.
    */
   public void removeLLCSegments(IdealState idealState) {
@@ -538,15 +502,23 @@ public class PinotLLCRealtimeSegmentManager {
     // Step-2
 
     // Say we currently were consuming from 3 shards A, B, C. Of those, A is the one committing. Also suppose that new partition D has come up
+
     // get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS], [C - IN_PROGRESS]
     List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
-    StreamConfig streamConfig = new StreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
+    PartitionLevelStreamConfig streamConfig = new PartitionLevelStreamConfig(tableConfig.getTableName(),
+        IngestionConfigUtils.getStreamConfigMap(tableConfig));
     StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
         .createStreamMetadataProvider(streamConfig.getTopicName() + " " + System.currentTimeMillis());
+
     // find new partition groups [A],[B],[C],[D]
-    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
+    List<PartitionGroupInfo> newPartitionGroupMetadataList;
+    try {
+      newPartitionGroupMetadataList =
+          streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
+    } catch (TimeoutException e) {
+      throw new IllegalStateException(e);
+    }
 
     // create new segment metadata, only if it is not IN_PROGRESS in the current state
     Map<Integer, PartitionGroupMetadata> currentGroupIdToMetadata = currentPartitionGroupMetadataList.stream().collect(
@@ -555,25 +527,24 @@ public class PinotLLCRealtimeSegmentManager {
     List<String> newConsumingSegmentNames = new ArrayList<>();
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     long newSegmentCreationTimeMs = getCurrentTimeMs();
-    for (PartitionGroupMetadata partitionGroupMetadata : newPartitionGroupMetadataList) {
-      int newPartitionGroupId = partitionGroupMetadata.getPartitionGroupId();
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupMetadataList) {
+      int newPartitionGroupId = partitionGroupInfo.getPartitionGroupId();
       PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
-      if (currentPartitionGroupMetadata == null) { // not present in current state
+      if (currentPartitionGroupMetadata == null) { // not present in current state. New partition found.
         // make new segment
-        LLCSegmentName newLLCSegmentName =
-            new LLCSegmentName(rawTableName, newPartitionGroupId, STARTING_SEQUENCE_NUMBER, newSegmentCreationTimeMs);
-        createNewSegmentZKMetadata(tableConfig, new PartitionLevelStreamConfig(tableConfig.getTableName(),
-                IngestionConfigUtils.getStreamConfigMap(tableConfig)), newLLCSegmentName, newSegmentCreationTimeMs,
-            committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
-        newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
+        String newLLCSegmentName =
+            setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo, newSegmentCreationTimeMs,
+                instancePartitions, numPartitions, numReplicas);
+        newConsumingSegmentNames.add(newLLCSegmentName);
       } else {
         String currentStatus = currentPartitionGroupMetadata.getStatus();
-        if (!currentStatus.equals(Status.IN_PROGRESS.toString())) { // not IN_PROGRESS anymore in current state
-          // make new segment
+        if (!currentStatus.equals(Status.IN_PROGRESS.toString())) {
+          // not IN_PROGRESS anymore in current state. Should be DONE.
+          // This should ONLY happen for the committing segment's partition. Need to trigger new consuming segment
+          // todo: skip this if the partition doesn't match with the committing segment?
           LLCSegmentName newLLCSegmentName = new LLCSegmentName(rawTableName, newPartitionGroupId,
               currentPartitionGroupMetadata.getSequenceNumber() + 1, newSegmentCreationTimeMs);
-          createNewSegmentZKMetadata(tableConfig, new PartitionLevelStreamConfig(tableConfig.getTableName(),
-                  IngestionConfigUtils.getStreamConfigMap(tableConfig)), newLLCSegmentName, newSegmentCreationTimeMs,
+          createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, newSegmentCreationTimeMs,
               committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
           newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
         }
@@ -1181,19 +1152,20 @@ public class PinotLLCRealtimeSegmentManager {
    * Sets up a new partition.
    * <p>Persists the ZK metadata for the first CONSUMING segment, and returns the segment name.
    */
-  private String setupNewPartitionGroup(TableConfig tableConfig, PartitionLevelStreamConfig streamConfig, int partitionGroupId,
+  private String setupNewPartitionGroup(TableConfig tableConfig, PartitionLevelStreamConfig streamConfig, PartitionGroupInfo partitionGroupInfo,
       long creationTimeMs, InstancePartitions instancePartitions, int numPartitionGroups, int numReplicas) {
     String realtimeTableName = tableConfig.getTableName();
+    int partitionGroupId = partitionGroupInfo.getPartitionGroupId();
+    String startCheckpoint = partitionGroupInfo.getStartCheckpoint();
     LOGGER.info("Setting up new partition group: {} for table: {}", partitionGroupId, realtimeTableName);
 
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     LLCSegmentName newLLCSegmentName =
         new LLCSegmentName(rawTableName, partitionGroupId, STARTING_SEQUENCE_NUMBER, creationTimeMs);
     String newSegmentName = newLLCSegmentName.getSegmentName();
-    StreamPartitionMsgOffset startOffset =
-        getPartitionOffset(streamConfig, streamConfig.getOffsetCriteria(), partitionGroupId);
+
     CommittingSegmentDescriptor committingSegmentDescriptor =
-        new CommittingSegmentDescriptor(null, startOffset.toString(), 0);
+        new CommittingSegmentDescriptor(null, startCheckpoint, 0);
     createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, creationTimeMs,
         committingSegmentDescriptor, null, instancePartitions, numPartitionGroups, numReplicas);
 
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index 289b226..54be1b6 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.core.realtime.impl.fakestream;
 
+import java.util.Collections;
 import java.util.Set;
 import org.apache.pinot.core.util.IngestionUtils;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -87,7 +88,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
     // stream metadata provider
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory.createStreamMetadataProvider(clientId);
-    int partitionCount = streamMetadataProvider.getPartitionGroupMetadataList(null, 10_000).size();
+    int partitionCount = streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 10_000).size();
     System.out.println(partitionCount);
 
     // Partition metadata provider
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
index fbdfdfb..43b72a8 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
@@ -291,7 +291,7 @@ public class KafkaPartitionLevelConsumerTest {
 
     KafkaStreamMetadataProvider streamMetadataProvider =
         new KafkaStreamMetadataProvider(clientId, streamConfig, mockKafkaSimpleConsumerFactory);
-    Assert.assertEquals(streamMetadataProvider.getPartitionGroupMetadataList(null, 10000L), 2);
+    Assert.assertEquals(streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 10000L), 2);
   }
 
   @Test
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index 187c61b..eb606f2 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -29,6 +29,7 @@ import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
@@ -37,12 +38,15 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 
 public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHandler implements StreamMetadataProvider {
 
+  private StreamConfig _streamConfig;
+
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig) {
     this(clientId, streamConfig, Integer.MIN_VALUE);
   }
 
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition) {
     super(clientId, streamConfig, partition);
+    _streamConfig = streamConfig;
   }
 
   @Override
@@ -57,14 +61,26 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
    *                                       Hence current partition groups are not needed to compute the new partition groups
    */
   @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      @Nullable List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
+  public List<PartitionGroupInfo> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
+      throws TimeoutException {
     int partitionCount = _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
-    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>(partitionCount);
-    for (int i = 0; i < partitionCount; i++) {
-      partitionGroupMetadataList.add(new KafkaPartitionGroupMetadata(i));
+    List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
+
+    // add a PartitionGroupInfo into the list foreach partition already present in current.
+    // the end checkpoint is set as checkpoint
+    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
+          currentPartitionGroupMetadata.getEndCheckpoint()));
+    }
+    // add PartitiongroupInfo for new partitions
+    // use offset criteria from stream config
+    for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
+      StreamPartitionMsgOffset streamPartitionMsgOffset =
+          fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000);
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
     }
-    return partitionGroupMetadataList;
+    return newPartitionGroupInfoList;
   }
 
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
new file mode 100644
index 0000000..438e148
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.spi.stream;
+
+public class PartitionGroupInfo {
+
+  // fixme: Make partitionGroupId string everywhere (LLCSegmentName, StreamMetadataProvider)
+  private final int _partitionGroupId;
+  private String _startCheckpoint;
+
+  public PartitionGroupInfo(int partitionGroupId, String startCheckpoint) {
+    _partitionGroupId = partitionGroupId;
+    _startCheckpoint = startCheckpoint;
+  }
+
+  public void setStartCheckpoint(String startCheckpoint) {
+    _startCheckpoint = startCheckpoint;
+  }
+
+  public int getPartitionGroupId() {
+    return _partitionGroupId;
+  }
+
+  public String getStartCheckpoint() {
+    return _startCheckpoint;
+  }
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index 5b9104e..a9cd2d6 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -20,6 +20,7 @@ package org.apache.pinot.spi.stream;
 
 import java.io.Closeable;
 import java.util.List;
+import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.annotations.InterfaceAudience;
 import org.apache.pinot.spi.annotations.InterfaceStability;
@@ -39,8 +40,9 @@ public interface StreamMetadataProvider extends Closeable {
   int fetchPartitionCount(long timeoutMillis);
 
   // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis);
+  List<PartitionGroupInfo> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
+      throws TimeoutException;
 
   // Issue 5953 Retain this interface for 0.5.0, remove in 0.6.0
   @Deprecated


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 30/47: Handle timeout exception in consumer and make shard iterator type configurable

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit cf23ee3a83a0ea22d0dde57e306ccadf30db4d1c
Author: KKcorps <kh...@gmail.com>
AuthorDate: Thu Dec 24 17:48:04 2020 +0530

    Handle timeout exception in consumer and make shard iterator type configurable
---
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |  1 -
 .../pinot/plugin/stream/kinesis/KinesisConfig.java |  8 +++++
 .../stream/kinesis/KinesisConnectionHandler.java   |  1 +
 .../plugin/stream/kinesis/KinesisConsumer.java     | 36 +++++++++-------------
 .../stream/kinesis/KinesisShardMetadata.java       |  2 +-
 .../plugin/stream/kinesis/KinesisConsumerTest.java |  8 +++--
 6 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 8de95e2..027b789 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -20,7 +20,6 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 
-
 public class KinesisCheckpoint implements Checkpoint {
   String _sequenceNumber;
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index a81d11f..82fc438 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -20,6 +20,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.Map;
 import org.apache.pinot.spi.stream.StreamConfig;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisConfig {
@@ -28,9 +29,11 @@ public class KinesisConfig {
   public static final String STREAM = "stream";
   private static final String AWS_REGION = "aws-region";
   private static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
+  public static final String SHARD_ITERATOR_TYPE = "shard-iterator-type";
 
   private static final String DEFAULT_AWS_REGION = "us-central-1";
   private static final String DEFAULT_MAX_RECORDS = "20";
+  private static final String DEFAULT_SHARD_ITERATOR_TYPE = "LATEST";
 
   public KinesisConfig(StreamConfig streamConfig) {
     _props = streamConfig.getStreamConfigsMap();
@@ -51,4 +54,9 @@ public class KinesisConfig {
   public Integer maxRecordsToFetch(){
     return Integer.parseInt(_props.getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
   }
+
+  public ShardIteratorType getShardIteratorType(){
+    return ShardIteratorType.fromValue(_props.getOrDefault(SHARD_ITERATOR_TYPE, DEFAULT_SHARD_ITERATOR_TYPE));
+  }
+
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 3607787..0cf4787 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -71,4 +71,5 @@ public class KinesisConnectionHandler {
       _kinesisClient = null;
     }
   }
+
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 3263f87..abbc753 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -51,6 +51,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   Integer _maxRecords;
   String _shardId;
   ExecutorService _executorService;
+  ShardIteratorType _shardIteratorType;
   private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
 
   public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
@@ -59,22 +60,23 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     _maxRecords = kinesisConfig.maxRecordsToFetch();
     KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
     _shardId = kinesisShardMetadata.getShardId();
+    _shardIteratorType = kinesisConfig.getShardIteratorType();
     _executorService = Executors.newSingleThreadExecutor();
   }
 
   @Override
   public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
-    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(() -> getResult(start, end));
+    List<Record> recordList = new ArrayList<>();
+    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(() -> getResult(start, end, recordList));
 
     try {
       return kinesisFetchResultFuture.get(timeout, TimeUnit.MILLISECONDS);
     } catch(Exception e){
-      return null;
+        return handleException((KinesisCheckpoint) start, recordList);
     }
   }
 
-  private KinesisFetchResult getResult(Checkpoint start, Checkpoint end) {
-    List<Record> recordList = new ArrayList<>();
+  private KinesisFetchResult getResult(Checkpoint start, Checkpoint end, List<Record> recordList) {
     KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
 
     try {
@@ -83,7 +85,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
         createConnection();
       }
 
-      String shardIterator = getShardIterator(kinesisStartCheckpoint);
+      String shardIterator = getShardIterator(kinesisStartCheckpoint.getSequenceNumber());
 
       String kinesisEndSequenceNumber = null;
 
@@ -162,25 +164,15 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     }
   }
 
-  private String getShardIterator(KinesisCheckpoint kinesisStartCheckpoint) {
-    if (kinesisStartCheckpoint.getSequenceNumber() != null) {
-      return getShardIterator(ShardIteratorType.AT_SEQUENCE_NUMBER, kinesisStartCheckpoint.getSequenceNumber());
-    } else {
-      return getShardIterator(ShardIteratorType.LATEST, null);
-    }
-  }
+  public String getShardIterator(String sequenceNumber) {
 
-  public String getShardIterator(ShardIteratorType shardIteratorType, String sequenceNumber){
-    if(sequenceNumber == null){
-      return _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream)
-              .shardIteratorType(shardIteratorType).build()).shardIterator();
-    }else{
-      return _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId)
-              .shardIteratorType(shardIteratorType)
-              .startingSequenceNumber(sequenceNumber).build()).shardIterator();
+    GetShardIteratorRequest.Builder requestBuilder =
+        GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId).shardIteratorType(_shardIteratorType);
+
+    if (sequenceNumber != null) {
+      requestBuilder = requestBuilder.startingSequenceNumber(sequenceNumber);
     }
+    return _kinesisClient.getShardIterator(requestBuilder.build()).shardIterator();
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 327e034..1d753c3 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -25,7 +25,7 @@ import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
 import software.amazon.awssdk.services.kinesis.model.SequenceNumberRange;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
-//TODO: Implement shardId as Array
+//TODO: Implement shardId as Array and have unique id
 public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
   String _shardId;
   KinesisCheckpoint _startCheckpoint;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index f8a0551..17691c4 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -20,6 +20,8 @@ package org.apache.pinot.plugin.stream.kinesis; /**
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import software.amazon.awssdk.services.kinesis.model.Record;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 
@@ -29,7 +31,8 @@ public class KinesisConsumerTest {
     Map<String, String> props = new HashMap<>();
     props.put("stream", "kinesis-test");
     props.put("aws-region", "us-west-2");
-    props.put("maxRecords", "10");
+    props.put("max-records-to-fetch", "2000");
+    props.put("shard-iterator-type", "AT-SEQUENCE-NUMBER");
 
     KinesisConfig kinesisConfig = new KinesisConfig(props);
 
@@ -38,6 +41,8 @@ public class KinesisConsumerTest {
     List<Shard> shardList = kinesisConnectionHandler.getShards();
 
     for(Shard shard : shardList) {
+      System.out.println("SHARD: " + shard.shardId());
+
       KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig, new KinesisShardMetadata(shard.shardId(), "kinesis-test", "us-west-2"));
 
       KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shard.sequenceNumberRange().startingSequenceNumber());
@@ -45,7 +50,6 @@ public class KinesisConsumerTest {
 
       List<Record> list = fetchResult.getMessages();
 
-      System.out.println("SHARD: " + shard.shardId());
       for (Record record : list) {
         System.out.println("SEQ-NO: " + record.sequenceNumber() + ", DATA: " + record.data().asUtf8String());
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 42/47: Cleanup, javadocs, comments

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit ce1a6462084dfa05d9b8c2b57a23a9c8274725e4
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Fri Jan 8 18:28:04 2021 -0800

    Cleanup, javadocs, comments
---
 .../protocols/SegmentCompletionProtocol.java       |  1 -
 .../helix/core/PinotTableIdealStateBuilder.java    |  8 +-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 76 ++++++++++---------
 .../PinotLLCRealtimeSegmentManagerTest.java        | 21 ++----
 .../realtime/LLRealtimeSegmentDataManager.java     | 21 +++---
 .../plugin/stream/kinesis/KinesisCheckpoint.java   | 10 ++-
 .../pinot/plugin/stream/kinesis/KinesisConfig.java |  3 +
 .../stream/kinesis/KinesisConnectionHandler.java   | 17 +++--
 .../plugin/stream/kinesis/KinesisConsumer.java     | 20 +++--
 .../stream/kinesis/KinesisConsumerFactory.java     |  8 +-
 .../stream/kinesis/KinesisMsgOffsetFactory.java    |  4 +
 .../plugin/stream/kinesis/KinesisRecordsBatch.java |  6 +-
 .../kinesis/KinesisStreamMetadataProvider.java     | 27 +++----
 .../org/apache/pinot/spi/stream/Checkpoint.java    |  5 ++
 .../stream/PartitionGroupCheckpointFactory.java    | 12 +--
 .../pinot/spi/stream/PartitionGroupConsumer.java   | 16 +++-
 .../pinot/spi/stream/PartitionGroupInfo.java       | 13 ++--
 .../spi/stream/PartitionGroupInfoFetcher.java      |  2 +-
 .../pinot/spi/stream/PartitionGroupMetadata.java   |  4 -
 .../pinot/spi/stream/PartitionLevelConsumer.java   |  6 +-
 .../pinot/spi/stream/PartitionOffsetFetcher.java   | 88 ----------------------
 .../pinot/spi/stream/StreamConsumerFactory.java    | 10 ++-
 .../pinot/spi/stream/StreamMetadataProvider.java   |  9 +--
 23 files changed, 170 insertions(+), 217 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
index 74614df..dd1330d 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
@@ -138,7 +138,6 @@ public class SegmentCompletionProtocol {
 
   public static final String REASON_ROW_LIMIT = "rowLimit";  // Stop reason sent by server as max num rows reached
   public static final String REASON_TIME_LIMIT = "timeLimit";  // Stop reason sent by server as max time reached
-  public static final String REASON_END_OF_PARTITION_GROUP = "endOfPartitionGroup";  // Stop reason sent by server as end of partitionGroup reached
 
   // Canned responses
   public static final Response RESP_NOT_LEADER =
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
index 68bcf57..98fbd5d 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
@@ -117,6 +117,12 @@ public class PinotTableIdealStateBuilder {
     pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
   }
 
+  /**
+   * Fetches the list of {@link PartitionGroupInfo} for the stream, with the help of the current partitionGroups metadata
+   * This call will only skip partitions which have reached end of life and all messages from that partition have been consumed.
+   * The current partition group metadata is used to determine the offsets that have been consumed for a partition.
+   * The current partition group metadata is also used to know about existing partition groupings which should not be disturbed
+   */
   public static List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
     PartitionGroupInfoFetcher partitionGroupInfoFetcher =
@@ -126,7 +132,7 @@ public class PinotTableIdealStateBuilder {
       return partitionGroupInfoFetcher.getPartitionGroupInfoList();
     } catch (Exception e) {
       Exception fetcherException = partitionGroupInfoFetcher.getException();
-      LOGGER.error("Could not get partition count for {}", streamConfig.getTopicName(), fetcherException);
+      LOGGER.error("Could not get partition group info for {}", streamConfig.getTopicName(), fetcherException);
       throw new RuntimeException(fetcherException);
     }
   }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 9a0786b..27d487b 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -24,7 +24,6 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -77,11 +76,11 @@ import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.filesystem.PinotFS;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
-import org.apache.pinot.spi.stream.PartitionOffsetFetcher;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConfigProperties;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
@@ -164,17 +163,18 @@ public class PinotLLCRealtimeSegmentManager {
 
 
   /**
-   * Using the ideal state and segment metadata, return a list of the current partition groups
+   * Using the ideal state and segment metadata, return a list of {@link PartitionGroupMetadata}
+   * for latest segment of each partition group
    */
   public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState) {
     List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
 
-    // from all segment names in the ideal state, find unique groups
-    Map<Integer, LLCSegmentName> groupIdToLatestSegment = new HashMap<>();
+    // From all segment names in the ideal state, find unique partition group ids and their latest segment
+    Map<Integer, LLCSegmentName> partitionGroupIdToLatestSegment = new HashMap<>();
     for (String segment : idealState.getPartitionSet()) {
       LLCSegmentName llcSegmentName = new LLCSegmentName(segment);
       int partitionGroupId = llcSegmentName.getPartitionGroupId();
-      groupIdToLatestSegment.compute(partitionGroupId, (k, latestSegment) -> {
+      partitionGroupIdToLatestSegment.compute(partitionGroupId, (k, latestSegment) -> {
         if (latestSegment == null) {
           return llcSegmentName;
         } else {
@@ -184,8 +184,8 @@ public class PinotLLCRealtimeSegmentManager {
       });
     }
 
-    // create a PartitionGroupMetadata for each latest segment
-    for (Map.Entry<Integer, LLCSegmentName> entry : groupIdToLatestSegment.entrySet()) {
+    // Create a PartitionGroupMetadata for each latest segment
+    for (Map.Entry<Integer, LLCSegmentName> entry : partitionGroupIdToLatestSegment.entrySet()) {
       int partitionGroupId = entry.getKey();
       LLCSegmentName llcSegmentName = entry.getValue();
       RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider
@@ -258,10 +258,8 @@ public class PinotLLCRealtimeSegmentManager {
     PartitionLevelStreamConfig streamConfig =
         new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
     InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
-    // get new partition groups and their metadata
     List<PartitionGroupInfo> newPartitionGroupInfoList = getPartitionGroupInfoList(streamConfig, Collections.emptyList());
     int numPartitionGroups = newPartitionGroupInfoList.size();
-
     int numReplicas = getNumReplicas(tableConfig, instancePartitions);
 
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
@@ -699,27 +697,16 @@ public class PinotLLCRealtimeSegmentManager {
     return commitTimeoutMS;
   }
 
+  /**
+   * Fetches the latest state of the PartitionGroups for the stream
+   * If any partition has reached end of life, and all messages of that partition have been consumed by the segment, it will be skipped from the result
+   */
   @VisibleForTesting
   List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
     return PinotTableIdealStateBuilder.getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
   }
 
-  @VisibleForTesting
-  StreamPartitionMsgOffset getPartitionOffset(StreamConfig streamConfig, OffsetCriteria offsetCriteria,
-      int partitionGroupId) {
-    PartitionOffsetFetcher partitionOffsetFetcher =
-        new PartitionOffsetFetcher(offsetCriteria, partitionGroupId, streamConfig);
-    try {
-      RetryPolicies.fixedDelayRetryPolicy(3, 1000L).attempt(partitionOffsetFetcher);
-      return partitionOffsetFetcher.getOffset();
-    } catch (Exception e) {
-      throw new IllegalStateException(String
-          .format("Failed to fetch the offset for topic: %s, partition: %s with criteria: %s",
-              streamConfig.getTopicName(), partitionGroupId, offsetCriteria), e);
-    }
-  }
-
   /**
    * An instance is reporting that it has stopped consuming a topic due to some error.
    * If the segment is in CONSUMING state, mark the state of the segment to be OFFLINE in idealstate.
@@ -1052,26 +1039,26 @@ public class PinotLLCRealtimeSegmentManager {
 
             // Create a new segment to re-consume from the previous start offset
             LLCSegmentName newLLCSegmentName = getNextLLCSegmentName(latestLLCSegmentName, currentTimeMs);
-            StreamPartitionMsgOffset startOffset = offsetFactory.create(latestSegmentZKMetadata.getStartOffset());
+            Checkpoint startCheckpoint = offsetFactory.create(latestSegmentZKMetadata.getStartOffset());
+            Checkpoint partitionGroupStartCheckpoint = getPartitionGroupStartCheckpoint(streamConfig, partitionGroupId);
+
             // Start offset must be higher than the start offset of the stream
-            StreamPartitionMsgOffset partitionStartOffset =
-                getPartitionOffset(streamConfig, OffsetCriteria.SMALLEST_OFFSET_CRITERIA, partitionGroupId);
-            if (partitionStartOffset.compareTo(startOffset) > 0) {
-              LOGGER.error("Data lost from offset: {} to: {} for partition: {} of table: {}", startOffset,
-                  partitionStartOffset, partitionGroupId, realtimeTableName);
+            if (partitionGroupStartCheckpoint.compareTo(startCheckpoint) > 0) {
+              LOGGER.error("Data lost from offset: {} to: {} for partition: {} of table: {}", startCheckpoint,
+                  partitionGroupStartCheckpoint, partitionGroupId, realtimeTableName);
               _controllerMetrics.addMeteredTableValue(realtimeTableName, ControllerMeter.LLC_STREAM_DATA_LOSS, 1L);
-              startOffset = partitionStartOffset;
+              startCheckpoint = partitionGroupStartCheckpoint;
             }
 
             CommittingSegmentDescriptor committingSegmentDescriptor =
-                new CommittingSegmentDescriptor(latestSegmentName, startOffset.toString(), 0);
+                new CommittingSegmentDescriptor(latestSegmentName, startCheckpoint.toString(), 0);
             createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, currentTimeMs,
                 committingSegmentDescriptor, latestSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
             String newSegmentName = newLLCSegmentName.getSegmentName();
             updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, newSegmentName, segmentAssignment,
                 instancePartitionsMap);
           } else {
-            if (!newPartitionGroupSet.contains(partitionGroupId)) {
+            if (newPartitionGroupSet.contains(partitionGroupId)) {
               // If we get here, that means in IdealState, the latest segment has no CONSUMING replicas, but has replicas
               // not OFFLINE. That is an unexpected state which cannot be fixed by the validation manager currently. In
               // that case, we need to either extend this part to handle the state, or prevent segments from getting into
@@ -1134,6 +1121,27 @@ public class PinotLLCRealtimeSegmentManager {
     return idealState;
   }
 
+  private StreamPartitionMsgOffset getPartitionGroupStartCheckpoint(StreamConfig streamConfig, int partitionGroupId) {
+    Map<String, String> streamConfigMapWithSmallestOffsetCriteria = new HashMap<>(streamConfig.getStreamConfigsMap());
+    streamConfigMapWithSmallestOffsetCriteria.put(StreamConfigProperties
+            .constructStreamProperty(streamConfig.getType(), StreamConfigProperties.STREAM_CONSUMER_OFFSET_CRITERIA),
+        OffsetCriteria.SMALLEST_OFFSET_CRITERIA.getOffsetString());
+    StreamConfig smallestOffsetCriteriaStreamConfig =
+        new StreamConfig(streamConfig.getTableNameWithType(), streamConfigMapWithSmallestOffsetCriteria);
+    List<PartitionGroupInfo> smallestOffsetCriteriaPartitionGroupInfo =
+        getPartitionGroupInfoList(smallestOffsetCriteriaStreamConfig, Collections.emptyList());
+    StreamPartitionMsgOffset partitionStartOffset = null;
+    for (PartitionGroupInfo info : smallestOffsetCriteriaPartitionGroupInfo) {
+      if (info.getPartitionGroupId() == partitionGroupId) {
+        StreamPartitionMsgOffsetFactory factory =
+            StreamConsumerFactoryProvider.create(streamConfig).createStreamMsgOffsetFactory();
+        partitionStartOffset = factory.create(info.getStartCheckpoint());
+        break;
+      }
+    }
+    return partitionStartOffset;
+  }
+
   private LLCSegmentName getNextLLCSegmentName(LLCSegmentName lastLLCSegmentName, long creationTimeMs) {
     return new LLCSegmentName(lastLLCSegmentName.getTableName(), lastLLCSegmentName.getPartitionGroupId(),
         lastLLCSegmentName.getSequenceNumber() + 1, creationTimeMs);
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 0f33556..c19a845 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -908,32 +908,23 @@ public class PinotLLCRealtimeSegmentManagerTest {
 
     @Override
     void updateIdealStateOnSegmentCompletion(String realtimeTableName, String committingSegmentName,
-        List<String> newSegmentNames, SegmentAssignment segmentAssignment,
+        String newSegmentName, SegmentAssignment segmentAssignment,
         Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap) {
-      updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), committingSegmentName,
-          null, segmentAssignment, instancePartitionsMap);
-      for (String segmentName : newSegmentNames) {
-        updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), null,
-            segmentName, segmentAssignment, instancePartitionsMap);
-      }
+      updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), committingSegmentName, null,
+          segmentAssignment, instancePartitionsMap);
+      updateInstanceStatesForNewConsumingSegment(_idealState.getRecord().getMapFields(), null, newSegmentName,
+          segmentAssignment, instancePartitionsMap);
     }
 
     @Override
     List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
       return IntStream.range(0, _numPartitions).mapToObj(i -> new PartitionGroupInfo(i,
-          getPartitionOffset(streamConfig, OffsetCriteria.SMALLEST_OFFSET_CRITERIA, i).toString()))
+          PARTITION_OFFSET.toString()))
           .collect(Collectors.toList());
     }
 
     @Override
-    LongMsgOffset getPartitionOffset(StreamConfig streamConfig, OffsetCriteria offsetCriteria, int partitionGroupId) {
-      // The criteria for this test should always be SMALLEST (for default streaming config and new added partitions)
-      assertTrue(offsetCriteria.isSmallest());
-      return PARTITION_OFFSET;
-    }
-
-    @Override
     boolean isExceededMaxSegmentCompletionTime(String realtimeTableName, String segmentName, long currentTimeMs) {
       return _exceededMaxSegmentCompletionTime;
     }
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index bc49830..e6e1402 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -34,6 +34,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import javax.annotation.Nullable;
@@ -240,7 +241,6 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   // Segment end criteria
   private volatile long _consumeEndTime = 0;
   private Checkpoint _finalOffset; // Used when we want to catch up to this one
-  private boolean _endOfPartitionGroup = false;
   private volatile boolean _shouldStop = false;
 
   // It takes 30s to locate controller leader, and more if there are multiple controller failures.
@@ -307,12 +307,6 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
               _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
           _stopReason = SegmentCompletionProtocol.REASON_ROW_LIMIT;
           return true;
-        } else if (_endOfPartitionGroup) {
-          // FIXME: handle numDocsIndexed == 0 case
-          segmentLogger.info("Stopping consumption due to end of partitionGroup reached nRows={} numRowsIndexed={}, numRowsConsumed={}",
-              _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
-          _stopReason = SegmentCompletionProtocol.REASON_END_OF_PARTITION_GROUP;
-          return true;
         }
         return false;
 
@@ -391,8 +385,10 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       try {
         messageBatch = _partitionGroupConsumer
             .fetchMessages(_currentOffset, null, _partitionLevelStreamConfig.getFetchTimeoutMillis());
-        _endOfPartitionGroup = messageBatch.isEndOfPartitionGroup();
         consecutiveErrorCount = 0;
+      } catch (TimeoutException e) {
+        handleTransientStreamErrors(e);
+        continue;
       } catch (TransientConsumerException e) {
         handleTransientStreamErrors(e);
         continue;
@@ -1253,7 +1249,12 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         //       long as the partition function is not changed.
         int numPartitions = columnPartitionConfig.getNumPartitions();
         try {
-          int numStreamPartitions = _streamMetadataProvider.fetchPartitionCount(/*maxWaitTimeMs=*/5000L);
+          // TODO: currentPartitionGroupMetadata should be fetched from idealState + segmentZkMetadata, so that we get back accurate partitionGroups info
+          //  However this is not an issue for Kafka, since partitionGroups never expire and every partitionGroup has a single partition
+          //  Fix this before opening support for partitioning in Kinesis
+          int numStreamPartitions = _streamMetadataProvider
+              .getPartitionGroupInfoList(_clientId, _partitionLevelStreamConfig,
+                  Collections.emptyList(), /*maxWaitTimeMs=*/5000).size();
           if (numStreamPartitions != numPartitions) {
             segmentLogger.warn(
                 "Number of stream partitions: {} does not match number of partitions in the partition config: {}, using number of stream partitions",
@@ -1335,7 +1336,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       closeStreamMetadataProvider();
     }
     segmentLogger.info("Creating new stream metadata provider, reason: {}", reason);
-    _streamMetadataProvider = _streamConsumerFactory.createPartitionMetadataProvider(_clientId, _partitionGroupId);
+    _streamMetadataProvider = _streamConsumerFactory.createStreamMetadataProvider(_clientId);
   }
 
   // This should be done during commit? We may not always commit when we build a segment....
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 517f8c0..e1f8b05 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -22,12 +22,17 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.type.TypeReference;
 import java.io.IOException;
 import java.util.Map;
+import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.utils.JsonUtils;
 
 
+/**
+ * A {@link Checkpoint} implementation for the Kinesis partition group consumption
+ * A partition group consists of 1 or more shards. The KinesisCheckpoint maintains a Map of shards to the sequenceNumber
+ */
 public class KinesisCheckpoint implements StreamPartitionMsgOffset {
-  private Map<String, String> _shardToStartSequenceMap;
+  private final Map<String, String> _shardToStartSequenceMap;
 
   public KinesisCheckpoint(Map<String, String> shardToStartSequenceMap) {
     _shardToStartSequenceMap = shardToStartSequenceMap;
@@ -68,6 +73,7 @@ public class KinesisCheckpoint implements StreamPartitionMsgOffset {
 
   @Override
   public int compareTo(Object o) {
-    return this._shardToStartSequenceMap.values().iterator().next().compareTo(((KinesisCheckpoint) o)._shardToStartSequenceMap.values().iterator().next());
+    return this._shardToStartSequenceMap.values().iterator().next()
+        .compareTo(((KinesisCheckpoint) o)._shardToStartSequenceMap.values().iterator().next());
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 529f34f..fbe369f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -23,6 +23,9 @@ import org.apache.pinot.spi.stream.StreamConfig;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
+/**
+ * Kinesis stream specific config
+ */
 public class KinesisConfig {
   public static final String STREAM = "stream";
   public static final String SHARD_ITERATOR_TYPE = "shard-iterator-type";
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 4d968f6..61d065e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -27,14 +27,13 @@ import software.amazon.awssdk.services.kinesis.model.ListShardsResponse;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
+/**
+ * Manages the Kinesis stream connection, given the stream name and aws region
+ */
 public class KinesisConnectionHandler {
   KinesisClient _kinesisClient;
-  private String _stream;
-  private String _awsRegion;
-
-  public KinesisConnectionHandler() {
-
-  }
+  private final String _stream;
+  private final String _awsRegion;
 
   public KinesisConnectionHandler(String stream, String awsRegion) {
     _stream = stream;
@@ -42,12 +41,18 @@ public class KinesisConnectionHandler {
     createConnection();
   }
 
+  /**
+   * Lists all shards of the stream
+   */
   public List<Shard> getShards() {
     ListShardsResponse listShardsResponse =
         _kinesisClient.listShards(ListShardsRequest.builder().streamName(_stream).build());
     return listShardsResponse.shards();
   }
 
+  /**
+   * Creates a Kinesis client for the stream
+   */
   public void createConnection() {
     if (_kinesisClient == null) {
       _kinesisClient =
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 5cbd7e6..9c56f95 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -43,6 +43,9 @@ import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
+/**
+ * A {@link PartitionGroupConsumer} implementation for the Kinesis stream
+ */
 public class KinesisConsumer extends KinesisConnectionHandler implements PartitionGroupConsumer {
   private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
   String _stream;
@@ -58,16 +61,19 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
     _executorService = Executors.newSingleThreadExecutor();
   }
 
+  /**
+   * Fetch records from the Kinesis stream between the start and end KinesisCheckpoint
+   */
   @Override
-  public KinesisRecordsBatch fetchMessages(Checkpoint start, Checkpoint end, int timeoutMs) {
+  public KinesisRecordsBatch fetchMessages(Checkpoint startCheckpoint, Checkpoint endCheckpoint, int timeoutMs) {
     List<Record> recordList = new ArrayList<>();
     Future<KinesisRecordsBatch> kinesisFetchResultFuture =
-        _executorService.submit(() -> getResult(start, end, recordList));
+        _executorService.submit(() -> getResult(startCheckpoint, endCheckpoint, recordList));
 
     try {
       return kinesisFetchResultFuture.get(timeoutMs, TimeUnit.MILLISECONDS);
     } catch (Exception e) {
-      return handleException((KinesisCheckpoint) start, recordList);
+      return handleException((KinesisCheckpoint) startCheckpoint, recordList);
     }
   }
 
@@ -81,6 +87,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
       }
 
       //TODO: iterate upon all the shardIds in the map
+      // Okay for now, since we have assumed that every partition group contains a single shard
       Map.Entry<String, String> next = kinesisStartCheckpoint.getShardToStartSequenceMap().entrySet().iterator().next();
       String shardIterator = getShardIterator(next.getKey(), next.getValue());
 
@@ -156,14 +163,11 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
       String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       Map<String, String> newCheckpoint = new HashMap<>(start.getShardToStartSequenceMap());
       newCheckpoint.put(newCheckpoint.keySet().iterator().next(), nextStartSequenceNumber);
-
-      return new KinesisRecordsBatch(recordList, shardId, false);
-    } else {
-      return new KinesisRecordsBatch(recordList, shardId, false);
     }
+    return new KinesisRecordsBatch(recordList, shardId, false);
   }
 
-  public String getShardIterator(String shardId, String sequenceNumber) {
+  private String getShardIterator(String shardId, String sequenceNumber) {
 
     GetShardIteratorRequest.Builder requestBuilder =
         GetShardIteratorRequest.builder().streamName(_stream).shardId(shardId).shardIteratorType(_shardIteratorType);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index fc9c4af..6792fb9 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -28,6 +28,9 @@ import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 
 
+/**
+ * {@link StreamConsumerFactory} implementation for the Kinesis stream
+ */
 public class KinesisConsumerFactory extends StreamConsumerFactory {
 
   @Override
@@ -43,7 +46,7 @@ public class KinesisConsumerFactory extends StreamConsumerFactory {
 
   @Override
   public StreamMetadataProvider createPartitionMetadataProvider(String clientId, int partition) {
-    return null;
+    throw new UnsupportedOperationException();
   }
 
   @Override
@@ -52,7 +55,8 @@ public class KinesisConsumerFactory extends StreamConsumerFactory {
   }
 
   @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId,
+      PartitionGroupMetadata partitionGroupMetadata) {
     return new KinesisConsumer(new KinesisConfig(_streamConfig));
   }
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
index f234bae..8f6b932 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisMsgOffsetFactory.java
@@ -1,11 +1,15 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.io.IOException;
+import org.apache.pinot.spi.stream.PartitionGroupCheckpointFactory;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 
 
+/**
+ * An implementation of the {@link PartitionGroupCheckpointFactory} for Kinesis stream
+ */
 public class KinesisMsgOffsetFactory implements StreamPartitionMsgOffsetFactory {
 
   KinesisConfig _kinesisConfig;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
index b3eb626..83228ec 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -22,13 +22,14 @@ import java.nio.ByteBuffer;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import javax.annotation.Nullable;
 import org.apache.pinot.spi.stream.MessageBatch;
-import org.apache.pinot.spi.stream.RowMetadata;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import software.amazon.awssdk.services.kinesis.model.Record;
 
 
+/**
+ * A {@link MessageBatch} for collecting records from the Kinesis stream
+ */
 public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   private final List<Record> _recordList;
   private final String _shardId;
@@ -49,6 +50,7 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   public byte[] getMessageAtIndex(int index) {
     return _recordList.get(index).data().asByteArray();
   }
+
   @Override
   public int getMessageOffsetAtIndex(int index) {
     return ByteBuffer.wrap(_recordList.get(index).data().asByteArray()).arrayOffset();
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index 8968b56..1083969 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -1,21 +1,14 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
-import com.google.common.base.Preconditions;
-import com.google.common.cache.CacheBuilder;
-import com.google.common.cache.CacheLoader;
-import com.google.common.cache.LoadingCache;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.OffsetCriteria;
 import org.apache.pinot.spi.stream.PartitionGroupConsumer;
@@ -28,6 +21,9 @@ import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
+/**
+ * A {@link StreamMetadataProvider} implementation for the Kinesis stream
+ */
 public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
   private final KinesisConnectionHandler _kinesisConnectionHandler;
   private final StreamConsumerFactory _kinesisStreamConsumerFactory;
@@ -52,17 +48,23 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     throw new UnsupportedOperationException();
   }
 
+  /**
+   * This call returns all active shards, taking into account the consumption status for those shards.
+   * PartitionGroupInfo is returned for a shard if:
+   * 1. It is a branch new shard i.e. no partitionGroupMetadata was found for it in the current list
+   * 2. It is still being actively consumed from i.e. the consuming partition has not reached the end of the shard
+   */
   @Override
   public List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
       throws IOException, TimeoutException {
 
-    Map<Integer, PartitionGroupMetadata> currentPartitionGroupMap =
-        currentPartitionGroupsMetadata.stream().collect(Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
+    Map<Integer, PartitionGroupMetadata> currentPartitionGroupMap = currentPartitionGroupsMetadata.stream()
+        .collect(Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
 
     List<PartitionGroupInfo> newPartitionGroupInfos = new ArrayList<>();
     List<Shard> shards = _kinesisConnectionHandler.getShards();
-    for (Shard shard : shards) { // go over all shards
+    for (Shard shard : shards) {
       KinesisCheckpoint newStartCheckpoint;
 
       String shardId = shard.shardId();
@@ -76,7 +78,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
         } catch (Exception e) {
           // ignore. No end checkpoint yet for IN_PROGRESS segment
         }
-        if (currentEndCheckpoint != null) { // end checkpoint available i.e. committing segment
+        if (currentEndCheckpoint != null) { // end checkpoint available i.e. committing/committed segment
           String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
           if (endingSequenceNumber != null) { // shard has ended
             // check if segment has consumed all the messages already
@@ -104,8 +106,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
         newStartCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
       }
 
-      newPartitionGroupInfos
-          .add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint.serialize()));
+      newPartitionGroupInfos.add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint.serialize()));
     }
     return newPartitionGroupInfos;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
index bae8832..b7a9dba 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
@@ -18,7 +18,12 @@
  */
 package org.apache.pinot.spi.stream;
 
+/**
+ * Keeps track of the consumption for a PartitionGroup
+ */
 public interface Checkpoint extends Comparable {
+
   String serialize();
+
   Checkpoint deserialize(String checkpointStr);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
index 14d2f39..4bd7839 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupCheckpointFactory.java
@@ -18,32 +18,22 @@
  */
 package org.apache.pinot.spi.stream;
 
-import org.apache.pinot.spi.annotations.InterfaceStability;
-
-
 /**
  * An interface to be implemented by streams that are consumed using Pinot LLC consumption.
  */
-@InterfaceStability.Evolving
 public interface PartitionGroupCheckpointFactory {
   /**
    * Initialization, called once when the factory is created.
-   * @param streamConfig
    */
   void init(StreamConfig streamConfig);
 
   /**
-   * Construct an offset from the string provided.
-   * @param offsetStr
-   * @return StreamPartitionMsgOffset
+   * Construct a checkpoint from the string provided.
    */
   Checkpoint create(String offsetStr);
 
   /**
    * Construct an offset from another one provided, of the same type.
-   *
-   * @param other
-   * @return
    */
   Checkpoint create(Checkpoint other);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
index b421268..72b59d7 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
@@ -22,7 +22,21 @@ import java.io.Closeable;
 import java.util.concurrent.TimeoutException;
 
 
+/**
+ * Consumer interface for consuming from a partition group of a stream
+ */
 public interface PartitionGroupConsumer extends Closeable {
-  MessageBatch fetchMessages(Checkpoint start, Checkpoint end, int timeout)
+
+  /**
+   * Fetch messages and offsets from the stream partition group
+   *
+   * @param startCheckpoint The offset of the first message desired, inclusive
+   * @param endCheckpoint The offset of the last message desired, exclusive, or null
+   * @param timeoutMs Timeout in milliseconds
+   * @throws java.util.concurrent.TimeoutException If the operation could not be completed within {@code timeoutMillis}
+   * milliseconds
+   * @return An iterable containing messages fetched from the stream partition and their offsets
+   */
+  MessageBatch fetchMessages(Checkpoint startCheckpoint, Checkpoint endCheckpoint, int timeoutMs)
       throws TimeoutException;
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
index 438e148..758953d 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfo.java
@@ -18,21 +18,22 @@
  */
 package org.apache.pinot.spi.stream;
 
+/**
+ * A PartitionGroup is a group of partitions/shards that the same consumer should consume from.
+ * This class is a container for the metadata of a partition group. It consists of
+ * 1. A unique partition group id for this partition group
+ * 2. The start checkpoint to begin consumption for this partition group
+ */
 public class PartitionGroupInfo {
 
-  // fixme: Make partitionGroupId string everywhere (LLCSegmentName, StreamMetadataProvider)
   private final int _partitionGroupId;
-  private String _startCheckpoint;
+  private final String _startCheckpoint;
 
   public PartitionGroupInfo(int partitionGroupId, String startCheckpoint) {
     _partitionGroupId = partitionGroupId;
     _startCheckpoint = startCheckpoint;
   }
 
-  public void setStartCheckpoint(String startCheckpoint) {
-    _startCheckpoint = startCheckpoint;
-  }
-
   public int getPartitionGroupId() {
     return _partitionGroupId;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
index f2d3f17..9c746e8 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
@@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory;
 
 
 /**
- * Fetches the partition count of a stream using the {@link StreamMetadataProvider}
+ * Creates a list of PartitionGroupInfo for all partition groups of the stream using the {@link StreamMetadataProvider}
  */
 public class PartitionGroupInfoFetcher implements Callable<Boolean> {
 
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index aaf20b6..a99a82b 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -18,12 +18,8 @@
  */
 package org.apache.pinot.spi.stream;
 
-import java.util.List;
-
-
 public class PartitionGroupMetadata {
 
-  // fixme: Make partitionGroupId string everywhere (LLCSegmentName, StreamMetadataProvider)
   private final int _partitionGroupId;
   private int _sequenceNumber;
   private String _startCheckpoint;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
index 3bedc8a..3f5b230 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionLevelConsumer.java
@@ -63,9 +63,9 @@ public interface PartitionLevelConsumer extends Closeable, PartitionGroupConsume
     return fetchMessages(startOffsetLong, endOffsetLong, timeoutMillis);
   }
 
-  default MessageBatch fetchMessages(Checkpoint startCheckpoint, Checkpoint endCheckpoint, int timeoutMillis)
+  default MessageBatch fetchMessages(Checkpoint startCheckpoint, Checkpoint endCheckpoint, int timeoutMs)
       throws java.util.concurrent.TimeoutException {
-    // TODO Issue 5359 remove this default implementation once all kafka consumers have migrated to use this API
-    return fetchMessages((StreamPartitionMsgOffset) startCheckpoint, (StreamPartitionMsgOffset) endCheckpoint, timeoutMillis);
+    return fetchMessages((StreamPartitionMsgOffset) startCheckpoint, (StreamPartitionMsgOffset) endCheckpoint,
+        timeoutMs);
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java
deleted file mode 100644
index b92f04d..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionOffsetFetcher.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream;
-
-import java.util.concurrent.Callable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
- * Fetches the partition offset for a stream given the offset criteria, using the {@link StreamMetadataProvider}
- */
-public class PartitionOffsetFetcher implements Callable<Boolean> {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionOffsetFetcher.class);
-  private static final int STREAM_PARTITION_OFFSET_FETCH_TIMEOUT_MILLIS = 10000;
-
-  private final String _topicName;
-  private final OffsetCriteria _offsetCriteria;
-  private final int _partitionGroupId;
-
-  private Exception _exception = null;
-  private StreamPartitionMsgOffset _offset;
-  private StreamConsumerFactory _streamConsumerFactory;
-  StreamConfig _streamConfig;
-
-  public PartitionOffsetFetcher(final OffsetCriteria offsetCriteria, int partitionGroupId, StreamConfig streamConfig) {
-    _offsetCriteria = offsetCriteria;
-    _partitionGroupId = partitionGroupId;
-    _streamConfig = streamConfig;
-    _streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
-    _topicName = streamConfig.getTopicName();
-  }
-
-  public StreamPartitionMsgOffset getOffset() {
-    return _offset;
-  }
-
-  public Exception getException() {
-    return _exception;
-  }
-
-  /**
-   * Callable to fetch the offset of the partition given the stream metadata and offset criteria
-   * @return
-   * @throws Exception
-   */
-  @Override
-  public Boolean call()
-      throws Exception {
-    String clientId = PartitionOffsetFetcher.class.getSimpleName() + "-" + _topicName + "-" + _partitionGroupId;
-    try (StreamMetadataProvider streamMetadataProvider = _streamConsumerFactory
-        .createPartitionMetadataProvider(clientId, _partitionGroupId)) {
-      _offset =
-          streamMetadataProvider.fetchStreamPartitionOffset(_offsetCriteria, STREAM_PARTITION_OFFSET_FETCH_TIMEOUT_MILLIS);
-      if (_exception != null) {
-        LOGGER.info("Successfully retrieved offset({}) for stream topic {} partition {}", _offset, _topicName,
-            _partitionGroupId);
-      }
-      return Boolean.TRUE;
-    } catch (TransientConsumerException e) {
-      LOGGER.warn("Temporary exception when fetching offset for topic {} partition {}:{}", _topicName,
-          _partitionGroupId,
-          e.getMessage());
-      _exception = e;
-      return Boolean.FALSE;
-    } catch (Exception e) {
-      _exception = e;
-      throw e;
-    }
-  }
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
index f993fed..ac928c5 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
@@ -41,7 +41,6 @@ public abstract class StreamConsumerFactory {
    * @param partition the partition id of the partition for which this consumer is being created
    * @return
    */
-  @Deprecated
   public abstract PartitionLevelConsumer createPartitionLevelConsumer(String clientId, int partition);
 
   /**
@@ -74,8 +73,11 @@ public abstract class StreamConsumerFactory {
     return new LongMsgOffsetFactory();
   }
 
-  // creates a consumer which consumes from a partition group
-  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
-    return createPartitionLevelConsumer(clientId, metadata.getPartitionGroupId());
+  /**
+   * Creates a partition group consumer, which can fetch messages from a partition group
+   */
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId,
+      PartitionGroupMetadata partitionGroupMetadata) {
+    return createPartitionLevelConsumer(clientId, partitionGroupMetadata.getPartitionGroupId());
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index be2e819..cecc708 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -60,7 +60,7 @@ public interface StreamMetadataProvider extends Closeable {
   }
 
   /**
-   * Fetch the partitionGroupMetadata list.
+   * Fetch the list of partition group info for the latest state of the stream
    * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
    */
   default List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
@@ -69,14 +69,13 @@ public interface StreamMetadataProvider extends Closeable {
     int partitionCount = fetchPartitionCount(timeoutMillis);
     List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
 
-    // add a PartitionGroupInfo into the list foreach partition already present in current.
-    // the end checkpoint is set as checkpoint
+    // Add a PartitionGroupInfo into the list foreach partition already present in current.
     for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
       newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
           currentPartitionGroupMetadata.getEndCheckpoint()));
     }
-    // add PartitiongroupInfo for new partitions
-    // use offset criteria from stream config
+    // Add PartitionGroupInfo for new partitions
+    // Use offset criteria from stream config
     StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
       StreamMetadataProvider partitionMetadataProvider =


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 46/47: Add unit tests in Kinesis consumer (#6410)

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit ab9655b8d42bd1a41920260ff3080070c6863cca
Author: Kartik Khare <kh...@gmail.com>
AuthorDate: Mon Feb 1 22:44:37 2021 +0530

    Add unit tests in Kinesis consumer (#6410)
    
    * Bug fixes: Handle connection broken exception
    
    * Add unit tests for partition group consumer
    
    * Add tests for child shards
    
    * Add unit test for kafka consumer
    
    * Refactor: remove unused imports, expand * imports and rename classes
    
    * Fix: enforcer errors
    
    * Remove powermock dependency
    
    * Fix jackson version conflict. Shade jackson dependency
    
    * Remove powermock
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   |  79 ++++++++-
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |   7 +-
 .../pinot/plugin/stream/kinesis/KinesisConfig.java |   4 +
 .../stream/kinesis/KinesisConnectionHandler.java   |   6 +
 .../plugin/stream/kinesis/KinesisConsumer.java     |  11 ++
 .../kinesis/KinesisStreamMetadataProvider.java     |  10 +-
 ...st.java => KinesisConsumerIntegrationTest.java} |   3 +-
 .../plugin/stream/kinesis/KinesisConsumerTest.java | 187 ++++++++++++++++-----
 .../kinesis/KinesisStreamMetadataProviderTest.java | 156 +++++++++++++++++
 .../pinot/plugin/stream/kinesis/TestUtils.java     |  55 ++++++
 pom.xml                                            |   2 +-
 11 files changed, 468 insertions(+), 52 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index 38d4f73..b636b9f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -36,9 +36,25 @@
   <properties>
     <pinot.root>${basedir}/../../..</pinot.root>
     <phase.prop>package</phase.prop>
-    <aws.version>2.15.50</aws.version>
+    <aws.version>2.14.28</aws.version>
+    <jackson.version>2.10.4</jackson.version>
+    <netty.version>4.1.42.Final</netty.version>
+    <easymock.version>4.2</easymock.version>
+    <reactive.version>1.0.2</reactive.version>
   </properties>
 
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>software.amazon.awssdk</groupId>
+        <artifactId>bom</artifactId>
+        <version>${aws.version}</version>
+        <type>pom</type>
+        <scope>import</scope>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+
   <dependencies>
     <dependency>
       <groupId>software.amazon.awssdk</groupId>
@@ -75,38 +91,87 @@
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
-      <version>2.12.0</version>
+      <version>${jackson.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.reactivestreams</groupId>
       <artifactId>reactive-streams</artifactId>
-      <version>1.0.2</version>
+      <version>${reactive.version}</version>
     </dependency>
 
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-codec</artifactId>
-      <version>4.1.42.Final</version>
+      <version>${netty.version}</version>
     </dependency>
 
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-buffer</artifactId>
-      <version>4.1.42.Final</version>
+      <version>${netty.version}</version>
     </dependency>
 
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-transport</artifactId>
-      <version>4.1.42.Final</version>
+      <version>${netty.version}</version>
     </dependency>
 
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-common</artifactId>
-      <version>4.1.42.Final</version>
+      <version>${netty.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymock</artifactId>
+      <version>${easymock.version}</version>
+      <scope>test</scope>
     </dependency>
+
   </dependencies>
 
+  <profiles>
+    <profile>
+      <id>build-shaded-jar</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <artifactId>maven-shade-plugin</artifactId>
+            <version>3.2.1</version>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                <goals>
+                  <goal>shade</goal>
+                </goals>
+                <configuration>
+                  <relocations>
+                    <relocation>
+                      <pattern>com.fasterxml.jackson</pattern>
+                      <shadedPattern>shaded.kinesis.com.fasterxml.jackson</shadedPattern>
+                    </relocation>
+                    <relocation>
+                      <pattern>software.amazon</pattern>
+                      <shadedPattern>shaded.kinesis.software.amazon</shadedPattern>
+                    </relocation>
+                  </relocations>
+                  <transformers>
+                    <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                    <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"/>
+                  </transformers>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
 </project>
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index e1f8b05..57904ac 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -20,6 +20,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.util.Map;
 import org.apache.pinot.spi.stream.Checkpoint;
@@ -33,6 +34,7 @@ import org.apache.pinot.spi.utils.JsonUtils;
  */
 public class KinesisCheckpoint implements StreamPartitionMsgOffset {
   private final Map<String, String> _shardToStartSequenceMap;
+  public static final ObjectMapper objectMapper = new ObjectMapper();
 
   public KinesisCheckpoint(Map<String, String> shardToStartSequenceMap) {
     _shardToStartSequenceMap = shardToStartSequenceMap;
@@ -40,8 +42,7 @@ public class KinesisCheckpoint implements StreamPartitionMsgOffset {
 
   public KinesisCheckpoint(String checkpointStr)
       throws IOException {
-    _shardToStartSequenceMap = JsonUtils.stringToObject(checkpointStr, new TypeReference<Map<String, String>>() {
-    });
+    _shardToStartSequenceMap = objectMapper.readValue(checkpointStr, new TypeReference<Map<String, String>>(){});
   }
 
   public Map<String, String> getShardToStartSequenceMap() {
@@ -51,7 +52,7 @@ public class KinesisCheckpoint implements StreamPartitionMsgOffset {
   @Override
   public String serialize() {
     try {
-      return JsonUtils.objectToString(_shardToStartSequenceMap);
+      return objectMapper.writeValueAsString(_shardToStartSequenceMap);
     } catch (JsonProcessingException e) {
       throw new IllegalStateException();
     }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index fbe369f..6e46498 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -59,4 +59,8 @@ public class KinesisConfig {
   public ShardIteratorType getShardIteratorType() {
     return ShardIteratorType.fromValue(_props.getOrDefault(SHARD_ITERATOR_TYPE, DEFAULT_SHARD_ITERATOR_TYPE));
   }
+
+  public void setMaxRecordsToFetch(int maxRecordsToFetch){
+    _props.put(MAX_RECORDS_TO_FETCH, String.valueOf(maxRecordsToFetch));
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 61d065e..0686742 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -41,6 +41,12 @@ public class KinesisConnectionHandler {
     createConnection();
   }
 
+  public KinesisConnectionHandler(String stream, String awsRegion, KinesisClient kinesisClient) {
+    _stream = stream;
+    _awsRegion = awsRegion;
+    _kinesisClient = kinesisClient;
+  }
+
   /**
    * Lists all shards of the stream
    */
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 9c56f95..8ad27b4 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -31,6 +31,7 @@ import org.apache.pinot.spi.stream.PartitionGroupConsumer;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import software.amazon.awssdk.services.kinesis.KinesisClient;
 import software.amazon.awssdk.services.kinesis.model.ExpiredIteratorException;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
@@ -61,6 +62,15 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
     _executorService = Executors.newSingleThreadExecutor();
   }
 
+  public KinesisConsumer(KinesisConfig kinesisConfig, KinesisClient kinesisClient) {
+    super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion(), kinesisClient);
+    _kinesisClient = kinesisClient;
+    _stream = kinesisConfig.getStream();
+    _maxRecords = kinesisConfig.maxRecordsToFetch();
+    _shardIteratorType = kinesisConfig.getShardIteratorType();
+    _executorService = Executors.newSingleThreadExecutor();
+  }
+
   /**
    * Fetch records from the Kinesis stream between the start and end KinesisCheckpoint
    */
@@ -175,6 +185,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
     if (sequenceNumber != null && _shardIteratorType.toString().contains("SEQUENCE")) {
       requestBuilder = requestBuilder.startingSequenceNumber(sequenceNumber);
     }
+
     return _kinesisClient.getShardIterator(requestBuilder.build()).shardIterator();
   }
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index 42150a3..7af1df1 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -59,6 +59,14 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     _fetchTimeoutMs = streamConfig.getFetchTimeoutMillis();
   }
 
+  public KinesisStreamMetadataProvider(String clientId, StreamConfig streamConfig, KinesisConnectionHandler kinesisConnectionHandler, StreamConsumerFactory streamConsumerFactory) {
+    KinesisConfig kinesisConfig = new KinesisConfig(streamConfig);
+    _kinesisConnectionHandler = kinesisConnectionHandler;
+    _kinesisStreamConsumerFactory = streamConsumerFactory;
+    _clientId = clientId;
+    _fetchTimeoutMs = streamConfig.getFetchTimeoutMillis();
+  }
+
   @Override
   public int fetchPartitionCount(long timeoutMillis) {
     throw new UnsupportedOperationException();
@@ -83,7 +91,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     List<PartitionGroupInfo> newPartitionGroupInfos = new ArrayList<>();
 
     Map<String, Shard> shardIdToShardMap =
-        _kinesisConnectionHandler.getShards().stream().collect(Collectors.toMap(Shard::shardId, s -> s));
+        _kinesisConnectionHandler.getShards().stream().collect(Collectors.toMap(Shard::shardId, s -> s, (s1, s2) -> s1));
     Set<String> shardsInCurrent = new HashSet<>();
     Set<String> shardsEnded = new HashSet<>();
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
similarity index 96%
copy from pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
copy to pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
index f9ed779..1e832fa 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerIntegrationTest.java
@@ -22,12 +22,11 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
-public class KinesisConsumerTest {
+public class KinesisConsumerIntegrationTest {
 
   private static final String STREAM_NAME = "kinesis-test";
   private static final String AWS_REGION = "us-west-2";
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index f9ed779..384c512 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -18,51 +18,162 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
-import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-import software.amazon.awssdk.services.kinesis.model.Shard;
-import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
+import org.easymock.Capture;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+import software.amazon.awssdk.core.SdkBytes;
+import software.amazon.awssdk.services.kinesis.KinesisClient;
+import software.amazon.awssdk.services.kinesis.model.ChildShard;
+import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
+import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+import software.amazon.awssdk.services.kinesis.model.Record;
+
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
 
 
 public class KinesisConsumerTest {
+  public static final int TIMEOUT = 1000;
+  public static final int NUM_RECORDS = 10;
+  public static final String DUMMY_RECORD_PREFIX = "DUMMY_RECORD-";
+  public static final String PARTITION_KEY_PREFIX = "PARTITION_KEY-";
+  public static final String PLACEHOLDER = "DUMMY";
+
+  private static KinesisConnectionHandler kinesisConnectionHandler;
+  private static StreamConsumerFactory streamConsumerFactory;
+  private static KinesisClient kinesisClient;
+  private List<Record> recordList;
+
+  @BeforeMethod
+  public void setupTest() {
+    kinesisConnectionHandler = createMock(KinesisConnectionHandler.class);
+    kinesisClient = createMock(KinesisClient.class);
+    streamConsumerFactory = createMock(StreamConsumerFactory.class);
+
+    recordList = new ArrayList<>();
+
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      Record record =
+          Record.builder().data(SdkBytes.fromUtf8String(DUMMY_RECORD_PREFIX + i)).partitionKey(PARTITION_KEY_PREFIX + i)
+              .sequenceNumber(String.valueOf(i + 1)).build();
+      recordList.add(record);
+    }
+  }
+
+  @Test
+  public void testBasicConsumer() {
+    Capture<GetRecordsRequest> getRecordsRequestCapture = Capture.newInstance();
+    Capture<GetShardIteratorRequest> getShardIteratorRequestCapture = Capture.newInstance();
+
+    GetRecordsResponse getRecordsResponse =
+        GetRecordsResponse.builder().nextShardIterator(null).records(recordList).build();
+    GetShardIteratorResponse getShardIteratorResponse =
+        GetShardIteratorResponse.builder().shardIterator(PLACEHOLDER).build();
+
+    expect(kinesisClient.getRecords(capture(getRecordsRequestCapture))).andReturn(getRecordsResponse).anyTimes();
+    expect(kinesisClient.getShardIterator(capture(getShardIteratorRequestCapture))).andReturn(getShardIteratorResponse)
+        .anyTimes();
 
-  private static final String STREAM_NAME = "kinesis-test";
-  private static final String AWS_REGION = "us-west-2";
-
-  public static void main(String[] args)
-      throws IOException {
-    Map<String, String> props = new HashMap<>();
-    props.put(KinesisConfig.STREAM, STREAM_NAME);
-    props.put(KinesisConfig.AWS_REGION, AWS_REGION);
-    props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
-    props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
-    KinesisConfig kinesisConfig = new KinesisConfig(props);
-    KinesisConnectionHandler kinesisConnectionHandler = new KinesisConnectionHandler(STREAM_NAME, AWS_REGION);
-    List<Shard> shardList = kinesisConnectionHandler.getShards();
-    for (Shard shard : shardList) {
-      System.out.println("SHARD: " + shard.shardId());
-
-      KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig);
-      System.out.println(
-          "Kinesis Checkpoint Range: < " + shard.sequenceNumberRange().startingSequenceNumber() + ", " + shard
-              .sequenceNumberRange().endingSequenceNumber() + " >");
-      Map<String, String> shardIdToSeqNumMap = new HashMap<>();
-      shardIdToSeqNumMap.put(shard.shardId(), shard.sequenceNumberRange().startingSequenceNumber());
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardIdToSeqNumMap);
-      KinesisRecordsBatch kinesisRecordsBatch = kinesisConsumer.fetchMessages(kinesisCheckpoint, null, 60 * 1000);
-      int n = kinesisRecordsBatch.getMessageCount();
-
-      System.out.println("Found " + n + " messages ");
-      for (int i = 0; i < n; i++) {
-        System.out.println(
-            "SEQ-NO: " + kinesisRecordsBatch.getMessageOffsetAtIndex(i) + ", DATA: " + kinesisRecordsBatch
-                .getMessageAtIndex(i));
-      }
-      kinesisConsumer.close();
+    replay(kinesisClient);
+
+    KinesisConsumer kinesisConsumer = new KinesisConsumer(TestUtils.getKinesisConfig(), kinesisClient);
+
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put("0", "1");
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceMap);
+    KinesisRecordsBatch kinesisRecordsBatch = kinesisConsumer.fetchMessages(kinesisCheckpoint, null, TIMEOUT);
+
+    Assert.assertEquals(kinesisRecordsBatch.getMessageCount(), NUM_RECORDS);
+
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      Assert.assertEquals(baToString(kinesisRecordsBatch.getMessageAtIndex(i)), DUMMY_RECORD_PREFIX + i);
     }
-    kinesisConnectionHandler.close();
+
+    Assert.assertFalse(kinesisRecordsBatch.isEndOfPartitionGroup());
+  }
+
+  @Test
+  public void testBasicConsumerWithMaxRecordsLimit() {
+    int maxRecordsLimit = 20;
+    Capture<GetRecordsRequest> getRecordsRequestCapture = Capture.newInstance();
+    Capture<GetShardIteratorRequest> getShardIteratorRequestCapture = Capture.newInstance();
+
+    GetRecordsResponse getRecordsResponse =
+        GetRecordsResponse.builder().nextShardIterator(PLACEHOLDER).records(recordList).build();
+    GetShardIteratorResponse getShardIteratorResponse =
+        GetShardIteratorResponse.builder().shardIterator(PLACEHOLDER).build();
+
+    expect(kinesisClient.getRecords(capture(getRecordsRequestCapture))).andReturn(getRecordsResponse).anyTimes();
+    expect(kinesisClient.getShardIterator(capture(getShardIteratorRequestCapture))).andReturn(getShardIteratorResponse)
+        .anyTimes();
+
+    replay(kinesisClient);
+
+    KinesisConfig kinesisConfig = TestUtils.getKinesisConfig();
+    kinesisConfig.setMaxRecordsToFetch(maxRecordsLimit);
+    KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig, kinesisClient);
+
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put("0", "1");
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceMap);
+    KinesisRecordsBatch kinesisRecordsBatch = kinesisConsumer.fetchMessages(kinesisCheckpoint, null, TIMEOUT);
+
+    Assert.assertEquals(kinesisRecordsBatch.getMessageCount(), maxRecordsLimit);
+
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      Assert.assertEquals(baToString(kinesisRecordsBatch.getMessageAtIndex(i)), DUMMY_RECORD_PREFIX + i);
+    }
+  }
+
+  @Test
+  public void testBasicConsumerWithChildShard() {
+    int maxRecordsLimit = 20;
+
+    List<ChildShard> shardList = new ArrayList<>();
+    shardList.add(ChildShard.builder().shardId(PLACEHOLDER).parentShards("0").build());
+
+    Capture<GetRecordsRequest> getRecordsRequestCapture = Capture.newInstance();
+    Capture<GetShardIteratorRequest> getShardIteratorRequestCapture = Capture.newInstance();
+
+    GetRecordsResponse getRecordsResponse =
+        GetRecordsResponse.builder().nextShardIterator(null).records(recordList).childShards(shardList).build();
+    GetShardIteratorResponse getShardIteratorResponse =
+        GetShardIteratorResponse.builder().shardIterator(PLACEHOLDER).build();
+
+    expect(kinesisClient.getRecords(capture(getRecordsRequestCapture))).andReturn(getRecordsResponse).anyTimes();
+    expect(kinesisClient.getShardIterator(capture(getShardIteratorRequestCapture))).andReturn(getShardIteratorResponse)
+        .anyTimes();
+
+    replay(kinesisClient);
+
+    KinesisConfig kinesisConfig = TestUtils.getKinesisConfig();
+    kinesisConfig.setMaxRecordsToFetch(maxRecordsLimit);
+    KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig, kinesisClient);
+
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put("0", "1");
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceMap);
+    KinesisRecordsBatch kinesisRecordsBatch = kinesisConsumer.fetchMessages(kinesisCheckpoint, null, TIMEOUT);
+
+    Assert.assertTrue(kinesisRecordsBatch.isEndOfPartitionGroup());
+    Assert.assertEquals(kinesisRecordsBatch.getMessageCount(), NUM_RECORDS);
+
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      Assert.assertEquals(baToString(kinesisRecordsBatch.getMessageAtIndex(i)), DUMMY_RECORD_PREFIX + i);
+    }
+  }
+
+  public String baToString(byte[] bytes) {
+    return SdkBytes.fromByteArray(bytes).asUtf8String();
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProviderTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProviderTest.java
new file mode 100644
index 0000000..4845e57
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProviderTest.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.plugin.stream.kinesis;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
+import org.easymock.Capture;
+import org.easymock.CaptureType;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+import software.amazon.awssdk.services.kinesis.model.SequenceNumberRange;
+import software.amazon.awssdk.services.kinesis.model.Shard;
+
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.createNiceMock;
+import static org.easymock.EasyMock.newCapture;
+import static org.easymock.EasyMock.captureInt;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+
+public class KinesisStreamMetadataProviderTest {
+  private static final String SHARD_ID_0 = "0";
+  private static final String SHARD_ID_1 = "1";
+  public static final String CLIENT_ID = "dummy";
+  public static final int TIMEOUT = 1000;
+
+  private static KinesisConnectionHandler kinesisConnectionHandler;
+  private KinesisStreamMetadataProvider kinesisStreamMetadataProvider;
+  private static StreamConsumerFactory streamConsumerFactory;
+  private static PartitionGroupConsumer partitionGroupConsumer;
+
+  @BeforeMethod
+  public void setupTest() {
+    kinesisConnectionHandler = createMock(KinesisConnectionHandler.class);
+    streamConsumerFactory = createMock(StreamConsumerFactory.class);
+    partitionGroupConsumer = createNiceMock(PartitionGroupConsumer.class);
+    kinesisStreamMetadataProvider =
+        new KinesisStreamMetadataProvider(CLIENT_ID, TestUtils.getStreamConfig(), kinesisConnectionHandler,
+            streamConsumerFactory);
+  }
+
+  @Test
+  public void getPartitionsGroupInfoListTest()
+      throws Exception {
+    Shard shard0 = Shard.builder().shardId(SHARD_ID_0).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").build()).build();
+    Shard shard1 = Shard.builder().shardId(SHARD_ID_1).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").build()).build();
+
+    expect(kinesisConnectionHandler.getShards()).andReturn(ImmutableList.of(shard0, shard1)).anyTimes();
+    replay(kinesisConnectionHandler);
+
+    List<PartitionGroupInfo> result = kinesisStreamMetadataProvider
+        .getPartitionGroupInfoList(CLIENT_ID, TestUtils.getStreamConfig(), new ArrayList<>(), TIMEOUT);
+
+
+    Assert.assertEquals(result.size(), 2);
+    Assert.assertEquals(result.get(0).getPartitionGroupId(), 0);
+    Assert.assertEquals(result.get(1).getPartitionGroupId(), 1);
+  }
+
+  @Test
+  public void getPartitionsGroupInfoEndOfShardTest()
+      throws Exception {
+    List<PartitionGroupMetadata> currentPartitionGroupMeta = new ArrayList<>();
+
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put("0", "1");
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceMap);
+
+    currentPartitionGroupMeta.add(new PartitionGroupMetadata(0, 1, kinesisCheckpoint, kinesisCheckpoint, "CONSUMING"));
+
+    Capture<Checkpoint> checkpointArgs = newCapture(CaptureType.ALL);
+    Capture<PartitionGroupMetadata> partitionGroupMetadataCapture = newCapture(CaptureType.ALL);
+    Capture<Integer> intArguments = newCapture(CaptureType.ALL);
+    Capture<String> stringCapture = newCapture(CaptureType.ALL);
+
+    Shard shard0 = Shard.builder().shardId(SHARD_ID_0).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").endingSequenceNumber("1").build()).build();
+    Shard shard1 = Shard.builder().shardId(SHARD_ID_1).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").build()).build();
+    expect(kinesisConnectionHandler.getShards()).andReturn(ImmutableList.of(shard0, shard1)).anyTimes();
+    expect(streamConsumerFactory
+        .createPartitionGroupConsumer(capture(stringCapture), capture(partitionGroupMetadataCapture)))
+        .andReturn(partitionGroupConsumer).anyTimes();
+    expect(partitionGroupConsumer
+        .fetchMessages(capture(checkpointArgs), capture(checkpointArgs), captureInt(intArguments)))
+        .andReturn(new KinesisRecordsBatch(new ArrayList<>(), "0", true)).anyTimes();
+
+    replay(kinesisConnectionHandler, streamConsumerFactory, partitionGroupConsumer);
+
+    List<PartitionGroupInfo> result = kinesisStreamMetadataProvider
+        .getPartitionGroupInfoList(CLIENT_ID, TestUtils.getStreamConfig(), currentPartitionGroupMeta, TIMEOUT);
+
+    Assert.assertEquals(result.size(), 1);
+    Assert.assertEquals(result.get(0).getPartitionGroupId(), 1);
+  }
+
+  @Test
+  public void getPartitionsGroupInfoChildShardsest()
+      throws Exception {
+    List<PartitionGroupMetadata> currentPartitionGroupMeta = new ArrayList<>();
+
+    Map<String, String> shardToSequenceMap = new HashMap<>();
+    shardToSequenceMap.put("1", "1");
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceMap);
+
+    currentPartitionGroupMeta.add(new PartitionGroupMetadata(0, 1, kinesisCheckpoint, kinesisCheckpoint, "CONSUMING"));
+
+    Capture<Checkpoint> checkpointArgs = newCapture(CaptureType.ALL);
+    Capture<PartitionGroupMetadata> partitionGroupMetadataCapture = newCapture(CaptureType.ALL);
+    Capture<Integer> intArguments = newCapture(CaptureType.ALL);
+    Capture<String> stringCapture = newCapture(CaptureType.ALL);
+
+    Shard shard0 = Shard.builder().shardId(SHARD_ID_0).parentShardId(SHARD_ID_1).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").build()).build();
+    Shard shard1 = Shard.builder().shardId(SHARD_ID_1).sequenceNumberRange(SequenceNumberRange.builder().startingSequenceNumber("1").endingSequenceNumber("1").build()).build();
+
+    expect(kinesisConnectionHandler.getShards()).andReturn(ImmutableList.of(shard0, shard1)).anyTimes();
+    expect(streamConsumerFactory
+        .createPartitionGroupConsumer(capture(stringCapture), capture(partitionGroupMetadataCapture)))
+        .andReturn(partitionGroupConsumer).anyTimes();
+    expect(partitionGroupConsumer
+        .fetchMessages(capture(checkpointArgs), capture(checkpointArgs), captureInt(intArguments)))
+        .andReturn(new KinesisRecordsBatch(new ArrayList<>(), "0", true)).anyTimes();
+
+    replay(kinesisConnectionHandler, streamConsumerFactory, partitionGroupConsumer);
+
+    List<PartitionGroupInfo> result = kinesisStreamMetadataProvider
+        .getPartitionGroupInfoList(CLIENT_ID, TestUtils.getStreamConfig(), currentPartitionGroupMeta, TIMEOUT);
+
+    Assert.assertEquals(result.size(), 1);
+    Assert.assertEquals(result.get(0).getPartitionGroupId(), 0);
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java
new file mode 100644
index 0000000..28d02de
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/TestUtils.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamConfigProperties;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
+
+
+public class TestUtils {
+  private static final String STREAM_NAME = "kinesis-test";
+  private static final String AWS_REGION = "us-west-2";
+
+  public static StreamConfig getStreamConfig() {
+    Map<String, String> props = new HashMap<>();
+    props.put(KinesisConfig.STREAM, STREAM_NAME);
+    props.put(KinesisConfig.AWS_REGION, AWS_REGION);
+    props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
+    props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
+    props.put(StreamConfigProperties.STREAM_TYPE, "kinesis");
+    props.put("stream.kinesis.consumer.type", "lowLevel");
+    props.put("stream.kinesis.topic.name", STREAM_NAME);
+    props.put("stream.kinesis.decoder.class.name", "ABCD");
+    props.put("stream.kinesis.consumer.factory.class.name",
+        "org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory");
+    return new StreamConfig("", props);
+  }
+
+  public static KinesisConfig getKinesisConfig() {
+    Map<String, String> props = new HashMap<>();
+    props.put(KinesisConfig.STREAM, STREAM_NAME);
+    props.put(KinesisConfig.AWS_REGION, AWS_REGION);
+    props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
+    props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
+    return new KinesisConfig(props);
+  }
+}
diff --git a/pom.xml b/pom.xml
index 881526b..5676edc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,7 +117,7 @@
     <parquet.version>1.8.0</parquet.version>
     <helix.version>0.9.8</helix.version>
     <zkclient.version>0.7</zkclient.version>
-    <jackson.version>2.12.0</jackson.version>
+    <jackson.version>2.9.8</jackson.version>
     <async-http-client.version>1.9.21</async-http-client.version>
     <jersey.version>2.28</jersey.version>
     <grizzly.version>2.4.4</grizzly.version>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 25/47: Refactor code

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 71ac64d04defd07f4a26d53f1d3cbb48d3a7a781
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 23:49:27 2020 +0530

    Refactor code
---
 .../pinot/plugin/stream/kinesis/KinesisConnectionHandler.java       | 6 ++++++
 .../org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java     | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index c41598e..ba94b0a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -59,4 +59,10 @@ public class KinesisConnectionHandler {
         _kinesisClient.listShards(ListShardsRequest.builder().streamName(_stream).build());
     return listShardsResponse.shards();
   }
+
+  public void close(){
+    if(_kinesisClient != null) {
+      _kinesisClient.close();
+    }
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index dfd6cda..24810ba 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -46,7 +46,6 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
-//TODO: Handle exceptions and timeout
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
   String _stream;
   Integer _maxRecords;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 10/47: default methods to avoid interface changes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 396dae01f0804bc916bfba1bf2d0c52a374b767e
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Sat Jan 2 18:16:09 2021 -0800

    default methods to avoid interface changes
---
 .../realtime/LLRealtimeSegmentDataManager.java     |  3 +-
 .../impl/fakestream/FakeStreamConsumerFactory.java |  8 +---
 .../fakestream/FakeStreamMetadataProvider.java     | 11 -----
 ...lakyConsumerRealtimeClusterIntegrationTest.java |  5 ---
 .../stream/kafka09/KafkaConsumerFactory.java       |  7 ---
 .../kafka09/KafkaStreamMetadataProvider.java       | 46 +------------------
 .../kafka09/KafkaPartitionLevelConsumerTest.java   |  2 +-
 .../stream/kafka20/KafkaConsumerFactory.java       |  7 ---
 .../kafka20/KafkaStreamMetadataProvider.java       | 52 ----------------------
 .../spi/stream/PartitionGroupInfoFetcher.java      |  4 +-
 .../pinot/spi/stream/StreamConsumerFactory.java    |  4 +-
 .../pinot/spi/stream/StreamMetadataProvider.java   | 36 ++++++++++++---
 12 files changed, 43 insertions(+), 142 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 80aa9d8..758c656 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -1246,7 +1246,8 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         int numPartitions = columnPartitionConfig.getNumPartitions();
         try {
           // fixme: get this from ideal state
-          int numStreamPartitions = _streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 5000).size();
+          int numStreamPartitions = _streamMetadataProvider
+              .getPartitionGroupInfoList(_clientId, _partitionLevelStreamConfig, Collections.emptyList(), 5000).size();
           if (numStreamPartitions != numPartitions) {
             segmentLogger.warn(
                 "Number of stream partitions: {} does not match number of partitions in the partition config: {}, using number of stream partitions",
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index fbeb808..b0dc7eb 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -69,12 +69,6 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
     return new FakeStreamMetadataProvider(_streamConfig);
   }
 
-
-  @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
-    return null;
-  }
-
   public static void main(String[] args)
       throws Exception {
     String clientId = "client_id_localhost_tester";
@@ -88,7 +82,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
     // stream metadata provider
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory.createStreamMetadataProvider(clientId);
-    int partitionCount = streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 10_000).size();
+    int partitionCount = streamMetadataProvider.getPartitionGroupInfoList("clientId", streamConfig, Collections.emptyList(), 10_000).size();
     System.out.println(partitionCount);
 
     // Partition metadata provider
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
index 0de0ce2..61aa01f 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
@@ -48,17 +48,6 @@ public class FakeStreamMetadataProvider implements StreamMetadataProvider {
     return _numPartitions;
   }
 
-  @Override
-  public List<PartitionGroupInfo> getPartitionGroupInfoList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
-      throws TimeoutException {
-    List<PartitionGroupInfo> partitionGroupMetadataList = new ArrayList<>();
-    for (int i = 0; i < _numPartitions; i++) {
-      partitionGroupMetadataList.add(new PartitionGroupInfo(i, fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000).toString()));
-    }
-    return partitionGroupMetadataList;
-  }
-
   public long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis) throws TimeoutException {
     throw new UnsupportedOperationException("This method is deprecated");
   }
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
index c7523e3..4503de0 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
@@ -119,10 +119,5 @@ public class FlakyConsumerRealtimeClusterIntegrationTest extends RealtimeCluster
     public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
       throw new UnsupportedOperationException();
     }
-
-    @Override
-    public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
-      return null;
-    }
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
index fe5a461..615e354 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
@@ -19,8 +19,6 @@
 package org.apache.pinot.plugin.stream.kafka09;
 
 import java.util.Set;
-import org.apache.pinot.spi.stream.PartitionGroupConsumer;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamLevelConsumer;
@@ -52,9 +50,4 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
     return new KafkaStreamMetadataProvider(clientId, _streamConfig);
   }
-
-  @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
-    return null;
-  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
index 2d0ad31..06ee697 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
@@ -22,13 +22,9 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.Uninterruptibles;
 import java.io.IOException;
-import java.time.Duration;
-import java.util.ArrayList;
 import java.util.Collections;
-import java.util.List;
 import java.util.concurrent.TimeUnit;
 import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
 import kafka.api.PartitionOffsetRequestInfo;
 import kafka.common.TopicAndPartition;
 import kafka.javaapi.OffsetRequest;
@@ -40,8 +36,6 @@ import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
-import org.apache.pinot.spi.stream.PartitionGroupInfo;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -55,14 +49,13 @@ import org.slf4j.LoggerFactory;
 public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implements StreamMetadataProvider {
   private static final Logger LOGGER = LoggerFactory.getLogger(KafkaStreamMetadataProvider.class);
 
-  private StreamConfig _streamConfig;
-
   /**
    * Create a partition specific metadata provider
+   * @param streamConfig
+   * @param partition
    */
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition) {
     super(clientId, streamConfig, partition, new KafkaSimpleConsumerFactoryImpl());
-    _streamConfig = streamConfig;
   }
 
   /**
@@ -71,21 +64,18 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
    */
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig) {
     super(clientId, streamConfig, new KafkaSimpleConsumerFactoryImpl());
-    _streamConfig = streamConfig;
   }
 
   @VisibleForTesting
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition,
       KafkaSimpleConsumerFactory kafkaSimpleConsumerFactory) {
     super(clientId, streamConfig, partition, kafkaSimpleConsumerFactory);
-    _streamConfig = streamConfig;
   }
 
   @VisibleForTesting
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig,
       KafkaSimpleConsumerFactory kafkaSimpleConsumerFactory) {
     super(clientId, streamConfig, kafkaSimpleConsumerFactory);
-    _streamConfig = streamConfig;
   }
 
   /**
@@ -94,12 +84,7 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
    * @return
    */
   @Override
-  @Deprecated
   public synchronized int fetchPartitionCount(long timeoutMillis) {
-    return fetchPartitionCountInternal(timeoutMillis);
-  }
-
-  private int fetchPartitionCountInternal(long timeoutMillis) {
     int unknownTopicReplyCount = 0;
     final int MAX_UNKNOWN_TOPIC_REPLY_COUNT = 10;
     int kafkaErrorCount = 0;
@@ -160,33 +145,6 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
     throw new TimeoutException();
   }
 
-  /**
-   * Fetch the partitionGroupMetadata list.
-   * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
-   */
-  @Override
-  public List<PartitionGroupInfo> getPartitionGroupInfoList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
-      throws java.util.concurrent.TimeoutException {
-    int partitionCount = fetchPartitionCountInternal(timeoutMillis);
-    List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
-
-    // add a PartitionGroupInfo into the list foreach partition already present in current.
-    // the end checkpoint is set as checkpoint
-    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
-      newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
-          currentPartitionGroupMetadata.getEndCheckpoint()));
-    }
-    // add PartitiongroupInfo for new partitions
-    // use offset criteria from stream config
-    for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
-      StreamPartitionMsgOffset streamPartitionMsgOffset =
-          fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000);
-      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
-    }
-    return newPartitionGroupInfoList;
-  }
-
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
       throws java.util.concurrent.TimeoutException {
     throw new UnsupportedOperationException("The use of this method s not supported");
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
index 9d3091e..90dc5ad 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
@@ -291,7 +291,7 @@ public class KafkaPartitionLevelConsumerTest {
 
     KafkaStreamMetadataProvider streamMetadataProvider =
         new KafkaStreamMetadataProvider(clientId, streamConfig, mockKafkaSimpleConsumerFactory);
-    Assert.assertEquals(streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 10000L), 2);
+    Assert.assertEquals(streamMetadataProvider.getPartitionGroupInfoList("clientId", streamConfig, Collections.emptyList(), 10000), 2);
   }
 
   @Test
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
index b6746ff..e0d1015 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
@@ -19,8 +19,6 @@
 package org.apache.pinot.plugin.stream.kafka20;
 
 import java.util.Set;
-import org.apache.pinot.spi.stream.PartitionGroupConsumer;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamLevelConsumer;
@@ -49,9 +47,4 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
     return new KafkaStreamMetadataProvider(clientId, _streamConfig);
   }
-
-  @Override
-  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
-    return new KafkaPartitionLevelConsumer(clientId, _streamConfig, metadata.getPartitionGroupId());
-  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index 1d3162a..38c49f5 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -21,17 +21,11 @@ package org.apache.pinot.plugin.stream.kafka20;
 import com.google.common.base.Preconditions;
 import java.io.IOException;
 import java.time.Duration;
-import java.util.ArrayList;
 import java.util.Collections;
-import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import org.apache.kafka.common.TopicPartition;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
-import org.apache.pinot.spi.stream.PartitionGroupInfo;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -39,15 +33,12 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 
 public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHandler implements StreamMetadataProvider {
 
-  private StreamConfig _streamConfig;
-
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig) {
     this(clientId, streamConfig, Integer.MIN_VALUE);
   }
 
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition) {
     super(clientId, streamConfig, partition);
-    _streamConfig = streamConfig;
   }
 
   @Override
@@ -56,33 +47,6 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
     return _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
   }
 
-  /**
-   * Fetch the partitionGroupMetadata list.
-   * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
-   */
-  @Override
-  public List<PartitionGroupInfo> getPartitionGroupInfoList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
-      throws TimeoutException {
-    int partitionCount = _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
-    List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
-
-    // add a PartitionGroupInfo into the list foreach partition already present in current.
-    // the end checkpoint is set as checkpoint
-    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
-      newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
-          currentPartitionGroupMetadata.getEndCheckpoint()));
-    }
-    // add PartitiongroupInfo for new partitions
-    // use offset criteria from stream config
-    for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
-      StreamPartitionMsgOffset streamPartitionMsgOffset =
-          fetchStreamPartitionOffsetInternal(i, _streamConfig.getOffsetCriteria(), 5000);
-      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
-    }
-    return newPartitionGroupInfoList;
-  }
-
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
       throws java.util.concurrent.TimeoutException {
     throw new UnsupportedOperationException("The use of this method is not supported");
@@ -105,22 +69,6 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
     return new LongMsgOffset(offset);
   }
 
-  private StreamPartitionMsgOffset fetchStreamPartitionOffsetInternal(int partitionId, @Nonnull OffsetCriteria offsetCriteria, long timeoutMillis) {
-    Preconditions.checkNotNull(offsetCriteria);
-    TopicPartition topicPartition = new TopicPartition(_topic, partitionId);
-    long offset = -1;
-    if (offsetCriteria.isLargest()) {
-      offset =  _consumer.endOffsets(Collections.singletonList(topicPartition), Duration.ofMillis(timeoutMillis))
-          .get(topicPartition);
-    } else if (offsetCriteria.isSmallest()) {
-      offset =  _consumer.beginningOffsets(Collections.singletonList(topicPartition), Duration.ofMillis(timeoutMillis))
-          .get(topicPartition);
-    } else {
-      throw new IllegalArgumentException("Unknown initial offset value " + offsetCriteria.toString());
-    }
-    return new LongMsgOffset(offset);
-  }
-
   @Override
   public void close()
       throws IOException {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
index d13be10..f2d3f17 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
@@ -32,6 +32,7 @@ public class PartitionGroupInfoFetcher implements Callable<Boolean> {
   private static final Logger LOGGER = LoggerFactory.getLogger(PartitionGroupInfoFetcher.class);
 
   private List<PartitionGroupInfo> _partitionGroupInfoList;
+  private final StreamConfig _streamConfig;
   private final List<PartitionGroupMetadata> _currentPartitionGroupMetadata;
   private final StreamConsumerFactory _streamConsumerFactory;
   private Exception _exception;
@@ -40,6 +41,7 @@ public class PartitionGroupInfoFetcher implements Callable<Boolean> {
   public PartitionGroupInfoFetcher(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
     _streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     _topicName = streamConfig.getTopicName();
+    _streamConfig = streamConfig;
     _currentPartitionGroupMetadata = currentPartitionGroupMetadataList;
   }
 
@@ -61,7 +63,7 @@ public class PartitionGroupInfoFetcher implements Callable<Boolean> {
     String clientId = PartitionGroupInfoFetcher.class.getSimpleName() + "-" + _topicName;
     try (
         StreamMetadataProvider streamMetadataProvider = _streamConsumerFactory.createStreamMetadataProvider(clientId)) {
-      _partitionGroupInfoList = streamMetadataProvider.getPartitionGroupInfoList(_currentPartitionGroupMetadata, /*maxWaitTimeMs=*/5000L);
+      _partitionGroupInfoList = streamMetadataProvider.getPartitionGroupInfoList(clientId, _streamConfig, _currentPartitionGroupMetadata, /*maxWaitTimeMs=*/5000);
       if (_exception != null) {
         // We had at least one failure, but succeeded now. Log an info
         LOGGER.info("Successfully retrieved partition group info for topic {}", _topicName);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
index db48a83..f993fed 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConsumerFactory.java
@@ -75,5 +75,7 @@ public abstract class StreamConsumerFactory {
   }
 
   // creates a consumer which consumes from a partition group
-  public abstract PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata);
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
+    return createPartitionLevelConsumer(clientId, metadata.getPartitionGroupId());
+  }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index f595ea3..572cd02 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -19,6 +19,8 @@
 package org.apache.pinot.spi.stream;
 
 import java.io.Closeable;
+import java.time.Duration;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
@@ -39,11 +41,6 @@ public interface StreamMetadataProvider extends Closeable {
   @Deprecated
   int fetchPartitionCount(long timeoutMillis);
 
-  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  List<PartitionGroupInfo> getPartitionGroupInfoList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
-      throws TimeoutException;
-
   // Issue 5953 Retain this interface for 0.5.0, remove in 0.6.0
   @Deprecated
   long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
@@ -60,4 +57,33 @@ public interface StreamMetadataProvider extends Closeable {
     long offset = fetchPartitionOffset(offsetCriteria, timeoutMillis);
     return new LongMsgOffset(offset);
   }
+
+  /**
+   * Fetch the partitionGroupMetadata list.
+   * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
+   */
+  default List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
+      throws TimeoutException {
+    int partitionCount = fetchPartitionCount(timeoutMillis);
+    List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
+
+    // add a PartitionGroupInfo into the list foreach partition already present in current.
+    // the end checkpoint is set as checkpoint
+    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
+          currentPartitionGroupMetadata.getEndCheckpoint()));
+    }
+    // add PartitiongroupInfo for new partitions
+    // use offset criteria from stream config
+    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
+      StreamMetadataProvider partitionMetadataProvider =
+          streamConsumerFactory.createPartitionMetadataProvider(clientId, i);
+      StreamPartitionMsgOffset streamPartitionMsgOffset =
+          partitionMetadataProvider.fetchStreamPartitionOffset(streamConfig.getOffsetCriteria(), timeoutMillis);
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
+    }
+    return newPartitionGroupInfoList;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 40/47: Dont create new CONSUMING segment if shard has reached end of life

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 214c007c2915c8aa149e1e06689e66abaa85b083
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Jan 7 16:07:16 2021 -0800

    Dont create new CONSUMING segment if shard has reached end of life
---
 .../protocols/SegmentCompletionProtocol.java       |   1 +
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 101 ++++++++++-----------
 .../RealtimeSegmentValidationManager.java          |   2 +-
 .../PinotLLCRealtimeSegmentManagerTest.java        |   3 +-
 .../realtime/LLRealtimeSegmentDataManager.java     |  13 ++-
 .../plugin/stream/kinesis/KinesisConsumer.java     |   8 +-
 .../stream/kinesis/KinesisConsumerFactory.java     |   2 +-
 .../plugin/stream/kinesis/KinesisRecordsBatch.java |   9 +-
 .../kinesis/KinesisStreamMetadataProvider.java     |  48 +++++++---
 .../org/apache/pinot/spi/stream/MessageBatch.java  |   7 ++
 10 files changed, 115 insertions(+), 79 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
index dd1330d..74614df 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/protocols/SegmentCompletionProtocol.java
@@ -138,6 +138,7 @@ public class SegmentCompletionProtocol {
 
   public static final String REASON_ROW_LIMIT = "rowLimit";  // Stop reason sent by server as max num rows reached
   public static final String REASON_TIME_LIMIT = "timeLimit";  // Stop reason sent by server as max time reached
+  public static final String REASON_END_OF_PARTITION_GROUP = "endOfPartitionGroup";  // Stop reason sent by server as end of partitionGroup reached
 
   // Canned responses
   public static final Response RESP_NOT_LEADER =
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index bbd1ef3..9fa6850 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -477,7 +477,6 @@ public class PinotLLCRealtimeSegmentManager {
     Preconditions
         .checkState(idealState.getInstanceStateMap(committingSegmentName).containsValue(SegmentStateModel.CONSUMING),
             "Failed to find instance in CONSUMING state in IdealState for segment: %s", committingSegmentName);
-    int numPartitions = getNumPartitionsFromIdealState(idealState);
     int numReplicas = getNumReplicas(tableConfig, instancePartitions);
 
     /*
@@ -496,18 +495,21 @@ public class PinotLLCRealtimeSegmentManager {
 
     // Step-2
 
-    // Say we currently were consuming from 3 shards A, B, C. Of those, A is the one committing. Also suppose that new partition D has come up
+    // Say we currently were consuming from 2 shards A, B. Of those, A is the one committing.
 
-    // get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS], [C - IN_PROGRESS]
+    // get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS]
     List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
     PartitionLevelStreamConfig streamConfig = new PartitionLevelStreamConfig(tableConfig.getTableName(),
         IngestionConfigUtils.getStreamConfigMap(tableConfig));
 
-    // find new partition groups [A],[B],[C],[D]
+    // find new partition groups [A],[B],[C],[D] (assume A split into C D)
+    // If segment has consumed all of A, we will receive B,C,D
+    // If segment is still not reached last msg of A, we will receive A,B,C,D
     List<PartitionGroupInfo> newPartitionGroupInfoList =
         getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
+    int numPartitions = newPartitionGroupInfoList.size();
 
-    // create new segment metadata, only if it is not IN_PROGRESS in the current state
+    // create new segment metadata, only if PartitionGroupInfo was returned for it in the newPartitionGroupInfoList
     Map<Integer, PartitionGroupMetadata> currentGroupIdToMetadata = currentPartitionGroupMetadataList.stream().collect(
         Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
 
@@ -519,36 +521,25 @@ public class PinotLLCRealtimeSegmentManager {
       PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
       if (currentPartitionGroupMetadata == null) { // not present in current state. New partition found.
         // make new segment
-        // FIXME: flushThreshold of segment is actually (configured threshold/numPartitions)
-        //  In Kinesis, with every split/merge, we get new partitions, and an old partition gets deactivated.
-        //  However, the getPartitionGroupInfo call returns ALL shards, regardless of whether they're active or not.
-        //  So our numPartitions will forever keep increasing.
-        // TODO: can the getPartitionGroupInfo return the active partitions only, based on the checkpoints passed in current?
+        // fixme: letting validation manager do this would be best, otherwise we risk creating multiple CONSUMING segments
         String newLLCSegmentName =
             setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo, newSegmentCreationTimeMs,
                 instancePartitions, numPartitions, numReplicas);
         newConsumingSegmentNames.add(newLLCSegmentName);
       } else {
-        String currentStatus = currentPartitionGroupMetadata.getStatus();
-        if (!currentStatus.equals(Status.IN_PROGRESS.toString())) {
-          // not IN_PROGRESS anymore in current state. Should be DONE.
-          // This should ONLY happen for the committing segment's partition. Need to trigger new consuming segment
-          // todo: skip this if the partition doesn't match with the committing segment?
+        LLCSegmentName committingLLCSegment = new LLCSegmentName(committingSegmentName);
+        // Update this only for committing segment. All other partitions should get updated by their own commit call
+        if (newPartitionGroupId == committingLLCSegment.getPartitionGroupId()) {
+          Preconditions.checkState(currentPartitionGroupMetadata.getStatus().equals(Status.DONE.toString()));
           LLCSegmentName newLLCSegmentName = new LLCSegmentName(rawTableName, newPartitionGroupId,
               currentPartitionGroupMetadata.getSequenceNumber() + 1, newSegmentCreationTimeMs);
           createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, newSegmentCreationTimeMs,
               committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
           newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
-
-          // FIXME: a new CONSUMING segment is created even if EOL for this shard has been reached.
-          //  the logic in getPartitionGroupInfo to prevent returning of EOLed shards isn't working
-          //  OPTION: Since consumer knows about it, it can pass param in request/committingSegmentDescriptor "isEndOfShard"
-          //  We can set that in metadata for validation manager to skip these partitions
         }
       }
     }
 
-
     // Step-3
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
     Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
@@ -840,8 +831,9 @@ public class PinotLLCRealtimeSegmentManager {
       if (idealState.isEnabled()) {
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList =
             getCurrentPartitionGroupMetadataList(idealState);
-        int numPartitions = getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList).size();
-        return ensureAllPartitionsConsuming(tableConfig, streamConfig, idealState, numPartitions);
+        List<PartitionGroupInfo> newPartitionGroupInfoList =
+            getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
+        return ensureAllPartitionsConsuming(tableConfig, streamConfig, idealState, newPartitionGroupInfoList);
       } else {
         LOGGER.info("Skipping LLC segments validation for disabled table: {}", realtimeTableName);
         return idealState;
@@ -988,11 +980,14 @@ public class PinotLLCRealtimeSegmentManager {
    */
   @VisibleForTesting
   IdealState ensureAllPartitionsConsuming(TableConfig tableConfig, PartitionLevelStreamConfig streamConfig,
-      IdealState idealState, int numPartitions) {
+      IdealState idealState, List<PartitionGroupInfo> newPartitionGroupInfoList) {
     String realtimeTableName = tableConfig.getTableName();
 
     InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
     int numReplicas = getNumReplicas(tableConfig, instancePartitions);
+    int numPartitions = newPartitionGroupInfoList.size();
+    Set<Integer> newPartitionGroupSet =
+        newPartitionGroupInfoList.stream().map(PartitionGroupInfo::getPartitionGroupId).collect(Collectors.toSet());
 
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
     Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
@@ -1029,7 +1024,7 @@ public class PinotLLCRealtimeSegmentManager {
       Map<String, String> instanceStateMap = instanceStatesMap.get(latestSegmentName);
       if (instanceStateMap != null) {
         // Latest segment of metadata is in idealstate.
-        if (instanceStateMap.values().contains(SegmentStateModel.CONSUMING)) {
+        if (instanceStateMap.containsValue(SegmentStateModel.CONSUMING)) {
           if (latestSegmentZKMetadata.getStatus() == Status.DONE) {
 
             // step-1 of commmitSegmentMetadata is done (i.e. marking old segment as DONE)
@@ -1040,15 +1035,23 @@ public class PinotLLCRealtimeSegmentManager {
             }
             LOGGER.info("Repairing segment: {} which is DONE in segment ZK metadata, but is CONSUMING in IdealState",
                 latestSegmentName);
-
-            LLCSegmentName newLLCSegmentName = getNextLLCSegmentName(latestLLCSegmentName, currentTimeMs);
-            String newSegmentName = newLLCSegmentName.getSegmentName();
-            CommittingSegmentDescriptor committingSegmentDescriptor = new CommittingSegmentDescriptor(latestSegmentName,
-                (offsetFactory.create(latestSegmentZKMetadata.getEndOffset()).toString()), 0);
-            createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, currentTimeMs,
-                committingSegmentDescriptor, latestSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
-            updateInstanceStatesForNewConsumingSegment(instanceStatesMap, latestSegmentName, newSegmentName,
-                segmentAssignment, instancePartitionsMap);
+            if (newPartitionGroupSet.contains(partitionGroupId)) {
+              LLCSegmentName newLLCSegmentName = getNextLLCSegmentName(latestLLCSegmentName, currentTimeMs);
+              String newSegmentName = newLLCSegmentName.getSegmentName();
+              CommittingSegmentDescriptor committingSegmentDescriptor = new CommittingSegmentDescriptor(latestSegmentName,
+                  (offsetFactory.create(latestSegmentZKMetadata.getEndOffset()).toString()), 0);
+              createNewSegmentZKMetadata(tableConfig, streamConfig, newLLCSegmentName, currentTimeMs,
+                  committingSegmentDescriptor, latestSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
+              updateInstanceStatesForNewConsumingSegment(instanceStatesMap, latestSegmentName, newSegmentName,
+                  segmentAssignment, instancePartitionsMap);
+            } else { // partition group reached end of life
+              LOGGER.info(
+                  "PartitionGroup: {} has reached end of life. Updating ideal state for segment: {}. "
+                      + "Skipping creation of new ZK metadata and new segment in ideal state",
+                  partitionGroupId, latestSegmentName);
+              updateInstanceStatesForNewConsumingSegment(instanceStatesMap, latestSegmentName, null, segmentAssignment,
+                  instancePartitionsMap);
+            }
           }
           // else, the metadata should be IN_PROGRESS, which is the right state for a consuming segment.
         } else { // no replica in CONSUMING state
@@ -1081,11 +1084,14 @@ public class PinotLLCRealtimeSegmentManager {
             updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, newSegmentName, segmentAssignment,
                 instancePartitionsMap);
           } else {
-            // If we get here, that means in IdealState, the latest segment has no CONSUMING replicas, but has replicas
-            // not OFFLINE. That is an unexpected state which cannot be fixed by the validation manager currently. In
-            // that case, we need to either extend this part to handle the state, or prevent segments from getting into
-            // such state.
-            LOGGER.error("Got unexpected instance state map: {} for segment: {}", instanceStateMap, latestSegmentName);
+            if (!newPartitionGroupSet.contains(partitionGroupId)) {
+              // If we get here, that means in IdealState, the latest segment has no CONSUMING replicas, but has replicas
+              // not OFFLINE. That is an unexpected state which cannot be fixed by the validation manager currently. In
+              // that case, we need to either extend this part to handle the state, or prevent segments from getting into
+              // such state.
+              LOGGER.error("Got unexpected instance state map: {} for segment: {}", instanceStateMap, latestSegmentName);
+            }
+            // else, the partition group has reached end of life. This is an acceptable state
           }
         }
       } else {
@@ -1127,10 +1133,7 @@ public class PinotLLCRealtimeSegmentManager {
     }
 
     // Set up new partitions if not exist
-    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
-    List<PartitionGroupInfo> partitionGroupInfoList =
-        getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
-    for (PartitionGroupInfo partitionGroupInfo : partitionGroupInfoList) {
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
       int partitionGroupId = partitionGroupInfo.getPartitionGroupId();
       if (!latestSegmentZKMetadataMap.containsKey(partitionGroupId)) {
         String newSegmentName =
@@ -1178,18 +1181,6 @@ public class PinotLLCRealtimeSegmentManager {
     return System.currentTimeMillis();
   }
 
-  // fixme: investigate if this should only return active partitions (i.e. skip a shard if it has reached eol)
-  //  or return all unique partitions found in ideal state right from the birth of the table
-  private int getNumPartitionsFromIdealState(IdealState idealState) {
-    Set<String> uniquePartitions = new HashSet<>();
-    for (String segmentName : idealState.getRecord().getMapFields().keySet()) {
-      if (LLCSegmentName.isLowLevelConsumerSegmentName(segmentName)) {
-        uniquePartitions.add(String.valueOf(new LLCSegmentName(segmentName).getPartitionGroupId()));
-      }
-    }
-    return uniquePartitions.size();
-  }
-
   private int getNumReplicas(TableConfig tableConfig, InstancePartitions instancePartitions) {
     if (instancePartitions.getNumReplicaGroups() == 1) {
       // Non-replica-group based
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
index d611433..96604dd 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/validation/RealtimeSegmentValidationManager.java
@@ -58,7 +58,7 @@ public class RealtimeSegmentValidationManager extends ControllerPeriodicTask<Rea
       LeadControllerManager leadControllerManager, PinotLLCRealtimeSegmentManager llcRealtimeSegmentManager,
       ValidationMetrics validationMetrics, ControllerMetrics controllerMetrics) {
     super("RealtimeSegmentValidationManager", config.getRealtimeSegmentValidationFrequencyInSeconds(),
-        config.getRealtimeSegmentValidationManagerInitialDelaySeconds(), pinotHelixResourceManager,
+        6000, pinotHelixResourceManager,
         leadControllerManager, controllerMetrics);
     _llcRealtimeSegmentManager = llcRealtimeSegmentManager;
     _validationMetrics = validationMetrics;
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 75c8057..0f33556 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -850,7 +850,8 @@ public class PinotLLCRealtimeSegmentManagerTest {
     }
 
     public void ensureAllPartitionsConsuming() {
-      ensureAllPartitionsConsuming(_tableConfig, _streamConfig, _idealState, _numPartitions);
+      ensureAllPartitionsConsuming(_tableConfig, _streamConfig, _idealState,
+          getPartitionGroupInfoList(_streamConfig, Collections.emptyList()));
     }
 
     @Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 758c656..c889193 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -240,6 +240,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
   // Segment end criteria
   private volatile long _consumeEndTime = 0;
   private Checkpoint _finalOffset; // Used when we want to catch up to this one
+  private boolean _endOfPartitionGroup = false;
   private volatile boolean _shouldStop = false;
 
   // It takes 30s to locate controller leader, and more if there are multiple controller failures.
@@ -306,6 +307,13 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
               _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
           _stopReason = SegmentCompletionProtocol.REASON_ROW_LIMIT;
           return true;
+        } else if (_endOfPartitionGroup) {
+          segmentLogger.info("Stopping consumption due to end of partitionGroup reached nRows={} numRowsIndexed={}, numRowsConsumed={}",
+              _numRowsIndexed, _numRowsConsumed, _segmentMaxRowCount);
+          _stopReason = SegmentCompletionProtocol.REASON_END_OF_PARTITION_GROUP;
+          // fixme: what happens if reached endOfPartitionGroup but numDocsIndexed == 0
+          //  If we decide to only setupNewPartitions via ValidationManager, we don't need commit on endOfShard
+          return true;
         }
         return false;
 
@@ -384,6 +392,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
       try {
         messageBatch = _partitionGroupConsumer
             .fetchMessages(_currentOffset, null, _partitionLevelStreamConfig.getFetchTimeoutMillis());
+        _endOfPartitionGroup = messageBatch.isEndOfPartitionGroup();
         consecutiveErrorCount = 0;
       } catch (TransientConsumerException e) {
         handleTransientStreamErrors(e);
@@ -1245,9 +1254,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         //       long as the partition function is not changed.
         int numPartitions = columnPartitionConfig.getNumPartitions();
         try {
-          // fixme: get this from ideal state
-          int numStreamPartitions = _streamMetadataProvider
-              .getPartitionGroupInfoList(_clientId, _partitionLevelStreamConfig, Collections.emptyList(), 5000).size();
+          int numStreamPartitions = _streamMetadataProvider.fetchPartitionCount(/*maxWaitTimeMs=*/5000L);
           if (numStreamPartitions != numPartitions) {
             segmentLogger.warn(
                 "Number of stream partitions: {} does not match number of partitions in the partition config: {}, using number of stream partitions",
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 70d2c8a..5cbd7e6 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -125,8 +125,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
       if (nextStartSequenceNumber == null && recordList.size() > 0) {
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       }
-
-      return new KinesisRecordsBatch(recordList, next.getKey());
+      return new KinesisRecordsBatch(recordList, next.getKey(), isEndOfShard);
     } catch (IllegalStateException e) {
       LOG.warn("Illegal state exception, connection is broken", e);
       return handleException(kinesisStartCheckpoint, recordList);
@@ -158,10 +157,9 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
       Map<String, String> newCheckpoint = new HashMap<>(start.getShardToStartSequenceMap());
       newCheckpoint.put(newCheckpoint.keySet().iterator().next(), nextStartSequenceNumber);
 
-      return new KinesisRecordsBatch(recordList, shardId);
+      return new KinesisRecordsBatch(recordList, shardId, false);
     } else {
-      return new KinesisRecordsBatch(recordList, shardId);
-
+      return new KinesisRecordsBatch(recordList, shardId, false);
     }
   }
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 631f240..fc9c4af 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -48,7 +48,7 @@ public class KinesisConsumerFactory extends StreamConsumerFactory {
 
   @Override
   public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
-    return new KinesisStreamMetadataProvider(clientId, new KinesisConfig(_streamConfig));
+    return new KinesisStreamMetadataProvider(clientId, _streamConfig);
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
index fdc883b..b3eb626 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -32,10 +32,12 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   private final List<Record> _recordList;
   private final String _shardId;
+  private final boolean _endOfShard;
 
-  public KinesisRecordsBatch(List<Record> recordList, String shardId) {
+  public KinesisRecordsBatch(List<Record> recordList, String shardId, boolean endOfShard) {
     _recordList = recordList;
     _shardId = shardId;
+    _endOfShard = endOfShard;
   }
 
   @Override
@@ -68,4 +70,9 @@ public class KinesisRecordsBatch implements MessageBatch<byte[]> {
   public long getNextStreamMessageOffsetAtIndex(int index) {
     throw new UnsupportedOperationException();
   }
+
+  @Override
+  public boolean isEndOfPartitionGroup() {
+    return _endOfShard;
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index 6c55a18..8968b56 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -1,27 +1,45 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
+import com.google.common.base.Preconditions;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.stream.Collectors;
 import javax.annotation.Nonnull;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
+import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
 public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
   private final KinesisConnectionHandler _kinesisConnectionHandler;
+  private final StreamConsumerFactory _kinesisStreamConsumerFactory;
+  private final String _clientId;
+  private final int _fetchTimeoutMs;
 
-  public KinesisStreamMetadataProvider(String clientId, KinesisConfig kinesisConfig) {
+  public KinesisStreamMetadataProvider(String clientId, StreamConfig streamConfig) {
+    KinesisConfig kinesisConfig = new KinesisConfig(streamConfig);
     _kinesisConnectionHandler = new KinesisConnectionHandler(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
+    _kinesisStreamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    _clientId = clientId;
+    _fetchTimeoutMs = streamConfig.getFetchTimeoutMillis();
   }
 
   @Override
@@ -37,7 +55,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
   @Override
   public List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
-      throws IOException {
+      throws IOException, TimeoutException {
 
     Map<Integer, PartitionGroupMetadata> currentPartitionGroupMap =
         currentPartitionGroupsMetadata.stream().collect(Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
@@ -45,10 +63,12 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     List<PartitionGroupInfo> newPartitionGroupInfos = new ArrayList<>();
     List<Shard> shards = _kinesisConnectionHandler.getShards();
     for (Shard shard : shards) { // go over all shards
+      KinesisCheckpoint newStartCheckpoint;
+
       String shardId = shard.shardId();
       int partitionGroupId = getPartitionGroupIdFromShardId(shardId);
       PartitionGroupMetadata currentPartitionGroupMetadata = currentPartitionGroupMap.get(partitionGroupId);
-      KinesisCheckpoint newStartCheckpoint;
+
       if (currentPartitionGroupMetadata != null) { // existing shard
         KinesisCheckpoint currentEndCheckpoint = null;
         try {
@@ -59,15 +79,18 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
         if (currentEndCheckpoint != null) { // end checkpoint available i.e. committing segment
           String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
           if (endingSequenceNumber != null) { // shard has ended
-            // FIXME: this logic is not working
-            //  was expecting sequenceNumOfLastMsgInShard == endSequenceNumOfShard.
-            //  But it is much lesser than the endSeqNumOfShard
-            Map<String, String> shardToSequenceNumberMap = new HashMap<>();
-            shardToSequenceNumberMap.put(shardId, endingSequenceNumber);
-            KinesisCheckpoint shardEndCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
-            if (currentEndCheckpoint.compareTo(shardEndCheckpoint) >= 0) {
-              // shard has ended AND we have reached the end checkpoint.
-              // skip this partition group in the result
+            // check if segment has consumed all the messages already
+            PartitionGroupConsumer partitionGroupConsumer =
+                _kinesisStreamConsumerFactory.createPartitionGroupConsumer(_clientId, currentPartitionGroupMetadata);
+
+            MessageBatch messageBatch;
+            try {
+              messageBatch = partitionGroupConsumer.fetchMessages(currentEndCheckpoint, null, _fetchTimeoutMs);
+            } finally {
+              partitionGroupConsumer.close();
+            }
+            if (messageBatch.isEndOfPartitionGroup()) {
+              // shard has ended. Skip it from results
               continue;
             }
           }
@@ -80,6 +103,7 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
         shardToSequenceNumberMap.put(shardId, shard.sequenceNumberRange().startingSequenceNumber());
         newStartCheckpoint = new KinesisCheckpoint(shardToSequenceNumberMap);
       }
+
       newPartitionGroupInfos
           .add(new PartitionGroupInfo(partitionGroupId, newStartCheckpoint.serialize()));
     }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
index 3052b9e..02c721f 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
@@ -81,4 +81,11 @@ public interface MessageBatch<T> {
   default StreamPartitionMsgOffset getNextStreamParitionMsgOffsetAtIndex(int index) {
     return new LongMsgOffset(getNextStreamMessageOffsetAtIndex(index));
   }
+
+  /**
+   * Returns true if end of the consumer detects that no more records can be read from this partition group for good
+   */
+  default boolean isEndOfPartitionGroup() {
+    return false;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 06/47: More controller side changes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 75547ec213f4466f0d2f7b9b45e1234641343ac2
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Dec 31 15:39:44 2020 -0800

    More controller side changes
---
 .../helix/core/PinotHelixResourceManager.java      |  4 +-
 .../helix/core/PinotTableIdealStateBuilder.java    | 16 +++---
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 57 ++++++++--------------
 .../PinotLLCRealtimeSegmentManagerTest.java        |  4 +-
 .../realtime/LLRealtimeSegmentDataManager.java     |  2 +-
 .../impl/fakestream/FakeStreamConsumerFactory.java |  2 +-
 .../fakestream/FakeStreamMetadataProvider.java     | 12 +++--
 .../kafka09/KafkaStreamMetadataProvider.java       | 36 ++++++++++----
 .../kafka09/KafkaPartitionLevelConsumerTest.java   |  4 +-
 .../kafka20/KafkaStreamMetadataProvider.java       |  3 +-
 ...Fetcher.java => PartitionGroupInfoFetcher.java} | 31 +++++-------
 .../pinot/spi/stream/StreamMetadataProvider.java   |  2 +-
 12 files changed, 86 insertions(+), 87 deletions(-)

diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index f0d52bc..c86f14c 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -1356,7 +1356,7 @@ public class PinotHelixResourceManager {
       idealState = PinotTableIdealStateBuilder
           .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
               _enableBatchMessageMode);
-      _pinotLLCRealtimeSegmentManager.setupNewShardedTable(realtimeTableConfig, idealState);
+      _pinotLLCRealtimeSegmentManager.setupNewTable(realtimeTableConfig, idealState);
       LOGGER.info("Successfully setup table for SHARDED consumers for {} ", realtimeTableName);
     } else {
 
@@ -1385,7 +1385,7 @@ public class PinotHelixResourceManager {
           idealState = PinotTableIdealStateBuilder
               .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
                   _enableBatchMessageMode);
-          _pinotLLCRealtimeSegmentManager.setupNewShardedTable(realtimeTableConfig, idealState);
+          _pinotLLCRealtimeSegmentManager.setupNewTable(realtimeTableConfig, idealState);
           LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
         } else {
           LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
index a7b3c9e..8b200bb 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
@@ -30,10 +30,10 @@ import org.apache.pinot.common.metadata.instance.InstanceZKMetadata;
 import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.common.utils.config.TagNameUtils;
 import org.apache.pinot.common.utils.helix.HelixHelper;
-import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
 import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-import org.apache.pinot.spi.stream.PartitionGroupMetadataFetcher;
+import org.apache.pinot.spi.stream.PartitionGroupInfoFetcher;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
 import org.apache.pinot.spi.utils.retry.RetryPolicies;
@@ -115,15 +115,15 @@ public class PinotTableIdealStateBuilder {
     return idealState;
   }
 
-  public static List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig,
+  public static List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
-    PartitionGroupMetadataFetcher partitionGroupMetadataFetcher =
-        new PartitionGroupMetadataFetcher(streamConfig, currentPartitionGroupMetadataList);
+    PartitionGroupInfoFetcher partitionGroupInfoFetcher =
+        new PartitionGroupInfoFetcher(streamConfig, currentPartitionGroupMetadataList);
     try {
-      RetryPolicies.noDelayRetryPolicy(3).attempt(partitionGroupMetadataFetcher);
-      return partitionGroupMetadataFetcher.getPartitionGroupMetadataList();
+      RetryPolicies.noDelayRetryPolicy(3).attempt(partitionGroupInfoFetcher);
+      return partitionGroupInfoFetcher.getPartitionGroupInfoList();
     } catch (Exception e) {
-      Exception fetcherException = partitionGroupMetadataFetcher.getException();
+      Exception fetcherException = partitionGroupInfoFetcher.getException();
       LOGGER.error("Could not get partition count for {}", streamConfig.getTopicName(), fetcherException);
       throw new RuntimeException(fetcherException);
     }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 528125b..0654a38 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -48,7 +48,6 @@ import org.apache.pinot.common.metadata.segment.ColumnPartitionMetadata;
 import org.apache.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.SegmentPartitionMetadata;
-import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.common.metrics.ControllerMeter;
 import org.apache.pinot.common.metrics.ControllerMetrics;
 import org.apache.pinot.common.protocols.SegmentCompletionProtocol;
@@ -208,7 +207,7 @@ public class PinotLLCRealtimeSegmentManager {
   /**
    * Sets up the realtime table ideal state for a table of consumer type SHARDED
    */
-  public void setupNewShardedTable(TableConfig tableConfig, IdealState idealState) {
+  public void setupNewTable(TableConfig tableConfig, IdealState idealState) {
     Preconditions.checkState(!_isStopping, "Segment manager is stopping");
 
     String realtimeTableName = tableConfig.getTableName();
@@ -220,18 +219,8 @@ public class PinotLLCRealtimeSegmentManager {
         new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
 
     // get new partition groups and their metadata
-    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
-    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
-        .createStreamMetadataProvider(streamConfig.getTopicName() + "_" + System.currentTimeMillis());
-
-    List<PartitionGroupInfo> newPartitionGroupMetadataList;
-    try {
-      newPartitionGroupMetadataList =
-          streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000);
-    } catch (TimeoutException e) {
-      throw new IllegalStateException(e);
-    }
-    int numPartitionGroups = newPartitionGroupMetadataList.size();
+    List<PartitionGroupInfo> newPartitionGroupInfoList = getPartitionGroupInfoList(streamConfig, Collections.emptyList());
+    int numPartitionGroups = newPartitionGroupInfoList.size();
 
     InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
     int numReplicas = getNumReplicas(tableConfig, instancePartitions);
@@ -242,7 +231,7 @@ public class PinotLLCRealtimeSegmentManager {
 
     long currentTimeMs = getCurrentTimeMs();
     Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
-    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupMetadataList) {
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
       String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo,
           currentTimeMs, instancePartitions, numPartitionGroups, numReplicas);
       updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
@@ -507,18 +496,10 @@ public class PinotLLCRealtimeSegmentManager {
     List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
     PartitionLevelStreamConfig streamConfig = new PartitionLevelStreamConfig(tableConfig.getTableName(),
         IngestionConfigUtils.getStreamConfigMap(tableConfig));
-    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
-    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
-        .createStreamMetadataProvider(streamConfig.getTopicName() + " " + System.currentTimeMillis());
 
     // find new partition groups [A],[B],[C],[D]
-    List<PartitionGroupInfo> newPartitionGroupMetadataList;
-    try {
-      newPartitionGroupMetadataList =
-          streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
-    } catch (TimeoutException e) {
-      throw new IllegalStateException(e);
-    }
+    List<PartitionGroupInfo> newPartitionGroupMetadataList =
+        getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
 
     // create new segment metadata, only if it is not IN_PROGRESS in the current state
     Map<Integer, PartitionGroupMetadata> currentGroupIdToMetadata = currentPartitionGroupMetadataList.stream().collect(
@@ -721,9 +702,9 @@ public class PinotLLCRealtimeSegmentManager {
   }
 
   @VisibleForTesting
-  List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig,
+  List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
       List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
-    return PinotTableIdealStateBuilder.getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList);
+    return PinotTableIdealStateBuilder.getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
   }
 
   @VisibleForTesting
@@ -843,7 +824,7 @@ public class PinotLLCRealtimeSegmentManager {
       if (idealState.isEnabled()) {
         List<PartitionGroupMetadata> currentPartitionGroupMetadataList =
             getCurrentPartitionGroupMetadataList(idealState);
-        int numPartitions = getPartitionGroupMetadataList(streamConfig, currentPartitionGroupMetadataList).size();
+        int numPartitions = getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList).size();
         return ensureAllPartitionsConsuming(tableConfig, streamConfig, idealState, numPartitions);
       } else {
         LOGGER.info("Skipping LLC segments validation for disabled table: {}", realtimeTableName);
@@ -1024,7 +1005,7 @@ public class PinotLLCRealtimeSegmentManager {
     //       and restart consumption from the same offset (if possible) or a newer offset (if realtime stream does not have the same offset).
     //       In latter case, report data loss.
     for (Map.Entry<Integer, LLCRealtimeSegmentZKMetadata> entry : latestSegmentZKMetadataMap.entrySet()) {
-      int partitionId = entry.getKey();
+      int partitionGroupId = entry.getKey();
       LLCRealtimeSegmentZKMetadata latestSegmentZKMetadata = entry.getValue();
       String latestSegmentName = latestSegmentZKMetadata.getSegmentName();
       LLCSegmentName latestLLCSegmentName = new LLCSegmentName(latestSegmentName);
@@ -1068,10 +1049,10 @@ public class PinotLLCRealtimeSegmentManager {
             StreamPartitionMsgOffset startOffset = offsetFactory.create(latestSegmentZKMetadata.getStartOffset());
             // Start offset must be higher than the start offset of the stream
             StreamPartitionMsgOffset partitionStartOffset =
-                getPartitionOffset(streamConfig, OffsetCriteria.SMALLEST_OFFSET_CRITERIA, partitionId);
+                getPartitionOffset(streamConfig, OffsetCriteria.SMALLEST_OFFSET_CRITERIA, partitionGroupId);
             if (partitionStartOffset.compareTo(startOffset) > 0) {
               LOGGER.error("Data lost from offset: {} to: {} for partition: {} of table: {}", startOffset,
-                  partitionStartOffset, partitionId, realtimeTableName);
+                  partitionStartOffset, partitionGroupId, realtimeTableName);
               _controllerMetrics.addMeteredTableValue(realtimeTableName, ControllerMeter.LLC_STREAM_DATA_LOSS, 1L);
               startOffset = partitionStartOffset;
             }
@@ -1108,7 +1089,7 @@ public class PinotLLCRealtimeSegmentManager {
           String previousConsumingSegment = null;
           for (Map.Entry<String, Map<String, String>> segmentEntry : instanceStatesMap.entrySet()) {
             LLCSegmentName llcSegmentName = new LLCSegmentName(segmentEntry.getKey());
-            if (llcSegmentName.getPartitionGroupId() == partitionId && segmentEntry.getValue()
+            if (llcSegmentName.getPartitionGroupId() == partitionGroupId && segmentEntry.getValue()
                 .containsValue(SegmentStateModel.CONSUMING)) {
               previousConsumingSegment = llcSegmentName.getSegmentName();
               break;
@@ -1117,7 +1098,7 @@ public class PinotLLCRealtimeSegmentManager {
           if (previousConsumingSegment == null) {
             LOGGER
                 .error("Failed to find previous CONSUMING segment for partition: {} of table: {}, potential data loss",
-                    partitionId, realtimeTableName);
+                    partitionGroupId, realtimeTableName);
             _controllerMetrics.addMeteredTableValue(realtimeTableName, ControllerMeter.LLC_STREAM_DATA_LOSS, 1L);
           }
           updateInstanceStatesForNewConsumingSegment(instanceStatesMap, previousConsumingSegment, latestSegmentName,
@@ -1130,10 +1111,14 @@ public class PinotLLCRealtimeSegmentManager {
     }
 
     // Set up new partitions if not exist
-    for (int partitionId = 0; partitionId < numPartitions; partitionId++) {
-      if (!latestSegmentZKMetadataMap.containsKey(partitionId)) {
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
+    List<PartitionGroupInfo> partitionGroupInfoList =
+        getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
+    for (PartitionGroupInfo partitionGroupInfo : partitionGroupInfoList) {
+      int partitionGroupId = partitionGroupInfo.getPartitionGroupId();
+      if (!latestSegmentZKMetadataMap.containsKey(partitionGroupId)) {
         String newSegmentName =
-            setupNewPartitionGroup(tableConfig, streamConfig, partitionId, currentTimeMs, instancePartitions, numPartitions,
+            setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo, currentTimeMs, instancePartitions, numPartitions,
                 numReplicas);
         updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, newSegmentName, segmentAssignment,
             instancePartitionsMap);
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 743e719..42bdedc 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -50,7 +50,6 @@ import org.apache.pinot.controller.helix.core.assignment.segment.SegmentAssignme
 import org.apache.pinot.controller.helix.core.realtime.segment.CommittingSegmentDescriptor;
 import org.apache.pinot.controller.util.SegmentCompletionUtils;
 import org.apache.pinot.core.indexsegment.generator.SegmentVersion;
-import org.apache.pinot.core.realtime.impl.fakestream.FakePartitionGroupMetadata;
 import org.apache.pinot.core.realtime.impl.fakestream.FakeStreamConfigUtils;
 import org.apache.pinot.core.segment.index.metadata.SegmentMetadataImpl;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -60,6 +59,7 @@ import org.apache.pinot.spi.env.PinotConfiguration;
 import org.apache.pinot.spi.filesystem.PinotFSFactory;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.StreamConfig;
@@ -914,7 +914,7 @@ public class PinotLLCRealtimeSegmentManagerTest {
     }
 
     @Override
-    List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+    List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
       return IntStream.range(0, _numPartitions).mapToObj(FakePartitionGroupMetadata::new).collect(Collectors.toList());
     }
 
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
index 13a9ab2..0938251 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/LLRealtimeSegmentDataManager.java
@@ -1245,7 +1245,7 @@ public class LLRealtimeSegmentDataManager extends RealtimeSegmentDataManager {
         int numPartitions = columnPartitionConfig.getNumPartitions();
         try {
           // fixme: get this from ideal state
-          int numStreamPartitions = _streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000).size();
+          int numStreamPartitions = _streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 5000).size();
           if (numStreamPartitions != numPartitions) {
             segmentLogger.warn(
                 "Number of stream partitions: {} does not match number of partitions in the partition config: {}, using number of stream partitions",
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index 54be1b6..6121eef 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -88,7 +88,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
     // stream metadata provider
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory.createStreamMetadataProvider(clientId);
-    int partitionCount = streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 10_000).size();
+    int partitionCount = streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 10_000).size();
     System.out.println(partitionCount);
 
     // Partition metadata provider
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
index c96d06a..0de0ce2 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
@@ -35,9 +36,11 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
  */
 public class FakeStreamMetadataProvider implements StreamMetadataProvider {
   private final int _numPartitions;
+  private StreamConfig _streamConfig;
 
   public FakeStreamMetadataProvider(StreamConfig streamConfig) {
     _numPartitions = FakeStreamConfigUtils.getNumPartitions(streamConfig);
+    _streamConfig = streamConfig;
   }
 
   @Override
@@ -46,11 +49,12 @@ public class FakeStreamMetadataProvider implements StreamMetadataProvider {
   }
 
   @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
-    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
+  public List<PartitionGroupInfo> getPartitionGroupInfoList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
+      throws TimeoutException {
+    List<PartitionGroupInfo> partitionGroupMetadataList = new ArrayList<>();
     for (int i = 0; i < _numPartitions; i++) {
-      partitionGroupMetadataList.add(new FakePartitionGroupMetadata(i));
+      partitionGroupMetadataList.add(new PartitionGroupInfo(i, fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000).toString()));
     }
     return partitionGroupMetadataList;
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
index 865ae96..2d0ad31 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaStreamMetadataProvider.java
@@ -40,6 +40,7 @@ import org.apache.kafka.common.errors.TimeoutException;
 import org.apache.kafka.common.protocol.Errors;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
@@ -54,13 +55,14 @@ import org.slf4j.LoggerFactory;
 public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implements StreamMetadataProvider {
   private static final Logger LOGGER = LoggerFactory.getLogger(KafkaStreamMetadataProvider.class);
 
+  private StreamConfig _streamConfig;
+
   /**
    * Create a partition specific metadata provider
-   * @param streamConfig
-   * @param partition
    */
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition) {
     super(clientId, streamConfig, partition, new KafkaSimpleConsumerFactoryImpl());
+    _streamConfig = streamConfig;
   }
 
   /**
@@ -69,18 +71,21 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
    */
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig) {
     super(clientId, streamConfig, new KafkaSimpleConsumerFactoryImpl());
+    _streamConfig = streamConfig;
   }
 
   @VisibleForTesting
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig, int partition,
       KafkaSimpleConsumerFactory kafkaSimpleConsumerFactory) {
     super(clientId, streamConfig, partition, kafkaSimpleConsumerFactory);
+    _streamConfig = streamConfig;
   }
 
   @VisibleForTesting
   public KafkaStreamMetadataProvider(String clientId, StreamConfig streamConfig,
       KafkaSimpleConsumerFactory kafkaSimpleConsumerFactory) {
     super(clientId, streamConfig, kafkaSimpleConsumerFactory);
+    _streamConfig = streamConfig;
   }
 
   /**
@@ -156,19 +161,30 @@ public class KafkaStreamMetadataProvider extends KafkaConnectionHandler implemen
   }
 
   /**
-   * Fetch the partition group metadata list
+   * Fetch the partitionGroupMetadata list.
    * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
-   *                                       Hence current partition groups are not needed to compute the new partition groups
    */
   @Override
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
-      @Nullable List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis) {
+  public List<PartitionGroupInfo> getPartitionGroupInfoList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
+      throws java.util.concurrent.TimeoutException {
     int partitionCount = fetchPartitionCountInternal(timeoutMillis);
-    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>(partitionCount);
-    for (int i = 0; i < partitionCount; i++) {
-      partitionGroupMetadataList.add(new KafkaPartitionGroupMetadata(i));
+    List<PartitionGroupInfo> newPartitionGroupInfoList = new ArrayList<>(partitionCount);
+
+    // add a PartitionGroupInfo into the list foreach partition already present in current.
+    // the end checkpoint is set as checkpoint
+    for (PartitionGroupMetadata currentPartitionGroupMetadata : currentPartitionGroupsMetadata) {
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(currentPartitionGroupMetadata.getPartitionGroupId(),
+          currentPartitionGroupMetadata.getEndCheckpoint()));
+    }
+    // add PartitiongroupInfo for new partitions
+    // use offset criteria from stream config
+    for (int i = currentPartitionGroupsMetadata.size(); i < partitionCount; i++) {
+      StreamPartitionMsgOffset streamPartitionMsgOffset =
+          fetchStreamPartitionOffset(_streamConfig.getOffsetCriteria(), 5000);
+      newPartitionGroupInfoList.add(new PartitionGroupInfo(i, streamPartitionMsgOffset.toString()));
     }
-    return partitionGroupMetadataList;
+    return newPartitionGroupInfoList;
   }
 
   public synchronized long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
index 43b72a8..9d3091e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
@@ -267,7 +267,7 @@ public class KafkaPartitionLevelConsumerTest {
   }
 
   @Test
-  public void testGetPartitionCount() {
+  public void testGetPartitionCount() throws Exception {
     String streamType = "kafka";
     String streamKafkaTopicName = "theTopic";
     String streamKafkaBrokerList = "abcd:1234,bcde:2345";
@@ -291,7 +291,7 @@ public class KafkaPartitionLevelConsumerTest {
 
     KafkaStreamMetadataProvider streamMetadataProvider =
         new KafkaStreamMetadataProvider(clientId, streamConfig, mockKafkaSimpleConsumerFactory);
-    Assert.assertEquals(streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 10000L), 2);
+    Assert.assertEquals(streamMetadataProvider.getPartitionGroupInfoList(Collections.emptyList(), 10000L), 2);
   }
 
   @Test
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index eb606f2..ef22b6a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -58,10 +58,9 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
   /**
    * Fetch the partitionGroupMetadata list.
    * @param currentPartitionGroupsMetadata In case of Kafka, each partition group contains a single partition.
-   *                                       Hence current partition groups are not needed to compute the new partition groups
    */
   @Override
-  public List<PartitionGroupInfo> getPartitionGroupMetadataList(
+  public List<PartitionGroupInfo> getPartitionGroupInfoList(
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
       throws TimeoutException {
     int partitionCount = _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
similarity index 64%
rename from pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java
rename to pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
index e1ce1a6..d13be10 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataFetcher.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupInfoFetcher.java
@@ -27,27 +27,24 @@ import org.slf4j.LoggerFactory;
 /**
  * Fetches the partition count of a stream using the {@link StreamMetadataProvider}
  */
-public class PartitionGroupMetadataFetcher implements Callable<Boolean> {
+public class PartitionGroupInfoFetcher implements Callable<Boolean> {
 
-  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionGroupMetadataFetcher.class);
+  private static final Logger LOGGER = LoggerFactory.getLogger(PartitionGroupInfoFetcher.class);
 
-  private int _partitionCount = -1;
-  private List<PartitionGroupMetadata> _partitionGroupMetadataList;
-  private List<PartitionGroupMetadata> _currentPartitionGroupMetadata;
-  private final StreamConfig _streamConfig;
-  private StreamConsumerFactory _streamConsumerFactory;
+  private List<PartitionGroupInfo> _partitionGroupInfoList;
+  private final List<PartitionGroupMetadata> _currentPartitionGroupMetadata;
+  private final StreamConsumerFactory _streamConsumerFactory;
   private Exception _exception;
   private final String _topicName;
 
-  public PartitionGroupMetadataFetcher(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
-    _streamConfig = streamConfig;
-    _streamConsumerFactory = StreamConsumerFactoryProvider.create(_streamConfig);
+  public PartitionGroupInfoFetcher(StreamConfig streamConfig, List<PartitionGroupMetadata> currentPartitionGroupMetadataList) {
+    _streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
     _topicName = streamConfig.getTopicName();
     _currentPartitionGroupMetadata = currentPartitionGroupMetadataList;
   }
 
-  public List<PartitionGroupMetadata> getPartitionGroupMetadataList() {
-    return _partitionGroupMetadataList;
+  public List<PartitionGroupInfo> getPartitionGroupInfoList() {
+    return _partitionGroupInfoList;
   }
 
   public Exception getException() {
@@ -55,21 +52,19 @@ public class PartitionGroupMetadataFetcher implements Callable<Boolean> {
   }
 
   /**
-   * Callable to fetch the number of partitions of the stream given the stream metadata
-   * @return
-   * @throws Exception
+   * Callable to fetch the partition group info for the stream
    */
   @Override
   public Boolean call()
       throws Exception {
 
-    String clientId = PartitionGroupMetadataFetcher.class.getSimpleName() + "-" + _topicName;
+    String clientId = PartitionGroupInfoFetcher.class.getSimpleName() + "-" + _topicName;
     try (
         StreamMetadataProvider streamMetadataProvider = _streamConsumerFactory.createStreamMetadataProvider(clientId)) {
-      _partitionGroupMetadataList = streamMetadataProvider.getPartitionGroupMetadataList(_currentPartitionGroupMetadata, /*maxWaitTimeMs=*/5000L);
+      _partitionGroupInfoList = streamMetadataProvider.getPartitionGroupInfoList(_currentPartitionGroupMetadata, /*maxWaitTimeMs=*/5000L);
       if (_exception != null) {
         // We had at least one failure, but succeeded now. Log an info
-        LOGGER.info("Successfully retrieved partition count as {} for topic {}", _partitionCount, _topicName);
+        LOGGER.info("Successfully retrieved partition group info for topic {}", _topicName);
       }
       return Boolean.TRUE;
     } catch (TransientConsumerException e) {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index a9cd2d6..f595ea3 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -40,7 +40,7 @@ public interface StreamMetadataProvider extends Closeable {
   int fetchPartitionCount(long timeoutMillis);
 
   // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  List<PartitionGroupInfo> getPartitionGroupMetadataList(
+  List<PartitionGroupInfo> getPartitionGroupInfoList(
       List<PartitionGroupMetadata> currentPartitionGroupsMetadata, long timeoutMillis)
       throws TimeoutException;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 23/47: Add license header

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 29068ca2ded7066d4dda1438199718c96b2702ad
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 11:45:11 2020 +0530

    Add license header
---
 .../pinot/plugin/stream/kinesis/KinesisConfig.java     | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 01d666a..d2e8715 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.StreamConfig;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 13/47: Add PartitionGroupMetdataMap interface

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 2546098f2ae43f4ff39acd0a48cb42b047dc1fc4
Author: KKcorps <kh...@gmail.com>
AuthorDate: Fri Dec 11 13:56:52 2020 +0530

    Add PartitionGroupMetdataMap interface
---
 .../src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java  | 7 +++++--
 .../org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java  | 4 ++++
 .../org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java    | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
index b490835..78ae5ef 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
@@ -1,7 +1,10 @@
 package org.apache.pinot.spi.stream.v2;
 
-public interface FetchResult {
+import java.util.List;
+
+
+public interface FetchResult<T> {
   Checkpoint getLastCheckpoint();
-  byte[] getMessages();
+  List<T> getMessages();
 }
 
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
new file mode 100644
index 0000000..3c344bc
--- /dev/null
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
@@ -0,0 +1,4 @@
+package org.apache.pinot.spi.stream.v2;
+
+public interface PartitionGroupMetadataMap {
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
index bd3017d..eb7f76e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
@@ -8,7 +8,7 @@ public interface StreamConsumerFactoryV2 {
   void init(StreamConfig streamConfig);
 
   // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  Map<Long, PartitionGroupMetadata> getPartitionGroupsMetadata(Map<Long, PartitionGroupMetadata> currentPartitionGroupsMetadata);
+  PartitionGroupMetadataMap getPartitionGroupsMetadata(PartitionGroupMetadataMap currentPartitionGroupsMetadata);
 
   // creates a name generator which generates segment name for a partition group
   SegmentNameGenerator getSegmentNameGenerator();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 34/47: Implementation fixes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit d079c81802adf8354523c3d6bd0b192c10e4d483
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Sat Jan 2 19:49:34 2021 -0800

    Implementation fixes
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   |  2 +-
 .../plugin/stream/kinesis/KinesisCheckpoint.java   | 47 +++++++----
 .../plugin/stream/kinesis/KinesisConsumer.java     | 46 +++++------
 .../stream/kinesis/KinesisConsumerFactory.java     | 39 +++++----
 .../plugin/stream/kinesis/KinesisFetchResult.java  | 44 ----------
 .../kinesis/KinesisPartitionGroupMetadataMap.java  | 93 ----------------------
 .../stream/kinesis/KinesisShardMetadata.java       | 71 -----------------
 .../kinesis/KinesisStreamMetadataProvider.java     | 53 ++++++++++++
 .../plugin/stream/kinesis/KinesisConsumerTest.java | 18 +++--
 .../pinot/spi/stream/PartitionGroupMetadata.java   |  3 +
 .../org/apache/pinot/spi/stream/v2/Checkpoint.java | 25 ------
 .../org/apache/pinot/spi/stream/v2/ConsumerV2.java | 24 ------
 .../apache/pinot/spi/stream/v2/FetchResult.java    | 29 -------
 .../spi/stream/v2/PartitionGroupMetadata.java      | 34 --------
 .../spi/stream/v2/PartitionGroupMetadataMap.java   | 30 -------
 .../pinot/spi/stream/v2/SegmentNameGenerator.java  | 25 ------
 .../spi/stream/v2/StreamConsumerFactoryV2.java     | 37 ---------
 pinot-tools/pom.xml                                |  5 ++
 pom.xml                                            |  2 +-
 19 files changed, 148 insertions(+), 479 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index 4fce169..38d4f73 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -109,4 +109,4 @@
     </dependency>
   </dependencies>
 
-</project>
\ No newline at end of file
+</project>
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index f3a7a49..1b8f86e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -18,38 +18,51 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
-import org.apache.pinot.spi.stream.v2.Checkpoint;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import java.io.IOException;
+import java.util.Map;
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.utils.JsonUtils;
 
 
 public class KinesisCheckpoint implements Checkpoint {
-  String _sequenceNumber;
-  Boolean _isEndOfPartition = false;
+  private Map<String, String> _shardToStartSequenceMap;
 
-  public KinesisCheckpoint(String sequenceNumber) {
-    _sequenceNumber = sequenceNumber;
+  public KinesisCheckpoint(Map<String, String> shardToStartSequenceMap) {
+    _shardToStartSequenceMap = shardToStartSequenceMap;
   }
 
-  public KinesisCheckpoint(String sequenceNumber, Boolean isEndOfPartition) {
-    _sequenceNumber = sequenceNumber;
-    _isEndOfPartition = isEndOfPartition;
+  public KinesisCheckpoint(String checkpointStr)
+      throws IOException {
+    _shardToStartSequenceMap = JsonUtils.stringToObject(checkpointStr, new TypeReference<Map<String, String>>() {
+    });
   }
 
-  @Override
-  public boolean isEndOfPartition() {
-    return _isEndOfPartition;
+  public Map<String, String> getShardToStartSequenceMap() {
+    return _shardToStartSequenceMap;
   }
 
-  public String getSequenceNumber() {
-    return _sequenceNumber;
+  @Override
+  public String serialize() {
+    try {
+      return JsonUtils.objectToString(_shardToStartSequenceMap);
+    } catch (JsonProcessingException e) {
+      throw new IllegalStateException();
+    }
   }
 
   @Override
-  public byte[] serialize() {
-    return _sequenceNumber.getBytes();
+  public KinesisCheckpoint deserialize(String blob) {
+    try {
+      return new KinesisCheckpoint(blob);
+    } catch (IOException e) {
+      throw new IllegalStateException();
+    }
   }
 
   @Override
-  public KinesisCheckpoint deserialize(byte[] blob) {
-    return new KinesisCheckpoint(new String(blob));
+  public int compareTo(Object o) {
+    return this._shardToStartSequenceMap.values().iterator().next().compareTo(((KinesisCheckpoint) o)._shardToStartSequenceMap.values().iterator().next());
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index fb414f0..8a24208 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -19,14 +19,16 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import org.apache.pinot.spi.stream.v2.Checkpoint;
-import org.apache.pinot.spi.stream.v2.ConsumerV2;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.Checkpoint;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import software.amazon.awssdk.services.kinesis.model.ExpiredIteratorException;
@@ -41,28 +43,25 @@ import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
-public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
+public class KinesisConsumer extends KinesisConnectionHandler implements PartitionGroupConsumer {
   private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
   String _stream;
   Integer _maxRecords;
-  String _shardId;
   ExecutorService _executorService;
   ShardIteratorType _shardIteratorType;
 
-  public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
+  public KinesisConsumer(KinesisConfig kinesisConfig) {
     super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
     _stream = kinesisConfig.getStream();
     _maxRecords = kinesisConfig.maxRecordsToFetch();
-    KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
-    _shardId = kinesisShardMetadata.getShardId();
     _shardIteratorType = kinesisConfig.getShardIteratorType();
     _executorService = Executors.newSingleThreadExecutor();
   }
 
   @Override
-  public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
+  public KinesisRecordsBatch fetchMessages(Checkpoint start, Checkpoint end, int timeout) {
     List<Record> recordList = new ArrayList<>();
-    Future<KinesisFetchResult> kinesisFetchResultFuture =
+    Future<KinesisRecordsBatch> kinesisFetchResultFuture =
         _executorService.submit(() -> getResult(start, end, recordList));
 
     try {
@@ -72,7 +71,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     }
   }
 
-  private KinesisFetchResult getResult(Checkpoint start, Checkpoint end, List<Record> recordList) {
+  private KinesisRecordsBatch getResult(Checkpoint start, Checkpoint end, List<Record> recordList) {
     KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
 
     try {
@@ -81,13 +80,14 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
         createConnection();
       }
 
-      String shardIterator = getShardIterator(kinesisStartCheckpoint.getSequenceNumber());
+      Map.Entry<String, String> next = kinesisStartCheckpoint.getShardToStartSequenceMap().entrySet().iterator().next();
+      String shardIterator = getShardIterator(next.getKey(), next.getValue());
 
       String kinesisEndSequenceNumber = null;
 
       if (end != null) {
         KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
-        kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
+        kinesisEndSequenceNumber = kinesisEndCheckpoint.getShardToStartSequenceMap().values().iterator().next();
       }
 
       String nextStartSequenceNumber = null;
@@ -125,10 +125,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       }
 
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber, isEndOfShard);
-      KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
-
-      return kinesisFetchResult;
+      return new KinesisRecordsBatch(recordList);
     } catch (ProvisionedThroughputExceededException e) {
       LOG.warn("The request rate for the stream is too high", e);
       return handleException(kinesisStartCheckpoint, recordList);
@@ -149,21 +146,22 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     }
   }
 
-  private KinesisFetchResult handleException(KinesisCheckpoint start, List<Record> recordList) {
+  private KinesisRecordsBatch handleException(KinesisCheckpoint start, List<Record> recordList) {
     if (recordList.size() > 0) {
       String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
-      return new KinesisFetchResult(kinesisCheckpoint, recordList);
+      Map<String, String> newCheckpoint = new HashMap<>(start.getShardToStartSequenceMap());
+      newCheckpoint.put(newCheckpoint.keySet().iterator().next(), nextStartSequenceNumber);
+      return new KinesisRecordsBatch(recordList);
     } else {
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(start.getSequenceNumber());
-      return new KinesisFetchResult(kinesisCheckpoint, recordList);
+      return new KinesisRecordsBatch(recordList);
+
     }
   }
 
-  public String getShardIterator(String sequenceNumber) {
+  public String getShardIterator(String shardId, String sequenceNumber) {
 
     GetShardIteratorRequest.Builder requestBuilder =
-        GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId).shardIteratorType(_shardIteratorType);
+        GetShardIteratorRequest.builder().streamName(_stream).shardId(shardId).shardIteratorType(_shardIteratorType);
 
     if (sequenceNumber != null) {
       requestBuilder = requestBuilder.startingSequenceNumber(sequenceNumber);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 9bb4d0c..aa90812 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -18,36 +18,41 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
-import org.apache.pinot.spi.stream.StreamConfig;
-import org.apache.pinot.spi.stream.v2.ConsumerV2;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
-import org.apache.pinot.spi.stream.v2.SegmentNameGenerator;
-import org.apache.pinot.spi.stream.v2.StreamConsumerFactoryV2;
+import java.util.Set;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.PartitionLevelConsumer;
+import org.apache.pinot.spi.stream.StreamConsumerFactory;
+import org.apache.pinot.spi.stream.StreamLevelConsumer;
+import org.apache.pinot.spi.stream.StreamMetadataProvider;
 
 
-public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
-  private KinesisConfig _kinesisConfig;
+public class KinesisConsumerFactory extends StreamConsumerFactory {
 
   @Override
-  public void init(StreamConfig streamConfig) {
-    _kinesisConfig = new KinesisConfig(streamConfig);
+  public PartitionLevelConsumer createPartitionLevelConsumer(String clientId, int partition) {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public PartitionGroupMetadataMap getPartitionGroupsMetadata(
-      PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
-    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion(),
-        currentPartitionGroupsMetadata);
+  public StreamLevelConsumer createStreamLevelConsumer(String clientId, String tableName, Set<String> fieldsToRead,
+      String groupId) {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public SegmentNameGenerator getSegmentNameGenerator() {
+  public StreamMetadataProvider createPartitionMetadataProvider(String clientId, int partition) {
     return null;
   }
 
   @Override
-  public ConsumerV2 createConsumer(PartitionGroupMetadata metadata) {
-    return new KinesisConsumer(_kinesisConfig, metadata);
+  public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
+    return new KinesisStreamMetadataProvider(clientId, new KinesisConfig(_streamConfig));
   }
+
+  @Override
+  public PartitionGroupConsumer createPartitionGroupConsumer(String clientId, PartitionGroupMetadata metadata) {
+    return new KinesisConsumer(new KinesisConfig(_streamConfig));
+  }
+
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
deleted file mode 100644
index 8da3d2e..0000000
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.plugin.stream.kinesis;
-
-import java.util.List;
-import org.apache.pinot.spi.stream.v2.FetchResult;
-import software.amazon.awssdk.services.kinesis.model.Record;
-
-
-public class KinesisFetchResult implements FetchResult<byte[]> {
-  private final KinesisCheckpoint _kinesisCheckpoint;
-  private final List<Record> _recordList;
-
-  public KinesisFetchResult(KinesisCheckpoint kinesisCheckpoint, List<Record> recordList) {
-    _kinesisCheckpoint = kinesisCheckpoint;
-    _recordList = recordList;
-  }
-
-  @Override
-  public KinesisCheckpoint getLastCheckpoint() {
-    return _kinesisCheckpoint;
-  }
-
-  @Override
-  public KinesisRecordsBatch getMessages() {
-    return new KinesisRecordsBatch(_recordList);
-  }
-}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
deleted file mode 100644
index f96533f..0000000
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.plugin.stream.kinesis;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
-import software.amazon.awssdk.services.kinesis.model.Shard;
-
-
-public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler implements PartitionGroupMetadataMap {
-  private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
-
-  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion,
-      PartitionGroupMetadataMap currentPartitionGroupMetadataMap) {
-    //TODO: Handle child shards. Do not consume data from child shard unless parent is finished.
-    //Return metadata only for shards in current metadata
-    super(stream, awsRegion);
-    KinesisPartitionGroupMetadataMap currentPartitionMeta =
-        (KinesisPartitionGroupMetadataMap) currentPartitionGroupMetadataMap;
-    List<PartitionGroupMetadata> currentMetaList = currentPartitionMeta.getMetadataList();
-
-    List<Shard> shardList = getShards();
-
-    Map<String, PartitionGroupMetadata> currentMetadataMap = new HashMap<>();
-    for (PartitionGroupMetadata partitionGroupMetadata : currentMetaList) {
-      KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
-      currentMetadataMap.put(kinesisShardMetadata.getShardId(), kinesisShardMetadata);
-    }
-
-    for (Shard shard : shardList) {
-      if (currentMetadataMap.containsKey(shard.shardId())) {
-        //Return existing shard metadata
-        _stringPartitionGroupMetadataIndex.add(currentMetadataMap.get(shard.shardId()));
-      } else if (currentMetadataMap.containsKey(shard.parentShardId())) {
-        KinesisShardMetadata kinesisShardMetadata =
-            (KinesisShardMetadata) currentMetadataMap.get(shard.parentShardId());
-        if (isProcessingFinished(kinesisShardMetadata)) {
-          //Add child shards for processing since parent has finished
-          appendShardMetadata(stream, awsRegion, shard);
-        } else {
-          //Do not process this shard unless the parent shard is finished or expired
-        }
-      } else {
-        //This is a new shard with no parents. We can start processing this shard.
-        appendShardMetadata(stream, awsRegion, shard);
-      }
-    }
-  }
-
-  private boolean isProcessingFinished(KinesisShardMetadata kinesisShardMetadata) {
-    return kinesisShardMetadata.getEndCheckpoint().getSequenceNumber() != null && kinesisShardMetadata
-        .getStartCheckpoint().getSequenceNumber().equals(kinesisShardMetadata.getEndCheckpoint().getSequenceNumber());
-  }
-
-  private void appendShardMetadata(String stream, String awsRegion, Shard shard) {
-    String startSequenceNumber = shard.sequenceNumberRange().startingSequenceNumber();
-    String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
-    KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
-    shardMetadata.setStartCheckpoint(new KinesisCheckpoint(startSequenceNumber));
-    shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
-    _stringPartitionGroupMetadataIndex.add(shardMetadata);
-  }
-
-  @Override
-  public List<PartitionGroupMetadata> getMetadataList() {
-    return _stringPartitionGroupMetadataIndex;
-  }
-
-  @Override
-  public PartitionGroupMetadata getPartitionGroupMetadata(int index) {
-    return _stringPartitionGroupMetadataIndex.get(index);
-  }
-}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
deleted file mode 100644
index e24121b..0000000
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.plugin.stream.kinesis;
-
-import org.apache.pinot.spi.stream.v2.Checkpoint;
-import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
-
-
-//TODO: Implement shardId as Array and have unique id
-public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
-  String _shardId;
-  KinesisCheckpoint _startCheckpoint;
-  KinesisCheckpoint _endCheckpoint;
-
-  public KinesisShardMetadata(String shardId, String streamName, String awsRegion) {
-    super(streamName, awsRegion);
-    _startCheckpoint = null;
-    _endCheckpoint = null;
-    _shardId = shardId;
-  }
-
-  public String getShardId() {
-    return _shardId;
-  }
-
-  @Override
-  public KinesisCheckpoint getStartCheckpoint() {
-    return _startCheckpoint;
-  }
-
-  @Override
-  public void setStartCheckpoint(Checkpoint startCheckpoint) {
-    _startCheckpoint = (KinesisCheckpoint) startCheckpoint;
-  }
-
-  @Override
-  public KinesisCheckpoint getEndCheckpoint() {
-    return _endCheckpoint;
-  }
-
-  @Override
-  public void setEndCheckpoint(Checkpoint endCheckpoint) {
-    _endCheckpoint = (KinesisCheckpoint) endCheckpoint;
-  }
-
-  @Override
-  public byte[] serialize() {
-    return new byte[0];
-  }
-
-  @Override
-  public KinesisShardMetadata deserialize(byte[] blob) {
-    return null;
-  }
-}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
new file mode 100644
index 0000000..ba9d2b6
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -0,0 +1,53 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+import javax.annotation.Nonnull;
+import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupInfo;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.StreamMetadataProvider;
+import software.amazon.awssdk.services.kinesis.model.Shard;
+
+
+public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
+  private final KinesisConfig _kinesisConfig;
+  private KinesisConnectionHandler _kinesisConnectionHandler;
+
+  public KinesisStreamMetadataProvider(String clientId, KinesisConfig kinesisConfig) {
+    _kinesisConfig = kinesisConfig;
+    _kinesisConnectionHandler = new KinesisConnectionHandler(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
+  }
+
+  @Override
+  public int fetchPartitionCount(long timeoutMillis) {
+    return 0;
+  }
+
+  @Override
+  public long fetchPartitionOffset(@Nonnull OffsetCriteria offsetCriteria, long timeoutMillis)
+      throws TimeoutException {
+    return 0;
+  }
+
+  @Override
+  public List<PartitionGroupInfo> getPartitionGroupInfoList(String clientId, StreamConfig streamConfig,
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata, int timeoutMillis)
+      throws TimeoutException {
+    List<PartitionGroupInfo> partitionGroupInfos = new ArrayList<>();
+    List<Shard> shards = _kinesisConnectionHandler.getShards();
+    for (Shard shard : shards) {
+      partitionGroupInfos.add(new PartitionGroupInfo(shard.shardId().hashCode(), shard.sequenceNumberRange().startingSequenceNumber()));
+    }
+    return partitionGroupInfos;
+  }
+
+  @Override
+  public void close()
+      throws IOException {
+
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index f853875..57baae9 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -17,9 +17,11 @@ package org.apache.pinot.plugin.stream.kinesis; /**
  * under the License.
  */
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
@@ -29,7 +31,8 @@ public class KinesisConsumerTest {
   private static final String STREAM_NAME = "kinesis-test";
   private static final String AWS_REGION = "us-west-2";
 
-  public static void main(String[] args) {
+  public static void main(String[] args)
+      throws IOException {
     Map<String, String> props = new HashMap<>();
     props.put(KinesisConfig.STREAM, STREAM_NAME);
     props.put(KinesisConfig.AWS_REGION, AWS_REGION);
@@ -42,18 +45,19 @@ public class KinesisConsumerTest {
       System.out.println("SHARD: " + shard.shardId());
 
       KinesisConsumer kinesisConsumer =
-          new KinesisConsumer(kinesisConfig, new KinesisShardMetadata(shard.shardId(), STREAM_NAME, AWS_REGION));
+          new KinesisConsumer(kinesisConfig);
       System.out.println(
           "Kinesis Checkpoint Range: < " + shard.sequenceNumberRange().startingSequenceNumber() + ", " + shard
               .sequenceNumberRange().endingSequenceNumber() + " >");
-      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shard.sequenceNumberRange().startingSequenceNumber());
-      KinesisFetchResult fetchResult = kinesisConsumer.fetch(kinesisCheckpoint, null, 60 * 1000L);
-      KinesisRecordsBatch list = fetchResult.getMessages();
-      int n = list.getMessageCount();
+      Map<String, String> shardIdToSeqNumMap = new HashMap<>();
+      shardIdToSeqNumMap.put(shard.shardId(), shard.sequenceNumberRange().startingSequenceNumber());
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardIdToSeqNumMap);
+      KinesisRecordsBatch kinesisRecordsBatch = kinesisConsumer.fetchMessages(kinesisCheckpoint, null, 60 * 1000);
+      int n = kinesisRecordsBatch.getMessageCount();
 
       System.out.println("Found " + n + " messages ");
       for (int i = 0; i < n; i++) {
-        System.out.println("SEQ-NO: " + list.getMessageOffsetAtIndex(i) + ", DATA: " + list.getMessageAtIndex(i));
+        System.out.println("SEQ-NO: " + kinesisRecordsBatch.getMessageOffsetAtIndex(i) + ", DATA: " + kinesisRecordsBatch.getMessageAtIndex(i));
       }
       kinesisConsumer.close();
     }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index f662d99..7c4e3ef 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -18,6 +18,9 @@
  */
 package org.apache.pinot.spi.stream;
 
+import java.util.List;
+
+
 public class PartitionGroupMetadata {
 
   // fixme: Make partitionGroupId string everywhere (LLCSegmentName, StreamMetadataProvider)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
deleted file mode 100644
index 0195684..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-public interface Checkpoint {
-  boolean isEndOfPartition();
-  byte[] serialize();
-  Checkpoint deserialize(byte[] blob);
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
deleted file mode 100644
index 48b387d..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-public interface ConsumerV2 {
-  FetchResult fetch(Checkpoint start, Checkpoint end, long timeout);
-}
-
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
deleted file mode 100644
index 2188ac9..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-import java.util.List;
-import org.apache.pinot.spi.stream.MessageBatch;
-
-
-public interface FetchResult<T> {
-  Checkpoint getLastCheckpoint();
-  MessageBatch<T> getMessages();
-}
-
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
deleted file mode 100644
index d7c44d7..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-public interface PartitionGroupMetadata {
-  Checkpoint getStartCheckpoint(); // similar to getStartOffset
-
-  Checkpoint getEndCheckpoint(); // similar to getEndOffset
-
-  void setStartCheckpoint(Checkpoint startCheckpoint);
-
-  void setEndCheckpoint(Checkpoint endCheckpoint);
-
-  byte[] serialize();
-
-  PartitionGroupMetadata deserialize(byte[] blob);
-}
-
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
deleted file mode 100644
index ba37767..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-import java.util.List;
-
-
-public interface PartitionGroupMetadataMap {
-
-  List<PartitionGroupMetadata> getMetadataList();
-
-  PartitionGroupMetadata getPartitionGroupMetadata(int index);
-
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
deleted file mode 100644
index 6e65b25..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-public interface SegmentNameGenerator {
-  // generates a unique name for a partition group based on the metadata
-    String generateSegmentName(PartitionGroupMetadata metadata);
-
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
deleted file mode 100644
index 9e671aa..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream.v2;
-
-import java.util.Map;
-import org.apache.pinot.spi.stream.StreamConfig;
-
-
-public interface StreamConsumerFactoryV2 {
-  void init(StreamConfig streamConfig);
-
-  // takes the current state of partition groups (groupings of shards, the state of the consumption) and creates the new state
-  PartitionGroupMetadataMap getPartitionGroupsMetadata(PartitionGroupMetadataMap currentPartitionGroupsMetadata);
-
-  // creates a name generator which generates segment name for a partition group
-  SegmentNameGenerator getSegmentNameGenerator();
-
-  // creates a consumer which consumes from a partition group
-  ConsumerV2 createConsumer(PartitionGroupMetadata metadata);
-
-}
diff --git a/pinot-tools/pom.xml b/pinot-tools/pom.xml
index 43afd0f..97b3bef 100644
--- a/pinot-tools/pom.xml
+++ b/pinot-tools/pom.xml
@@ -97,6 +97,11 @@
     </dependency>
     <dependency>
       <groupId>org.apache.pinot</groupId>
+      <artifactId>pinot-kinesis</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.pinot</groupId>
       <artifactId>pinot-kafka-${kafka.version}</artifactId>
       <version>${project.version}</version>
       <scope>runtime</scope>
diff --git a/pom.xml b/pom.xml
index 5676edc..881526b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,7 +117,7 @@
     <parquet.version>1.8.0</parquet.version>
     <helix.version>0.9.8</helix.version>
     <zkclient.version>0.7</zkclient.version>
-    <jackson.version>2.9.8</jackson.version>
+    <jackson.version>2.12.0</jackson.version>
     <async-http-client.version>1.9.21</async-http-client.version>
     <jersey.version>2.28</jersey.version>
     <grizzly.version>2.4.4</grizzly.version>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 20/47: Add license headers

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit b05ad0813351a7ecf5f2f2e059e0399859aa2f29
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 01:39:25 2020 +0530

    Add license headers
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml     | 20 ++++++++++++++++++++
 .../plugin/stream/kinesis/KinesisCheckpoint.java     | 18 ++++++++++++++++++
 .../stream/kinesis/KinesisConnectionHandler.java     | 18 ++++++++++++++++++
 .../pinot/plugin/stream/kinesis/KinesisConsumer.java | 18 ++++++++++++++++++
 .../stream/kinesis/KinesisConsumerFactory.java       | 18 ++++++++++++++++++
 .../plugin/stream/kinesis/KinesisFetchResult.java    | 18 ++++++++++++++++++
 .../kinesis/KinesisPartitionGroupMetadataMap.java    | 18 ++++++++++++++++++
 .../plugin/stream/kinesis/KinesisShardMetadata.java  | 18 ++++++++++++++++++
 .../org/apache/pinot/spi/stream/v2/Checkpoint.java   | 18 ++++++++++++++++++
 .../org/apache/pinot/spi/stream/v2/ConsumerV2.java   | 18 ++++++++++++++++++
 .../org/apache/pinot/spi/stream/v2/FetchResult.java  | 18 ++++++++++++++++++
 .../pinot/spi/stream/v2/PartitionGroupMetadata.java  | 18 ++++++++++++++++++
 .../spi/stream/v2/PartitionGroupMetadataMap.java     | 18 ++++++++++++++++++
 .../pinot/spi/stream/v2/SegmentNameGenerator.java    | 18 ++++++++++++++++++
 .../pinot/spi/stream/v2/StreamConsumerFactoryV2.java | 18 ++++++++++++++++++
 15 files changed, 272 insertions(+)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index f863d17..1abc536 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -1,4 +1,24 @@
 <?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
 <project xmlns="http://maven.apache.org/POM/4.0.0"
          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 89043ea..450173c 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 554cca6..c41598e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.List;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 1181d14..7670f06 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 5e06a01..931fa07 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.Map;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index 2801a09..52dab66 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index 05d95de..9a34004 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index e1d23da..8141cd4 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
index 0856454..030fe4e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/Checkpoint.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 public interface Checkpoint {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
index afc8d38..48b387d 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/ConsumerV2.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 public interface ConsumerV2 {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
index 78ae5ef..9d14473 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/FetchResult.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 import java.util.List;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
index 27c5ce7..d7c44d7 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadata.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 public interface PartitionGroupMetadata {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
index 702f08a..ba37767 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/PartitionGroupMetadataMap.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 import java.util.List;
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
index 689c686..6e65b25 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/SegmentNameGenerator.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 public interface SegmentNameGenerator {
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
index eb7f76e..9e671aa 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/v2/StreamConsumerFactoryV2.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.spi.stream.v2;
 
 import java.util.Map;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 36/47: Fix offsets in StreamMetadataProvider impl

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 9c33895266f71512c4e8a0858d8f6eaa783faf44
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Mon Jan 4 11:59:18 2021 -0800

    Fix offsets in StreamMetadataProvider impl
---
 .../org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java | 6 +++++-
 .../plugin/stream/kinesis/KinesisStreamMetadataProvider.java      | 8 +++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index d42f899..517f8c0 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.type.TypeReference;
 import java.io.IOException;
 import java.util.Map;
-import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.utils.JsonUtils;
 
@@ -54,6 +53,11 @@ public class KinesisCheckpoint implements StreamPartitionMsgOffset {
   }
 
   @Override
+  public String toString() {
+    return serialize();
+  }
+
+  @Override
   public KinesisCheckpoint deserialize(String blob) {
     try {
       return new KinesisCheckpoint(blob);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
index ba9d2b6..f86d06c 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisStreamMetadataProvider.java
@@ -2,7 +2,9 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.stream.OffsetCriteria;
@@ -40,7 +42,11 @@ public class KinesisStreamMetadataProvider implements StreamMetadataProvider {
     List<PartitionGroupInfo> partitionGroupInfos = new ArrayList<>();
     List<Shard> shards = _kinesisConnectionHandler.getShards();
     for (Shard shard : shards) {
-      partitionGroupInfos.add(new PartitionGroupInfo(shard.shardId().hashCode(), shard.sequenceNumberRange().startingSequenceNumber()));
+      Map<String, String> shardToSequenceNumMap = new HashMap<>();
+      shardToSequenceNumMap.put(shard.shardId(), shard.sequenceNumberRange().startingSequenceNumber());
+      KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shardToSequenceNumMap);
+      partitionGroupInfos
+          .add(new PartitionGroupInfo(Math.abs(shard.shardId().hashCode()), kinesisCheckpoint.serialize()));
     }
     return partitionGroupInfos;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 38/47: Remove unused classes and changes

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 570a95a2e3a7189615433bbab23c962f6957805c
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Tue Jan 5 10:20:43 2021 -0800

    Remove unused classes and changes
---
 .../apache/pinot/common/utils/CommonConstants.java |  4 -
 .../helix/core/PinotHelixResourceManager.java      | 61 ++++++--------
 .../helix/core/PinotTableIdealStateBuilder.java    | 10 ++-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 96 ++++++++++++----------
 .../impl/fakestream/FakeStreamConsumerFactory.java |  5 +-
 .../fakestream/FakeStreamMetadataProvider.java     |  8 +-
 ...lakyConsumerRealtimeClusterIntegrationTest.java |  2 -
 .../kafka09/KafkaPartitionLevelConsumerTest.java   |  4 +-
 .../kafka20/KafkaPartitionLevelConsumer.java       |  1 -
 .../kafka20/KafkaStreamMetadataProvider.java       |  1 -
 .../plugin/stream/kinesis/KinesisConsumer.java     |  2 +-
 .../org/apache/pinot/spi/stream/FetchResult.java   | 24 ------
 .../org/apache/pinot/spi/stream/MessageBatch.java  |  2 -
 .../spi/stream/PartitionGroupMetadataList.java     | 30 -------
 .../org/apache/pinot/spi/stream/StreamConfig.java  |  6 +-
 .../pinot/spi/stream/StreamMetadataProvider.java   |  4 +-
 16 files changed, 91 insertions(+), 169 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
index 4e81349..191ae93 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
@@ -393,10 +393,6 @@ public class CommonConstants {
     public static final String FLUSH_THRESHOLD_TIME = "segment.flush.threshold.time";
     public static final String PARTITION_METADATA = "segment.partition.metadata";
     /**
-     * Serialized {@link org.apache.pinot.spi.stream.PartitionGroupMetadata} for this segment
-     */
-    public static final String PARTITION_GROUP_METADATA = "segment.partition.group.metadata";
-    /**
      * This field is used for parallel push protection to lock the segment globally.
      * We put the segment upload start timestamp so that if the previous push failed without unlock the segment, the
      * next upload won't be blocked forever.
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index b2949e7..b50da5f 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -1351,45 +1351,34 @@ public class PinotHelixResourceManager {
         IngestionConfigUtils.getStreamConfigMap(realtimeTableConfig));
     IdealState idealState = getTableIdealState(realtimeTableName);
 
-
-    if (streamConfig.isShardedConsumerType()) {
-      idealState = PinotTableIdealStateBuilder
-          .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
-              _enableBatchMessageMode);
-      _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
-      LOGGER.info("Successfully setup table for SHARDED consumers for {} ", realtimeTableName);
-    } else {
-
-      if (streamConfig.hasHighLevelConsumerType()) {
-        if (idealState == null) {
-          LOGGER.info("Initializing IdealState for HLC table: {}", realtimeTableName);
-          idealState = PinotTableIdealStateBuilder
-              .buildInitialHighLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, _helixZkManager,
-                  _propertyStore, _enableBatchMessageMode);
-          _helixAdmin.addResource(_helixClusterName, realtimeTableName, idealState);
-        } else {
-          // Remove LLC segments if it is not configured
-          if (!streamConfig.hasLowLevelConsumerType()) {
-            _pinotLLCRealtimeSegmentManager.removeLLCSegments(idealState);
-          }
+    if (streamConfig.hasHighLevelConsumerType()) {
+      if (idealState == null) {
+        LOGGER.info("Initializing IdealState for HLC table: {}", realtimeTableName);
+        idealState = PinotTableIdealStateBuilder
+            .buildInitialHighLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, _helixZkManager,
+                _propertyStore, _enableBatchMessageMode);
+        _helixAdmin.addResource(_helixClusterName, realtimeTableName, idealState);
+      } else {
+        // Remove LLC segments if it is not configured
+        if (!streamConfig.hasLowLevelConsumerType()) {
+          _pinotLLCRealtimeSegmentManager.removeLLCSegments(idealState);
         }
-        // For HLC table, property store entry must exist to trigger watchers to create segments
-        ensurePropertyStoreEntryExistsForHighLevelConsumer(realtimeTableName);
       }
+      // For HLC table, property store entry must exist to trigger watchers to create segments
+      ensurePropertyStoreEntryExistsForHighLevelConsumer(realtimeTableName);
+    }
 
-      // Either we have only low-level consumer, or both.
-      if (streamConfig.hasLowLevelConsumerType()) {
-        // Will either create idealstate entry, or update the IS entry with new segments
-        // (unless there are low-level segments already present)
-        if (ZKMetadataProvider.getLLCRealtimeSegments(_propertyStore, realtimeTableName).isEmpty()) {
-          idealState = PinotTableIdealStateBuilder
-              .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
-                  _enableBatchMessageMode);
-          _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
-          LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
-        } else {
-          LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
-        }
+    // Either we have only low-level consumer, or both.
+    if (streamConfig.hasLowLevelConsumerType()) {
+      // Will either create idealstate entry, or update the IS entry with new segments
+      // (unless there are low-level segments already present)
+      if (ZKMetadataProvider.getLLCRealtimeSegments(_propertyStore, realtimeTableName).isEmpty()) {
+        PinotTableIdealStateBuilder
+            .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
+                _enableBatchMessageMode);
+        LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
+      } else {
+        LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
       }
     }
   }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
index 8b200bb..68bcf57 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
@@ -30,6 +30,7 @@ import org.apache.pinot.common.metadata.instance.InstanceZKMetadata;
 import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.common.utils.config.TagNameUtils;
 import org.apache.pinot.common.utils.helix.HelixHelper;
+import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.stream.PartitionGroupInfo;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
@@ -94,8 +95,9 @@ public class PinotTableIdealStateBuilder {
     return idealState;
   }
 
-  public static IdealState buildLowLevelRealtimeIdealStateFor(String realtimeTableName, TableConfig realtimeTableConfig,
-      IdealState idealState, boolean enableBatchMessageMode) {
+  public static void buildLowLevelRealtimeIdealStateFor(PinotLLCRealtimeSegmentManager pinotLLCRealtimeSegmentManager,
+      String realtimeTableName, TableConfig realtimeTableConfig, IdealState idealState,
+      boolean enableBatchMessageMode) {
 
     // Validate replicasPerPartition here.
     final String replicasPerPartitionStr = realtimeTableConfig.getValidationConfig().getReplicasPerPartition();
@@ -104,7 +106,7 @@ public class PinotTableIdealStateBuilder {
     }
     final int nReplicas;
     try {
-      nReplicas = Integer.parseInt(replicasPerPartitionStr);
+      nReplicas = Integer.valueOf(replicasPerPartitionStr);
     } catch (NumberFormatException e) {
       throw new PinotHelixResourceManager.InvalidTableConfigException(
           "Invalid value for replicasPerPartition, expected a number: " + replicasPerPartitionStr, e);
@@ -112,7 +114,7 @@ public class PinotTableIdealStateBuilder {
     if (idealState == null) {
       idealState = buildEmptyRealtimeIdealStateFor(realtimeTableName, nReplicas, enableBatchMessageMode);
     }
-    return idealState;
+    pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
   }
 
   public static List<PartitionGroupInfo> getPartitionGroupInfoList(StreamConfig streamConfig,
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index a6ef625..61ef719 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -24,12 +24,12 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
@@ -84,9 +84,7 @@ import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.PartitionOffsetFetcher;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConfigProperties;
-import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
-import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffsetFactory;
 import org.apache.pinot.spi.utils.IngestionConfigUtils;
@@ -204,42 +202,6 @@ public class PinotLLCRealtimeSegmentManager {
     return partitionGroupMetadataList;
   }
 
-  /**
-   * Sets up the realtime table ideal state for a table of consumer type SHARDED
-   */
-  public void setUpNewTable(TableConfig tableConfig, IdealState idealState) {
-    Preconditions.checkState(!_isStopping, "Segment manager is stopping");
-
-    String realtimeTableName = tableConfig.getTableName();
-    LOGGER.info("Setting up new SHARDED table: {}", realtimeTableName);
-
-    _flushThresholdUpdateManager.clearFlushThresholdUpdater(realtimeTableName);
-
-    PartitionLevelStreamConfig streamConfig =
-        new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
-
-    // get new partition groups and their metadata
-    List<PartitionGroupInfo> newPartitionGroupInfoList = getPartitionGroupInfoList(streamConfig, Collections.emptyList());
-    int numPartitionGroups = newPartitionGroupInfoList.size();
-
-    InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
-    int numReplicas = getNumReplicas(tableConfig, instancePartitions);
-
-    SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
-    Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
-        Collections.singletonMap(InstancePartitionsType.CONSUMING, instancePartitions);
-
-    long currentTimeMs = getCurrentTimeMs();
-    Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
-    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
-      String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo,
-          currentTimeMs, instancePartitions, numPartitionGroups, numReplicas);
-      updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
-          instancePartitionsMap);
-    }
-    setIdealState(realtimeTableName, idealState);
-  }
-
   public boolean getIsSplitCommitEnabled() {
     return _controllerConf.getAcceptSplitCommit();
   }
@@ -274,6 +236,50 @@ public class PinotLLCRealtimeSegmentManager {
   }
 
   /**
+   * Sets up the initial segments for a new LLC real-time table.
+   * <p>NOTE: the passed in IdealState may contain HLC segments if both HLC and LLC are configured.
+   */
+  public void setUpNewTable(TableConfig tableConfig, IdealState idealState) {
+    Preconditions.checkState(!_isStopping, "Segment manager is stopping");
+
+    String realtimeTableName = tableConfig.getTableName();
+    LOGGER.info("Setting up new LLC table: {}", realtimeTableName);
+
+    // Make sure all the existing segments are HLC segments
+    List<String> currentSegments = getAllSegments(realtimeTableName);
+    for (String segmentName : currentSegments) {
+      // TODO: Should return 4xx HTTP status code. Currently all exceptions are returning 500
+      Preconditions.checkState(SegmentName.isHighLevelConsumerSegmentName(segmentName),
+          "Cannot set up new LLC table: %s with existing non-HLC segment: %s", realtimeTableName, segmentName);
+    }
+
+    _flushThresholdUpdateManager.clearFlushThresholdUpdater(realtimeTableName);
+
+    PartitionLevelStreamConfig streamConfig =
+        new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
+    InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
+    // get new partition groups and their metadata
+    List<PartitionGroupInfo> newPartitionGroupInfoList = getPartitionGroupInfoList(streamConfig, Collections.emptyList());
+    int numPartitionGroups = newPartitionGroupInfoList.size();
+
+    int numReplicas = getNumReplicas(tableConfig, instancePartitions);
+
+    SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
+    Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
+        Collections.singletonMap(InstancePartitionsType.CONSUMING, instancePartitions);
+
+    long currentTimeMs = getCurrentTimeMs();
+    Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
+      String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupInfo,
+          currentTimeMs, instancePartitions, numPartitionGroups, numReplicas);
+      updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
+          instancePartitionsMap);
+    }
+    setIdealState(realtimeTableName, idealState);
+  }
+
+  /**
    * Removes all LLC segments from the given IdealState.
    */
   public void removeLLCSegments(IdealState idealState) {
@@ -498,7 +504,7 @@ public class PinotLLCRealtimeSegmentManager {
         IngestionConfigUtils.getStreamConfigMap(tableConfig));
 
     // find new partition groups [A],[B],[C],[D]
-    List<PartitionGroupInfo> newPartitionGroupMetadataList =
+    List<PartitionGroupInfo> newPartitionGroupInfoList =
         getPartitionGroupInfoList(streamConfig, currentPartitionGroupMetadataList);
 
     // create new segment metadata, only if it is not IN_PROGRESS in the current state
@@ -508,7 +514,7 @@ public class PinotLLCRealtimeSegmentManager {
     List<String> newConsumingSegmentNames = new ArrayList<>();
     String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     long newSegmentCreationTimeMs = getCurrentTimeMs();
-    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupMetadataList) {
+    for (PartitionGroupInfo partitionGroupInfo : newPartitionGroupInfoList) {
       int newPartitionGroupId = partitionGroupInfo.getPartitionGroupId();
       PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
       if (currentPartitionGroupMetadata == null) { // not present in current state. New partition found.
@@ -1162,14 +1168,16 @@ public class PinotLLCRealtimeSegmentManager {
     return System.currentTimeMillis();
   }
 
+  // fixme: investigate if this should only return active partitions (i.e. skip a shard if it has reached eol)
+  //  or return all unique partitions found in ideal state right from the birth of the table
   private int getNumPartitionsFromIdealState(IdealState idealState) {
-    int numPartitions = 0;
+    Set<String> uniquePartitions = new HashSet<>();
     for (String segmentName : idealState.getRecord().getMapFields().keySet()) {
       if (LLCSegmentName.isLowLevelConsumerSegmentName(segmentName)) {
-        numPartitions = Math.max(numPartitions, new LLCSegmentName(segmentName).getPartitionGroupId() + 1);
+        uniquePartitions.add(String.valueOf(new LLCSegmentName(segmentName).getPartitionGroupId()));
       }
     }
-    return numPartitions;
+    return uniquePartitions.size();
   }
 
   private int getNumReplicas(TableConfig tableConfig, InstancePartitions instancePartitions) {
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index b0dc7eb..bb01e5c 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.core.realtime.impl.fakestream;
 
-import java.util.Collections;
 import java.util.Set;
 import org.apache.pinot.core.util.IngestionUtils;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -27,8 +26,6 @@ import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.OffsetCriteria;
-import org.apache.pinot.spi.stream.PartitionGroupConsumer;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
@@ -82,7 +79,7 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
 
     // stream metadata provider
     StreamMetadataProvider streamMetadataProvider = streamConsumerFactory.createStreamMetadataProvider(clientId);
-    int partitionCount = streamMetadataProvider.getPartitionGroupInfoList("clientId", streamConfig, Collections.emptyList(), 10_000).size();
+    int partitionCount = streamMetadataProvider.fetchPartitionCount(10_000);
     System.out.println(partitionCount);
 
     // Partition metadata provider
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
index 61aa01f..e0b8ebd 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamMetadataProvider.java
@@ -19,13 +19,9 @@
 package org.apache.pinot.core.realtime.impl.fakestream;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.concurrent.TimeoutException;
 import javax.annotation.Nonnull;
 import org.apache.pinot.spi.stream.OffsetCriteria;
-import org.apache.pinot.spi.stream.PartitionGroupInfo;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamMetadataProvider;
 import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
@@ -35,12 +31,10 @@ import org.apache.pinot.spi.stream.StreamPartitionMsgOffset;
  * StreamMetadataProvider implementation for the fake stream
  */
 public class FakeStreamMetadataProvider implements StreamMetadataProvider {
-  private final int _numPartitions;
-  private StreamConfig _streamConfig;
+  private int _numPartitions;
 
   public FakeStreamMetadataProvider(StreamConfig streamConfig) {
     _numPartitions = FakeStreamConfigUtils.getNumPartitions(streamConfig);
-    _streamConfig = streamConfig;
   }
 
   @Override
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
index 4503de0..b05244f 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
@@ -22,8 +22,6 @@ import java.lang.reflect.Constructor;
 import java.util.Random;
 import java.util.Set;
 import org.apache.pinot.spi.data.readers.GenericRow;
-import org.apache.pinot.spi.stream.PartitionGroupConsumer;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
index 90dc5ad..beb82e5 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/test/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionLevelConsumerTest.java
@@ -267,7 +267,7 @@ public class KafkaPartitionLevelConsumerTest {
   }
 
   @Test
-  public void testGetPartitionCount() throws Exception {
+  public void testGetPartitionCount() {
     String streamType = "kafka";
     String streamKafkaTopicName = "theTopic";
     String streamKafkaBrokerList = "abcd:1234,bcde:2345";
@@ -291,7 +291,7 @@ public class KafkaPartitionLevelConsumerTest {
 
     KafkaStreamMetadataProvider streamMetadataProvider =
         new KafkaStreamMetadataProvider(clientId, streamConfig, mockKafkaSimpleConsumerFactory);
-    Assert.assertEquals(streamMetadataProvider.getPartitionGroupInfoList("clientId", streamConfig, Collections.emptyList(), 10000), 2);
+    Assert.assertEquals(streamMetadataProvider.fetchPartitionCount(10000L), 2);
   }
 
   @Test
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
index 25b1742..f9b4365 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionLevelConsumer.java
@@ -26,7 +26,6 @@ import java.util.concurrent.TimeoutException;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.consumer.ConsumerRecords;
 import org.apache.kafka.common.utils.Bytes;
-import org.apache.pinot.spi.stream.Checkpoint;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
index 38c49f5..c0e2041 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaStreamMetadataProvider.java
@@ -42,7 +42,6 @@ public class KafkaStreamMetadataProvider extends KafkaPartitionLevelConnectionHa
   }
 
   @Override
-  @Deprecated
   public int fetchPartitionCount(long timeoutMillis) {
     return _consumer.partitionsFor(_topic, Duration.ofMillis(timeoutMillis)).size();
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index a97f3dc..70d2c8a 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -170,7 +170,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Partiti
     GetShardIteratorRequest.Builder requestBuilder =
         GetShardIteratorRequest.builder().streamName(_stream).shardId(shardId).shardIteratorType(_shardIteratorType);
 
-    if (sequenceNumber != null) {
+    if (sequenceNumber != null && _shardIteratorType.toString().contains("SEQUENCE")) {
       requestBuilder = requestBuilder.startingSequenceNumber(sequenceNumber);
     }
     return _kinesisClient.getShardIterator(requestBuilder.build()).shardIterator();
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
deleted file mode 100644
index 7e8a911..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/FetchResult.java
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream;
-
-public interface FetchResult<T> {
-  Checkpoint getLastCheckpoint();
-  MessageBatch<T> getMessages();
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
index 5af72c0..3052b9e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/MessageBatch.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.spi.stream;
 
-import javax.annotation.Nullable;
 import org.apache.pinot.spi.annotations.InterfaceAudience;
 import org.apache.pinot.spi.annotations.InterfaceStability;
 
@@ -62,7 +61,6 @@ public interface MessageBatch<T> {
    * Returns the metadata associated with the message at a particular index. This typically includes the timestamp
    * when the message was ingested by the upstream stream-provider and other relevant metadata.
    */
-  @Nullable
   default RowMetadata getMetadataAtIndex(int index) {
     return null;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java
deleted file mode 100644
index 1568d63..0000000
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadataList.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.pinot.spi.stream;
-
-import java.util.List;
-
-
-public interface PartitionGroupMetadataList {
-
-  List<PartitionGroupMetadata> getMetadataList();
-
-  PartitionGroupMetadata getPartitionGroupMetadata(int index);
-
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
index a3e359e..d343203 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamConfig.java
@@ -41,7 +41,7 @@ public class StreamConfig {
    * The type of the stream consumer either HIGHLEVEL or LOWLEVEL. For backward compatibility, adding SIMPLE which is equivalent to LOWLEVEL
    */
   public enum ConsumerType {
-    HIGHLEVEL, LOWLEVEL, SHARDED
+    HIGHLEVEL, LOWLEVEL
   }
 
   public static final int DEFAULT_FLUSH_THRESHOLD_ROWS = 5_000_000;
@@ -273,10 +273,6 @@ public class StreamConfig {
     return _consumerTypes.contains(ConsumerType.LOWLEVEL);
   }
 
-  public boolean isShardedConsumerType() {
-    return _consumerTypes.size() == 1 && _consumerTypes.get(0).equals(ConsumerType.SHARDED);
-  }
-
   public String getConsumerFactoryClassName() {
     return _consumerFactoryClassName;
   }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
index 572cd02..c64f710 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMetadataProvider.java
@@ -36,9 +36,9 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
 public interface StreamMetadataProvider extends Closeable {
   /**
    * Fetches the number of partitions for a topic given the stream configs
-   * @deprecated use getPartitionGroupMetadataList instead
+   * @param timeoutMillis
+   * @return
    */
-  @Deprecated
   int fetchPartitionCount(long timeoutMillis);
 
   // Issue 5953 Retain this interface for 0.5.0, remove in 0.6.0


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 19/47: Reformat code

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 6c8af2b54c72407bfa8f91b2403dc317b01cc8cb
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 01:27:05 2020 +0530

    Reformat code
---
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |  3 +--
 .../stream/kinesis/KinesisConnectionHandler.java   | 14 ++++++-----
 .../plugin/stream/kinesis/KinesisConsumer.java     | 28 ++++++++++++----------
 .../stream/kinesis/KinesisConsumerFactory.java     |  3 ++-
 .../plugin/stream/kinesis/KinesisFetchResult.java  |  6 ++---
 .../kinesis/KinesisPartitionGroupMetadataMap.java  |  5 ++--
 6 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index aa80b17..89043ea 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -6,7 +6,7 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 public class KinesisCheckpoint implements Checkpoint {
   String _sequenceNumber;
 
-  public KinesisCheckpoint(String sequenceNumber){
+  public KinesisCheckpoint(String sequenceNumber) {
     _sequenceNumber = sequenceNumber;
   }
 
@@ -24,5 +24,4 @@ public class KinesisCheckpoint implements Checkpoint {
     //TODO: Implement SerDe
     return new KinesisCheckpoint(new String(blob));
   }
-
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index d8888fa..554cca6 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -24,19 +24,21 @@ public class KinesisConnectionHandler {
   private String _awsRegion;
   KinesisClient _kinesisClient;
 
-  public KinesisConnectionHandler(){
+  public KinesisConnectionHandler() {
 
   }
 
-  public KinesisConnectionHandler(String stream, String awsRegion){
+  public KinesisConnectionHandler(String stream, String awsRegion) {
     _stream = stream;
     _awsRegion = awsRegion;
-    _kinesisClient = KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create()).build();
+    _kinesisClient =
+        KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create())
+            .build();
   }
 
-  public List<Shard> getShards(){
-    ListShardsResponse listShardsResponse =  _kinesisClient.listShards(ListShardsRequest.builder().streamName(_stream).build());
+  public List<Shard> getShards() {
+    ListShardsResponse listShardsResponse =
+        _kinesisClient.listShards(ListShardsRequest.builder().streamName(_stream).build());
     return listShardsResponse.shards();
   }
-
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index d896d67..1181d14 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -39,7 +39,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
 
     String kinesisEndSequenceNumber = null;
 
-    if(end != null) {
+    if (end != null) {
       KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
       kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
     }
@@ -47,32 +47,34 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     String nextStartSequenceNumber = null;
     Long startTimestamp = System.currentTimeMillis();
 
-    while(shardIterator != null && !isTimedOut(startTimestamp, timeout)){
+    while (shardIterator != null && !isTimedOut(startTimestamp, timeout)) {
       GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
       GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
 
-      if(getRecordsResponse.records().size() > 0){
+      if (getRecordsResponse.records().size() > 0) {
         recordList.addAll(getRecordsResponse.records());
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
 
-        if(kinesisEndSequenceNumber != null && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0 ){
+        if (kinesisEndSequenceNumber != null
+            && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0) {
           nextStartSequenceNumber = kinesisEndSequenceNumber;
           break;
         }
 
-        if(recordList.size() >= _maxRecords) break;
+        if (recordList.size() >= _maxRecords) {
+          break;
+        }
       }
 
       shardIterator = getRecordsResponse.nextShardIterator();
     }
 
-    if(nextStartSequenceNumber == null && recordList.size() > 0){
+    if (nextStartSequenceNumber == null && recordList.size() > 0) {
       nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
     }
 
     KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
-    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint,
-        recordList);
+    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
 
     return kinesisFetchResult;
   }
@@ -80,14 +82,16 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   private String getShardIterator(KinesisCheckpoint kinesisStartCheckpoint) {
     GetShardIteratorResponse getShardIteratorResponse;
 
-    if(kinesisStartCheckpoint.getSequenceNumber() != null) {
+    if (kinesisStartCheckpoint.getSequenceNumber() != null) {
       String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
       getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId).shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
+          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId)
+              .shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
               .startingSequenceNumber(kinesisStartSequenceNumber).build());
-    } else{
+    } else {
       getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream).shardIteratorType(ShardIteratorType.LATEST).build());
+          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream)
+              .shardIteratorType(ShardIteratorType.LATEST).build());
     }
 
     return getShardIteratorResponse.shardIterator();
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 0608118..5e06a01 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -21,7 +21,8 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
   @Override
   public PartitionGroupMetadataMap getPartitionGroupsMetadata(
       PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
-    return new KinesisPartitionGroupMetadataMap(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"));
+    return new KinesisPartitionGroupMetadataMap(_streamConfig.getTopicName(),
+        _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "global"));
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index 2996b28..2801a09 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -11,9 +11,9 @@ public class KinesisFetchResult implements FetchResult<Record> {
   private final KinesisCheckpoint _kinesisCheckpoint;
   private final List<Record> _recordList;
 
-  public KinesisFetchResult(KinesisCheckpoint kinesisCheckpoint, List<Record> recordList){
-     _kinesisCheckpoint = kinesisCheckpoint;
-     _recordList = recordList;
+  public KinesisFetchResult(KinesisCheckpoint kinesisCheckpoint, List<Record> recordList) {
+    _kinesisCheckpoint = kinesisCheckpoint;
+    _recordList = recordList;
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index 700ec3f..05d95de 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -12,10 +12,10 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
 public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler implements PartitionGroupMetadataMap {
   private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
 
-  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion){
+  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion) {
     super(stream, awsRegion);
     List<Shard> shardList = getShards();
-    for(Shard shard : shardList){
+    for (Shard shard : shardList) {
       String startSequenceNumber = shard.sequenceNumberRange().startingSequenceNumber();
       String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
       KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
@@ -34,5 +34,4 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
   public PartitionGroupMetadata getPartitionGroupMetadata(int index) {
     return _stringPartitionGroupMetadataIndex.get(index);
   }
-
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 17/47: Fix consumer code

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit b0d8c1b422e58013c48e07c5469698229936a621
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 00:54:16 2020 +0530

    Fix consumer code
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   | 11 +--
 .../plugin/stream/kinesis/KinesisCheckpoint.java   | 15 +++-
 .../stream/kinesis/KinesisConnectionHandler.java   | 21 +++++-
 .../plugin/stream/kinesis/KinesisConsumer.java     | 88 +++++++++++++++-------
 .../stream/kinesis/KinesisConsumerFactory.java     |  2 +-
 .../plugin/stream/kinesis/KinesisFetchResult.java  |  8 +-
 .../kinesis/KinesisPartitionGroupMetadataMap.java  |  9 +--
 .../stream/kinesis/KinesisShardMetadata.java       | 11 +--
 8 files changed, 112 insertions(+), 53 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index 97e5eef..f863d17 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -15,7 +15,7 @@
   <properties>
     <pinot.root>${basedir}/../../..</pinot.root>
     <phase.prop>package</phase.prop>
-    <aws.version>2.15.42</aws.version>
+    <aws.version>2.13.46</aws.version>
   </properties>
 
   <dependencies>
@@ -24,12 +24,13 @@
       <artifactId>kinesis</artifactId>
       <version>${aws.version}</version>
     </dependency>
+    <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
     <dependency>
-      <groupId>org.apache.pinot</groupId>
-      <artifactId>pinot-json</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+      <version>2.12.0</version>
     </dependency>
+
     <dependency>
       <groupId>org.apache.pinot</groupId>
       <artifactId>pinot-spi</artifactId>
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 77f790b..8448665 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -4,9 +4,11 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 
 
 public class KinesisCheckpoint implements Checkpoint {
+  String _shardId;
   String _sequenceNumber;
 
-  public KinesisCheckpoint(String sequenceNumber){
+  public KinesisCheckpoint(String shardId, String sequenceNumber){
+    _shardId = shardId;
     _sequenceNumber = sequenceNumber;
   }
 
@@ -14,6 +16,14 @@ public class KinesisCheckpoint implements Checkpoint {
     return _sequenceNumber;
   }
 
+  public String getShardId() {
+    return _shardId;
+  }
+
+  public void setShardId(String shardId) {
+    _shardId = shardId;
+  }
+
   @Override
   public byte[] serialize() {
     return _sequenceNumber.getBytes();
@@ -21,7 +31,8 @@ public class KinesisCheckpoint implements Checkpoint {
 
   @Override
   public Checkpoint deserialize(byte[] blob) {
-    return new KinesisCheckpoint(new String(blob));
+    //TODO: Implement SerDe
+    return new KinesisCheckpoint("", new String(blob));
   }
 
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 7ea24c0..d8888fa 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -1,25 +1,42 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.util.List;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
+import software.amazon.awssdk.core.SdkBytes;
 import software.amazon.awssdk.regions.Region;
 import software.amazon.awssdk.services.kinesis.KinesisClient;
+import software.amazon.awssdk.services.kinesis.model.DescribeStreamRequest;
+import software.amazon.awssdk.services.kinesis.model.DescribeStreamResponse;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
+import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
+import software.amazon.awssdk.services.kinesis.model.ListShardsResponse;
+import software.amazon.awssdk.services.kinesis.model.PutRecordRequest;
+import software.amazon.awssdk.services.kinesis.model.PutRecordResponse;
+import software.amazon.awssdk.services.kinesis.model.Shard;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
+import software.amazon.awssdk.services.kinesis.model.StreamDescription;
 
 
 public class KinesisConnectionHandler {
-  String _awsRegion = "";
+  private String _stream;
+  private String _awsRegion;
   KinesisClient _kinesisClient;
 
   public KinesisConnectionHandler(){
 
   }
 
-  public KinesisConnectionHandler(String awsRegion){
+  public KinesisConnectionHandler(String stream, String awsRegion){
+    _stream = stream;
     _awsRegion = awsRegion;
     _kinesisClient = KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create()).build();
   }
 
+  public List<Shard> getShards(){
+    ListShardsResponse listShardsResponse =  _kinesisClient.listShards(ListShardsRequest.builder().streamName(_stream).build());
+    return listShardsResponse.shards();
+  }
+
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index dc44079..7bc1006 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -3,6 +3,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import org.apache.pinot.spi.stream.v2.FetchResult;
@@ -16,57 +17,86 @@ import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
   String _stream;
+  Integer _maxRecords;
 
   //TODO: Fetch AWS region from  Stream Config.
   public KinesisConsumer(String stream, String awsRegion) {
-    super(awsRegion);
+    super(stream, awsRegion);
     _stream = stream;
+    _maxRecords = 20;
+  }
+
+  public KinesisConsumer(String stream, String awsRegion, StreamConfig streamConfig) {
+    super(stream, awsRegion);
+    _stream = stream;
+    _maxRecords = Integer.parseInt(streamConfig.getStreamConfigsMap().getOrDefault("maxRecords", "20"));
   }
 
   @Override
-  public FetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
+  public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
     KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
-    KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
 
-    String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
-    String kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
+    String shardIterator = getShardIterator(kinesisStartCheckpoint);
 
-    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().streamName(_stream).shardIteratorType(
-        ShardIteratorType.AFTER_SEQUENCE_NUMBER).startingSequenceNumber(kinesisStartSequenceNumber).build());
-
-    String shardIterator = getShardIteratorResponse.shardIterator();
-    GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
-    GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
+    List<Record> recordList = new ArrayList<>();
 
-    String kinesisNextShardIterator = getRecordsResponse.nextShardIterator();
+    String kinesisEndSequenceNumber = null;
 
-    //TODO: Get records in the loop and stop when end sequence number is reached or there is an exception.
-    if(!getRecordsResponse.hasRecords()){
-      return new KinesisFetchResult(kinesisStartSequenceNumber, Collections.emptyList());
+    if(end != null) {
+      KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
+      kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
     }
 
-    List<Record> recordList = new ArrayList<>();
-    recordList.addAll(getRecordsResponse.records());
+    String nextStartSequenceNumber = null;
+    Long startTimestamp = System.currentTimeMillis();
+
+    while(shardIterator != null && !isTimedOut(startTimestamp, timeout)){
+      GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
+      GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
 
-    String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
-    while(kinesisNextShardIterator != null){
-      getRecordsRequest = GetRecordsRequest.builder().shardIterator(kinesisNextShardIterator).build();
-      getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
-      if(getRecordsResponse.hasRecords()){
+      if(getRecordsResponse.records().size() > 0){
         recordList.addAll(getRecordsResponse.records());
         nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
-      }
 
-      if(kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0 ) {
-        nextStartSequenceNumber = kinesisEndSequenceNumber;
-        break;
+        if(kinesisEndSequenceNumber != null && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0 ){
+          nextStartSequenceNumber = kinesisEndSequenceNumber;
+          break;
+        }
+
+        if(recordList.size() >= _maxRecords) break;
       }
-      kinesisNextShardIterator = getRecordsResponse.nextShardIterator();
+
+      shardIterator = getRecordsResponse.nextShardIterator();
     }
 
-    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(nextStartSequenceNumber,
-        getRecordsResponse.records());
+    if(nextStartSequenceNumber == null && recordList.size() > 0){
+      nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+    }
+
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(kinesisStartCheckpoint.getShardId(), nextStartSequenceNumber);
+    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint,
+        recordList);
 
     return kinesisFetchResult;
   }
+
+  private String getShardIterator(KinesisCheckpoint kinesisStartCheckpoint) {
+    GetShardIteratorResponse getShardIteratorResponse;
+
+    if(kinesisStartCheckpoint.getSequenceNumber() != null) {
+      String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
+      getShardIteratorResponse = _kinesisClient.getShardIterator(
+          GetShardIteratorRequest.builder().streamName(_stream).shardId(kinesisStartCheckpoint.getShardId()).shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
+              .startingSequenceNumber(kinesisStartSequenceNumber).build());
+    } else{
+      getShardIteratorResponse = _kinesisClient.getShardIterator(
+          GetShardIteratorRequest.builder().shardId(kinesisStartCheckpoint.getShardId()).streamName(_stream).shardIteratorType(ShardIteratorType.LATEST).build());
+    }
+
+    return getShardIteratorResponse.shardIterator();
+  }
+
+  private boolean isTimedOut(Long startTimestamp, Long timeout) {
+    return (System.currentTimeMillis() - startTimestamp) >= timeout;
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index 6bd1e3a..bdbc348 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -31,6 +31,6 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
 
   @Override
   public ConsumerV2 createConsumer(PartitionGroupMetadata metadata) {
-    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"));
+    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"), _streamConfig);
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index dc8e764..2996b28 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -8,17 +8,17 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 
 
 public class KinesisFetchResult implements FetchResult<Record> {
-  private final String _nextShardIterator;
+  private final KinesisCheckpoint _kinesisCheckpoint;
   private final List<Record> _recordList;
 
-  public KinesisFetchResult(String nextShardIterator, List<Record> recordList){
-     _nextShardIterator = nextShardIterator;
+  public KinesisFetchResult(KinesisCheckpoint kinesisCheckpoint, List<Record> recordList){
+     _kinesisCheckpoint = kinesisCheckpoint;
      _recordList = recordList;
   }
 
   @Override
   public Checkpoint getLastCheckpoint() {
-    return new KinesisCheckpoint(_nextShardIterator);
+    return _kinesisCheckpoint;
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index 87f7235..d15804e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -13,13 +13,12 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
   private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
 
   public KinesisPartitionGroupMetadataMap(String stream, String awsRegion){
-    super(awsRegion);
-    ListShardsResponse listShardsResponse = _kinesisClient.listShards(ListShardsRequest.builder().streamName(stream).build());
-    List<Shard> shardList = listShardsResponse.shards();
+    super(stream, awsRegion);
+    List<Shard> shardList = getShards();
     for(Shard shard : shardList){
       String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
-      KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream);
-      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
+      KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
+      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(shard.shardId(), endingSequenceNumber));
       _stringPartitionGroupMetadataIndex.add(shardMetadata);
     }
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 4a19285..693b307 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -4,6 +4,7 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+import software.amazon.awssdk.services.kinesis.model.SequenceNumberRange;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
@@ -12,11 +13,11 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
   Checkpoint _startCheckpoint;
   Checkpoint _endCheckpoint;
 
-  public KinesisShardMetadata(String shardId, String streamName) {
-    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().shardId(shardId).shardIteratorType(
-        ShardIteratorType.LATEST).streamName(streamName).build());
-    _startCheckpoint = new KinesisCheckpoint(getShardIteratorResponse.shardIterator());
-    _endCheckpoint = null;
+  public KinesisShardMetadata(String shardId, String streamName, String awsRegion) {
+    super(streamName, awsRegion);
+
+    _startCheckpoint = new KinesisCheckpoint(shardId, null);
+    _endCheckpoint = new KinesisCheckpoint(shardId, null);
     _shardId = shardId;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 02/47: StreamPartitionOffset to implement Checkpoint

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 7504c3148fe86496d1d85add9cb2452ab9f62769
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Tue Dec 29 18:43:48 2020 -0800

    StreamPartitionOffset to implement Checkpoint
---
 .../common/metadata/segment/RealtimeSegmentZKMetadata.java |  6 ++++++
 .../org/apache/pinot/common/utils/CommonConstants.java     |  4 ++++
 .../impl/fakestream/FakeStreamConsumerFactory.java         | 14 ++++++++++++++
 .../tests/FlakyConsumerRealtimeClusterIntegrationTest.java | 14 ++++++++++++++
 .../pinot/plugin/stream/kafka09/KafkaConsumerFactory.java  | 14 ++++++++++++++
 .../pinot/plugin/stream/kafka20/KafkaConsumerFactory.java  | 14 ++++++++++++++
 .../main/java/org/apache/pinot/spi/stream/Checkpoint.java  |  6 +++---
 .../java/org/apache/pinot/spi/stream/LongMsgOffset.java    | 10 ++++++++++
 .../apache/pinot/spi/stream/PartitionGroupConsumer.java    |  5 ++++-
 .../apache/pinot/spi/stream/PartitionGroupMetadata.java    |  2 --
 .../apache/pinot/spi/stream/StreamPartitionMsgOffset.java  |  2 +-
 11 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
index d88be18..c46af53 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
@@ -35,6 +35,7 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
   private Status _status = null;
   private int _sizeThresholdToFlushSegment = -1;
   private String _timeThresholdToFlushSegment = null; // store as period string for readability
+  private String _partitionGroupMetadataStr = null;
 
   public RealtimeSegmentZKMetadata() {
     setSegmentType(SegmentType.REALTIME);
@@ -49,6 +50,7 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
     if (flushThresholdTime != null && !flushThresholdTime.equals(NULL)) {
       _timeThresholdToFlushSegment = znRecord.getSimpleField(CommonConstants.Segment.FLUSH_THRESHOLD_TIME);
     }
+    _partitionGroupMetadataStr = znRecord.getSimpleField(CommonConstants.Segment.PARTITION_GROUP_METADATA);
   }
 
   @Override
@@ -141,4 +143,8 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
   public void setTimeThresholdToFlushSegment(String timeThresholdPeriodString) {
     _timeThresholdToFlushSegment = timeThresholdPeriodString;
   }
+
+  public String getPartitionGroupMetadataStr() {
+    return _partitionGroupMetadataStr;
+  }
 }
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
index 191ae93..4e81349 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
@@ -393,6 +393,10 @@ public class CommonConstants {
     public static final String FLUSH_THRESHOLD_TIME = "segment.flush.threshold.time";
     public static final String PARTITION_METADATA = "segment.partition.metadata";
     /**
+     * Serialized {@link org.apache.pinot.spi.stream.PartitionGroupMetadata} for this segment
+     */
+    public static final String PARTITION_GROUP_METADATA = "segment.partition.group.metadata";
+    /**
      * This field is used for parallel push protection to lock the segment globally.
      * We put the segment upload start timestamp so that if the previous push failed without unlock the segment, the
      * next upload won't be blocked forever.
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
index bb01e5c..9669223 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakeStreamConsumerFactory.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.core.realtime.impl.fakestream;
 
+import java.util.List;
 import java.util.Set;
 import org.apache.pinot.core.util.IngestionUtils;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -26,6 +27,8 @@ import org.apache.pinot.spi.data.readers.GenericRow;
 import org.apache.pinot.spi.stream.LongMsgOffset;
 import org.apache.pinot.spi.stream.MessageBatch;
 import org.apache.pinot.spi.stream.OffsetCriteria;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
@@ -66,6 +69,17 @@ public class FakeStreamConsumerFactory extends StreamConsumerFactory {
     return new FakeStreamMetadataProvider(_streamConfig);
   }
 
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
+    return null;
+  }
+
+  @Override
+  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+    return null;
+  }
+
   public static void main(String[] args)
       throws Exception {
     String clientId = "client_id_localhost_tester";
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
index b05244f..808a464 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/FlakyConsumerRealtimeClusterIntegrationTest.java
@@ -19,9 +19,12 @@
 package org.apache.pinot.integration.tests;
 
 import java.lang.reflect.Constructor;
+import java.util.List;
 import java.util.Random;
 import java.util.Set;
 import org.apache.pinot.spi.data.readers.GenericRow;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
@@ -117,5 +120,16 @@ public class FlakyConsumerRealtimeClusterIntegrationTest extends RealtimeCluster
     public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
       throw new UnsupportedOperationException();
     }
+
+    @Override
+    public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+        List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
+      return null;
+    }
+
+    @Override
+    public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+      return null;
+    }
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
index 615e354..b8ed19d 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaConsumerFactory.java
@@ -18,7 +18,10 @@
  */
 package org.apache.pinot.plugin.stream.kafka09;
 
+import java.util.List;
 import java.util.Set;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamLevelConsumer;
@@ -50,4 +53,15 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
     return new KafkaStreamMetadataProvider(clientId, _streamConfig);
   }
+
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
+    return null;
+  }
+
+  @Override
+  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+    return null;
+  }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
index e0d1015..806baff 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaConsumerFactory.java
@@ -18,7 +18,10 @@
  */
 package org.apache.pinot.plugin.stream.kafka20;
 
+import java.util.List;
 import java.util.Set;
+import org.apache.pinot.spi.stream.PartitionGroupConsumer;
+import org.apache.pinot.spi.stream.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.PartitionLevelConsumer;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamLevelConsumer;
@@ -47,4 +50,15 @@ public class KafkaConsumerFactory extends StreamConsumerFactory {
   public StreamMetadataProvider createStreamMetadataProvider(String clientId) {
     return new KafkaStreamMetadataProvider(clientId, _streamConfig);
   }
+
+  @Override
+  public List<PartitionGroupMetadata> getPartitionGroupMetadataList(
+      List<PartitionGroupMetadata> currentPartitionGroupsMetadata) {
+    return null;
+  }
+
+  @Override
+  public PartitionGroupConsumer createConsumer(PartitionGroupMetadata metadata) {
+    return null;
+  }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
index 627c964..bae8832 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/Checkpoint.java
@@ -18,7 +18,7 @@
  */
 package org.apache.pinot.spi.stream;
 
-public interface Checkpoint {
-  byte[] serialize();
-  Checkpoint deserialize(byte[] blob);
+public interface Checkpoint extends Comparable {
+  String serialize();
+  Checkpoint deserialize(String checkpointStr);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
index e5025f6..e8fa275 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/LongMsgOffset.java
@@ -50,4 +50,14 @@ public class LongMsgOffset implements StreamPartitionMsgOffset {
   public String toString() {
     return Long.toString(_offset);
   }
+
+  @Override
+  public String serialize() {
+    return toString();
+  }
+
+  @Override
+  public Checkpoint deserialize(String checkpointStr) {
+    return new LongMsgOffset(checkpointStr);
+  }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
index 2f138c2..e096e67 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupConsumer.java
@@ -18,6 +18,9 @@
  */
 package org.apache.pinot.spi.stream;
 
-public interface PartitionGroupConsumer {
+import java.io.Closeable;
+
+
+public interface PartitionGroupConsumer extends Closeable {
   FetchResult fetch(Checkpoint start, Checkpoint end, long timeout);
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index 779c167..0f44173 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -25,8 +25,6 @@ public interface PartitionGroupMetadata {
 
   int getGroupId();
 
-  List<String> getPartitions();
-
   Checkpoint getStartCheckpoint(); // similar to getStartOffset
 
   Checkpoint getEndCheckpoint(); // similar to getEndOffset
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffset.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffset.java
index 72654bf..06a090e 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffset.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamPartitionMsgOffset.java
@@ -39,7 +39,7 @@ import org.apache.pinot.spi.annotations.InterfaceStability;
  * versions of the stream implementation
  */
 @InterfaceStability.Evolving
-public interface StreamPartitionMsgOffset extends Comparable {
+public interface StreamPartitionMsgOffset extends Checkpoint {
 
   /**
    * Compare this offset with another one.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 18/47: Move shardId out of checkpoint to partition group metadata

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 7a4fccc3ad68f72f363f1663f6956c4b2aa6cc78
Author: KKcorps <kh...@gmail.com>
AuthorDate: Sun Dec 20 01:25:13 2020 +0530

    Move shardId out of checkpoint to partition group metadata
---
 .../plugin/stream/kinesis/KinesisCheckpoint.java    | 14 ++------------
 .../plugin/stream/kinesis/KinesisConsumer.java      | 21 +++++++++------------
 .../stream/kinesis/KinesisConsumerFactory.java      |  2 +-
 .../kinesis/KinesisPartitionGroupMetadataMap.java   |  4 +++-
 .../plugin/stream/kinesis/KinesisShardMetadata.java |  5 ++---
 5 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 8448665..aa80b17 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -4,11 +4,9 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 
 
 public class KinesisCheckpoint implements Checkpoint {
-  String _shardId;
   String _sequenceNumber;
 
-  public KinesisCheckpoint(String shardId, String sequenceNumber){
-    _shardId = shardId;
+  public KinesisCheckpoint(String sequenceNumber){
     _sequenceNumber = sequenceNumber;
   }
 
@@ -16,14 +14,6 @@ public class KinesisCheckpoint implements Checkpoint {
     return _sequenceNumber;
   }
 
-  public String getShardId() {
-    return _shardId;
-  }
-
-  public void setShardId(String shardId) {
-    _shardId = shardId;
-  }
-
   @Override
   public byte[] serialize() {
     return _sequenceNumber.getBytes();
@@ -32,7 +22,7 @@ public class KinesisCheckpoint implements Checkpoint {
   @Override
   public Checkpoint deserialize(byte[] blob) {
     //TODO: Implement SerDe
-    return new KinesisCheckpoint("", new String(blob));
+    return new KinesisCheckpoint(new String(blob));
   }
 
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 7bc1006..d896d67 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -7,6 +7,7 @@ import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import org.apache.pinot.spi.stream.v2.FetchResult;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
@@ -18,18 +19,14 @@ import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
   String _stream;
   Integer _maxRecords;
+  String _shardId;
 
-  //TODO: Fetch AWS region from  Stream Config.
-  public KinesisConsumer(String stream, String awsRegion) {
-    super(stream, awsRegion);
-    _stream = stream;
-    _maxRecords = 20;
-  }
-
-  public KinesisConsumer(String stream, String awsRegion, StreamConfig streamConfig) {
-    super(stream, awsRegion);
+  public KinesisConsumer(String stream, StreamConfig streamConfig, PartitionGroupMetadata partitionGroupMetadata) {
+    super(stream, streamConfig.getStreamConfigsMap().getOrDefault("aws-region", "global"));
     _stream = stream;
     _maxRecords = Integer.parseInt(streamConfig.getStreamConfigsMap().getOrDefault("maxRecords", "20"));
+    KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
+    _shardId = kinesisShardMetadata.getShardId();
   }
 
   @Override
@@ -73,7 +70,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
       nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
     }
 
-    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(kinesisStartCheckpoint.getShardId(), nextStartSequenceNumber);
+    KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
     KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint,
         recordList);
 
@@ -86,11 +83,11 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     if(kinesisStartCheckpoint.getSequenceNumber() != null) {
       String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
       getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().streamName(_stream).shardId(kinesisStartCheckpoint.getShardId()).shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
+          GetShardIteratorRequest.builder().streamName(_stream).shardId(_shardId).shardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
               .startingSequenceNumber(kinesisStartSequenceNumber).build());
     } else{
       getShardIteratorResponse = _kinesisClient.getShardIterator(
-          GetShardIteratorRequest.builder().shardId(kinesisStartCheckpoint.getShardId()).streamName(_stream).shardIteratorType(ShardIteratorType.LATEST).build());
+          GetShardIteratorRequest.builder().shardId(_shardId).streamName(_stream).shardIteratorType(ShardIteratorType.LATEST).build());
     }
 
     return getShardIteratorResponse.shardIterator();
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index bdbc348..0608118 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -31,6 +31,6 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
 
   @Override
   public ConsumerV2 createConsumer(PartitionGroupMetadata metadata) {
-    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"), _streamConfig);
+    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig, metadata);
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index d15804e..700ec3f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -16,9 +16,11 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
     super(stream, awsRegion);
     List<Shard> shardList = getShards();
     for(Shard shard : shardList){
+      String startSequenceNumber = shard.sequenceNumberRange().startingSequenceNumber();
       String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
       KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
-      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(shard.shardId(), endingSequenceNumber));
+      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(startSequenceNumber));
+      shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
       _stringPartitionGroupMetadataIndex.add(shardMetadata);
     }
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 693b307..e1d23da 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -15,9 +15,8 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
 
   public KinesisShardMetadata(String shardId, String streamName, String awsRegion) {
     super(streamName, awsRegion);
-
-    _startCheckpoint = new KinesisCheckpoint(shardId, null);
-    _endCheckpoint = new KinesisCheckpoint(shardId, null);
+    _startCheckpoint = null;
+    _endCheckpoint = null;
     _shardId = shardId;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 33/47: fixing compilation

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 6cb0ebb1b775959c166cbcdeadec74ae3349e4ad
Author: Xiang Fu <fx...@gmail.com>
AuthorDate: Sat Jan 2 17:14:31 2021 -0800

    fixing compilation
---
 pinot-distribution/pinot-assembly.xml              |  4 ++
 pinot-distribution/pom.xml                         |  4 ++
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   | 64 ++++++++++++++++++++--
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |  1 +
 .../pinot/plugin/stream/kinesis/KinesisConfig.java | 23 ++++----
 .../stream/kinesis/KinesisConnectionHandler.java   | 26 +++------
 .../plugin/stream/kinesis/KinesisConsumer.java     | 50 +++++++----------
 .../stream/kinesis/KinesisConsumerFactory.java     |  4 +-
 .../plugin/stream/kinesis/KinesisFetchResult.java  |  3 -
 .../kinesis/KinesisPartitionGroupMetadataMap.java  |  7 +--
 .../plugin/stream/kinesis/KinesisRecordsBatch.java | 18 ++++++
 .../stream/kinesis/KinesisShardMetadata.java       | 13 ++---
 .../plugin/stream/kinesis/KinesisConsumerTest.java | 39 +++++++------
 13 files changed, 152 insertions(+), 104 deletions(-)

diff --git a/pinot-distribution/pinot-assembly.xml b/pinot-distribution/pinot-assembly.xml
index 2dfb36e..de7329f 100644
--- a/pinot-distribution/pinot-assembly.xml
+++ b/pinot-distribution/pinot-assembly.xml
@@ -55,6 +55,10 @@
       <source>${pinot.root}/pinot-plugins/pinot-stream-ingestion/pinot-kafka-${kafka.version}/target/pinot-kafka-${kafka.version}-${project.version}-shaded.jar</source>
       <destName>plugins/pinot-stream-ingestion/pinot-kafka-${kafka.version}/pinot-kafka-${kafka.version}-${project.version}-shaded.jar</destName>
     </file>
+    <file>
+      <source>${pinot.root}/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/target/pinot-kinesis-${project.version}-shaded.jar</source>
+      <destName>plugins/pinot-stream-ingestion/pinot-kinesis/pinot-kinesis-${project.version}-shaded.jar</destName>
+    </file>
     <!-- End Include Pinot Stream Ingestion Plugins-->
     <!-- Start Include Pinot Batch Ingestion Plugins-->
     <file>
diff --git a/pinot-distribution/pom.xml b/pinot-distribution/pom.xml
index 1a3f106..f29cae0 100644
--- a/pinot-distribution/pom.xml
+++ b/pinot-distribution/pom.xml
@@ -86,6 +86,10 @@
         </exclusion>
         <exclusion>
           <groupId>org.apache.pinot</groupId>
+          <artifactId>pinot-kinesis</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.pinot</groupId>
           <artifactId>pinot-batch-ingestion-standalone</artifactId>
         </exclusion>
         <exclusion>
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index 0c9ae0b..4fce169 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -19,19 +19,20 @@
     under the License.
 
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns="http://maven.apache.org/POM/4.0.0"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
   <parent>
     <artifactId>pinot-stream-ingestion</artifactId>
     <groupId>org.apache.pinot</groupId>
     <version>0.7.0-SNAPSHOT</version>
     <relativePath>..</relativePath>
   </parent>
-  <modelVersion>4.0.0</modelVersion>
 
   <artifactId>pinot-kinesis</artifactId>
-
+  <name>Pinot Kinesis</name>
+  <url>https://pinot.apache.org/</url>
   <properties>
     <pinot.root>${basedir}/../../..</pinot.root>
     <phase.prop>package</phase.prop>
@@ -43,6 +44,32 @@
       <groupId>software.amazon.awssdk</groupId>
       <artifactId>kinesis</artifactId>
       <version>${aws.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.reactivestreams</groupId>
+          <artifactId>reactive-streams</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-codec</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-buffer</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-transport</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>io.netty</groupId>
+          <artifactId>netty-common</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-core -->
     <dependency>
@@ -52,8 +79,33 @@
     </dependency>
 
     <dependency>
-      <groupId>org.apache.pinot</groupId>
-      <artifactId>pinot-spi</artifactId>
+      <groupId>org.reactivestreams</groupId>
+      <artifactId>reactive-streams</artifactId>
+      <version>1.0.2</version>
+    </dependency>
+
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-codec</artifactId>
+      <version>4.1.42.Final</version>
+    </dependency>
+
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-buffer</artifactId>
+      <version>4.1.42.Final</version>
+    </dependency>
+
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport</artifactId>
+      <version>4.1.42.Final</version>
+    </dependency>
+
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-common</artifactId>
+      <version>4.1.42.Final</version>
     </dependency>
   </dependencies>
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 54e26d0..f3a7a49 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -20,6 +20,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 
+
 public class KinesisCheckpoint implements Checkpoint {
   String _sequenceNumber;
   Boolean _isEndOfPartition = false;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
index 82fc438..529f34f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConfig.java
@@ -24,16 +24,14 @@ import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisConfig {
-  private final Map<String, String> _props;
-
   public static final String STREAM = "stream";
-  private static final String AWS_REGION = "aws-region";
-  private static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
   public static final String SHARD_ITERATOR_TYPE = "shard-iterator-type";
-
-  private static final String DEFAULT_AWS_REGION = "us-central-1";
-  private static final String DEFAULT_MAX_RECORDS = "20";
-  private static final String DEFAULT_SHARD_ITERATOR_TYPE = "LATEST";
+  public static final String AWS_REGION = "aws-region";
+  public static final String MAX_RECORDS_TO_FETCH = "max-records-to-fetch";
+  public static final String DEFAULT_AWS_REGION = "us-central-1";
+  public static final String DEFAULT_MAX_RECORDS = "20";
+  public static final String DEFAULT_SHARD_ITERATOR_TYPE = ShardIteratorType.LATEST.toString();
+  private final Map<String, String> _props;
 
   public KinesisConfig(StreamConfig streamConfig) {
     _props = streamConfig.getStreamConfigsMap();
@@ -43,20 +41,19 @@ public class KinesisConfig {
     _props = props;
   }
 
-  public String getStream(){
+  public String getStream() {
     return _props.get(STREAM);
   }
 
-  public String getAwsRegion(){
+  public String getAwsRegion() {
     return _props.getOrDefault(AWS_REGION, DEFAULT_AWS_REGION);
   }
 
-  public Integer maxRecordsToFetch(){
+  public Integer maxRecordsToFetch() {
     return Integer.parseInt(_props.getOrDefault(MAX_RECORDS_TO_FETCH, DEFAULT_MAX_RECORDS));
   }
 
-  public ShardIteratorType getShardIteratorType(){
+  public ShardIteratorType getShardIteratorType() {
     return ShardIteratorType.fromValue(_props.getOrDefault(SHARD_ITERATOR_TYPE, DEFAULT_SHARD_ITERATOR_TYPE));
   }
-
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index 0cf4787..4d968f6 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -19,28 +19,18 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.List;
-import org.apache.pinot.spi.stream.StreamConfig;
-import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
-import software.amazon.awssdk.core.SdkBytes;
 import software.amazon.awssdk.regions.Region;
 import software.amazon.awssdk.services.kinesis.KinesisClient;
-import software.amazon.awssdk.services.kinesis.model.DescribeStreamRequest;
-import software.amazon.awssdk.services.kinesis.model.DescribeStreamResponse;
-import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
 import software.amazon.awssdk.services.kinesis.model.ListShardsResponse;
-import software.amazon.awssdk.services.kinesis.model.PutRecordRequest;
-import software.amazon.awssdk.services.kinesis.model.PutRecordResponse;
 import software.amazon.awssdk.services.kinesis.model.Shard;
-import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
-import software.amazon.awssdk.services.kinesis.model.StreamDescription;
 
 
 public class KinesisConnectionHandler {
+  KinesisClient _kinesisClient;
   private String _stream;
   private String _awsRegion;
-  KinesisClient _kinesisClient;
 
   public KinesisConnectionHandler() {
 
@@ -58,18 +48,18 @@ public class KinesisConnectionHandler {
     return listShardsResponse.shards();
   }
 
-  public void createConnection(){
-    if(_kinesisClient == null) {
-      _kinesisClient = KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create())
-          .build();
+  public void createConnection() {
+    if (_kinesisClient == null) {
+      _kinesisClient =
+          KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create())
+              .build();
     }
   }
 
-  public void close(){
-    if(_kinesisClient != null) {
+  public void close() {
+    if (_kinesisClient != null) {
       _kinesisClient.close();
       _kinesisClient = null;
     }
   }
-
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 336468a..fb414f0 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -19,18 +19,13 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
-import org.apache.pinot.spi.stream.v2.FetchResult;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -38,7 +33,6 @@ import software.amazon.awssdk.services.kinesis.model.ExpiredIteratorException;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
-import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
 import software.amazon.awssdk.services.kinesis.model.InvalidArgumentException;
 import software.amazon.awssdk.services.kinesis.model.KinesisException;
 import software.amazon.awssdk.services.kinesis.model.ProvisionedThroughputExceededException;
@@ -46,13 +40,14 @@ import software.amazon.awssdk.services.kinesis.model.Record;
 import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
+
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
+  private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
   String _stream;
   Integer _maxRecords;
   String _shardId;
   ExecutorService _executorService;
   ShardIteratorType _shardIteratorType;
-  private final Logger LOG = LoggerFactory.getLogger(KinesisConsumer.class);
 
   public KinesisConsumer(KinesisConfig kinesisConfig, PartitionGroupMetadata partitionGroupMetadata) {
     super(kinesisConfig.getStream(), kinesisConfig.getAwsRegion());
@@ -67,12 +62,13 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   @Override
   public KinesisFetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
     List<Record> recordList = new ArrayList<>();
-    Future<KinesisFetchResult> kinesisFetchResultFuture = _executorService.submit(() -> getResult(start, end, recordList));
+    Future<KinesisFetchResult> kinesisFetchResultFuture =
+        _executorService.submit(() -> getResult(start, end, recordList));
 
     try {
       return kinesisFetchResultFuture.get(timeout, TimeUnit.MILLISECONDS);
-    } catch(Exception e){
-        return handleException((KinesisCheckpoint) start, recordList);
+    } catch (Exception e) {
+      return handleException((KinesisCheckpoint) start, recordList);
     }
   }
 
@@ -81,7 +77,7 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
 
     try {
 
-      if(_kinesisClient == null){
+      if (_kinesisClient == null) {
         createConnection();
       }
 
@@ -105,7 +101,8 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
           recordList.addAll(getRecordsResponse.records());
           nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
 
-          if (kinesisEndSequenceNumber != null && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0) {
+          if (kinesisEndSequenceNumber != null
+              && kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0) {
             nextStartSequenceNumber = kinesisEndSequenceNumber;
             break;
           }
@@ -115,14 +112,13 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
           }
         }
 
-        if(getRecordsResponse.hasChildShards()){
+        if (getRecordsResponse.hasChildShards()) {
           //This statement returns true only when end of current shard has reached.
           isEndOfShard = true;
           break;
         }
 
         shardIterator = getRecordsResponse.nextShardIterator();
-
       }
 
       if (nextStartSequenceNumber == null && recordList.size() > 0) {
@@ -133,28 +129,20 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
       KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisCheckpoint, recordList);
 
       return kinesisFetchResult;
-    }catch (ProvisionedThroughputExceededException e) {
-      LOG.warn(
-          "The request rate for the stream is too high"
-      , e);
+    } catch (ProvisionedThroughputExceededException e) {
+      LOG.warn("The request rate for the stream is too high", e);
       return handleException(kinesisStartCheckpoint, recordList);
-    }
-    catch (ExpiredIteratorException e) {
-      LOG.warn(
-          "ShardIterator expired while trying to fetch records",e
-      );
+    } catch (ExpiredIteratorException e) {
+      LOG.warn("ShardIterator expired while trying to fetch records", e);
       return handleException(kinesisStartCheckpoint, recordList);
-    }
-    catch (ResourceNotFoundException | InvalidArgumentException e) {
+    } catch (ResourceNotFoundException | InvalidArgumentException e) {
       // aws errors
       LOG.error("Encountered AWS error while attempting to fetch records", e);
       return handleException(kinesisStartCheckpoint, recordList);
-    }
-    catch (KinesisException e) {
+    } catch (KinesisException e) {
       LOG.warn("Encountered unknown unrecoverable AWS exception", e);
       throw new RuntimeException(e);
-    }
-    catch (Throwable e) {
+    } catch (Throwable e) {
       // non transient errors
       LOG.error("Unknown fetchRecords exception", e);
       throw new RuntimeException(e);
@@ -162,11 +150,11 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
   }
 
   private KinesisFetchResult handleException(KinesisCheckpoint start, List<Record> recordList) {
-    if(recordList.size() > 0){
+    if (recordList.size() > 0) {
       String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
       KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(nextStartSequenceNumber);
       return new KinesisFetchResult(kinesisCheckpoint, recordList);
-    }else{
+    } else {
       KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(start.getSequenceNumber());
       return new KinesisFetchResult(kinesisCheckpoint, recordList);
     }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index acac1fb..9bb4d0c 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
-import java.util.Map;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
@@ -38,7 +37,8 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
   @Override
   public PartitionGroupMetadataMap getPartitionGroupsMetadata(
       PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
-    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion(), currentPartitionGroupsMetadata);
+    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion(),
+        currentPartitionGroupsMetadata);
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index 39561f3..8da3d2e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -18,10 +18,7 @@
  */
 package org.apache.pinot.plugin.stream.kinesis;
 
-import java.util.ArrayList;
 import java.util.List;
-import org.apache.pinot.spi.stream.MessageBatch;
-import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.FetchResult;
 import software.amazon.awssdk.services.kinesis.model.Record;
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index 626c8ea..f96533f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -22,12 +22,8 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
-import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
-import software.amazon.awssdk.services.kinesis.model.ListShardsResponse;
 import software.amazon.awssdk.services.kinesis.model.Shard;
 
 
@@ -56,7 +52,8 @@ public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler i
         //Return existing shard metadata
         _stringPartitionGroupMetadataIndex.add(currentMetadataMap.get(shard.shardId()));
       } else if (currentMetadataMap.containsKey(shard.parentShardId())) {
-        KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) currentMetadataMap.get(shard.parentShardId());
+        KinesisShardMetadata kinesisShardMetadata =
+            (KinesisShardMetadata) currentMetadataMap.get(shard.parentShardId());
         if (isProcessingFinished(kinesisShardMetadata)) {
           //Add child shards for processing since parent has finished
           appendShardMetadata(stream, awsRegion, shard);
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
index ed51f8f..04bf4e6 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisRecordsBatch.java
@@ -1,3 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.List;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 1d753c3..e24121b 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -20,10 +20,7 @@ package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
-import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
-import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
-import software.amazon.awssdk.services.kinesis.model.SequenceNumberRange;
-import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
+
 
 //TODO: Implement shardId as Array and have unique id
 public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
@@ -48,13 +45,13 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
   }
 
   @Override
-  public KinesisCheckpoint getEndCheckpoint() {
-    return _endCheckpoint;
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
+    _startCheckpoint = (KinesisCheckpoint) startCheckpoint;
   }
 
   @Override
-  public void setStartCheckpoint(Checkpoint startCheckpoint) {
-    _startCheckpoint = (KinesisCheckpoint) startCheckpoint;
+  public KinesisCheckpoint getEndCheckpoint() {
+    return _endCheckpoint;
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
index 6f660f7..f853875 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/test/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerTest.java
@@ -20,40 +20,43 @@ package org.apache.pinot.plugin.stream.kinesis; /**
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import software.amazon.awssdk.services.kinesis.model.Record;
 import software.amazon.awssdk.services.kinesis.model.Shard;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisConsumerTest {
+
+  private static final String STREAM_NAME = "kinesis-test";
+  private static final String AWS_REGION = "us-west-2";
+
   public static void main(String[] args) {
     Map<String, String> props = new HashMap<>();
-    props.put("stream", "kinesis-test");
-    props.put("aws-region", "us-west-2");
-    props.put("max-records-to-fetch", "2000");
-    props.put("shard-iterator-type", "AT-SEQUENCE-NUMBER");
-
+    props.put(KinesisConfig.STREAM, STREAM_NAME);
+    props.put(KinesisConfig.AWS_REGION, AWS_REGION);
+    props.put(KinesisConfig.MAX_RECORDS_TO_FETCH, "10");
+    props.put(KinesisConfig.SHARD_ITERATOR_TYPE, ShardIteratorType.AT_SEQUENCE_NUMBER.toString());
     KinesisConfig kinesisConfig = new KinesisConfig(props);
-
-    KinesisConnectionHandler kinesisConnectionHandler = new KinesisConnectionHandler("kinesis-test", "us-west-2");
-
+    KinesisConnectionHandler kinesisConnectionHandler = new KinesisConnectionHandler(STREAM_NAME, AWS_REGION);
     List<Shard> shardList = kinesisConnectionHandler.getShards();
-
-    for(Shard shard : shardList) {
+    for (Shard shard : shardList) {
       System.out.println("SHARD: " + shard.shardId());
 
-      KinesisConsumer kinesisConsumer = new KinesisConsumer(kinesisConfig, new KinesisShardMetadata(shard.shardId(), "kinesis-test", "us-west-2"));
-
+      KinesisConsumer kinesisConsumer =
+          new KinesisConsumer(kinesisConfig, new KinesisShardMetadata(shard.shardId(), STREAM_NAME, AWS_REGION));
+      System.out.println(
+          "Kinesis Checkpoint Range: < " + shard.sequenceNumberRange().startingSequenceNumber() + ", " + shard
+              .sequenceNumberRange().endingSequenceNumber() + " >");
       KinesisCheckpoint kinesisCheckpoint = new KinesisCheckpoint(shard.sequenceNumberRange().startingSequenceNumber());
-      KinesisFetchResult fetchResult = kinesisConsumer.fetch(kinesisCheckpoint, null, 6 * 10 * 1000L);
-
+      KinesisFetchResult fetchResult = kinesisConsumer.fetch(kinesisCheckpoint, null, 60 * 1000L);
       KinesisRecordsBatch list = fetchResult.getMessages();
       int n = list.getMessageCount();
 
-      for (int i=0;i<n;i++) {
+      System.out.println("Found " + n + " messages ");
+      for (int i = 0; i < n; i++) {
         System.out.println("SEQ-NO: " + list.getMessageOffsetAtIndex(i) + ", DATA: " + list.getMessageAtIndex(i));
       }
+      kinesisConsumer.close();
     }
+    kinesisConnectionHandler.close();
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 12/47: Add initial implementation of Kinesis consumer

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 72a77c61226ce5cf3abd781916f7797a3fa5a230
Author: KKcorps <kh...@gmail.com>
AuthorDate: Thu Dec 10 19:08:41 2020 +0530

    Add initial implementation of Kinesis consumer
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   | 39 ++++++++++++++++++
 .../plugin/stream/kinesis/KinesisCheckpoint.java   | 28 +++++++++++++
 .../stream/kinesis/KinesisConnectionHandler.java   | 25 ++++++++++++
 .../plugin/stream/kinesis/KinesisConsumer.java     | 40 ++++++++++++++++++
 .../plugin/stream/kinesis/KinesisFetchResult.java  | 25 ++++++++++++
 .../stream/kinesis/KinesisShardMetadata.java       | 47 ++++++++++++++++++++++
 pinot-plugins/pinot-stream-ingestion/pom.xml       |  1 +
 7 files changed, 205 insertions(+)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
new file mode 100644
index 0000000..97e5eef
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>pinot-stream-ingestion</artifactId>
+    <groupId>org.apache.pinot</groupId>
+    <version>0.7.0-SNAPSHOT</version>
+    <relativePath>..</relativePath>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>pinot-kinesis</artifactId>
+
+  <properties>
+    <pinot.root>${basedir}/../../..</pinot.root>
+    <phase.prop>package</phase.prop>
+    <aws.version>2.15.42</aws.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>software.amazon.awssdk</groupId>
+      <artifactId>kinesis</artifactId>
+      <version>${aws.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.pinot</groupId>
+      <artifactId>pinot-json</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.pinot</groupId>
+      <artifactId>pinot-spi</artifactId>
+    </dependency>
+  </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
new file mode 100644
index 0000000..a330e78
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -0,0 +1,28 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import org.apache.pinot.spi.stream.v2.Checkpoint;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+
+
+public class KinesisCheckpoint implements Checkpoint {
+  String _shardIterator;
+
+  public KinesisCheckpoint(String shardIterator){
+    _shardIterator = shardIterator;
+  }
+
+  public String getShardIterator() {
+    return _shardIterator;
+  }
+
+  @Override
+  public byte[] serialize() {
+    return _shardIterator.getBytes();
+  }
+
+  @Override
+  public Checkpoint deserialize(byte[] blob) {
+    return new KinesisCheckpoint(new String(blob));
+  }
+
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
new file mode 100644
index 0000000..7ea24c0
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -0,0 +1,25 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.v2.ConsumerV2;
+import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.kinesis.KinesisClient;
+import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
+
+
+public class KinesisConnectionHandler {
+  String _awsRegion = "";
+  KinesisClient _kinesisClient;
+
+  public KinesisConnectionHandler(){
+
+  }
+
+  public KinesisConnectionHandler(String awsRegion){
+    _awsRegion = awsRegion;
+    _kinesisClient = KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create()).build();
+  }
+
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
new file mode 100644
index 0000000..251d831
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -0,0 +1,40 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.Collections;
+import org.apache.pinot.spi.stream.v2.Checkpoint;
+import org.apache.pinot.spi.stream.v2.ConsumerV2;
+import org.apache.pinot.spi.stream.v2.FetchResult;
+import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
+import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
+import software.amazon.awssdk.services.kinesis.model.Record;
+
+
+public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
+
+  //TODO: Fetch AWS region from  Stream Config.
+  public KinesisConsumer(String awsRegion) {
+    super(awsRegion);
+  }
+
+  @Override
+  public FetchResult fetch(Checkpoint start, Checkpoint end, long timeout) {
+    KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
+    KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
+
+    String kinesisShardIteratorStart = kinesisStartCheckpoint.getShardIterator();
+
+    GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(kinesisShardIteratorStart).build();
+    GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
+
+    String kinesisNextShardIterator = getRecordsResponse.nextShardIterator();
+
+    if(!getRecordsResponse.hasRecords()){
+      return new KinesisFetchResult(kinesisNextShardIterator, Collections.emptyList());
+    }
+
+    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisNextShardIterator,
+        getRecordsResponse.records());
+
+    return kinesisFetchResult;
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
new file mode 100644
index 0000000..5ef4e30
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -0,0 +1,25 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.List;
+import org.apache.pinot.spi.stream.v2.Checkpoint;
+import org.apache.pinot.spi.stream.v2.FetchResult;
+import software.amazon.awssdk.services.kinesis.model.Record;
+
+
+public class KinesisFetchResult implements FetchResult {
+  private String _nextShardIterator;
+
+  public KinesisFetchResult(String nextShardIterator, List<Record> recordList){
+     _nextShardIterator = nextShardIterator;
+  }
+
+  @Override
+  public Checkpoint getLastCheckpoint() {
+    return new KinesisCheckpoint(_nextShardIterator);
+  }
+
+  @Override
+  public byte[] getMessages() {
+    return new byte[0];
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
new file mode 100644
index 0000000..07ede73
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -0,0 +1,47 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import org.apache.pinot.spi.stream.v2.Checkpoint;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+
+
+public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
+  Checkpoint _startCheckpoint;
+  Checkpoint _endCheckpoint;
+
+  public KinesisShardMetadata(String shardId, String streamName) {
+    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().shardId(shardId).streamName(streamName).build());
+    _startCheckpoint = new KinesisCheckpoint(getShardIteratorResponse.shardIterator());
+  }
+
+  @Override
+  public Checkpoint getStartCheckpoint() {
+    return _startCheckpoint;
+  }
+
+  @Override
+  public Checkpoint getEndCheckpoint() {
+    return _endCheckpoint;
+  }
+
+  @Override
+  public void setStartCheckpoint(Checkpoint startCheckpoint) {
+    _startCheckpoint = startCheckpoint;
+  }
+
+  @Override
+  public void setEndCheckpoint(Checkpoint endCheckpoint) {
+    _endCheckpoint = endCheckpoint;
+  }
+
+  @Override
+  public byte[] serialize() {
+    return new byte[0];
+  }
+
+  @Override
+  public PartitionGroupMetadata deserialize(byte[] blob) {
+    return null;
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pom.xml b/pinot-plugins/pinot-stream-ingestion/pom.xml
index 3a51626..e7b9a46 100644
--- a/pinot-plugins/pinot-stream-ingestion/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pom.xml
@@ -42,6 +42,7 @@
     <module>pinot-kafka-base</module>
     <module>pinot-kafka-0.9</module>
     <module>pinot-kafka-2.0</module>
+    <module>pinot-kinesis</module>
   </modules>
 
 </project>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 14/47: Add kinesis code to handle offsets

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 04919035cfa9cd9e2f8d554dfd608c643b70d30e
Author: KKcorps <kh...@gmail.com>
AuthorDate: Fri Dec 11 13:57:25 2020 +0530

    Add kinesis code to handle offsets
---
 .../plugin/stream/kinesis/KinesisCheckpoint.java   | 13 ++++---
 .../plugin/stream/kinesis/KinesisConsumer.java     | 42 +++++++++++++++++++---
 .../stream/kinesis/KinesisConsumerFactory.java     | 36 +++++++++++++++++++
 .../plugin/stream/kinesis/KinesisFetchResult.java  | 11 +++---
 .../kinesis/KinesisPartitionGroupMetadataMap.java  | 31 ++++++++++++++++
 .../stream/kinesis/KinesisShardMetadata.java       |  5 ++-
 6 files changed, 121 insertions(+), 17 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index a330e78..77f790b 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -1,23 +1,22 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import org.apache.pinot.spi.stream.v2.Checkpoint;
-import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
 
 
 public class KinesisCheckpoint implements Checkpoint {
-  String _shardIterator;
+  String _sequenceNumber;
 
-  public KinesisCheckpoint(String shardIterator){
-    _shardIterator = shardIterator;
+  public KinesisCheckpoint(String sequenceNumber){
+    _sequenceNumber = sequenceNumber;
   }
 
-  public String getShardIterator() {
-    return _shardIterator;
+  public String getSequenceNumber() {
+    return _sequenceNumber;
   }
 
   @Override
   public byte[] serialize() {
-    return _shardIterator.getBytes();
+    return _sequenceNumber.getBytes();
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 251d831..dc44079 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -1,19 +1,26 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.ConsumerV2;
 import org.apache.pinot.spi.stream.v2.FetchResult;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsRequest;
 import software.amazon.awssdk.services.kinesis.model.GetRecordsResponse;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
+import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
 import software.amazon.awssdk.services.kinesis.model.Record;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisConsumer extends KinesisConnectionHandler implements ConsumerV2 {
+  String _stream;
 
   //TODO: Fetch AWS region from  Stream Config.
-  public KinesisConsumer(String awsRegion) {
+  public KinesisConsumer(String stream, String awsRegion) {
     super(awsRegion);
+    _stream = stream;
   }
 
   @Override
@@ -21,18 +28,43 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     KinesisCheckpoint kinesisStartCheckpoint = (KinesisCheckpoint) start;
     KinesisCheckpoint kinesisEndCheckpoint = (KinesisCheckpoint) end;
 
-    String kinesisShardIteratorStart = kinesisStartCheckpoint.getShardIterator();
+    String kinesisStartSequenceNumber = kinesisStartCheckpoint.getSequenceNumber();
+    String kinesisEndSequenceNumber = kinesisEndCheckpoint.getSequenceNumber();
 
-    GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(kinesisShardIteratorStart).build();
+    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().streamName(_stream).shardIteratorType(
+        ShardIteratorType.AFTER_SEQUENCE_NUMBER).startingSequenceNumber(kinesisStartSequenceNumber).build());
+
+    String shardIterator = getShardIteratorResponse.shardIterator();
+    GetRecordsRequest getRecordsRequest = GetRecordsRequest.builder().shardIterator(shardIterator).build();
     GetRecordsResponse getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
 
     String kinesisNextShardIterator = getRecordsResponse.nextShardIterator();
 
+    //TODO: Get records in the loop and stop when end sequence number is reached or there is an exception.
     if(!getRecordsResponse.hasRecords()){
-      return new KinesisFetchResult(kinesisNextShardIterator, Collections.emptyList());
+      return new KinesisFetchResult(kinesisStartSequenceNumber, Collections.emptyList());
+    }
+
+    List<Record> recordList = new ArrayList<>();
+    recordList.addAll(getRecordsResponse.records());
+
+    String nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+    while(kinesisNextShardIterator != null){
+      getRecordsRequest = GetRecordsRequest.builder().shardIterator(kinesisNextShardIterator).build();
+      getRecordsResponse = _kinesisClient.getRecords(getRecordsRequest);
+      if(getRecordsResponse.hasRecords()){
+        recordList.addAll(getRecordsResponse.records());
+        nextStartSequenceNumber = recordList.get(recordList.size() - 1).sequenceNumber();
+      }
+
+      if(kinesisEndSequenceNumber.compareTo(recordList.get(recordList.size() - 1).sequenceNumber()) <= 0 ) {
+        nextStartSequenceNumber = kinesisEndSequenceNumber;
+        break;
+      }
+      kinesisNextShardIterator = getRecordsResponse.nextShardIterator();
     }
 
-    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(kinesisNextShardIterator,
+    KinesisFetchResult kinesisFetchResult = new KinesisFetchResult(nextStartSequenceNumber,
         getRecordsResponse.records());
 
     return kinesisFetchResult;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
new file mode 100644
index 0000000..6bd1e3a
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -0,0 +1,36 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.Map;
+import org.apache.pinot.spi.stream.StreamConfig;
+import org.apache.pinot.spi.stream.v2.ConsumerV2;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
+import org.apache.pinot.spi.stream.v2.SegmentNameGenerator;
+import org.apache.pinot.spi.stream.v2.StreamConsumerFactoryV2;
+
+
+public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
+  private StreamConfig _streamConfig;
+  private final String AWS_REGION = "aws-region";
+
+  @Override
+  public void init(StreamConfig streamConfig) {
+    _streamConfig = streamConfig;
+  }
+
+  @Override
+  public PartitionGroupMetadataMap getPartitionGroupsMetadata(
+      PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
+    return new KinesisPartitionGroupMetadataMap(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"));
+  }
+
+  @Override
+  public SegmentNameGenerator getSegmentNameGenerator() {
+    return null;
+  }
+
+  @Override
+  public ConsumerV2 createConsumer(PartitionGroupMetadata metadata) {
+    return new KinesisConsumer(_streamConfig.getTopicName(), _streamConfig.getStreamConfigsMap().getOrDefault(AWS_REGION, "us-central-1"));
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index 5ef4e30..dc8e764 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -1,16 +1,19 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
+import java.util.ArrayList;
 import java.util.List;
 import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.FetchResult;
 import software.amazon.awssdk.services.kinesis.model.Record;
 
 
-public class KinesisFetchResult implements FetchResult {
-  private String _nextShardIterator;
+public class KinesisFetchResult implements FetchResult<Record> {
+  private final String _nextShardIterator;
+  private final List<Record> _recordList;
 
   public KinesisFetchResult(String nextShardIterator, List<Record> recordList){
      _nextShardIterator = nextShardIterator;
+     _recordList = recordList;
   }
 
   @Override
@@ -19,7 +22,7 @@ public class KinesisFetchResult implements FetchResult {
   }
 
   @Override
-  public byte[] getMessages() {
-    return new byte[0];
+  public List<Record> getMessages() {
+    return _recordList;
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
new file mode 100644
index 0000000..bc3fef2
--- /dev/null
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -0,0 +1,31 @@
+package org.apache.pinot.plugin.stream.kinesis;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
+import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
+import software.amazon.awssdk.services.kinesis.model.ListShardsResponse;
+import software.amazon.awssdk.services.kinesis.model.Shard;
+
+
+public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler implements PartitionGroupMetadataMap {
+  private Map<String, PartitionGroupMetadata> _stringPartitionGroupMetadataMap = new HashMap<>();
+
+  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion){
+    super(awsRegion);
+    ListShardsResponse listShardsResponse = _kinesisClient.listShards(ListShardsRequest.builder().streamName(stream).build());
+    List<Shard> shardList = listShardsResponse.shards();
+    for(Shard shard : shardList){
+      String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
+      KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream);
+      shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
+      _stringPartitionGroupMetadataMap.put(shard.shardId(), shardMetadata);
+    }
+  }
+
+  public Map<String, PartitionGroupMetadata> getPartitionMetadata(){
+      return _stringPartitionGroupMetadataMap;
+  }
+}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 07ede73..d50d821 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -4,6 +4,7 @@ import org.apache.pinot.spi.stream.v2.Checkpoint;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorRequest;
 import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
+import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
 
 public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
@@ -11,8 +12,10 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
   Checkpoint _endCheckpoint;
 
   public KinesisShardMetadata(String shardId, String streamName) {
-    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().shardId(shardId).streamName(streamName).build());
+    GetShardIteratorResponse getShardIteratorResponse = _kinesisClient.getShardIterator(GetShardIteratorRequest.builder().shardId(shardId).shardIteratorType(
+        ShardIteratorType.LATEST).streamName(streamName).build());
     _startCheckpoint = new KinesisCheckpoint(getShardIteratorResponse.shardIterator());
+    _endCheckpoint = null;
   }
 
   @Override


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 26/47: Handle closed connections

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 0b8bb670bfd91e3a455a989a584388f15065b6a3
Author: KKcorps <kh...@gmail.com>
AuthorDate: Mon Dec 21 14:21:55 2020 +0530

    Handle closed connections
---
 .../plugin/stream/kinesis/KinesisConnectionHandler.java      | 12 +++++++++---
 .../apache/pinot/plugin/stream/kinesis/KinesisConsumer.java  |  8 ++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
index ba94b0a..3607787 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConnectionHandler.java
@@ -49,9 +49,7 @@ public class KinesisConnectionHandler {
   public KinesisConnectionHandler(String stream, String awsRegion) {
     _stream = stream;
     _awsRegion = awsRegion;
-    _kinesisClient =
-        KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create())
-            .build();
+    createConnection();
   }
 
   public List<Shard> getShards() {
@@ -60,9 +58,17 @@ public class KinesisConnectionHandler {
     return listShardsResponse.shards();
   }
 
+  public void createConnection(){
+    if(_kinesisClient == null) {
+      _kinesisClient = KinesisClient.builder().region(Region.of(_awsRegion)).credentialsProvider(DefaultCredentialsProvider.create())
+          .build();
+    }
+  }
+
   public void close(){
     if(_kinesisClient != null) {
       _kinesisClient.close();
+      _kinesisClient = null;
     }
   }
 }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
index 24810ba..fd48a92 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumer.java
@@ -79,6 +79,10 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
 
     try {
 
+      if(_kinesisClient == null){
+        createConnection();
+      }
+
       String shardIterator = getShardIterator(kinesisStartCheckpoint);
 
       String kinesisEndSequenceNumber = null;
@@ -176,4 +180,8 @@ public class KinesisConsumer extends KinesisConnectionHandler implements Consume
     return getShardIteratorResponse.shardIterator();
   }
 
+  @Override
+  public void close() {
+    super.close();
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 28/47: Change shard metadata logic

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 26085a88ac34d6b00737bbd68b8ff409ad281467
Author: KKcorps <kh...@gmail.com>
AuthorDate: Tue Dec 22 20:42:05 2020 +0530

    Change shard metadata logic
---
 .../pinot-stream-ingestion/pinot-kinesis/pom.xml   |  2 +-
 .../plugin/stream/kinesis/KinesisCheckpoint.java   |  2 +-
 .../stream/kinesis/KinesisConsumerFactory.java     |  2 +-
 .../plugin/stream/kinesis/KinesisFetchResult.java  |  2 +-
 .../kinesis/KinesisPartitionGroupMetadataMap.java  | 55 +++++++++++++++++++---
 .../stream/kinesis/KinesisShardMetadata.java       | 16 +++----
 6 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
index 1abc536..0c9ae0b 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml
@@ -35,7 +35,7 @@
   <properties>
     <pinot.root>${basedir}/../../..</pinot.root>
     <phase.prop>package</phase.prop>
-    <aws.version>2.13.46</aws.version>
+    <aws.version>2.15.50</aws.version>
   </properties>
 
   <dependencies>
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
index 450173c..8de95e2 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisCheckpoint.java
@@ -38,7 +38,7 @@ public class KinesisCheckpoint implements Checkpoint {
   }
 
   @Override
-  public Checkpoint deserialize(byte[] blob) {
+  public KinesisCheckpoint deserialize(byte[] blob) {
     //TODO: Implement SerDe
     return new KinesisCheckpoint(new String(blob));
   }
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
index da39aab..acac1fb 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisConsumerFactory.java
@@ -38,7 +38,7 @@ public class KinesisConsumerFactory implements StreamConsumerFactoryV2 {
   @Override
   public PartitionGroupMetadataMap getPartitionGroupsMetadata(
       PartitionGroupMetadataMap currentPartitionGroupsMetadata) {
-    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion());
+    return new KinesisPartitionGroupMetadataMap(_kinesisConfig.getStream(), _kinesisConfig.getAwsRegion(), currentPartitionGroupsMetadata);
   }
 
   @Override
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
index 52dab66..aedcd5d 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisFetchResult.java
@@ -35,7 +35,7 @@ public class KinesisFetchResult implements FetchResult<Record> {
   }
 
   @Override
-  public Checkpoint getLastCheckpoint() {
+  public KinesisCheckpoint getLastCheckpoint() {
     return _kinesisCheckpoint;
   }
 
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
index 9a34004..d77579e 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisPartitionGroupMetadataMap.java
@@ -19,7 +19,11 @@
 package org.apache.pinot.plugin.stream.kinesis;
 
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadata;
 import org.apache.pinot.spi.stream.v2.PartitionGroupMetadataMap;
 import software.amazon.awssdk.services.kinesis.model.ListShardsRequest;
@@ -30,19 +34,56 @@ import software.amazon.awssdk.services.kinesis.model.Shard;
 public class KinesisPartitionGroupMetadataMap extends KinesisConnectionHandler implements PartitionGroupMetadataMap {
   private final List<PartitionGroupMetadata> _stringPartitionGroupMetadataIndex = new ArrayList<>();
 
-  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion) {
+  public KinesisPartitionGroupMetadataMap(String stream, String awsRegion,
+      PartitionGroupMetadataMap partitionGroupMetadataMap) {
+    //TODO: Handle child shards. Do not consume data from child shard unless parent is finished.
+    //Return metadata only for shards in current metadata
     super(stream, awsRegion);
+    KinesisPartitionGroupMetadataMap currentPartitionMeta =
+        (KinesisPartitionGroupMetadataMap) partitionGroupMetadataMap;
+    List<PartitionGroupMetadata> currentMetaList = currentPartitionMeta.getMetadataList();
+
     List<Shard> shardList = getShards();
+
+    Map<String, PartitionGroupMetadata> metadataMap = new HashMap<>();
+    for (PartitionGroupMetadata partitionGroupMetadata : currentMetaList) {
+      KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) partitionGroupMetadata;
+      metadataMap.put(kinesisShardMetadata.getShardId(), kinesisShardMetadata);
+    }
+
     for (Shard shard : shardList) {
-      String startSequenceNumber = shard.sequenceNumberRange().startingSequenceNumber();
-      String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
-      KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
-      shardMetadata.setStartCheckpoint(new KinesisCheckpoint(startSequenceNumber));
-      shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
-      _stringPartitionGroupMetadataIndex.add(shardMetadata);
+      if (metadataMap.containsKey(shard.shardId())) {
+        //Return existing shard metadata
+        _stringPartitionGroupMetadataIndex.add(metadataMap.get(shard.shardId()));
+      } else if (metadataMap.containsKey(shard.parentShardId())) {
+        KinesisShardMetadata kinesisShardMetadata = (KinesisShardMetadata) metadataMap.get(shard.parentShardId());
+        if (isProcessingFinished(kinesisShardMetadata)) {
+          //Add child shards for processing since parent has finished
+          appendShardMetadata(stream, awsRegion, shard);
+        } else {
+          //Do not process this shard unless the parent shard is finished or expired
+        }
+      } else {
+        //This is a new shard with no parents. We can start processing this shard.
+        appendShardMetadata(stream, awsRegion, shard);
+      }
     }
   }
 
+  private boolean isProcessingFinished(KinesisShardMetadata kinesisShardMetadata) {
+    return kinesisShardMetadata.getEndCheckpoint().getSequenceNumber() != null && kinesisShardMetadata
+        .getStartCheckpoint().getSequenceNumber().equals(kinesisShardMetadata.getEndCheckpoint().getSequenceNumber());
+  }
+
+  private void appendShardMetadata(String stream, String awsRegion, Shard shard) {
+    String startSequenceNumber = shard.sequenceNumberRange().startingSequenceNumber();
+    String endingSequenceNumber = shard.sequenceNumberRange().endingSequenceNumber();
+    KinesisShardMetadata shardMetadata = new KinesisShardMetadata(shard.shardId(), stream, awsRegion);
+    shardMetadata.setStartCheckpoint(new KinesisCheckpoint(startSequenceNumber));
+    shardMetadata.setEndCheckpoint(new KinesisCheckpoint(endingSequenceNumber));
+    _stringPartitionGroupMetadataIndex.add(shardMetadata);
+  }
+
   @Override
   public List<PartitionGroupMetadata> getMetadataList() {
     return _stringPartitionGroupMetadataIndex;
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
index 8141cd4..327e034 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/src/main/java/org/apache/pinot/plugin/stream/kinesis/KinesisShardMetadata.java
@@ -25,11 +25,11 @@ import software.amazon.awssdk.services.kinesis.model.GetShardIteratorResponse;
 import software.amazon.awssdk.services.kinesis.model.SequenceNumberRange;
 import software.amazon.awssdk.services.kinesis.model.ShardIteratorType;
 
-
+//TODO: Implement shardId as Array
 public class KinesisShardMetadata extends KinesisConnectionHandler implements PartitionGroupMetadata {
   String _shardId;
-  Checkpoint _startCheckpoint;
-  Checkpoint _endCheckpoint;
+  KinesisCheckpoint _startCheckpoint;
+  KinesisCheckpoint _endCheckpoint;
 
   public KinesisShardMetadata(String shardId, String streamName, String awsRegion) {
     super(streamName, awsRegion);
@@ -43,23 +43,23 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
   }
 
   @Override
-  public Checkpoint getStartCheckpoint() {
+  public KinesisCheckpoint getStartCheckpoint() {
     return _startCheckpoint;
   }
 
   @Override
-  public Checkpoint getEndCheckpoint() {
+  public KinesisCheckpoint getEndCheckpoint() {
     return _endCheckpoint;
   }
 
   @Override
   public void setStartCheckpoint(Checkpoint startCheckpoint) {
-    _startCheckpoint = startCheckpoint;
+    _startCheckpoint = (KinesisCheckpoint) startCheckpoint;
   }
 
   @Override
   public void setEndCheckpoint(Checkpoint endCheckpoint) {
-    _endCheckpoint = endCheckpoint;
+    _endCheckpoint = (KinesisCheckpoint) endCheckpoint;
   }
 
   @Override
@@ -68,7 +68,7 @@ public class KinesisShardMetadata extends KinesisConnectionHandler implements Pa
   }
 
   @Override
-  public PartitionGroupMetadata deserialize(byte[] blob) {
+  public KinesisShardMetadata deserialize(byte[] blob) {
     return null;
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


[incubator-pinot] 04/47: Controller side code

Posted by ne...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

nehapawar pushed a commit to branch sharded_consumer_type_support_with_kinesis
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 96621f415385649b4f2ec5ea1828723eed4baa7b
Author: Neha Pawar <ne...@gmail.com>
AuthorDate: Thu Dec 31 12:04:54 2020 -0800

    Controller side code
---
 .../segment/RealtimeSegmentZKMetadata.java         |   6 -
 .../helix/core/PinotHelixResourceManager.java      |  88 ++++-----
 .../helix/core/PinotTableIdealStateBuilder.java    |   9 +-
 .../realtime/PinotLLCRealtimeSegmentManager.java   | 201 +++++++++++++--------
 .../fakestream/FakePartitionGroupMetadata.java     |  48 -----
 .../kafka09/KafkaPartitionGroupMetadata.java       |  48 -----
 .../kafka20/KafkaPartitionGroupMetadata.java       |  48 -----
 .../pinot/spi/stream/PartitionGroupMetadata.java   |  52 +++++-
 8 files changed, 207 insertions(+), 293 deletions(-)

diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
index c46af53..d88be18 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/metadata/segment/RealtimeSegmentZKMetadata.java
@@ -35,7 +35,6 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
   private Status _status = null;
   private int _sizeThresholdToFlushSegment = -1;
   private String _timeThresholdToFlushSegment = null; // store as period string for readability
-  private String _partitionGroupMetadataStr = null;
 
   public RealtimeSegmentZKMetadata() {
     setSegmentType(SegmentType.REALTIME);
@@ -50,7 +49,6 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
     if (flushThresholdTime != null && !flushThresholdTime.equals(NULL)) {
       _timeThresholdToFlushSegment = znRecord.getSimpleField(CommonConstants.Segment.FLUSH_THRESHOLD_TIME);
     }
-    _partitionGroupMetadataStr = znRecord.getSimpleField(CommonConstants.Segment.PARTITION_GROUP_METADATA);
   }
 
   @Override
@@ -143,8 +141,4 @@ public class RealtimeSegmentZKMetadata extends SegmentZKMetadata {
   public void setTimeThresholdToFlushSegment(String timeThresholdPeriodString) {
     _timeThresholdToFlushSegment = timeThresholdPeriodString;
   }
-
-  public String getPartitionGroupMetadataStr() {
-    return _partitionGroupMetadataStr;
-  }
 }
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index a04e0bc..1f36e4f 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -127,6 +127,7 @@ import org.apache.pinot.spi.config.table.assignment.InstancePartitionsType;
 import org.apache.pinot.spi.config.tenant.Tenant;
 import org.apache.pinot.spi.data.Schema;
 import org.apache.pinot.spi.stream.PartitionGroupMetadata;
+import org.apache.pinot.spi.stream.PartitionLevelStreamConfig;
 import org.apache.pinot.spi.stream.StreamConfig;
 import org.apache.pinot.spi.stream.StreamConsumerFactory;
 import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider;
@@ -1355,65 +1356,50 @@ public class PinotHelixResourceManager {
         IngestionConfigUtils.getStreamConfigMap(realtimeTableConfig));
     IdealState idealState = getTableIdealState(realtimeTableName);
 
+
     if (streamConfig.isShardedConsumerType()) {
-      setupShardedRealtimeTable(streamConfig, idealState, realtimeTableConfig.getValidationConfig().getReplicasPerPartitionNumber());
-    }
+      idealState = PinotTableIdealStateBuilder
+          .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
+              _enableBatchMessageMode);
+      _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
+      LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
+      _pinotLLCRealtimeSegmentManager.setupNewShardedTable(rawRealtimeTableConfig, idealState);
+    } else {
 
-    if (streamConfig.hasHighLevelConsumerType()) {
-      if (idealState == null) {
-        LOGGER.info("Initializing IdealState for HLC table: {}", realtimeTableName);
-        idealState = PinotTableIdealStateBuilder
-            .buildInitialHighLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, _helixZkManager,
-                _propertyStore, _enableBatchMessageMode);
-        _helixAdmin.addResource(_helixClusterName, realtimeTableName, idealState);
-      } else {
-        // Remove LLC segments if it is not configured
-        if (!streamConfig.hasLowLevelConsumerType()) {
-          _pinotLLCRealtimeSegmentManager.removeLLCSegments(idealState);
+      if (streamConfig.hasHighLevelConsumerType()) {
+        if (idealState == null) {
+          LOGGER.info("Initializing IdealState for HLC table: {}", realtimeTableName);
+          idealState = PinotTableIdealStateBuilder
+              .buildInitialHighLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, _helixZkManager,
+                  _propertyStore, _enableBatchMessageMode);
+          _helixAdmin.addResource(_helixClusterName, realtimeTableName, idealState);
+        } else {
+          // Remove LLC segments if it is not configured
+          if (!streamConfig.hasLowLevelConsumerType()) {
+            _pinotLLCRealtimeSegmentManager.removeLLCSegments(idealState);
+          }
         }
+        // For HLC table, property store entry must exist to trigger watchers to create segments
+        ensurePropertyStoreEntryExistsForHighLevelConsumer(realtimeTableName);
       }
-      // For HLC table, property store entry must exist to trigger watchers to create segments
-      ensurePropertyStoreEntryExistsForHighLevelConsumer(realtimeTableName);
-    }
-
-    // Either we have only low-level consumer, or both.
-    if (streamConfig.hasLowLevelConsumerType()) {
-      // Will either create idealstate entry, or update the IS entry with new segments
-      // (unless there are low-level segments already present)
-      if (ZKMetadataProvider.getLLCRealtimeSegments(_propertyStore, realtimeTableName).isEmpty()) {
-        PinotTableIdealStateBuilder
-            .buildLowLevelRealtimeIdealStateFor(_pinotLLCRealtimeSegmentManager, realtimeTableName, realtimeTableConfig,
-                idealState, _enableBatchMessageMode);
-        LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
-      } else {
-        LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
+
+      // Either we have only low-level consumer, or both.
+      if (streamConfig.hasLowLevelConsumerType()) {
+        // Will either create idealstate entry, or update the IS entry with new segments
+        // (unless there are low-level segments already present)
+        if (ZKMetadataProvider.getLLCRealtimeSegments(_propertyStore, realtimeTableName).isEmpty()) {
+          idealState = PinotTableIdealStateBuilder
+              .buildLowLevelRealtimeIdealStateFor(realtimeTableName, realtimeTableConfig, idealState,
+                  _enableBatchMessageMode);
+          _pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
+          LOGGER.info("Successfully added Helix entries for low-level consumers for {} ", realtimeTableName);
+        } else {
+          LOGGER.info("LLC is already set up for table {}, not configuring again", realtimeTableName);
+        }
       }
     }
   }
 
-  /**
-   * Sets up the realtime table ideal state
-   * @param streamConfig
-   */
-  private void setupShardedRealtimeTable(StreamConfig streamConfig, IdealState idealState, int numReplicas) {
-    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
-    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
-        .createStreamMetadataProvider(streamConfig.getTopicName() + "_" + System.currentTimeMillis());
-
-    // get current partition groups and their metadata - this will be empty when creating the table
-    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = _pinotLLCRealtimeSegmentManager.getCurrentPartitionGroupMetadataList(idealState);
-
-    // get new partition groups and their metadata,
-    // Assume table has 3 shards. Say we get [0], [1], [2] groups (for now assume that each group contains only 1 shard)
-    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 5000);
-
-    // setup segment zk metadata and ideal state for all the new found partition groups
-    _pinotLLCRealtimeSegmentManager.setupNewPartitionGroups(newPartitionGroupMetadataList, numReplicas);
-  }
-
-
-
   private void ensurePropertyStoreEntryExistsForHighLevelConsumer(String realtimeTableName) {
     String propertyStorePath = ZKMetadataProvider.constructPropertyStorePathForResource(realtimeTableName);
     if (!_propertyStore.exists(propertyStorePath, AccessOption.PERSISTENT)) {
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
index 1e95966..a7b3c9e 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotTableIdealStateBuilder.java
@@ -94,9 +94,8 @@ public class PinotTableIdealStateBuilder {
     return idealState;
   }
 
-  public static void buildLowLevelRealtimeIdealStateFor(PinotLLCRealtimeSegmentManager pinotLLCRealtimeSegmentManager,
-      String realtimeTableName, TableConfig realtimeTableConfig, IdealState idealState,
-      boolean enableBatchMessageMode) {
+  public static IdealState buildLowLevelRealtimeIdealStateFor(String realtimeTableName, TableConfig realtimeTableConfig,
+      IdealState idealState, boolean enableBatchMessageMode) {
 
     // Validate replicasPerPartition here.
     final String replicasPerPartitionStr = realtimeTableConfig.getValidationConfig().getReplicasPerPartition();
@@ -105,7 +104,7 @@ public class PinotTableIdealStateBuilder {
     }
     final int nReplicas;
     try {
-      nReplicas = Integer.valueOf(replicasPerPartitionStr);
+      nReplicas = Integer.parseInt(replicasPerPartitionStr);
     } catch (NumberFormatException e) {
       throw new PinotHelixResourceManager.InvalidTableConfigException(
           "Invalid value for replicasPerPartition, expected a number: " + replicasPerPartitionStr, e);
@@ -113,7 +112,7 @@ public class PinotTableIdealStateBuilder {
     if (idealState == null) {
       idealState = buildEmptyRealtimeIdealStateFor(realtimeTableName, nReplicas, enableBatchMessageMode);
     }
-    pinotLLCRealtimeSegmentManager.setUpNewTable(realtimeTableConfig, idealState);
+    return idealState;
   }
 
   public static List<PartitionGroupMetadata> getPartitionGroupMetadataList(StreamConfig streamConfig,
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 189be8b..9b03fa4 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.helix.AccessOption;
 import org.apache.helix.HelixAdmin;
@@ -44,6 +45,7 @@ import org.apache.pinot.common.assignment.InstancePartitionsUtils;
 import org.apache.pinot.common.metadata.ZKMetadataProvider;
 import org.apache.pinot.common.metadata.segment.ColumnPartitionMetadata;
 import org.apache.pinot.common.metadata.segment.LLCRealtimeSegmentZKMetadata;
+import org.apache.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
 import org.apache.pinot.common.metadata.segment.SegmentPartitionMetadata;
 import org.apache.pinot.common.metadata.segment.SegmentZKMetadata;
 import org.apache.pinot.common.metrics.ControllerMeter;
@@ -161,82 +163,84 @@ public class PinotLLCRealtimeSegmentManager {
     _flushThresholdUpdateManager = new FlushThresholdUpdateManager();
   }
 
+
   /**
-   * The committing segment will call this.
-   * 
-   * For example, say we have 3 shards, grouped into PartitionGroups as [0], [1], [2]
-   * Now segment of PG (partition group) 0 is committing. First, we'll update the metadata to DONE, and ideal state to ONLINE
-   * Then, the currentPartitionGroupMetadata list will contain - [1], [2]
-   * The newPartitionGroupMetadata list will contain - [0], [1], [2]
-   * We then get the set of PGs for which new segments need to be made - [0]
+   * Using the ideal state and segment metadata, return a list of the current partition groups
    */
-  public void commitPartitionGroup(String realtimeTableName, CommittingSegmentDescriptor committingSegmentDescriptor) {
-    TableConfig realtimeTableConfig = getTableConfig(realtimeTableName);
-    StreamConfig streamConfig = new StreamConfig(realtimeTableName, IngestionConfigUtils.getStreamConfigMap(realtimeTableConfig));
-    int numReplicas = realtimeTableConfig.getValidationConfig().getReplicasPerPartitionNumber();
-    IdealState idealState = getIdealState(realtimeTableName);
-
-    // update status in segment metadata to DONE
-    // ..
-
-    // update Ideal State for this segment to ONLINE
-    // ..
-
-    // fetch current partition groups (which are actively CONSUMING - from example above, [1], [2])
-    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
-
-    // get new partition groups (honor any groupings which are already consuming - [0], [1], [2])
-    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
-    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
-        .createStreamMetadataProvider(streamConfig.getTopicName() + " " + System.currentTimeMillis());
-    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
-        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
-
-    // from the above list, remove the partition groups which are already CONSUMING
-    // i.e. newPartitionGroups - currentPartitionGroups. Therefore, ([0], [1], [2]) - ([1], [2]) = ([0])
-    // ..
-
-    // setup segment metadata and ideal state for the new found  partition groups
-    setupNewPartitionGroups(newPartitionGroupMetadataList, numReplicas);
-  }
+  public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState) {
+    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
 
-  public void setupIdealStateForConsuming(List<SegmentZKMetadata> segmentZKMetadata, int numReplicas) {
-    // add all segments from the list to ideal state, with state CONSUMING
-  }
+    // from all segment names in the ideal state, find unique groups
+    Map<Integer, LLCSegmentName> groupIdToLatestSegment = new HashMap<>();
+    for (String segment : idealState.getPartitionSet()) {
+      LLCSegmentName llcSegmentName = new LLCSegmentName(segment);
+      int partitionGroupId = llcSegmentName.getPartitionGroupId();
+      groupIdToLatestSegment.compute(partitionGroupId, (k, latestSegment) -> {
+        if (latestSegment == null) {
+          return llcSegmentName;
+        } else {
+          return latestSegment.getSequenceNumber() > llcSegmentName.getSequenceNumber() ? latestSegment
+              : llcSegmentName;
+        }
+      });
+    }
 
-  public void persistSegmentMetadata(List<SegmentZKMetadata> segmentMetadata) {
-    // persist new segment metadata from list to zk
+    // create a PartitionGroupMetadata for each latest segment
+    for (Map.Entry<Integer, LLCSegmentName> entry : groupIdToLatestSegment.entrySet()) {
+      int partitionGroupId = entry.getKey();
+      LLCSegmentName llcSegmentName = entry.getValue();
+      RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = ZKMetadataProvider
+          .getRealtimeSegmentZKMetadata(_propertyStore, llcSegmentName.getTableName(), llcSegmentName.getSegmentName());
+      Preconditions.checkNotNull(realtimeSegmentZKMetadata);
+      LLCRealtimeSegmentZKMetadata llRealtimeSegmentZKMetadata =
+          (LLCRealtimeSegmentZKMetadata) realtimeSegmentZKMetadata;
+      PartitionGroupMetadata partitionGroupMetadata =
+          new PartitionGroupMetadata(partitionGroupId, llcSegmentName.getSequenceNumber(),
+              llRealtimeSegmentZKMetadata.getStartOffset(), llRealtimeSegmentZKMetadata.getEndOffset(),
+              llRealtimeSegmentZKMetadata.getStatus().toString());
+      partitionGroupMetadataList.add(partitionGroupMetadata);
+    }
+    return partitionGroupMetadataList;
   }
 
   /**
-   * Using the list of partition group metadata, create a list of equivalent segment zk metadata
+   * Sets up the realtime table ideal state for a table of consumer type SHARDED
    */
-  public List<SegmentZKMetadata> constructSegmentMetadata(List<PartitionGroupMetadata> partitionGroupMetadataList) {
-    List<SegmentZKMetadata> segmentZKMetadata = new ArrayList<>();
-    // for each partition group construct a segment zk metadata object
-    return segmentZKMetadata;
-  }
+  public void setupNewShardedTable(TableConfig tableConfig, IdealState idealState) {
+    Preconditions.checkState(!_isStopping, "Segment manager is stopping");
 
-  /**
-   * Using the ideal state, return a list of the current partition groups
-   */
-  public List<PartitionGroupMetadata> getCurrentPartitionGroupMetadataList(IdealState idealState) {
-    List<PartitionGroupMetadata> partitionGroupMetadataList = new ArrayList<>();
-    // from all segment names in the ideal state, find unique groups
+    String realtimeTableName = tableConfig.getTableName();
+    LOGGER.info("Setting up new SHARDED table: {}", realtimeTableName);
 
-    // create a PartitionGroupMetadata, one for each group
-    return partitionGroupMetadataList;
-  }
+    _flushThresholdUpdateManager.clearFlushThresholdUpdater(realtimeTableName);
 
-  public void setupNewPartitionGroups(List<PartitionGroupMetadata> newPartitionGroupMetadataList, int numReplicas) {
-    // construct segment zk metadata for the new partition groups
-    List<SegmentZKMetadata> segmentMetadata = constructSegmentMetadata(newPartitionGroupMetadataList);
+    PartitionLevelStreamConfig streamConfig =
+        new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
+
+    // get new partition groups and their metadata
+    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
+        .createStreamMetadataProvider(streamConfig.getTopicName() + "_" + System.currentTimeMillis());
+    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
+        streamMetadataProvider.getPartitionGroupMetadataList(Collections.emptyList(), 5000);
+    int numPartitionGroups = newPartitionGroupMetadataList.size();
+
+    InstancePartitions instancePartitions = getConsumingInstancePartitions(tableConfig);
+    int numReplicas = getNumReplicas(tableConfig, instancePartitions);
 
-    // create these new segments metadata
-    persistSegmentMetadata(segmentMetadata);
+    SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
+    Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap =
+        Collections.singletonMap(InstancePartitionsType.CONSUMING, instancePartitions);
 
-    // setup ideal state for the new segments
-    setupIdealStateForConsuming(segmentMetadata, numReplicas);
+    long currentTimeMs = getCurrentTimeMs();
+    Map<String, Map<String, String>> instanceStatesMap = idealState.getRecord().getMapFields();
+    for (PartitionGroupMetadata partitionGroupMetadata : newPartitionGroupMetadataList) {
+      String segmentName = setupNewPartitionGroup(tableConfig, streamConfig, partitionGroupMetadata.getPartitionGroupId(),
+          currentTimeMs, instancePartitions, numPartitionGroups, numReplicas);
+      updateInstanceStatesForNewConsumingSegment(instanceStatesMap, null, segmentName, segmentAssignment,
+          instancePartitionsMap);
+    }
+    setIdealState(realtimeTableName, idealState);
   }
 
   public boolean getIsSplitCommitEnabled() {
@@ -532,13 +536,50 @@ public class PinotLLCRealtimeSegmentManager {
     _helixResourceManager.sendSegmentRefreshMessage(realtimeTableName, committingSegmentName, false, true);
 
     // Step-2
+
+    // Say we currently were consuming from 3 shards A, B, C. Of those, A is the one committing. Also suppose that new partition D has come up
+    // get current partition groups - this gives current state of latest segments for each partition [A - DONE], [B - IN_PROGRESS], [C - IN_PROGRESS]
+    List<PartitionGroupMetadata> currentPartitionGroupMetadataList = getCurrentPartitionGroupMetadataList(idealState);
+    StreamConfig streamConfig = new StreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig));
+    StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(streamConfig);
+    StreamMetadataProvider streamMetadataProvider = streamConsumerFactory
+        .createStreamMetadataProvider(streamConfig.getTopicName() + " " + System.currentTimeMillis());
+    // find new partition groups [A],[B],[C],[D]
+    List<PartitionGroupMetadata> newPartitionGroupMetadataList =
+        streamMetadataProvider.getPartitionGroupMetadataList(currentPartitionGroupMetadataList, 1000);
+
+    // create new segment metadata, only if it is not IN_PROGRESS in the current state
+    Map<Integer, PartitionGroupMetadata> currentGroupIdToMetadata = currentPartitionGroupMetadataList.stream().collect(
+        Collectors.toMap(PartitionGroupMetadata::getPartitionGroupId, p -> p));
+
+    List<String> newConsumingSegmentNames = new ArrayList<>();
+    String rawTableName = TableNameBuilder.extractRawTableName(realtimeTableName);
     long newSegmentCreationTimeMs = getCurrentTimeMs();
-    LLCSegmentName newLLCSegmentName =
-        getNextLLCSegmentName(new LLCSegmentName(committingSegmentName), newSegmentCreationTimeMs);
-    createNewSegmentZKMetadata(tableConfig,
-        new PartitionLevelStreamConfig(tableConfig.getTableName(), IngestionConfigUtils.getStreamConfigMap(tableConfig)),
-        newLLCSegmentName, newSegmentCreationTimeMs, committingSegmentDescriptor, committingSegmentZKMetadata,
-        instancePartitions, numPartitions, numReplicas);
+    for (PartitionGroupMetadata partitionGroupMetadata : newPartitionGroupMetadataList) {
+      int newPartitionGroupId = partitionGroupMetadata.getPartitionGroupId();
+      PartitionGroupMetadata currentPartitionGroupMetadata = currentGroupIdToMetadata.get(newPartitionGroupId);
+      if (currentPartitionGroupMetadata == null) { // not present in current state
+        // make new segment
+        LLCSegmentName newLLCSegmentName =
+            new LLCSegmentName(rawTableName, newPartitionGroupId, STARTING_SEQUENCE_NUMBER, newSegmentCreationTimeMs);
+        createNewSegmentZKMetadata(tableConfig, new PartitionLevelStreamConfig(tableConfig.getTableName(),
+                IngestionConfigUtils.getStreamConfigMap(tableConfig)), newLLCSegmentName, newSegmentCreationTimeMs,
+            committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
+        newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
+      } else {
+        String currentStatus = currentPartitionGroupMetadata.getStatus();
+        if (!currentStatus.equals(Status.IN_PROGRESS.toString())) { // not IN_PROGRESS anymore in current state
+          // make new segment
+          LLCSegmentName newLLCSegmentName = new LLCSegmentName(rawTableName, newPartitionGroupId,
+              currentPartitionGroupMetadata.getSequenceNumber() + 1, newSegmentCreationTimeMs);
+          createNewSegmentZKMetadata(tableConfig, new PartitionLevelStreamConfig(tableConfig.getTableName(),
+                  IngestionConfigUtils.getStreamConfigMap(tableConfig)), newLLCSegmentName, newSegmentCreationTimeMs,
+              committingSegmentDescriptor, committingSegmentZKMetadata, instancePartitions, numPartitions, numReplicas);
+          newConsumingSegmentNames.add(newLLCSegmentName.getSegmentName());
+        }
+      }
+    }
+
 
     // Step-3
     SegmentAssignment segmentAssignment = SegmentAssignmentFactory.getSegmentAssignment(_helixManager, tableConfig);
@@ -554,7 +595,7 @@ public class PinotLLCRealtimeSegmentManager {
     Lock lock = _idealStateUpdateLocks[lockIndex];
     try {
       lock.lock();
-      updateIdealStateOnSegmentCompletion(realtimeTableName, committingSegmentName, newLLCSegmentName.getSegmentName(),
+      updateIdealStateOnSegmentCompletion(realtimeTableName, committingSegmentName, newConsumingSegmentNames,
           segmentAssignment, instancePartitionsMap);
     } finally {
       lock.unlock();
@@ -845,7 +886,7 @@ public class PinotLLCRealtimeSegmentManager {
    */
   @VisibleForTesting
   void updateIdealStateOnSegmentCompletion(String realtimeTableName, String committingSegmentName,
-      String newSegmentName, SegmentAssignment segmentAssignment,
+      List<String> newSegmentNames, SegmentAssignment segmentAssignment,
       Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap) {
     HelixHelper.updateIdealState(_helixManager, realtimeTableName, idealState -> {
       assert idealState != null;
@@ -862,14 +903,18 @@ public class PinotLLCRealtimeSegmentManager {
             "Exceeded max segment completion time for segment " + committingSegmentName);
       }
       updateInstanceStatesForNewConsumingSegment(idealState.getRecord().getMapFields(), committingSegmentName,
-          newSegmentName, segmentAssignment, instancePartitionsMap);
+          null, segmentAssignment, instancePartitionsMap);
+      for (String newSegmentName : newSegmentNames) {
+        updateInstanceStatesForNewConsumingSegment(idealState.getRecord().getMapFields(), null,
+            newSegmentName, segmentAssignment, instancePartitionsMap);
+      }
       return idealState;
     }, RetryPolicies.exponentialBackoffRetryPolicy(10, 1000L, 1.2f));
   }
 
   @VisibleForTesting
   void updateInstanceStatesForNewConsumingSegment(Map<String, Map<String, String>> instanceStatesMap,
-      @Nullable String committingSegmentName, String newSegmentName, SegmentAssignment segmentAssignment,
+      @Nullable String committingSegmentName, @Nullable String newSegmentName, SegmentAssignment segmentAssignment,
       Map<InstancePartitionsType, InstancePartitions> instancePartitionsMap) {
     if (committingSegmentName != null) {
       // Change committing segment state to ONLINE
@@ -899,11 +944,11 @@ public class PinotLLCRealtimeSegmentManager {
       }
     }
     // Assign instances to the new segment and add instances as state CONSUMING
-    List<String> instancesAssigned =
-        segmentAssignment.assignSegment(newSegmentName, instanceStatesMap, instancePartitionsMap);
-    instanceStatesMap.put(newSegmentName,
-        SegmentAssignmentUtils.getInstanceStateMap(instancesAssigned, SegmentStateModel.CONSUMING));
-    LOGGER.info("Adding new CONSUMING segment: {} to instances: {}", newSegmentName, instancesAssigned);
+    if (newSegmentName != null) {
+      List<String> instancesAssigned = segmentAssignment.assignSegment(newSegmentName, instanceStatesMap, instancePartitionsMap);
+      instanceStatesMap.put(newSegmentName, SegmentAssignmentUtils.getInstanceStateMap(instancesAssigned, SegmentStateModel.CONSUMING));
+      LOGGER.info("Adding new CONSUMING segment: {} to instances: {}", newSegmentName, instancesAssigned);
+    }
   }
 
   /*
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java
deleted file mode 100644
index 78ee12c..0000000
--- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/fakestream/FakePartitionGroupMetadata.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package org.apache.pinot.core.realtime.impl.fakestream;
-
-import org.apache.pinot.spi.stream.Checkpoint;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-
-
-public class FakePartitionGroupMetadata implements PartitionGroupMetadata {
-
-  private final int _groupId;
-  public FakePartitionGroupMetadata(int groupId) {
-    _groupId = groupId;
-  }
-
-  @Override
-  public int getGroupId() {
-    return getGroupId();
-  }
-
-  @Override
-  public Checkpoint getStartCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public Checkpoint getEndCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public void setStartCheckpoint(Checkpoint startCheckpoint) {
-
-  }
-
-  @Override
-  public void setEndCheckpoint(Checkpoint endCheckpoint) {
-
-  }
-
-  @Override
-  public byte[] serialize() {
-    return new byte[0];
-  }
-
-  @Override
-  public PartitionGroupMetadata deserialize(byte[] blob) {
-    return null;
-  }
-}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java
deleted file mode 100644
index 1d792ac..0000000
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-0.9/src/main/java/org/apache/pinot/plugin/stream/kafka09/KafkaPartitionGroupMetadata.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package org.apache.pinot.plugin.stream.kafka09;
-
-import org.apache.pinot.spi.stream.Checkpoint;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-
-
-public class KafkaPartitionGroupMetadata implements PartitionGroupMetadata {
-
-  private final int _groupId;
-  public KafkaPartitionGroupMetadata(int partitionId) {
-    _groupId = partitionId;
-  }
-
-  @Override
-  public int getGroupId() {
-    return _groupId;
-  }
-
-  @Override
-  public Checkpoint getStartCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public Checkpoint getEndCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public void setStartCheckpoint(Checkpoint startCheckpoint) {
-
-  }
-
-  @Override
-  public void setEndCheckpoint(Checkpoint endCheckpoint) {
-
-  }
-
-  @Override
-  public byte[] serialize() {
-    return new byte[0];
-  }
-
-  @Override
-  public PartitionGroupMetadata deserialize(byte[] blob) {
-    return null;
-  }
-}
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java
deleted file mode 100644
index 31ae75a..0000000
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/src/main/java/org/apache/pinot/plugin/stream/kafka20/KafkaPartitionGroupMetadata.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package org.apache.pinot.plugin.stream.kafka20;
-
-import org.apache.pinot.spi.stream.Checkpoint;
-import org.apache.pinot.spi.stream.PartitionGroupMetadata;
-
-
-public class KafkaPartitionGroupMetadata implements PartitionGroupMetadata {
-
-  private final int _groupId;
-  public KafkaPartitionGroupMetadata(int partitionId) {
-    _groupId = partitionId;
-  }
-
-  @Override
-  public int getGroupId() {
-    return _groupId;
-  }
-
-  @Override
-  public Checkpoint getStartCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public Checkpoint getEndCheckpoint() {
-    return null;
-  }
-
-  @Override
-  public void setStartCheckpoint(Checkpoint startCheckpoint) {
-
-  }
-
-  @Override
-  public void setEndCheckpoint(Checkpoint endCheckpoint) {
-
-  }
-
-  @Override
-  public byte[] serialize() {
-    return new byte[0];
-  }
-
-  @Override
-  public PartitionGroupMetadata deserialize(byte[] blob) {
-    return null;
-  }
-}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
index 0f44173..f662d99 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/PartitionGroupMetadata.java
@@ -18,22 +18,56 @@
  */
 package org.apache.pinot.spi.stream;
 
-import java.util.List;
+public class PartitionGroupMetadata {
 
+  // fixme: Make partitionGroupId string everywhere (LLCSegmentName, StreamMetadataProvider)
+  private final int _partitionGroupId;
+  private int _sequenceNumber;
+  private String _startCheckpoint;
+  private String _endCheckpoint;
+  private String _status;
 
-public interface PartitionGroupMetadata {
+  public PartitionGroupMetadata(int partitionGroupId, int sequenceNumber, String startCheckpoint,
+      String endCheckpoint, String status) {
+    _partitionGroupId = partitionGroupId;
+    _sequenceNumber = sequenceNumber;
+    _startCheckpoint = startCheckpoint;
+    _endCheckpoint = endCheckpoint;
+  }
 
-  int getGroupId();
+  public void setSequenceNumber(int sequenceNumber) {
+    _sequenceNumber = sequenceNumber;
+  }
 
-  Checkpoint getStartCheckpoint(); // similar to getStartOffset
+  public void setStartCheckpoint(String startCheckpoint) {
+    _startCheckpoint = startCheckpoint;
+  }
 
-  Checkpoint getEndCheckpoint(); // similar to getEndOffset
+  public void setEndCheckpoint(String endCheckpoint) {
+    _endCheckpoint = endCheckpoint;
+  }
 
-  void setStartCheckpoint(Checkpoint startCheckpoint);
+  public int getPartitionGroupId() {
+    return _partitionGroupId;
+  }
 
-  void setEndCheckpoint(Checkpoint endCheckpoint);
+  public int getSequenceNumber() {
+    return _sequenceNumber;
+  }
 
-  byte[] serialize();
+  public String getStartCheckpoint() {
+    return _startCheckpoint;
+  }
 
-  PartitionGroupMetadata deserialize(byte[] blob);
+  public String getEndCheckpoint() {
+    return _endCheckpoint;
+  }
+
+  public String getStatus() {
+    return _status;
+  }
+
+  public void setStatus(String status) {
+    _status = status;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org