You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/01/16 10:32:28 UTC

[GitHub] [hudi] peng-xin commented on issue #2448: [SUPPORT] deltacommit for client 172.16.116.102 already exists

peng-xin commented on issue #2448:
URL: https://github.com/apache/hudi/issues/2448#issuecomment-761540885


   > Can you provide the full dump of the logs and .hoodie/ folder ?
   
   the log has been clear, but still have some picture
   ![image](https://user-images.githubusercontent.com/13350489/104809247-d629d780-5826-11eb-94cd-f164b93da130.png)
   ![image](https://user-images.githubusercontent.com/13350489/104809252-da55f500-5826-11eb-8941-421ff86c6133.png)
   ![image](https://user-images.githubusercontent.com/13350489/104809239-c27e7100-5826-11eb-8954-5221e7e36984.png)
   ![image](https://user-images.githubusercontent.com/13350489/104809554-26099e00-5829-11eb-86ee-47a01397b22a.png)
   
   when i set `hoodie.auto.commit = false`,the error is gone.
   but how to limit the log file size,my log file is so big(3GB+),log file version always 1.
   ![image](https://user-images.githubusercontent.com/13350489/104809283-2a34bc00-5827-11eb-950f-468d2362dbd7.png)
   
   when i change `hoodie.cleaner.policy = KEEP_LATEST_FILE_VERSIONS` and `hoodie.cleaner.fileversions.retained = 1`,the old data file can be clean,but how to clean the old log file(or clean the old log file commit,set `hoodie.cleaner.policy = KEEP_LATEST_COMMITS` and `hoodie.cleaner.commits.retained = 1` is useless)
   now my config is 
   > hoodie.filesystem.view.incr.timeline.sync.enable -> false,
   hoodie.bulkinsert.sort.mode -> GLOBAL_SORT,
   hoodie.avro.schema.externalTransformation -> false,
   hoodie.bootstrap.parallelism -> 1500,
   hoodie.delete.shuffle.parallelism -> 1500,
   hoodie.simple.index.use.caching -> true,
   hoodie.bloom.index.filter.type -> DYNAMIC_V0,
   hoodie.filesystem.view.remote.port -> 26754,
   hoodie.datasource.write.operation -> upsert,
   hoodie.cleaner.parallelism -> 200,
   hoodie.global.simple.index.parallelism -> 100,
   hoodie.bootstrap.mode.selector.regex -> .*,
   hoodie.parquet.page.size -> 1048576,
   hoodie.datasource.write.table.type -> MERGE_ON_READ,
   hoodie.datasource.hive_sync.table -> f_mid_business_card,
   hoodie.compaction.daybased.target.partitions -> 10,
   hoodie.metrics.reporter.class -> ,
   hoodie.parquet.block.size -> 125829120,
   hoodie.cleaner.delete.bootstrap.base.file -> false,
   hoodie.consistency.check.max_interval_ms -> 300000,
   hoodie.insert.shuffle.parallelism -> 100,
   hoodie.upsert.shuffle.parallelism -> 100,
   hoodie.bulkinsert.shuffle.parallelism -> 1500,
   hoodie.write.commit.callback.on -> false,
   hoodie.cleaner.fileversions.retained -> 1,
   hoodie.datasource.hive_sync.partition_extractor_class -> org.apache.hudi.hive.NonPartitionedExtractor,
   hoodie.parquet.compression.codec -> gzip,
   hoodie.datasource.write.hive_style_partitioning -> true,
   hoodie.copyonwrite.insert.split.size -> 500000,
   hoodie.optimistic.consistency.guard.sleep_time_ms -> 500,
   hoodie.datasource.hive_sync.use_jdbc -> true,
   hoodie.metrics.reporter.type -> GRAPHITE,
   hoodie.bootstrap.index.class -> org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex,
   hoodie.filesystem.remote.backup.view.enable -> true,
   hoodie.logfile.to.parquet.compression.ratio -> 0.35,
   hoodie.filesystem.view.spillable.mem -> 104857600,
   hoodie.write.status.storage.level -> MEMORY_AND_DISK_SER,
   hoodie.write.commit.callback.http.timeout.seconds -> 3,
   hoodie.copyonwrite.insert.auto.split -> true,
   hoodie.logfile.data.block.max.size -> 268435456,
   hoodie.index.type -> BLOOM,
   hoodie.keep.min.commits -> 6,
   hoodie.memory.spillable.map.path -> /tmp/,
   hoodie.filesystem.view.rocksdb.base.path -> /tmp/hoodie_timeline_rocksdb,
   hoodie.compact.inline -> false,
   hoodie.clean.async -> true,
   hoodie.record.size.estimation.threshold -> 1.0,
   hoodie.metrics.graphite.host -> localhost,
   hoodie.simple.index.update.partition.path -> false,
   hoodie.bloom.index.filter.dynamic.max.entries -> 100000,
   hoodie.compaction.reverse.log.read -> false,
   hoodie.metrics.jmx.port -> 9889,
   hoodie.writestatus.class -> org.apache.hudi.client.WriteStatus,
   hoodie.datasource.hive_sync.enable -> true,
   hoodie.finalize.write.parallelism -> 1500,
   hoodie.rollback.parallelism -> 100,
   hoodie.index.bloom.num_entries -> 60000,
   hoodie.memory.merge.max.size -> 134217728,
   hoodie.bootstrap.mode.selector.regex.mode -> METADATA_ONLY,
   hoodie.rollback.using.markers -> false,
   hoodie.copyonwrite.record.size.estimate -> 1024,
   hoodie.bloom.index.input.storage.level -> MEMORY_AND_DISK_SER,
   hoodie.simple.index.parallelism -> 50,
   hoodie.consistency.check.enabled -> false,
   hoodie.bloom.index.use.caching -> true,
   hoodie.metrics.on -> false,
   hoodie.memory.compaction.max.size -> 1073741824,
   hoodie.parquet.small.file.limit -> 104857600,
   hoodie.combine.before.insert -> false,
   hoodie.cleaner.commits.retained -> 1,
   hoodie.embed.timeline.server -> true,
   hoodie.bootstrap.mode.selector -> org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector,
   hoodie.filesystem.view.secondary.type -> MEMORY,
   _.hoodie.allow.multi.write.on.same.instant -> false,
   hoodie.datasource.write.partitionpath.field -> ,
   _hoodie.optimistic.consistency.guard.enable -> true,
   hoodie.datasource.hive_sync.database -> hbase,
   hoodie.bloom.index.update.partition.path -> true,
   hoodie.fail.on.timeline.archiving -> true,
   hoodie.markers.delete.parallelism -> 100,
   hoodie.filesystem.view.type -> MEMORY,
   hoodie.parquet.max.file.size -> 125829120,
   hoodie.datasource.write.keygenerator.class -> org.apache.hudi.keygen.NonpartitionedKeyGenerator,
   hoodie.bootstrap.partitionpath.translator.class -> org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator,
   hoodie.bloom.index.prune.by.ranges -> true,
   hoodie.base.path -> /user/datalake/hudi/hbase/f_mid_business_card,
   hoodie.index.class -> ,
   hoodie.clean.automatic -> true,
   hoodie.filesystem.view.remote.host -> localhost,
   hoodie.compaction.lazy.block.read -> false,
   hoodie.memory.writestatus.failure.fraction -> 0.1,
   hoodie.metrics.graphite.port -> 4756,
   hoodie.cleaner.policy -> KEEP_LATEST_FILE_VERSIONS,
   hoodie.logfile.max.size -> 1073741824,
   hoodie.filesystem.view.spillable.compaction.mem.fraction -> 0.01,
   hoodie.datasource.write.recordkey.field -> datalake_rowkey,
   hoodie.avro.schema.validate -> false,
   hoodie.simple.index.input.storage.level -> MEMORY_AND_DISK_SER,
   hoodie.timeline.layout.version -> 1,
   hoodie.consistency.check.max_checks -> 7,
   hoodie.consistency.check.initial_interval_ms -> 2000,
   hoodie.keep.max.commits -> 8,
   hoodie.compact.inline.max.delta.commits -> 5,
   hoodie.parquet.compression.ratio -> 0.1,
   hoodie.memory.dfs.buffer.max.size -> 16777216,
   hoodie.auto.commit -> false,
   hoodie.write.commit.callback.http.api.key -> hudi_write_commit_http_callback,
   hoodie.assume.date.partitioning -> false,
   hoodie.filesystem.view.spillable.dir -> /tmp/view_map/,
   hoodie.compaction.strategy -> org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy,
   hoodie.combine.before.upsert -> true,
   hoodie.bloom.index.keys.per.bucket -> 10000000,
   hoodie.write.commit.callback.class -> org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback,
   hoodie.bloom.index.parallelism -> 0,
   hoodie.cleaner.incremental.mode -> true,
   hoodie.commits.archival.batch -> 5,
   hoodie.datasource.hive_sync.partition_fields -> ,
   hoodie.compaction.target.io -> 512000,
   hoodie.table.name -> f_mid_business_card,
   hoodie.bloom.index.bucketized.checking -> true,
   hoodie.compaction.payload.class -> org.apache.hudi.common.model.OverwriteWithLatestAvroPayload,
   hoodie.combine.before.delete -> true,
   hoodie.datasource.write.precombine.field -> ts,
   hoodie.filesystem.view.spillable.bootstrap.base.file.mem.fraction -> 0.05,
   hoodie.metrics.jmx.host -> localhost,
   hoodie.index.bloom.fpp -> 0.000000001,
   hoodie.datasource.hive_sync.jdbcurl -> jdbc:hive2://172.16.116.102:10000,
   hoodie.bloom.index.use.treebased.filter -> true
   
   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org