You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/01/16 10:32:28 UTC
[GitHub] [hudi] peng-xin commented on issue #2448: [SUPPORT] deltacommit for client 172.16.116.102 already exists
peng-xin commented on issue #2448:
URL: https://github.com/apache/hudi/issues/2448#issuecomment-761540885
> Can you provide the full dump of the logs and .hoodie/ folder ?
the log has been clear, but still have some picture
![image](https://user-images.githubusercontent.com/13350489/104809247-d629d780-5826-11eb-94cd-f164b93da130.png)
![image](https://user-images.githubusercontent.com/13350489/104809252-da55f500-5826-11eb-8941-421ff86c6133.png)
![image](https://user-images.githubusercontent.com/13350489/104809239-c27e7100-5826-11eb-8954-5221e7e36984.png)
![image](https://user-images.githubusercontent.com/13350489/104809554-26099e00-5829-11eb-86ee-47a01397b22a.png)
when i set `hoodie.auto.commit = false`,the error is gone.
but how to limit the log file size,my log file is so big(3GB+),log file version always 1.
![image](https://user-images.githubusercontent.com/13350489/104809283-2a34bc00-5827-11eb-950f-468d2362dbd7.png)
when i change `hoodie.cleaner.policy = KEEP_LATEST_FILE_VERSIONS` and `hoodie.cleaner.fileversions.retained = 1`,the old data file can be clean,but how to clean the old log file(or clean the old log file commit,set `hoodie.cleaner.policy = KEEP_LATEST_COMMITS` and `hoodie.cleaner.commits.retained = 1` is useless)
now my config is
> hoodie.filesystem.view.incr.timeline.sync.enable -> false,
hoodie.bulkinsert.sort.mode -> GLOBAL_SORT,
hoodie.avro.schema.externalTransformation -> false,
hoodie.bootstrap.parallelism -> 1500,
hoodie.delete.shuffle.parallelism -> 1500,
hoodie.simple.index.use.caching -> true,
hoodie.bloom.index.filter.type -> DYNAMIC_V0,
hoodie.filesystem.view.remote.port -> 26754,
hoodie.datasource.write.operation -> upsert,
hoodie.cleaner.parallelism -> 200,
hoodie.global.simple.index.parallelism -> 100,
hoodie.bootstrap.mode.selector.regex -> .*,
hoodie.parquet.page.size -> 1048576,
hoodie.datasource.write.table.type -> MERGE_ON_READ,
hoodie.datasource.hive_sync.table -> f_mid_business_card,
hoodie.compaction.daybased.target.partitions -> 10,
hoodie.metrics.reporter.class -> ,
hoodie.parquet.block.size -> 125829120,
hoodie.cleaner.delete.bootstrap.base.file -> false,
hoodie.consistency.check.max_interval_ms -> 300000,
hoodie.insert.shuffle.parallelism -> 100,
hoodie.upsert.shuffle.parallelism -> 100,
hoodie.bulkinsert.shuffle.parallelism -> 1500,
hoodie.write.commit.callback.on -> false,
hoodie.cleaner.fileversions.retained -> 1,
hoodie.datasource.hive_sync.partition_extractor_class -> org.apache.hudi.hive.NonPartitionedExtractor,
hoodie.parquet.compression.codec -> gzip,
hoodie.datasource.write.hive_style_partitioning -> true,
hoodie.copyonwrite.insert.split.size -> 500000,
hoodie.optimistic.consistency.guard.sleep_time_ms -> 500,
hoodie.datasource.hive_sync.use_jdbc -> true,
hoodie.metrics.reporter.type -> GRAPHITE,
hoodie.bootstrap.index.class -> org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex,
hoodie.filesystem.remote.backup.view.enable -> true,
hoodie.logfile.to.parquet.compression.ratio -> 0.35,
hoodie.filesystem.view.spillable.mem -> 104857600,
hoodie.write.status.storage.level -> MEMORY_AND_DISK_SER,
hoodie.write.commit.callback.http.timeout.seconds -> 3,
hoodie.copyonwrite.insert.auto.split -> true,
hoodie.logfile.data.block.max.size -> 268435456,
hoodie.index.type -> BLOOM,
hoodie.keep.min.commits -> 6,
hoodie.memory.spillable.map.path -> /tmp/,
hoodie.filesystem.view.rocksdb.base.path -> /tmp/hoodie_timeline_rocksdb,
hoodie.compact.inline -> false,
hoodie.clean.async -> true,
hoodie.record.size.estimation.threshold -> 1.0,
hoodie.metrics.graphite.host -> localhost,
hoodie.simple.index.update.partition.path -> false,
hoodie.bloom.index.filter.dynamic.max.entries -> 100000,
hoodie.compaction.reverse.log.read -> false,
hoodie.metrics.jmx.port -> 9889,
hoodie.writestatus.class -> org.apache.hudi.client.WriteStatus,
hoodie.datasource.hive_sync.enable -> true,
hoodie.finalize.write.parallelism -> 1500,
hoodie.rollback.parallelism -> 100,
hoodie.index.bloom.num_entries -> 60000,
hoodie.memory.merge.max.size -> 134217728,
hoodie.bootstrap.mode.selector.regex.mode -> METADATA_ONLY,
hoodie.rollback.using.markers -> false,
hoodie.copyonwrite.record.size.estimate -> 1024,
hoodie.bloom.index.input.storage.level -> MEMORY_AND_DISK_SER,
hoodie.simple.index.parallelism -> 50,
hoodie.consistency.check.enabled -> false,
hoodie.bloom.index.use.caching -> true,
hoodie.metrics.on -> false,
hoodie.memory.compaction.max.size -> 1073741824,
hoodie.parquet.small.file.limit -> 104857600,
hoodie.combine.before.insert -> false,
hoodie.cleaner.commits.retained -> 1,
hoodie.embed.timeline.server -> true,
hoodie.bootstrap.mode.selector -> org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector,
hoodie.filesystem.view.secondary.type -> MEMORY,
_.hoodie.allow.multi.write.on.same.instant -> false,
hoodie.datasource.write.partitionpath.field -> ,
_hoodie.optimistic.consistency.guard.enable -> true,
hoodie.datasource.hive_sync.database -> hbase,
hoodie.bloom.index.update.partition.path -> true,
hoodie.fail.on.timeline.archiving -> true,
hoodie.markers.delete.parallelism -> 100,
hoodie.filesystem.view.type -> MEMORY,
hoodie.parquet.max.file.size -> 125829120,
hoodie.datasource.write.keygenerator.class -> org.apache.hudi.keygen.NonpartitionedKeyGenerator,
hoodie.bootstrap.partitionpath.translator.class -> org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator,
hoodie.bloom.index.prune.by.ranges -> true,
hoodie.base.path -> /user/datalake/hudi/hbase/f_mid_business_card,
hoodie.index.class -> ,
hoodie.clean.automatic -> true,
hoodie.filesystem.view.remote.host -> localhost,
hoodie.compaction.lazy.block.read -> false,
hoodie.memory.writestatus.failure.fraction -> 0.1,
hoodie.metrics.graphite.port -> 4756,
hoodie.cleaner.policy -> KEEP_LATEST_FILE_VERSIONS,
hoodie.logfile.max.size -> 1073741824,
hoodie.filesystem.view.spillable.compaction.mem.fraction -> 0.01,
hoodie.datasource.write.recordkey.field -> datalake_rowkey,
hoodie.avro.schema.validate -> false,
hoodie.simple.index.input.storage.level -> MEMORY_AND_DISK_SER,
hoodie.timeline.layout.version -> 1,
hoodie.consistency.check.max_checks -> 7,
hoodie.consistency.check.initial_interval_ms -> 2000,
hoodie.keep.max.commits -> 8,
hoodie.compact.inline.max.delta.commits -> 5,
hoodie.parquet.compression.ratio -> 0.1,
hoodie.memory.dfs.buffer.max.size -> 16777216,
hoodie.auto.commit -> false,
hoodie.write.commit.callback.http.api.key -> hudi_write_commit_http_callback,
hoodie.assume.date.partitioning -> false,
hoodie.filesystem.view.spillable.dir -> /tmp/view_map/,
hoodie.compaction.strategy -> org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy,
hoodie.combine.before.upsert -> true,
hoodie.bloom.index.keys.per.bucket -> 10000000,
hoodie.write.commit.callback.class -> org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback,
hoodie.bloom.index.parallelism -> 0,
hoodie.cleaner.incremental.mode -> true,
hoodie.commits.archival.batch -> 5,
hoodie.datasource.hive_sync.partition_fields -> ,
hoodie.compaction.target.io -> 512000,
hoodie.table.name -> f_mid_business_card,
hoodie.bloom.index.bucketized.checking -> true,
hoodie.compaction.payload.class -> org.apache.hudi.common.model.OverwriteWithLatestAvroPayload,
hoodie.combine.before.delete -> true,
hoodie.datasource.write.precombine.field -> ts,
hoodie.filesystem.view.spillable.bootstrap.base.file.mem.fraction -> 0.05,
hoodie.metrics.jmx.host -> localhost,
hoodie.index.bloom.fpp -> 0.000000001,
hoodie.datasource.hive_sync.jdbcurl -> jdbc:hive2://172.16.116.102:10000,
hoodie.bloom.index.use.treebased.filter -> true
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org