You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-issues@hadoop.apache.org by "Steve Loughran (JIRA)" <ji...@apache.org> on 2018/04/30 18:49:00 UTC

[jira] [Comment Edited] (HADOOP-15421) Stabilise/formalise the JSON _SUCCESS format used in the S3A committers

    [ https://issues.apache.org/jira/browse/HADOOP-15421?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16458869#comment-16458869 ] 

Steve Loughran edited comment on HADOOP-15421 at 4/30/18 6:48 PM:
------------------------------------------------------------------

Currently
{code}
{
  "name" : "org.apache.hadoop.fs.s3a.commit.files.SuccessData/1",
  "timestamp" : 1525099686641,
  "date" : "Mon Apr 30 14:48:06 UTC 2018",
  "hostname" : "stevel",
  "committer" : "directory",
  "description" : "Task committer attempt_1525098749694_0003_m_000000_0",
  "metrics" : {
    "stream_write_block_uploads" : 0,
    "files_created" : 0,
    "S3guard_metadatastore_put_path_latencyNumOps" : 0,
    "stream_write_block_uploads_aborted" : 0,
    "committer_commits_reverted" : 0,
    "op_open" : 0,
    "stream_closed" : 0,
    "committer_magic_files_created" : 0,
    "object_copy_requests" : 0,
    "s3guard_metadatastore_initialization" : 1,
    "S3guard_metadatastore_put_path_latency90thPercentileLatency" : 0,
    "stream_write_block_uploads_committed" : 0,
    "S3guard_metadatastore_throttle_rate75thPercentileFrequency (Hz)" : 0,
    "S3guard_metadatastore_throttle_rate90thPercentileFrequency (Hz)" : 0,
    "committer_bytes_committed" : 5017,
    "op_create" : 0,
    "stream_read_fully_operations" : 0,
    "committer_commits_completed" : 1,
    "object_put_requests_active" : 0,
    "s3guard_metadatastore_retry" : 0,
    "stream_write_block_uploads_active" : 0,
    "stream_opened" : 0,
    "S3guard_metadatastore_throttle_rate95thPercentileFrequency (Hz)" : 0,
    "op_create_non_recursive" : 0,
    "object_continue_list_requests" : 0,
    "committer_jobs_completed" : 1,
    "S3guard_metadatastore_put_path_latency50thPercentileLatency" : 0,
    "stream_close_operations" : 0,
    "stream_read_operations" : 0,
    "object_delete_requests" : 1,
    "fake_directories_deleted" : 4,
    "stream_aborted" : 0,
    "op_rename" : 0,
    "object_multipart_aborted" : 0,
    "committer_commits_created" : 0,
    "op_get_file_status" : 2,
    "s3guard_metadatastore_put_path_request" : 1,
    "committer_commits_failed" : 0,
    "stream_bytes_read_in_close" : 0,
    "op_glob_status" : 0,
    "stream_read_exceptions" : 0,
    "op_exists" : 2,
    "S3guard_metadatastore_throttle_rate50thPercentileFrequency (Hz)" : 0,
    "S3guard_metadatastore_put_path_latency95thPercentileLatency" : 0,
    "stream_write_block_uploads_pending" : 0,
    "directories_created" : 0,
    "S3guard_metadatastore_throttle_rateNumEvents" : 0,
    "S3guard_metadatastore_put_path_latency99thPercentileLatency" : 0,
    "stream_bytes_backwards_on_seek" : 0,
    "stream_bytes_read" : 0,
    "stream_write_total_data" : 0,
    "committer_jobs_failed" : 0,
    "stream_read_operations_incomplete" : 0,
    "files_copied_bytes" : 0,
    "op_delete" : 0,
    "object_put_bytes_pending" : 0,
    "stream_write_block_uploads_data_pending" : 0,
    "op_list_located_status" : 0,
    "object_list_requests" : 2,
    "stream_forward_seek_operations" : 0,
    "committer_tasks_completed" : 0,
    "committer_commits_aborted" : 0,
    "object_metadata_requests" : 4,
    "object_put_requests_completed" : 0,
    "stream_seek_operations" : 0,
    "op_list_status" : 0,
    "store_io_throttled" : 0,
    "stream_write_failures" : 0,
    "op_get_file_checksum" : 0,
    "files_copied" : 0,
    "ignored_errors" : 0,
    "committer_bytes_uploaded" : 0,
    "committer_tasks_failed" : 0,
    "stream_bytes_skipped_on_seek" : 0,
    "op_list_files" : 0,
    "files_deleted" : 0,
    "stream_bytes_discarded_in_abort" : 0,
    "op_mkdirs" : 0,
    "op_copy_from_local_file" : 0,
    "op_is_directory" : 0,
    "s3guard_metadatastore_throttled" : 0,
    "S3guard_metadatastore_put_path_latency75thPercentileLatency" : 0,
    "stream_write_total_time" : 0,
    "stream_backward_seek_operations" : 0,
    "object_put_requests" : 0,
    "object_put_bytes" : 0,
    "directories_deleted" : 0,
    "op_is_file" : 0,
    "S3guard_metadatastore_throttle_rate99thPercentileFrequency (Hz)" : 0
  },
  "diagnostics" : {
    "fs.s3a.metadatastore.impl" : "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore",
    "fs.s3a.committer.magic.enabled" : "true",
    "fs.s3a.metadatastore.authoritative" : "false"
  },
  "filenames" : [ "/hwdev-steve-ireland/mr_job_dir/output/part-r-00000" ]
}
{code}


was (Author: stevel@apache.org):
Currently
{code}
{
  "name" : "org.apache.hadoop.fs.s3a.commit.files.SuccessData/1",
  "timestamp" : 1525099686641,
  "date" : "Mon Apr 30 14:48:06 UTC 2018",
  "hostname" : "stevel",
  "committer" : "directory",
  "description" : "Task committer attempt_1525098749694_0003_m_000000_0",
  "metrics" : {
    "stream_write_block_uploads" : 0,
    "files_created" : 0,
    "S3guard_metadatastore_put_path_latencyNumOps" : 0,
    "stream_write_block_uploads_aborted" : 0,
    "committer_commits_reverted" : 0,
    "op_open" : 0,
    "stream_closed" : 0,
    "committer_magic_files_created" : 0,
    "object_copy_requests" : 0,
    "s3guard_metadatastore_initialization" : 1,
    "S3guard_metadatastore_put_path_latency90thPercentileLatency" : 0,
    "stream_write_block_uploads_committed" : 0,
    "S3guard_metadatastore_throttle_rate75thPercentileFrequency (Hz)" : 0,
    "S3guard_metadatastore_throttle_rate90thPercentileFrequency (Hz)" : 0,
    "committer_bytes_committed" : 5017,
    "op_create" : 0,
    "stream_read_fully_operations" : 0,
    "committer_commits_completed" : 1,
    "object_put_requests_active" : 0,
    "s3guard_metadatastore_retry" : 0,
    "stream_write_block_uploads_active" : 0,
    "stream_opened" : 0,
    "S3guard_metadatastore_throttle_rate95thPercentileFrequency (Hz)" : 0,
    "op_create_non_recursive" : 0,
    "object_continue_list_requests" : 0,
    "committer_jobs_completed" : 1,
    "S3guard_metadatastore_put_path_latency50thPercentileLatency" : 0,
    "stream_close_operations" : 0,
    "stream_read_operations" : 0,
    "object_delete_requests" : 1,
    "fake_directories_deleted" : 4,
    "stream_aborted" : 0,
    "op_rename" : 0,
    "object_multipart_aborted" : 0,
    "committer_commits_created" : 0,
    "op_get_file_status" : 2,
    "s3guard_metadatastore_put_path_request" : 1,
    "committer_commits_failed" : 0,
    "stream_bytes_read_in_close" : 0,
    "op_glob_status" : 0,
    "stream_read_exceptions" : 0,
    "op_exists" : 2,
    "S3guard_metadatastore_throttle_rate50thPercentileFrequency (Hz)" : 0,
    "S3guard_metadatastore_put_path_latency95thPercentileLatency" : 0,
    "stream_write_block_uploads_pending" : 0,
    "directories_created" : 0,
    "S3guard_metadatastore_throttle_rateNumEvents" : 0,
    "S3guard_metadatastore_put_path_latency99thPercentileLatency" : 0,
    "stream_bytes_backwards_on_seek" : 0,
    "stream_bytes_read" : 0,
    "stream_write_total_data" : 0,
    "committer_jobs_failed" : 0,
    "stream_read_operations_incomplete" : 0,
    "files_copied_bytes" : 0,
    "op_delete" : 0,
    "object_put_bytes_pending" : 0,
    "stream_write_block_uploads_data_pending" : 0,
    "op_list_located_status" : 0,
    "object_list_requests" : 2,
    "stream_forward_seek_operations" : 0,
    "committer_tasks_completed" : 0,
    "committer_commits_aborted" : 0,
    "object_metadata_requests" : 4,
    "object_put_requests_completed" : 0,
    "stream_seek_operations" : 0,
    "op_list_status" : 0,
    "store_io_throttled" : 0,
    "stream_write_failures" : 0,
    "op_get_file_checksum" : 0,
    "files_copied" : 0,
    "ignored_errors" : 0,
    "committer_bytes_uploaded" : 0,
    "committer_tasks_failed" : 0,
    "stream_bytes_skipped_on_seek" : 0,
    "op_list_files" : 0,
    "files_deleted" : 0,
    "stream_bytes_discarded_in_abort" : 0,
    "op_mkdirs" : 0,
    "op_copy_from_local_file" : 0,
    "op_is_directory" : 0,
    "s3guard_metadatastore_throttled" : 0,
    "S3guard_metadatastore_put_path_latency75thPercentileLatency" : 0,
    "stream_write_total_time" : 0,
    "stream_backward_seek_operations" : 0,
    "object_put_requests" : 0,
    "object_put_bytes" : 0,
    "directories_deleted" : 0,
    "op_is_file" : 0,
    "S3guard_metadatastore_throttle_rate99thPercentileFrequency (Hz)" : 0
  },
  "diagnostics" : {
    "fs.s3a.metadatastore.impl" : "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore",
    "fs.s3a.committer.magic.enabled" : "true",
    "fs.s3a.metadatastore.authoritative" : "false"
  },
  "filenames" : [ "/hwdev-steve-ireland/mr_job_dir/output/part-r-00000" ]
}

> Stabilise/formalise the JSON _SUCCESS format used in the S3A committers
> -----------------------------------------------------------------------
>
>                 Key: HADOOP-15421
>                 URL: https://issues.apache.org/jira/browse/HADOOP-15421
>             Project: Hadoop Common
>          Issue Type: Sub-task
>    Affects Versions: 3.2.0
>            Reporter: Steve Loughran
>            Priority: Major
>
> the S3A committers rely on an atomic PUT to save a JSON summary of the job to the dest FS, containing files, statistics, etc. This is for internal testing, but it turns out to be useful for spark integration testing, Hive, etc.
> IBM's stocator also generated a manifest.
> Proposed: come up with (an extensible) design that we are happy with as a long lived format.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-issues-help@hadoop.apache.org