You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "ASF GitHub Bot (JIRA)" <ji...@apache.org> on 2017/08/30 18:43:01 UTC
[jira] [Commented] (HIVE-16219) metastore notification_log contains
serialized message with non functional fields
[ https://issues.apache.org/jira/browse/HIVE-16219?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16147796#comment-16147796 ]
ASF GitHub Bot commented on HIVE-16219:
---------------------------------------
Github user anishek closed the pull request at:
https://github.com/apache/hive/pull/159
> metastore notification_log contains serialized message with non functional fields
> ----------------------------------------------------------------------------------
>
> Key: HIVE-16219
> URL: https://issues.apache.org/jira/browse/HIVE-16219
> Project: Hive
> Issue Type: Bug
> Components: Metastore
> Affects Versions: 2.2.0
> Reporter: anishek
> Assignee: anishek
> Fix For: 2.3.0, 3.0.0
>
> Attachments: HIVE-16219.3.patch
>
>
> the event notification logs stored in hive metastore have json serialized messages stored in NOTIFICATION_LOG table, these messages also store the serialized Thrift API objects in them. when doing a reply dump we are however serializing both the metadata for replication event + event Message + additional helper method getters representing the thrift objects.
> We should only serialize metadata for replication event + event Message
> for ex for create table :
> {code}
> {
> "eventType": "CREATE_TABLE",
> "server": "",
> "servicePrincipal": "",
> "db": "default",
> "table": "a",
> "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
> "timestamp": 1489552350,
> "files": [],
> "tableObj": {
> "tableName": "a",
> "dbName": "default",
> "owner": "anagarwal",
> "createTime": 1489552350,
> "lastAccessTime": 0,
> "retention": 0,
> "sd": {
> "cols": [
> {
> "name": "name",
> "type": "string",
> "comment": null,
> "setName": true,
> "setType": true,
> "setComment": false
> }
> ],
> "location": "file:/tmp/warehouse/a",
> "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
> "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
> "compressed": false,
> "numBuckets": -1,
> "serdeInfo": {
> "name": null,
> "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
> "parameters": {
> "serialization.format": "\n",
> "field.delim": "\n"
> },
> "setName": false,
> "parametersSize": 2,
> "setParameters": true,
> "setSerializationLib": true
> },
> "bucketCols": [],
> "sortCols": [],
> "parameters": {},
> "skewedInfo": {
> "skewedColNames": [],
> "skewedColValues": [],
> "skewedColValueLocationMaps": {},
> "setSkewedColNames": true,
> "setSkewedColValues": true,
> "setSkewedColValueLocationMaps": true,
> "skewedColNamesSize": 0,
> "skewedColNamesIterator": [],
> "skewedColValuesSize": 0,
> "skewedColValuesIterator": [],
> "skewedColValueLocationMapsSize": 0
> },
> "storedAsSubDirectories": false,
> "setSkewedInfo": true,
> "parametersSize": 0,
> "colsSize": 1,
> "setParameters": true,
> "setLocation": true,
> "setInputFormat": true,
> "setCols": true,
> "setOutputFormat": true,
> "setSerdeInfo": true,
> "setBucketCols": true,
> "setSortCols": true,
> "colsIterator": [
> {
> "name": "name",
> "type": "string",
> "comment": null,
> "setName": true,
> "setType": true,
> "setComment": false
> }
> ],
> "bucketColsSize": 0,
> "bucketColsIterator": [],
> "sortColsSize": 0,
> "sortColsIterator": [],
> "setStoredAsSubDirectories": true,
> "setCompressed": true,
> "setNumBuckets": true
> },
> "partitionKeys": [],
> "parameters": {
> "totalSize": "0",
> "EXTERNAL": "TRUE",
> "numRows": "0",
> "rawDataSize": "0",
> "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}",
> "numFiles": "0",
> "transient_lastDdlTime": "1489552350"
> },
> "viewOriginalText": null,
> "viewExpandedText": null,
> "tableType": "EXTERNAL_TABLE",
> "privileges": {
> "userPrivileges": {
> "anagarwal": [
> {
> "privilege": "INSERT",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "SELECT",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "UPDATE",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> },
> {
> "privilege": "DELETE",
> "createTime": -1,
> "grantor": "anagarwal",
> "grantorType": "USER",
> "grantOption": true,
> "setCreateTime": true,
> "setGrantOption": true,
> "setPrivilege": true,
> "setGrantor": true,
> "setGrantorType": true
> }
> ]
> },
> "groupPrivileges": null,
> "rolePrivileges": null,
> "rolePrivilegesSize": 0,
> "setUserPrivileges": true,
> "setGroupPrivileges": false,
> "setRolePrivileges": false,
> "userPrivilegesSize": 1,
> "groupPrivilegesSize": 0
> },
> "temporary": false,
> "rewriteEnabled": false,
> "setTableName": true,
> "setDbName": true,
> "setOwner": true,
> "setViewOriginalText": false,
> "setViewExpandedText": false,
> "setTableType": true,
> "setPrivileges": true,
> "setCreateTime": true,
> "setLastAccessTime": true,
> "setRetention": true,
> "partitionKeysIterator": [],
> "parametersSize": 7,
> "setTemporary": true,
> "setRewriteEnabled": false,
> "setParameters": true,
> "setPartitionKeys": true,
> "setSd": true,
> "partitionKeysSize": 0
> }
> }
> {code}
> it should only be the json message required as :
> {code}
> {
> "eventType": "CREATE_TABLE",
> "server": "",
> "servicePrincipal": "",
> "db": "default",
> "table": "a",
> "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
> "timestamp": 1489552350,
> "files": [],
> }
> {code}
> this will require adding serialization features to mapper use such that it only serializes the annotated fields.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)