You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "anishek (JIRA)" <ji...@apache.org> on 2017/03/15 08:51:41 UTC

[jira] [Created] (HIVE-16219) Replication Event Messages Contain un-intended fields

anishek created HIVE-16219:
------------------------------

             Summary: Replication Event Messages Contain un-intended fields
                 Key: HIVE-16219
                 URL: https://issues.apache.org/jira/browse/HIVE-16219
             Project: Hive
          Issue Type: Bug
          Components: Metastore
    Affects Versions: 2.2.0
            Reporter: anishek
            Assignee: anishek


the event notification logs stored in hive metastore have json serialized messages stored in NOTIFICATION_LOG table,  these messages also store the serialized Thrift API objects in them for ex for create table :

{code}
{
  "eventType": "CREATE_TABLE",
  "server": "",
  "servicePrincipal": "",
  "db": "default",
  "table": "a",
  "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
  "timestamp": 1489552350,
  "files": [],
  "tableObj": {
    "tableName": "a",
    "dbName": "default",
    "owner": "anagarwal",
    "createTime": 1489552350,
    "lastAccessTime": 0,
    "retention": 0,
    "sd": {
      "cols": [
        {
          "name": "name",
          "type": "string",
          "comment": null,
          "setName": true,
          "setType": true,
          "setComment": false
        }
      ],
      "location": "file:/tmp/warehouse/a",
      "inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
      "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
      "compressed": false,
      "numBuckets": -1,
      "serdeInfo": {
        "name": null,
        "serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
        "parameters": {
          "serialization.format": "\n",
          "field.delim": "\n"
        },
        "setName": false,
        "parametersSize": 2,
        "setParameters": true,
        "setSerializationLib": true
      },
      "bucketCols": [],
      "sortCols": [],
      "parameters": {},
      "skewedInfo": {
        "skewedColNames": [],
        "skewedColValues": [],
        "skewedColValueLocationMaps": {},
        "setSkewedColNames": true,
        "setSkewedColValues": true,
        "setSkewedColValueLocationMaps": true,
        "skewedColNamesSize": 0,
        "skewedColNamesIterator": [],
        "skewedColValuesSize": 0,
        "skewedColValuesIterator": [],
        "skewedColValueLocationMapsSize": 0
      },
      "storedAsSubDirectories": false,
      "setSkewedInfo": true,
      "parametersSize": 0,
      "colsSize": 1,
      "setParameters": true,
      "setLocation": true,
      "setInputFormat": true,
      "setCols": true,
      "setOutputFormat": true,
      "setSerdeInfo": true,
      "setBucketCols": true,
      "setSortCols": true,
      "colsIterator": [
        {
          "name": "name",
          "type": "string",
          "comment": null,
          "setName": true,
          "setType": true,
          "setComment": false
        }
      ],
      "bucketColsSize": 0,
      "bucketColsIterator": [],
      "sortColsSize": 0,
      "sortColsIterator": [],
      "setStoredAsSubDirectories": true,
      "setCompressed": true,
      "setNumBuckets": true
    },
    "partitionKeys": [],
    "parameters": {
      "totalSize": "0",
      "EXTERNAL": "TRUE",
      "numRows": "0",
      "rawDataSize": "0",
      "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}",
      "numFiles": "0",
      "transient_lastDdlTime": "1489552350"
    },
    "viewOriginalText": null,
    "viewExpandedText": null,
    "tableType": "EXTERNAL_TABLE",
    "privileges": {
      "userPrivileges": {
        "anagarwal": [
          {
            "privilege": "INSERT",
            "createTime": -1,
            "grantor": "anagarwal",
            "grantorType": "USER",
            "grantOption": true,
            "setCreateTime": true,
            "setGrantOption": true,
            "setPrivilege": true,
            "setGrantor": true,
            "setGrantorType": true
          },
          {
            "privilege": "SELECT",
            "createTime": -1,
            "grantor": "anagarwal",
            "grantorType": "USER",
            "grantOption": true,
            "setCreateTime": true,
            "setGrantOption": true,
            "setPrivilege": true,
            "setGrantor": true,
            "setGrantorType": true
          },
          {
            "privilege": "UPDATE",
            "createTime": -1,
            "grantor": "anagarwal",
            "grantorType": "USER",
            "grantOption": true,
            "setCreateTime": true,
            "setGrantOption": true,
            "setPrivilege": true,
            "setGrantor": true,
            "setGrantorType": true
          },
          {
            "privilege": "DELETE",
            "createTime": -1,
            "grantor": "anagarwal",
            "grantorType": "USER",
            "grantOption": true,
            "setCreateTime": true,
            "setGrantOption": true,
            "setPrivilege": true,
            "setGrantor": true,
            "setGrantorType": true
          }
        ]
      },
      "groupPrivileges": null,
      "rolePrivileges": null,
      "rolePrivilegesSize": 0,
      "setUserPrivileges": true,
      "setGroupPrivileges": false,
      "setRolePrivileges": false,
      "userPrivilegesSize": 1,
      "groupPrivilegesSize": 0
    },
    "temporary": false,
    "rewriteEnabled": false,
    "setTableName": true,
    "setDbName": true,
    "setOwner": true,
    "setViewOriginalText": false,
    "setViewExpandedText": false,
    "setTableType": true,
    "setPrivileges": true,
    "setCreateTime": true,
    "setLastAccessTime": true,
    "setRetention": true,
    "partitionKeysIterator": [],
    "parametersSize": 7,
    "setTemporary": true,
    "setRewriteEnabled": false,
    "setParameters": true,
    "setPartitionKeys": true,
    "setSd": true,
    "partitionKeysSize": 0
  }
}
{code}

it should only be the json message required as :
{code}
{
  "eventType": "CREATE_TABLE",
  "server": "",
  "servicePrincipal": "",
  "db": "default",
  "table": "a",
  "tableObjJson": "{\"1\":{\"str\":\"a\"},\"2\":{\"str\":\"default\"},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1489552350},\"5\":{\"i32\":0},\"6\":{\"i32\":0},\"7\":{\"rec\":{\"1\":{\"lst\":[\"rec\",1,{\"1\":{\"str\":\"name\"},\"2\":{\"str\":\"string\"}}]},\"2\":{\"str\":\"file:/tmp/warehouse/a\"},\"3\":{\"str\":\"org.apache.hadoop.mapred.TextInputFormat\"},\"4\":{\"str\":\"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat\"},\"5\":{\"tf\":0},\"6\":{\"i32\":-1},\"7\":{\"rec\":{\"2\":{\"str\":\"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe\"},\"3\":{\"map\":[\"str\",\"str\",2,{\"field.delim\":\"\\n\",\"serialization.format\":\"\\n\"}]}}},\"8\":{\"lst\":[\"str\",0]},\"9\":{\"lst\":[\"rec\",0]},\"10\":{\"map\":[\"str\",\"str\",0,{}]},\"11\":{\"rec\":{\"1\":{\"lst\":[\"str\",0]},\"2\":{\"lst\":[\"lst\",0]},\"3\":{\"map\":[\"lst\",\"str\",0,{}]}}},\"12\":{\"tf\":0}}},\"8\":{\"lst\":[\"rec\",0]},\"9\":{\"map\":[\"str\",\"str\",7,{\"totalSize\":\"0\",\"EXTERNAL\":\"TRUE\",\"numRows\":\"0\",\"rawDataSize\":\"0\",\"COLUMN_STATS_ACCURATE\":\"{\\\"BASIC_STATS\\\":\\\"true\\\"}\",\"numFiles\":\"0\",\"transient_lastDdlTime\":\"1489552350\"}]},\"12\":{\"str\":\"EXTERNAL_TABLE\"},\"13\":{\"rec\":{\"1\":{\"map\":[\"str\",\"lst\",1,{\"anagarwal\":[\"rec\",4,{\"1\":{\"str\":\"INSERT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"SELECT\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"UPDATE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}},{\"1\":{\"str\":\"DELETE\"},\"2\":{\"i32\":-1},\"3\":{\"str\":\"anagarwal\"},\"4\":{\"i32\":1},\"5\":{\"tf\":1}}]}]}}},\"14\":{\"tf\":0}}",
  "timestamp": 1489552350,
  "files": [],
}
{code}

this will require adding serialization features to mapper use such that it only serializes the annotated fields. 




--
This message was sent by Atlassian JIRA
(v6.3.15#6346)