You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2020/03/05 12:46:04 UTC

[GitHub] [druid] mkuthan opened a new issue #9461: SegmentMetadata query returns hasMultipleValues=true for all STRING dims in realtime segment

mkuthan opened a new issue #9461: SegmentMetadata query returns hasMultipleValues=true for all STRING dims in realtime segment
URL: https://github.com/apache/druid/issues/9461
 
 
   SegmentMetadata query returns hasMultipleValues=true for all STRING dims in realtime segment. Segments moved to the historical nodes reports hasMultipleValues=false as expected.
   
   ### Affected Version
   
   0.16.0 (on 0.14.0 the issue didn't happen)
   
   ### Description
   
   Datasource is defined as follows:
   
   ```
   "dataSchema": {
       "dataSource": "dead_letter_queue",
       "parser": {
         "type": "json",
         "parseSpec": {
           "format": "json",
           "timestampSpec": {
             "format": "millis",
             "column": "collector_timestamp"
           },
           "dimensionsSpec": {
             "dimensions": [
               {
                 "type": "string",
                 "name": "destination",
                 "createBitmapIndex": false
               },
               {
                 "type": "string",
                 "name": "message",
                 "createBitmapIndex": false
               },
               {
                 "type": "string",
                 "name": "stack_trace",
                 "createBitmapIndex": false
               }
             ],
             "dimensionExclusions": null
           }
         }
       },
       "metricsSpec": [
         {
           "type": "count",
           "name": "dlq_count"
         }
       ],
       "granularitySpec": {
         "type": "uniform",
         "segmentGranularity": "HOUR",
         "queryGranularity": "MINUTE",
         "rollup": true,
         "intervals": null
       },
       "transformSpec": {
         "filter": null,
         "transforms": []
       }
     },
     "tuningConfig": {
       "type": "KafkaTuningConfig",
       "maxRowsInMemory": 500000,
       "maxBytesInMemory": 0,
       "maxRowsPerSegment": 5000000,
       "maxTotalRows": 20000000,
       "intermediatePersistPeriod": "PT10M",
       "basePersistDirectory": "/var/tmp/druid/1575281192030-0",
       "maxPendingPersists": 0,
       "indexSpec": {
         "bitmap": {
           "type": "concise"
         },
         "dimensionCompression": "lz4",
         "metricCompression": "lz4",
         "longEncoding": "longs"
       },
       "indexSpecForIntermediatePersists": {
         "bitmap": {
           "type": "concise"
         },
         "dimensionCompression": "lz4",
         "metricCompression": "lz4",
         "longEncoding": "longs"
       },
       "buildV9Directly": true,
       "reportParseExceptions": false,
       "handoffConditionTimeout": 0,
       "resetOffsetAutomatically": true,
       "segmentWriteOutMediumFactory": null,
       "intermediateHandoffPeriod": "P2147483647D",
       "logParseExceptions": false,
       "maxParseExceptions": 2147483647,
       "maxSavedParseExceptions": 0,
       "skipSequenceNumberAvailabilityCheck": false
     },
     "ioConfig": {
       "type": "kafka",
       (...)
     }
     "dataSource": "dead_letter_queue"
   }
   ```
   
   Segment metadata query for the latest hour:
   
   ```
   {"queryType":"segmentMetadata","dataSource":"dead_letter_queue","analysisTypes":["aggregators"],"intervals": ["2020-03-05T12:00:00/2020-03-06"],"merge": false}
   ```
   
   And the results:
   
   ```
   [
     {
       "id": "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z",
       "intervals": null,
       "columns": {
         "__time": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "destination": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "dlq_count": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "message": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "stack_trace": {
           "type": "STRING",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         }
       },
       "size": 0,
       "numRows": 19,
       "aggregators": {
         "dlq_count": {
           "type": "longSum",
           "name": "dlq_count",
           "fieldName": "dlq_count",
           "expression": null
         }
       },
       "timestampSpec": null,
       "queryGranularity": null,
       "rollup": null
     },
     {
       "id": "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1",
       "intervals": null,
       "columns": {
         "__time": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "destination": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "dlq_count": {
           "type": "LONG",
           "hasMultipleValues": false,
           "size": 0,
           "cardinality": null,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "message": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         },
         "stack_trace": {
           "type": "STRING",
           "hasMultipleValues": true,
           "size": 0,
           "cardinality": 0,
           "minValue": null,
           "maxValue": null,
           "errorMessage": null
         }
       },
       "size": 0,
       "numRows": 127,
       "aggregators": {
         "dlq_count": {
           "type": "longSum",
           "name": "dlq_count",
           "fieldName": "dlq_count",
           "expression": null
         }
       },
       "timestampSpec": null,
       "queryGranularity": null,
       "rollup": null
     }
   ]
   ```
   
   For segment "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z_1" all STRING dims are reported as multivalve ("hasMultipleValues": true). Segment metadata for all other segments (like "dead_letter_queue_2020-03-05T12:00:00.000Z_2020-03-05T13:00:00.000Z_2020-03-05T12:00:50.223Z") is as expected, all STRING dims are defined as "hasMultipleValues": false. 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org