You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2020/04/16 13:27:19 UTC

[GitHub] [druid] roopini550 commented on issue #9645: String last aggregator is not considering all the segments for calculating the aggregation

roopini550 commented on issue #9645: String last aggregator is not considering all the segments for calculating the aggregation
URL: https://github.com/apache/druid/issues/9645#issuecomment-614652144
 
 
   Hi @gianm ,
   
   Here is our simple query  with  "descending": "true" for getting sample data in timestamp colum descending order.. And here "instrument_breached_time" is our timestamp field....
   
   {
     "queryType": "select",
     "dataSource": "client_1_SLAMonitoring",
     "descending": "true",
     "dimensions": [],
     "metrics": [],
     "granularity": "all",
     "intervals": [
       "2020-04-06/2020-04-17"
     ],
     "filter": { "type": "selector", "dimension": "instrument_id", "value":"Ass_7448" },
     "pagingSpec": {
       "pagingIdentifiers": {},
       "threshold": 1000
     }
   }
   
   And here is the result for above query..
   
   [ {
     "timestamp" : "2020-04-10T04:10:00.895Z",
     "result" : {
       "pagingIdentifiers" : {
         "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z_3" : -2,
         "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z" : -5
       },
       "dimensions" : [ "agent_name", "agent_id", ........" ],
       "metrics" : [ ],
       "events" : [ {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z_3",
         "offset" : -1,
         "event" : {
           "timestamp" : "2020-04-10T04:33:14.399Z",
           "instrument_id" : "Ass_7448",
           "process_uuid" : ".....",
           "task_name" : "UT1",
   		"task_status" : "ASSIGNED",
           "task_completed_time" : 1587038797612,
           "instrument_breached_time" : 1586493194399
         }
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z_3",
         "offset" : -2,
         "event" : {
           "timestamp" : "2020-04-10T04:33:14.399Z",
           "instrument_id" : "Ass_7448",
           "task_name" : "................",
           "task_status" : "ASSIGNED",
           "task_completed_time" : 1587038797502,
           "instrument_breached_time" : 1586493194399
   		}
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z",
         "offset" : -1,
         "event" : {
           "timestamp" : "2020-04-10T04:33:14.399Z",
           "instrument_breached_time" : 1586493194399,
           "instrument_id" : "Ass_7448", 
           "task_completed_time" : 1586491394783,
           "task_name" : "................",
           "task_status" : "QUEUED"
         }
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z",
         "offset" : -2,
         "event" : {
           "timestamp" : "2020-04-10T04:06:14.399Z",
           "instrument_breached_time" : 1586491574399,
           "instrument_id" : "Ass_7448",
           "task_completed_time" : 1586491394783,
           "task_name" : "................",
           "task_status" : "QUEUED"
         }
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z",
         "offset" : -3,
         "event" : {
           "timestamp" : "2020-04-10T04:05:20.399Z",
           "instrument_breached_time" : 1586491520399,
           "instrument_id" : "Ass_7448", 
           "task_completed_time" : 1586491394783,
           "task_name" : "................",
           "task_status" : "QUEUED"      }
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z",
         "offset" : -4,
         "event" : {
           "timestamp" : "2020-04-10T04:04:44.399Z",
           "instrument_breached_time" : 1586491484399,
           "instrument_id" : "Ass_7448",
           "task_completed_time" : 1586491394783,
           "task_name" : "................",
           "task_status" : "QUEUED      }
       }, {
         "segmentId" : "client_1_SLAMonitoring_2020-04-10T04:00:00.000Z_2020-04-10T05:00:00.000Z_2020-04-10T04:02:46.062Z",
         "offset" : -5,
         "event" : {
           "timestamp" : "2020-04-10T04:03:14.969Z",
           "instrument_breached_time" : 1586491394969,
           "instrument_id" : "Ass_7448",
           "task_completed_time" : 1586491394783,
           "task_name" : "................",
           "task_status" : "QUEUED"
         }
       } ]
     }
   } ]
   
   And here is our Stringlast groupBy query on instrument_id for latest value of dimension task_status....
   
   {
     "queryType": "groupBy",
     "dataSource": "client_1_SLAMonitoring",
     "dimensions":["instrument_id"],
     "threshold": 5,
     "metric": "count",
     "granularity": "all",
     "filter": { "type": "selector", "dimension": "instrument_id", "value":"Ass_7448" },
     "aggregations": [
       { "type" : "longLast", "name" : "comptimeLast","fieldName" : "task_completed_time" },
      {
         "type": "stringLast",
         "name": "lastest_task_status",
         "fieldName": "task_status"
       }],
       "postAggregations": [],
     "intervals": [
       "2020-04-06/2020-04-17"
     ]
   }
   
   And here is the result of above StringLast query...
   
   [ {
     "version" : "v1",
     "timestamp" : "2020-04-06T00:00:00.000Z",
     "event" : {
       "lastest_task_status" : "QUEUED",
       "instrument_id" : "Ass_7448",
       "comptimeLast" : 1586491394783
     }
   } ]
   
   As you can see here latest task status should be "ASSIGNED" here.. but it is giving QUEUED as a result..
   
   And another observation here is Last aggregators are not considering segments having partition/version numbers in their segment ids. Same you can observe in the above example.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org