You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by GitBox <gi...@apache.org> on 2019/06/03 15:57:09 UTC
[GitHub] [incubator-druid] quenlang opened a new issue #7824: Kafka index
service use a lot of direct memory during segment publish
quenlang opened a new issue #7824: Kafka index service use a lot of direct memory during segment publish
URL: https://github.com/apache/incubator-druid/issues/7824
Hi, all
I found a problem when i use kafka index service. The task process uses a lot of direct memory when publishing segments, almost 6x than ingesting data stage.
I'm using druid-0.13.0.
At ingesting time, ```RES``` of the task is 1.647g.
```
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
29321 root 20 0 12.691g 1.647g 30952 S 8.7 5.3 3:52.96 java
```
And the ```heap``` 、```non-heap```、```direct``` are 221M、120M and 864M
```
Memory used total max usage GC
heap 221M 502M 2048M 10.80% gc.g1_young_generation.count 134
g1_eden_space 43M 248M -1 17.34% gc.g1_young_generation.time(ms) 1146
g1_survivor_space 38M 38M -1 100.00% gc.g1_old_generation.count 0
g1_old_gen 140M 216M 2048M 6.85% gc.g1_old_generation.time(ms) 0
nonheap 120M 124M -1 96.74%
code_cache 40M 42M 240M 17.05%
metaspace 70M 72M -1 96.62%
compressed_class_space 9M 9M 1024M 0.91%
direct 864M 864M - 100.00%
```
But at publishing time, ```RES``` of the task is 6.046g
```
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
29321 root 20 0 17.002g 6.046g 109592 S 142.7 19.3 5:31.43 java
```
And the ```heap``` 、```non-heap```、```direct``` are 266M、124M and 5204M
```
Memory used total max usage GC
heap 266M 502M 2048M 13.01% gc.g1_young_generation.count 187
g1_eden_space 13M 198M -1 6.57% gc.g1_young_generation.time(ms) 1802
g1_survivor_space 4M 4M -1 100.00% gc.g1_old_generation.count 0
g1_old_gen 249M 300M 2048M 12.18% gc.g1_old_generation.time(ms) 0
nonheap 124M 134M -1 92.48%
code_cache 44M 44M 240M 18.47%
metaspace 70M 78M -1 89.42%
compressed_class_space 9M 10M 1024M 0.91%
direct 5204M 5204M - 100.00%
```
**The direct memory increase 864M to 5204M when publishing begins. How can i control the usage of direct memory?**
Any suggestion? Thanks!
middleManager runtime.properties
```
druid.service=druid/middleManager
druid.port=8091
# Number of tasks per middleManager
druid.worker.capacity=10
# Task launch parameters
druid.indexer.runner.javaOpts=-server -Xmx2g -XX:MaxDirectMemorySize=10240g -XX:+UseG1GC -XX:MaxGCPauseMillis=100 -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager
druid.indexer.task.baseTaskDir=var/druid/task
# HTTP server threads
druid.server.http.numThreads=50
# Processing threads and buffers on Peons
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=100000000
druid.indexer.fork.property.druid.processing.numThreads=7
druid.indexer.task.restoreTasksOnRestart=true
# Hadoop indexing
druid.indexer.task.hadoopWorkingPath=var/druid/hadoop-tmp
druid.indexer.task.defaultHadoopCoordinates=["org.apache.hadoop:hadoop-client:2.7.3"]
```
Datasource has 35 dimensions and 45 metrics. The spec file
```
{
"type": "kafka",
"dataSchema": {
"dataSource": "MY_DATASOURCE_DAY",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "timestamp",
"format": "millis"
},
"dimensionsSpec": {
"dimensions": [
"mp_id",
"application_id",
"instance_id",
"action_name",
"message_type",
"from_path",
"path",
"open_path",
"close_path",
"scene",
"country_id",
"region_id",
"city_id",
"carrier_id",
"error_message",
"error_filename",
"request_method",
"host",
"uri",
"network_type",
"wechat_version",
"route_chain",
"uid",
"http_code",
"system",
"ip",
"device_type",
"agreement_id",
"protocol",
"error_type",
"error_detail",
"custom_code",
"api_name",
"opera_name",
"offset_left",
"offset_top",
"agent_version"
]
}
}
},
"metricsSpec": [
{
"name": "server_count",
"fieldName": "server_count",
"type": "longSum"
},
{
"name": "quit_count",
"fieldName": "quit_count",
"type": "longSum"
},
{
"name": "on_ready",
"fieldName": "on_ready",
"type": "doubleSum"
},
{
"name": "custom_time",
"fieldName": "custom_time",
"type": "longSum"
},
{
"name": "first_response_time",
"fieldName": "first_response_time",
"type": "doubleSum"
},
{
"name": "response_time",
"fieldName": "response_time",
"type": "doubleSum"
},
{
"name": "application_server_time",
"fieldName": "application_server_time",
"type": "doubleSum"
},
{
"name": "network_time",
"fieldName": "network_time",
"type": "doubleSum"
},
{
"name": "callback_time",
"fieldName": "callback_time",
"type": "longSum"
},
{
"name": "bytes_sent",
"fieldName": "bytes_sent",
"type": "longSum"
},
{
"name": "bytes_received",
"fieldName": "bytes_received",
"type": "longSum"
},
{
"name": "msg_error_pv",
"fieldName": "msg_error_pv",
"type": "longSum"
},
{
"name": "file_error_pv",
"fieldName": "file_error_pv",
"type": "longSum"
},
{
"name": "count",
"fieldName": "count",
"type": "longSum"
},
{
"name": "on_ready_count",
"fieldName": "on_ready_count",
"type": "longSum"
},
{
"name": "open_count",
"fieldName": "open_count",
"type": "longSum"
},
{
"name": "net_count",
"fieldName": "net_count",
"type": "longSum"
},
{
"name": "net_error_count",
"fieldName": "net_error_count",
"type": "longSum"
},
{
"name": "js_error_count",
"fieldName": "js_error_count",
"type": "longSum"
},
{
"name": "slow_count",
"fieldName": "slow_count",
"type": "longSum"
},
{
"name": "net_slow_count",
"fieldName": "net_slow_count",
"type": "longSum"
},
{
"name": "custom_success_count",
"fieldName": "custom_success_count",
"type": "longSum"
},
{
"name": "uv",
"fieldName": "uid",
"type": "thetaSketch"
},
{
"name": "api_success_count",
"fieldName": "api_success_count",
"type": "longSum"
},
{
"name": "api_fail_count",
"fieldName": "api_fail_count",
"type": "longSum"
},
{
"name": "api_count",
"fieldName": "api_count",
"type": "longSum"
},
{
"name": "api_cancel_count",
"fieldName": "api_cancel_count",
"type": "longSum"
},
{
"type": "longSum",
"name": "opera_time",
"fieldName": "opera_time",
"expression": null
},
{
"type": "longSum",
"name": "opera_request_time",
"fieldName": "opera_request_time",
"expression": null
},
{
"type": "longSum",
"name": "opera_server_time",
"fieldName": "opera_server_time",
"expression": null
},
{
"type": "longSum",
"name": "opera_count",
"fieldName": "opera_count",
"expression": null
},
{
"type": "longSum",
"name": "opera_request_count",
"fieldName": "opera_request_count",
"expression": null
},
{
"type": "longSum",
"name": "opera_success_count",
"fieldName": "opera_success_count",
"expression": null
},
{
"type": "longSum",
"name": "opera_fail_count",
"fieldName": "opera_fail_count",
"expression": null
},
{
"type": "longSum",
"name": "opera_slow_count",
"fieldName": "opera_slow_count",
"expression": null
},
{
"type": "longSum",
"name": "opera_server_count",
"fieldName": "opera_server_count",
"expression": null
},
{
"type": "longSum",
"name": "page_net_error_count",
"fieldName": "page_net_error_count",
"expression": null
},
{
"type" : "quantilesDoublesSketch",
"name" : "on_ready_sketch",
"fieldName" : "on_ready",
"k": 256
},
{
"type" : "quantilesDoublesSketch",
"name" : "first_response_time_sketch",
"fieldName" : "first_response_time",
"k": 256
},
{
"type" : "quantilesDoublesSketch",
"name" : "response_time_sketch",
"fieldName" : "response_time",
"k": 256
},
{
"type" : "quantilesDoublesSketch",
"name" : "network_time_sketch",
"fieldName" : "network_time",
"k": 256
},
{
"type" : "quantilesDoublesSketch",
"name" : "application_server_time_sketch",
"fieldName" : "application_server_time",
"k": 256
},
{
"type": "quantilesDoublesSketch",
"name": "opera_time_sketch",
"fieldName": "opera_time",
"k": 256
},
{
"type": "quantilesDoublesSketch",
"name": "opera_request_time_sketch",
"fieldName": "opera_request_time",
"k": 256
},
{
"type": "quantilesDoublesSketch",
"name": "opera_server_time_sketch",
"fieldName": "opera_server_time",
"k": 256
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "DAY",
"queryGranularity": "DAY"
}
},
"tuningConfig": {
"type": "kafka",
"maxRowsPerSegment": 2500000,
"resetOffsetAutomatically": true,
"targetPartitionSize": 2500000
},
"ioConfig": {
"topic": "drd-mp-webchat-applet",
"consumerProperties": {
"bootstrap.servers": "kafka-server:9092"
},
"taskCount": 1,
"taskDuration": "PT1H",
"replicas": 1,
"useEarliestOffset": true
}
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org