You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@skywalking.apache.org by wu...@apache.org on 2023/01/12 06:48:28 UTC

[skywalking] branch master updated: Add documentation for profiling APIs (#10269)

This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git


The following commit(s) were added to refs/heads/master by this push:
     new fba96452f5 Add documentation for profiling APIs (#10269)
fba96452f5 is described below

commit fba96452f59a2e04d072c97592797c04e7fa6481
Author: mrproliu <74...@qq.com>
AuthorDate: Thu Jan 12 14:48:21 2023 +0800

    Add documentation for profiling APIs (#10269)
---
 docs/en/api/profiling-protocol.md | 277 ++++++++++++++++++++++++++++++++++++++
 docs/en/api/query-protocol.md     |  61 +++++++++
 docs/en/changes/changes.md        |   1 +
 docs/menu.yml                     |   2 +
 4 files changed, 341 insertions(+)

diff --git a/docs/en/api/profiling-protocol.md b/docs/en/api/profiling-protocol.md
new file mode 100644
index 0000000000..f72b8c36e4
--- /dev/null
+++ b/docs/en/api/profiling-protocol.md
@@ -0,0 +1,277 @@
+# Profiling APIs
+
+SkyWalking offers two types of Profiling, in-process and out-process, each with its own API.
+
+## In-process profiling APIs
+
+[In-process profiling](../concepts-and-designs/profiling.md#in-process-profiling) commonly interacts with auto-instrument agents. It gathers stack traces of programs and sends the data to the OAP for further analysis.
+
+```protobuf
+syntax = "proto3";
+
+package skywalking.v3;
+
+option java_multiple_files = true;
+option java_package = "org.apache.skywalking.apm.network.language.profile.v3";
+option csharp_namespace = "SkyWalking.NetworkProtocol.V3";
+option go_package = "skywalking.apache.org/repo/goapi/collect/language/profile/v3";
+
+import "common/Command.proto";
+
+service ProfileTask {
+
+    // query all sniffer need to execute profile task commands
+    rpc getProfileTaskCommands (ProfileTaskCommandQuery) returns (Commands) {
+    }
+
+    // collect dumped thread snapshot
+    rpc collectSnapshot (stream ThreadSnapshot) returns (Commands) {
+    }
+
+    // report profiling task finished
+    rpc reportTaskFinish (ProfileTaskFinishReport) returns (Commands) {
+    }
+
+}
+
+message ProfileTaskCommandQuery {
+    // current sniffer information
+    string service = 1;
+    string serviceInstance = 2;
+
+    // last command timestamp
+    int64 lastCommandTime = 3;
+}
+
+// dumped thread snapshot
+message ThreadSnapshot {
+    // profile task id
+    string taskId = 1;
+    // dumped segment id
+    string traceSegmentId = 2;
+    // dump timestamp
+    int64 time = 3;
+    // snapshot dump sequence, start with zero
+    int32 sequence = 4;
+    // snapshot stack
+    ThreadStack stack = 5;
+}
+
+message ThreadStack {
+    // stack code signature list
+    repeated string codeSignatures = 1;
+}
+
+// profile task finished report
+message ProfileTaskFinishReport {
+    // current sniffer information
+    string service = 1;
+    string serviceInstance = 2;
+
+    // profile task
+    string taskId = 3;
+}
+```
+
+## Out-process profiling
+
+[Out-process profiling](../concepts-and-designs/profiling.md#out-of-process-profiling) interacts with eBPF agent, which receives tasks and captures data, then reports it to the OAP for further analysis.
+
+### Process APIs
+
+Similar to Service Instance, all processes must be reported to the OAP storage segment prior to analysis.
+
+```protobuf
+syntax = "proto3";
+
+package skywalking.v3;
+
+option java_multiple_files = true;
+option java_package = "org.apache.skywalking.apm.network.ebpf.profiling.process.v3";
+option go_package = "skywalking.apache.org/repo/goapi/collect/ebpf/profiling/process/v3";
+
+import "common/Common.proto";
+import "common/Command.proto";
+
+// Define the detected processes and report them.
+service EBPFProcessService {
+    // Report discovered process in Rover
+    rpc reportProcesses (EBPFProcessReportList) returns (EBPFReportProcessDownstream) {
+    }
+
+    // Keep the process alive in the backend.
+    rpc keepAlive (EBPFProcessPingPkgList) returns (Commands) {
+    }
+}
+
+message EBPFProcessReportList {
+    repeated EBPFProcessProperties processes = 1;
+    // An ID generated by eBPF agent, should be unique globally.
+    string ebpfAgentID = 2;
+}
+
+message EBPFProcessProperties {
+    // The Process metadata
+    oneof metadata {
+        EBPFHostProcessMetadata hostProcess = 1;
+        EBPFKubernetesProcessMetadata k8sProcess = 2;
+    }
+}
+
+message EBPFHostProcessMetadata {
+    // [required] Entity metadata
+    // Must ensure that entity information is unique at the time of reporting
+    EBPFProcessEntityMetadata entity = 1;
+    // [required] The Process id of the host
+    int32 pid = 2;
+    // [optional] properties of the process
+    repeated KeyStringValuePair properties = 3;
+}
+
+// Process Entity metadata
+message EBPFProcessEntityMetadata {
+    // [required] Process belong layer name which define in the backend
+    string layer = 1;
+    // [required] Process belong service name
+    string serviceName = 2;
+    // [required] Process belong service instance name
+    string instanceName = 3;
+    // [required] Process name
+    string processName = 4;
+    // Process labels for aggregate from service
+    repeated string labels = 5;
+}
+
+// Kubernetes process metadata
+message EBPFKubernetesProcessMetadata {
+    // [required] Entity metadata
+    // Must ensure that entity information is unique at the time of reporting
+    EBPFProcessEntityMetadata entity = 1;
+    // [required] The Process id of the host
+    int32 pid = 2;
+    // [optional] properties of the process
+    repeated KeyStringValuePair properties = 3;
+}
+
+message EBPFReportProcessDownstream {
+    repeated EBPFProcessDownstream processes = 1;
+}
+
+message EBPFProcessDownstream {
+    // Generated process id
+    string processId = 1;
+    // Locate the process by basic information
+    oneof process {
+        EBPFHostProcessDownstream hostProcess = 2;
+        EBPFKubernetesProcessDownstream k8sProcess = 3;
+    }
+}
+
+message EBPFHostProcessDownstream {
+    int32 pid = 1;
+    EBPFProcessEntityMetadata entityMetadata = 2;
+}
+
+// Kubernetes process downstream
+message EBPFKubernetesProcessDownstream {
+    int32 pid = 1;
+    EBPFProcessEntityMetadata entityMetadata = 2;
+}
+
+message EBPFProcessPingPkgList {
+    repeated EBPFProcessPingPkg processes = 1;
+    // An ID generated by eBPF agent, should be unique globally.
+    string ebpfAgentID = 2;
+}
+
+message EBPFProcessPingPkg {
+    // Process entity
+    EBPFProcessEntityMetadata entityMetadata = 1;
+    // Minimize necessary properties
+    repeated KeyStringValuePair properties = 2;
+}
+```
+
+### Out-process profiling APIs
+
+```protobuf
+syntax = "proto3";
+
+package skywalking.v3;
+
+option java_multiple_files = true;
+option java_package = "org.apache.skywalking.apm.network.ebpf.profiling.v3";
+option go_package = "skywalking.apache.org/repo/goapi/collect/ebpf/profiling/v3";
+
+import "common/Command.proto";
+
+// Define the Rover Process profiling task and upload profiling data.
+service EBPFProfilingService {
+    // Query profiling (start or stop) tasks
+    rpc queryTasks (EBPFProfilingTaskQuery) returns (Commands) {
+    }
+
+    // collect profiling data
+    rpc collectProfilingData (stream EBPFProfilingData) returns (Commands) {
+    }
+}
+
+message EBPFProfilingTaskQuery {
+    // rover instance id
+    string roverInstanceId = 1;
+
+    // latest task update time
+    int64 latestUpdateTime = 2;
+}
+
+message EBPFProfilingData {
+    // task metadata
+    EBPFProfilingTaskMetadata task = 1;
+    // profiling data
+    oneof profiling {
+        EBPFOnCPUProfiling onCPU = 2;
+        EBPFOffCPUProfiling offCPU = 3;
+    }
+}
+
+message EBPFProfilingTaskMetadata {
+    // profiling task id
+    string taskId = 1;
+    // profiling process id
+    string processId = 2;
+    // the start time of this profiling process
+    int64 profilingStartTime = 3;
+    // report time
+    int64 currentTime = 4;
+}
+
+message EBPFProfilingStackMetadata {
+    // stack type
+    EBPFProfilingStackType stackType = 1;
+    // stack id from kernel provide
+    int32 stackId = 2;
+    // stack symbols
+    repeated string stackSymbols = 3;
+}
+
+enum EBPFProfilingStackType {
+    PROCESS_KERNEL_SPACE = 0;
+    PROCESS_USER_SPACE = 1;
+}
+
+message EBPFOnCPUProfiling {
+    // stack data in one task(thread)
+    repeated EBPFProfilingStackMetadata stacks = 1;
+    // stack counts
+    int32 dumpCount = 2;
+}
+
+message EBPFOffCPUProfiling {
+    // stack data in one task(thread)
+    repeated EBPFProfilingStackMetadata stacks = 1;
+    // total count of the process is switched to off cpu by the scheduler.
+    int32 switchCount = 2;
+    // where time(nanoseconds) is spent waiting while blocked on I/O, locks, timers, paging/swapping, etc.
+    int64 duration = 3;
+}
+```
\ No newline at end of file
diff --git a/docs/en/api/query-protocol.md b/docs/en/api/query-protocol.md
index 739f5fe443..fb7eb6a50c 100644
--- a/docs/en/api/query-protocol.md
+++ b/docs/en/api/query-protocol.md
@@ -28,6 +28,18 @@ extend type Query {
     searchEndpoint(keyword: String!, serviceId: ID!, limit: Int!): [Endpoint!]!
     getEndpointInfo(endpointId: ID!): EndpointInfo
 
+    # Process query
+    # Read process list.
+    listProcesses(duration: Duration!, instanceId: ID!): [Process!]!
+    # Find process according to given ID. Return null if not existing.
+    getProcess(processId: ID!): Process
+    # Get the number of matched processes through serviceId, labels
+    # Labels: the matched process should contain all labels
+    #
+    # The return is not a precise number, the process has its lifecycle, as it reboots and shutdowns with time.
+    # The return number just gives an abstract of the scale of profiling that would be applied.
+    estimateProcessScale(serviceId: ID!, labels: [String!]!): Long!
+
     # Database related meta info.
     getAllDatabases(duration: Duration!): [Database!]!
     getTimeInfo: TimeInfo
@@ -186,6 +198,55 @@ extend type Query {
 
 Event query fetches the event list based on given sources and time range conditions.
 
+### Profiling
+SkyWalking offers two types of [profiling](../concepts-and-designs/profiling.md), in-process and out-process, allowing users to create tasks and check their execution status.
+
+#### In-process profiling
+
+```graphql
+extend type Mutation {
+    # crate new profile task
+    createProfileTask(creationRequest: ProfileTaskCreationRequest): ProfileTaskCreationResult!
+}
+extend type Query {
+    # query all task list, order by ProfileTask#startTime descending
+    getProfileTaskList(serviceId: ID, endpointName: String): [ProfileTask!]!
+    # query all task logs
+    getProfileTaskLogs(taskID: String): [ProfileTaskLog!]!
+    # query all task profiled segment list
+    getProfileTaskSegmentList(taskID: String): [BasicTrace!]!
+    # query profiled segment
+    getProfiledSegment(segmentId: String): ProfiledSegment
+    # analyze profiled segment, start and end time use timestamp(millisecond)
+    getProfileAnalyze(segmentId: String!, timeRanges: [ProfileAnalyzeTimeRange!]!): ProfileAnalyzation!
+}
+```
+
+#### Out-process profiling
+
+```graphql
+extend type Mutation {
+    # create a new eBPF fixed time profiling task
+    createEBPFProfilingFixedTimeTask(request: EBPFProfilingTaskFixedTimeCreationRequest!): EBPFProfilingTaskCreationResult!
+
+    # create a new eBPF network profiling task
+    createEBPFNetworkProfiling(request: EBPFProfilingNetworkTaskRequest!): EBPFProfilingTaskCreationResult!
+    # keep alive the eBPF profiling task
+    keepEBPFNetworkProfiling(taskId: ID!): EBPFNetworkKeepProfilingResult!
+}
+extend type Query {
+    # query eBPF profiling data for prepare create task
+    queryPrepareCreateEBPFProfilingTaskData(serviceId: ID!): EBPFProfilingTaskPrepare!
+    # query eBPF profiling task list
+    queryEBPFProfilingTasks(serviceId: ID, serviceInstanceId: ID, targets: [EBPFProfilingTargetType!]): [EBPFProfilingTask!]!
+    # query schedules from profiling task
+    queryEBPFProfilingSchedules(taskId: ID!): [EBPFProfilingSchedule!]!
+    # analyze the profiling schedule
+    # aggregateType is "EBPFProfilingAnalyzeAggregateType#COUNT" as default. 
+    analysisEBPFProfilingResult(scheduleIdList: [ID!]!, timeRanges: [EBPFProfilingAnalyzeTimeRange!]!, aggregateType: EBPFProfilingAnalyzeAggregateType): EBPFProfilingAnalyzation!
+}
+```
+
 ## Condition
 ### Duration
 Duration is a widely used parameter type as the APM data is time-related. See the following for more details. 
diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md
index ad8aac3194..bdf33dc6cd 100644
--- a/docs/en/changes/changes.md
+++ b/docs/en/changes/changes.md
@@ -94,5 +94,6 @@
 * Reorganize the protocols docs to a more clear API docs.
 * Add documentation about replacing Zipkin server with SkyWalking OAP.
 * Add Lens UI relative docs in Zipkin trace section.
+* Add Profiling APIs.
 
 All issues and pull requests are [here](https://github.com/apache/skywalking/milestone/160?closed=1)
diff --git a/docs/menu.yml b/docs/menu.yml
index fef61034cc..a3fd6bc8d9 100644
--- a/docs/menu.yml
+++ b/docs/menu.yml
@@ -245,6 +245,8 @@ catalog:
             path: "/en/api/instance-properties"
           - name: "Event"
             path: "/en/api/event"
+          - name: "Profiling"
+            path: "/en/api/profiling-protocol"
       - name: "Query APIs"
         catalog:
           - name:  "GraphQL APIs"