You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metron.apache.org by ni...@apache.org on 2019/12/05 15:37:52 UTC

[metron] branch master updated: METRON-2332 Enable Tuning of the Profiler's Parallelism from Ambari (nickwallen) closes apache/metron#1574

This is an automated email from the ASF dual-hosted git repository.

nickallen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/metron.git


The following commit(s) were added to refs/heads/master by this push:
     new 19dd9b1  METRON-2332 Enable Tuning of the Profiler's Parallelism from Ambari (nickwallen) closes apache/metron#1574
19dd9b1 is described below

commit 19dd9b173b5fbe3c4dc1e08a5d0498a1c441c6bc
Author: nickwallen <ni...@apache.org>
AuthorDate: Thu Dec 5 10:37:30 2019 -0500

    METRON-2332 Enable Tuning of the Profiler's Parallelism from Ambari (nickwallen) closes apache/metron#1574
---
 metron-analytics/metron-profiler-storm/README.md   | 36 ++++++++++++++--
 .../src/main/config/profiler.properties            |  7 ++-
 .../src/main/flux/profiler/remote.yaml             |  7 ++-
 .../storm/integration/ProfilerIntegrationTest.java |  7 ++-
 .../CURRENT/configuration/metron-profiler-env.xml  | 36 ++++++++++++++--
 .../CURRENT/package/scripts/params/params_linux.py |  5 +++
 .../package/templates/profiler.properties.j2       |  7 ++-
 .../METRON/CURRENT/themes/metron_theme.json        | 50 ++++++++++++++++++++++
 8 files changed, 145 insertions(+), 10 deletions(-)

diff --git a/metron-analytics/metron-profiler-storm/README.md b/metron-analytics/metron-profiler-storm/README.md
index c952cb7..c31b6fa 100644
--- a/metron-analytics/metron-profiler-storm/README.md
+++ b/metron-analytics/metron-profiler-storm/README.md
@@ -325,11 +325,41 @@ The units used to specify the `profiler.window.lag`.  This value should be defin
 
 The number of worker processes to create for the Profiler topology.  This property is useful for performance tuning the Profiler.
 
-### `profiler.executors`
+### `profiler.acker.executors`
 
-*Default*: 0
+*Default*: 1
+
+The number of threads dedicated to tuple acking. This should most often be equal to the number of partitions in the inbound Kafka topic.
+
+### `profiler.spout.parallelism`
+
+*Default*: 1
+
+The initial number of executors (threads) for the Profiler's Kafka spout. This is also known as the parallelism hint.
+
+### `profiler.splitter.parallelism`
+
+*Default*: 1
+
+The initial number of executors (threads) for the Profiler's splitter component. This is also known as the parallelism hint.
+
+### `profiler.builder.parallelism`
+
+*Default*: 1
+
+The initial number of executors (threads) for the Profiler's builder component. This is also known as the parallelism hint.
+
+### `profiler.hbase.writer.parallelism`
+
+*Default*: 1
+
+The initial number of executors (threads) for the Profiler's HBase writer component. This is also known as the parallelism hint.
+
+### `profiler.kafka.writer.parallelism`
+
+*Default*: 1
 
-The number of executors to spawn per component for the Profiler topology.  This property is useful for performance tuning the Profiler.
+The initial number of executors (threads) for the Profiler's Kafka writer component. This is also known as the parallelism hint.
 
 ### `profiler.ttl`
 
diff --git a/metron-analytics/metron-profiler-storm/src/main/config/profiler.properties b/metron-analytics/metron-profiler-storm/src/main/config/profiler.properties
index dc30838..69efa93 100644
--- a/metron-analytics/metron-profiler-storm/src/main/config/profiler.properties
+++ b/metron-analytics/metron-profiler-storm/src/main/config/profiler.properties
@@ -23,7 +23,12 @@
 topology.worker.childopts=
 topology.auto-credentials=
 profiler.workers=1
-profiler.executors=0
+profiler.acker.executors=1
+profiler.spout.parallelism=1
+profiler.splitter.parallelism=1
+profiler.builder.parallelism=1
+profiler.hbase.writer.parallelism=1
+profiler.kafka.writer.parallelism=1
 topology.message.timeout.secs=30
 topology.max.spout.pending=100000
 topology.fall.back.on.java.serialization=true
diff --git a/metron-analytics/metron-profiler-storm/src/main/flux/profiler/remote.yaml b/metron-analytics/metron-profiler-storm/src/main/flux/profiler/remote.yaml
index e16a782..3921967 100644
--- a/metron-analytics/metron-profiler-storm/src/main/flux/profiler/remote.yaml
+++ b/metron-analytics/metron-profiler-storm/src/main/flux/profiler/remote.yaml
@@ -18,7 +18,7 @@ name: "profiler"
 
 config:
     topology.workers: ${profiler.workers}
-    topology.acker.executors: ${profiler.executors}
+    topology.acker.executors: ${profiler.acker.executors}
     topology.worker.childopts: ${topology.worker.childopts}
     topology.auto-credentials: ${topology.auto-credentials}
     topology.message.timeout.secs: ${topology.message.timeout.secs}
@@ -143,6 +143,7 @@ spouts:
         className: "org.apache.metron.storm.kafka.flux.StormKafkaSpout"
         constructorArgs:
             - ref: "kafkaConfig"
+        parallelism: ${profiler.spout.parallelism}
 
 bolts:
 
@@ -150,6 +151,7 @@ bolts:
         className: "org.apache.metron.profiler.storm.ProfileSplitterBolt"
         constructorArgs:
             - "${kafka.zk}"
+        parallelism: ${profiler.splitter.parallelism}
 
     -   id: "builderBolt"
         className: "org.apache.metron.profiler.storm.ProfileBuilderBolt"
@@ -172,6 +174,7 @@ bolts:
               args: [${profiler.max.routes.per.bolt}]
             - name: "withTimestampField"
               args: ["timestamp"]
+        parallelism: ${profiler.builder.parallelism}
 
     -   id: "hbaseBolt"
         className: "org.apache.metron.hbase.bolt.HBaseBolt"
@@ -185,6 +188,7 @@ bolts:
               args: [${profiler.hbase.batch}]
             - name: "withFlushIntervalSecs"
               args: [${profiler.hbase.flush.interval.seconds}]
+        parallelism: ${profiler.hbase.writer.parallelism}
 
     -   id: "kafkaBolt"
         className: "org.apache.metron.writer.bolt.BulkMessageWriterBolt"
@@ -194,6 +198,7 @@ bolts:
         configMethods:
             -   name: "withBulkMessageWriter"
                 args: [ref: "kafkaWriter"]
+        parallelism: ${profiler.kafka.writer.parallelism}
 
 streams:
 
diff --git a/metron-analytics/metron-profiler-storm/src/test/java/org/apache/metron/profiler/storm/integration/ProfilerIntegrationTest.java b/metron-analytics/metron-profiler-storm/src/test/java/org/apache/metron/profiler/storm/integration/ProfilerIntegrationTest.java
index 2452102..ad150b1 100644
--- a/metron-analytics/metron-profiler-storm/src/test/java/org/apache/metron/profiler/storm/integration/ProfilerIntegrationTest.java
+++ b/metron-analytics/metron-profiler-storm/src/test/java/org/apache/metron/profiler/storm/integration/ProfilerIntegrationTest.java
@@ -406,7 +406,12 @@ public class ProfilerIntegrationTest extends BaseIntegrationTest {
 
       // storm settings
       setProperty("profiler.workers", "1");
-      setProperty("profiler.executors", "0");
+      setProperty("profiler.acker.executors", "0");
+      setProperty("profiler.spout.parallelism", "1");
+      setProperty("profiler.splitter.parallelism", "1");
+      setProperty("profiler.builder.parallelism", "1");
+      setProperty("profiler.hbase.writer.parallelism", "1");
+      setProperty("profiler.kafka.writer.parallelism", "1");
 
       setProperty(Config.TOPOLOGY_AUTO_CREDENTIALS, "[]");
       setProperty(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS, "60");
diff --git a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/configuration/metron-profiler-env.xml b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/configuration/metron-profiler-env.xml
index 07b8d11..93ebba8 100644
--- a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/configuration/metron-profiler-env.xml
+++ b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/configuration/metron-profiler-env.xml
@@ -212,13 +212,43 @@
   <property>
     <name>profiler_topology_workers</name>
     <value>1</value>
-    <description>The profiler storm topology workers</description>
-    <display-name>Number of Workers</display-name>
+    <description>Number of worker processes to create across all machines in the cluster.</description>
+    <display-name>Number of Worker Processes</display-name>
+  </property>
+  <property>
+    <name>profiler_spout_parallelism</name>
+    <value>1</value>
+    <description>The spout parallelism hint; initial number of executors (threads).</description>
+    <display-name>Spout Parallelism Hint</display-name>
+  </property>
+  <property>
+    <name>profiler_splitter_parallelism</name>
+    <value>1</value>
+    <description>The profile splitter parallelism hint; initial number of executors (threads).</description>
+    <display-name>Splitter Parallelism Hint</display-name>
+  </property>
+  <property>
+    <name>profiler_builder_parallelism</name>
+    <value>1</value>
+    <description>The profile builder parallelism hint; initial number of executors (threads).</description>
+    <display-name>Builder Parallelism Hint</display-name>
+  </property>
+  <property>
+    <name>profiler_hbase_writer_parallelism</name>
+    <value>1</value>
+    <description>The HBase writer parallelism hint; initial number of executors (threads).</description>
+    <display-name>HBase Writer Parallelism Hint</display-name>
+  </property>
+  <property>
+    <name>profiler_kafka_writer_parallelism</name>
+    <value>1</value>
+    <description>The Kafka writer parallelism hint; initial number of executors (threads).</description>
+    <display-name>Kafka Writer Parallelism Hint</display-name>
   </property>
   <property>
     <name>profiler_acker_executors</name>
     <value>1</value>
-    <description>The profiler storm topology acker executors</description>
+    <description>Number of threads dedicated to tuple acking. Should be equal to the number of partitions in the inbound Kafka topic.</description>
     <display-name>Number of Acker Executors</display-name>
   </property>
   <property>
diff --git a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/scripts/params/params_linux.py b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/scripts/params/params_linux.py
index c4a6a7b..346d352 100755
--- a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/scripts/params/params_linux.py
+++ b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/scripts/params/params_linux.py
@@ -366,6 +366,11 @@ profiler_hbase_batch = config['configurations']['metron-profiler-env']['profiler
 profiler_hbase_flush_interval = config['configurations']['metron-profiler-env']['profiler_hbase_flush_interval']
 profiler_topology_workers = config['configurations']['metron-profiler-env']['profiler_topology_workers']
 profiler_acker_executors = config['configurations']['metron-profiler-env']['profiler_acker_executors']
+profiler_spout_parallelism = config['configurations']['metron-profiler-env']['profiler_spout_parallelism']
+profiler_splitter_parallelism = config['configurations']['metron-profiler-env']['profiler_splitter_parallelism']
+profiler_builder_parallelism = config['configurations']['metron-profiler-env']['profiler_builder_parallelism']
+profiler_hbase_writer_parallelism = config['configurations']['metron-profiler-env']['profiler_hbase_writer_parallelism']
+profiler_kafka_writer_parallelism = config['configurations']['metron-profiler-env']['profiler_kafka_writer_parallelism']
 profiler_hbase_table = config['configurations']['metron-profiler-env']['profiler_hbase_table']
 profiler_hbase_cf = config['configurations']['metron-profiler-env']['profiler_hbase_cf']
 profiler_configured_flag_file = status_params.profiler_configured_flag_file
diff --git a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/templates/profiler.properties.j2 b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/templates/profiler.properties.j2
index d8bc13d..fc665b3 100644
--- a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/templates/profiler.properties.j2
+++ b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/package/templates/profiler.properties.j2
@@ -23,7 +23,12 @@
 topology.worker.childopts={{profiler_topology_worker_childopts}}
 topology.auto-credentials={{topology_auto_credentials}}
 profiler.workers={{profiler_topology_workers}}
-profiler.executors={{profiler_acker_executors}}
+profiler.acker.executors={{profiler_acker_executors}}
+profiler.spout.parallelism={{profiler_spout_parallelism}}
+profiler.splitter.parallelism={{profiler_splitter_parallelism}}
+profiler.builder.parallelism={{profiler_builder_parallelism}}
+profiler.hbase.writer.parallelism={{profiler_hbase_writer_parallelism}}
+profiler.kafka.writer.parallelism={{profiler_kafka_writer_parallelism}}
 topology.message.timeout.secs={{profiler_topology_message_timeout_secs}}
 topology.max.spout.pending={{profiler_topology_max_spout_pending}}
 topology.fall.back.on.java.serialization=true
diff --git a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/themes/metron_theme.json b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/themes/metron_theme.json
index 41fd044..f8cf67d 100644
--- a/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/themes/metron_theme.json
+++ b/metron-deployment/packaging/ambari/metron-mpack/src/main/resources/common-services/METRON/CURRENT/themes/metron_theme.json
@@ -796,6 +796,26 @@
           "subsection-name": "subsection-profiler-storm"
         },
         {
+          "config": "metron-profiler-env/profiler_spout_parallelism",
+          "subsection-name": "subsection-profiler-storm"
+        },
+        {
+          "config": "metron-profiler-env/profiler_splitter_parallelism",
+          "subsection-name": "subsection-profiler-storm"
+        },
+        {
+          "config": "metron-profiler-env/profiler_builder_parallelism",
+          "subsection-name": "subsection-profiler-storm"
+        },
+        {
+          "config": "metron-profiler-env/profiler_hbase_writer_parallelism",
+          "subsection-name": "subsection-profiler-storm"
+        },
+        {
+          "config": "metron-profiler-env/profiler_kafka_writer_parallelism",
+          "subsection-name": "subsection-profiler-storm"
+        },
+        {
           "config": "metron-rest-env/metron_rest_port",
           "subsection-name": "subsection-rest"
         },
@@ -1507,6 +1527,36 @@
         }
       },
       {
+        "config": "metron-profiler-env/profiler_spout_parallelism",
+        "widget": {
+          "type": "text-field"
+        }
+      },
+      {
+        "config": "metron-profiler-env/profiler_splitter_parallelism",
+        "widget": {
+          "type": "text-field"
+        }
+      },
+      {
+        "config": "metron-profiler-env/profiler_builder_parallelism",
+        "widget": {
+          "type": "text-field"
+        }
+      },
+      {
+        "config": "metron-profiler-env/profiler_hbase_writer_parallelism",
+        "widget": {
+          "type": "text-field"
+        }
+      },
+      {
+        "config": "metron-profiler-env/profiler_kafka_writer_parallelism",
+        "widget": {
+          "type": "text-field"
+        }
+      },
+      {
         "config": "metron-profiler-env/profiler_topology_max_spout_pending",
         "widget": {
           "type": "text-field"