You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@openwhisk.apache.org by GitBox <gi...@apache.org> on 2022/01/18 11:45:38 UTC

[GitHub] [openwhisk] style95 commented on a change in pull request #5194: [New Scheduler] Run scheduler

style95 commented on a change in pull request #5194:
URL: https://github.com/apache/openwhisk/pull/5194#discussion_r786662740



##########
File path: core/scheduler/src/main/resources/application.conf
##########
@@ -37,10 +38,11 @@ akka {
     }
   }
 
-  remote.netty.tcp {
-    send-buffer-size = 3151796b
-    receive-buffer-size = 3151796b
-    maximum-frame-size = 3151796b
+  remote {
+    artery {

Review comment:
       As the Akka version is upgraded, it is now using artery rather than netty which is deprecated.
   https://doc.akka.io/docs/akka/current/remoting.html#classic-remoting-deprecated-
   

##########
File path: ansible/roles/schedulers/tasks/deploy.yml
##########
@@ -0,0 +1,362 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+---
+# This role will install Scheduler in group 'schedulers' in the environment
+# inventory
+
+- import_tasks: docker_login.yml
+
+- name: get scheduler name and index
+  set_fact:
+    scheduler_name: "{{ name_prefix ~ host_group.index(inventory_hostname) }}"
+    scheduler_index:
+      "{{ (scheduler_index_base|int) + host_group.index(inventory_hostname) }}"
+
+- name: "pull the {{ docker.image.tag }} image of scheduler"
+  shell: "docker pull {{docker_registry}}{{ docker.image.prefix }}/scheduler:{{docker.image.tag}}"
+  when: docker_registry != ""
+  register: result
+  until: (result.rc == 0)
+  retries: "{{ docker.pull.retries }}"
+  delay: "{{ docker.pull.delay }}"
+
+- name: ensure scheduler log directory is created with permissions
+  file:
+    path: "{{ whisk_logs_dir }}/{{ scheduler_name }}"
+    state: directory
+    mode: 0777
+  become: "{{ logs.dir.become }}"
+
+# We need to create the file with proper permissions because the dir creation above
+# does not result in a dir with full permissions in docker machine especially with macos mounts
+- name: ensure scheduler log file is created with permissions
+  file:
+    path: "{{ whisk_logs_dir }}/{{ scheduler_name }}/{{ scheduler_name }}_logs.log"
+    state: touch
+    mode: 0777
+  when: environment_type is defined and environment_type == "docker-machine"
+
+- name: ensure scheduler config directory is created with permissions
+  file:
+    path: "{{ scheduler.confdir }}/{{ scheduler_name }}"
+    state: directory
+    mode: 0777
+  become: "{{ scheduler.dir.become }}"
+
+- name: check, that required databases exist
+  include_tasks: "{{ openwhisk_home }}/ansible/tasks/db/checkDb.yml"
+  vars:
+    dbName: "{{ item }}"
+    dbUser: "{{ db.credentials.scheduler.user }}"
+    dbPass: "{{ db.credentials.scheduler.pass }}"
+  with_items:
+  - "{{ db.whisk.auth }}"
+
+- name: copy jmxremote password file
+  when: jmx.enabled
+  template:
+    src: "jmxremote.password.j2"
+    dest: "{{ scheduler.confdir }}/{{ scheduler_name }}/jmxremote.password"
+    mode: 0777
+
+- name: copy jmxremote access file
+  when: jmx.enabled
+  template:
+    src: "jmxremote.access.j2"
+    dest: "{{ scheduler.confdir }}/{{ scheduler_name }}/jmxremote.access"
+    mode: 0777
+
+- name: prepare scheduler port
+  set_fact:
+    scheduler_port: "{{ scheduler.basePort + (scheduler_index | int) }}"
+    ports_to_expose:
+      - "{{ scheduler.grpc.basePort + (scheduler_index | int) }}:{{ scheduler.grpc.basePort + (scheduler_index | int) }}"
+      - "{{ scheduler.basePort + (scheduler_index | int) }}:8080"
+
+- name: expose additional ports if jmxremote is enabled
+  when: jmx.enabled
+  vars:
+    jmx_remote_port: "{{ jmx.basePortScheduler + (scheduler_index|int) }}"
+    jmx_remote_rmi_port:
+      "{{ jmx.rmiBasePortScheduler + (scheduler_index|int) }}"
+  set_fact:
+    ports_to_expose: >-
+      {{ ports_to_expose }} +
+      [ '{{ jmx_remote_port }}:{{ jmx_remote_port }}' ] +
+      [ '{{ jmx_remote_rmi_port }}:{{ jmx_remote_rmi_port }}' ]
+    scheduler_args: >-
+      {{ scheduler.arguments }}
+      {{ jmx.jvmCommonArgs }}
+      -Djava.rmi.server.hostname={{ ansible_host }}
+      -Dcom.sun.management.jmxremote.rmi.port={{ jmx_remote_rmi_port }}
+      -Dcom.sun.management.jmxremote.port={{ jmx_remote_port }}
+
+- name: populate environment variables for scheduler
+  set_fact:
+    env:
+      "JAVA_OPTS":
+        -Xmx{{ scheduler.heap }}
+        -XX:+CrashOnOutOfMemoryError
+        -XX:+UseGCOverheadLimit
+        -XX:ErrorFile=/logs/java_error.log
+        -XX:+HeapDumpOnOutOfMemoryError
+        -XX:HeapDumpPath=/logs
+      "SCHEDULER_OPTS": "{{ scheduler_args | default(scheduler.arguments) }}"
+      "SCHEDULER_INSTANCES": "{{ scheduler.instances }}"
+      "JMX_REMOTE": "{{ jmx.enabled }}"
+      "PORT": "8080"
+
+      "WHISK_SCHEDULER_ENDPOINTS_HOST": "{{ ansible_host }}"

Review comment:
       These three data are used to put a scheduler endpoint to ETCD so that other components can refer to.
   

##########
File path: ansible/roles/schedulers/tasks/join_akka_cluster.yml
##########
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more contributor
+# license agreements; and to You under the Apache License, Version 2.0.
+---
+#
+#  Scheduler 'plugin' that will add the items necessary to the scheduler
+#  environment to cause the scheduler to join a specified akka cluster
+#
+
+- name: add akka port to ports_to_expose
+  set_fact:
+    ports_to_expose: >-
+      {{ ports_to_expose }} +
+      [ "{{ (scheduler.akka.cluster.basePort + (scheduler_index | int)) }}:"
+      + "{{ scheduler.akka.cluster.bindPort }}" ]
+
+- name: add seed nodes to scheduler environment
+  set_fact:
+    env: >-
+      {{ env | combine({
+        'CONFIG_akka_cluster_seedNodes_' ~ seedNode.0:
+          'akka://scheduler-actor-system@'~seedNode.1~':'~(scheduler.akka.cluster.basePort+seedNode.0)
+      }) }}
+  with_indexed_items: "{{ scheduler.akka.cluster.seedNodes }}"
+  loop_control:
+    loop_var: seedNode
+
+- name: Add akka environment to scheduler environment
+  vars:
+    akka_env:
+      "CONFIG_akka_actor_provider": "{{ scheduler.akka.provider }}"
+      "CONFIG_akka_remote_artery_canonical_hostname":

Review comment:
       This is required to specify the artery configurations.

##########
File path: common/scala/build.gradle
##########
@@ -40,6 +40,10 @@ dependencies {
     compile "com.typesafe.akka:akka-actor_${gradle.scala.depVersion}:${gradle.akka.version}"
     compile "com.typesafe.akka:akka-stream_${gradle.scala.depVersion}:${gradle.akka.version}"
     compile "com.typesafe.akka:akka-slf4j_${gradle.scala.depVersion}:${gradle.akka.version}"
+    compile "com.typesafe.akka:akka-cluster_${gradle.scala.depVersion}:${gradle.akka.version}"

Review comment:
       One thing to clarify here is, in our(downstream) production system, we are still sticking to `akka-2.5.26` and netty-based transport for akka-remote.
   So there can be some differences.

##########
File path: core/scheduler/src/main/scala/org/apache/openwhisk/core/scheduler/Scheduler.scala
##########
@@ -347,12 +341,17 @@ object Scheduler {
         // Create scheduler
         val scheduler = new Scheduler(instanceId, schedulerEndpoints)
 
-        // TODO: Add Akka-grpc handler
-        val httpsConfig =
-          if (Scheduler.protocol == "https") Some(loadConfigOrThrow[HttpsConfig]("whisk.controller.https")) else None
-
-        BasicHttpService.startHttpService(FPCSchedulerServer.instance(scheduler).route, port, httpsConfig)(actorSystem)
+        Http()
+          .newServerAt("0.0.0.0", port = rpcPort)
+          .bind(scheduler.serviceHandlers)

Review comment:
       This is to bind the gRPC port is used for invokers to communicate with schedulers to fetch activations.
   

##########
File path: ansible/group_vars/all
##########
@@ -435,8 +442,9 @@ metrics:
 
 user_events: "{{ user_events_enabled | default(false) | lower }}"
 
-durationChecker:
-    timeWindow: "{{ duration_checker_time_window | default('1 d') }}"
+zerodowntimeDeployment:

Review comment:
       This configuration is used to deploy all components without any downtime in downstream.
   Since there are no corresponding ansible steps on the controller and invoker sides, it would not work but I kept it as is.
   We can add the required changes in the future.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@openwhisk.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org