You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/12/07 16:52:39 UTC

[GitHub] [pinot] jadami10 commented on a change in pull request #7871: Support loading plugins from multiple directories

jadami10 commented on a change in pull request #7871:
URL: https://github.com/apache/pinot/pull/7871#discussion_r764179452



##########
File path: pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentGenerationJobRunner.java
##########
@@ -390,27 +391,41 @@ protected void addMapperJarToDistributedCache(Job job, PinotFS outputDirFS, URI
   }
 
   protected void packPluginsToDistributedCache(Job job, PinotFS outputDirFS, URI stagingDirURI) {
-    File pluginsRootDir = new File(PluginManager.get().getPluginsRootDir());
-    if (pluginsRootDir.exists()) {
-      try {
-        File pluginsTarGzFile = File.createTempFile("pinot-plugins-", ".tar.gz");
-        TarGzCompressionUtils.createTarGzFile(pluginsRootDir, pluginsTarGzFile);
-
-        // Copy to staging directory
-        Path cachedPluginsTarball = new Path(stagingDirURI.toString(), SegmentGenerationUtils.PINOT_PLUGINS_TAR_GZ);
-        outputDirFS.copyFromLocalFile(pluginsTarGzFile, cachedPluginsTarball.toUri());
-        job.addCacheFile(cachedPluginsTarball.toUri());
-      } catch (Exception e) {
-        LOGGER.error("Failed to tar plugins directory and upload to staging dir", e);
-        throw new RuntimeException(e);
-      }
+    String[] pluginDirectories = PluginManager.get().getPluginsDirectories();
+    if (pluginDirectories == null) {
+      LOGGER.warn("Plugin directories is null, nothing to pack to distributed cache");
+      return;
+    }
 
-      String pluginsIncludes = System.getProperty(PLUGINS_INCLUDE_PROPERTY_NAME);
-      if (pluginsIncludes != null) {
-        job.getConfiguration().set(PLUGINS_INCLUDE_PROPERTY_NAME, pluginsIncludes);
+    ArrayList<File> validPluginDirectories = new ArrayList();
+
+    for (String pluginsDirPath : pluginDirectories) {
+      File pluginsDir = new File(pluginsDirPath);
+      if (pluginsDir.exists()) {
+        validPluginDirectories.add(pluginsDir);
+      } else {
+        LOGGER.warn("Cannot find Pinot plugins directory at [{}]", pluginsDirPath);
+        return;
       }
-    } else {
-      LOGGER.warn("Cannot find local Pinot plugins directory at [{}]", pluginsRootDir);
+    }
+
+    File pluginsTarGzFile = new File(PINOT_PLUGINS_TAR_GZ);
+    try {
+      File[] files = validPluginDirectories.toArray(new File[0]);
+      TarGzCompressionUtils.createTarGzFile(files, pluginsTarGzFile);

Review comment:
       how does this untar later? If I have
   ```
   /a/b
   /a/c
   /b/c
   ```
   will it come back out the same way?

##########
File path: pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentGenerationJobRunner.java
##########
@@ -390,27 +391,41 @@ protected void addMapperJarToDistributedCache(Job job, PinotFS outputDirFS, URI
   }
 
   protected void packPluginsToDistributedCache(Job job, PinotFS outputDirFS, URI stagingDirURI) {
-    File pluginsRootDir = new File(PluginManager.get().getPluginsRootDir());
-    if (pluginsRootDir.exists()) {
-      try {
-        File pluginsTarGzFile = File.createTempFile("pinot-plugins-", ".tar.gz");
-        TarGzCompressionUtils.createTarGzFile(pluginsRootDir, pluginsTarGzFile);
-
-        // Copy to staging directory
-        Path cachedPluginsTarball = new Path(stagingDirURI.toString(), SegmentGenerationUtils.PINOT_PLUGINS_TAR_GZ);
-        outputDirFS.copyFromLocalFile(pluginsTarGzFile, cachedPluginsTarball.toUri());
-        job.addCacheFile(cachedPluginsTarball.toUri());
-      } catch (Exception e) {
-        LOGGER.error("Failed to tar plugins directory and upload to staging dir", e);
-        throw new RuntimeException(e);
-      }
+    String[] pluginDirectories = PluginManager.get().getPluginsDirectories();
+    if (pluginDirectories == null) {
+      LOGGER.warn("Plugin directories is null, nothing to pack to distributed cache");
+      return;
+    }
 
-      String pluginsIncludes = System.getProperty(PLUGINS_INCLUDE_PROPERTY_NAME);
-      if (pluginsIncludes != null) {
-        job.getConfiguration().set(PLUGINS_INCLUDE_PROPERTY_NAME, pluginsIncludes);
+    ArrayList<File> validPluginDirectories = new ArrayList();
+
+    for (String pluginsDirPath : pluginDirectories) {
+      File pluginsDir = new File(pluginsDirPath);
+      if (pluginsDir.exists()) {
+        validPluginDirectories.add(pluginsDir);
+      } else {
+        LOGGER.warn("Cannot find Pinot plugins directory at [{}]", pluginsDirPath);
+        return;
       }
-    } else {

Review comment:
       why remove this?

##########
File path: pinot-spi/src/main/java/org/apache/pinot/spi/plugin/PluginManager.java
##########
@@ -124,55 +124,66 @@ public synchronized void init() {
       return;
     }
     try {
-      _pluginsRootDir = System.getProperty(PLUGINS_DIR_PROPERTY_NAME);
+      _pluginsDirectories = System.getProperty(PLUGINS_DIR_PROPERTY_NAME);
     } catch (Exception e) {
       LOGGER.error("Failed to load env variable {}", PLUGINS_DIR_PROPERTY_NAME, e);
-      _pluginsRootDir = null;
+      _pluginsDirectories = null;
     }
     try {
       _pluginsInclude = System.getProperty(PLUGINS_INCLUDE_PROPERTY_NAME);
     } catch (Exception e) {
       LOGGER.error("Failed to load env variable {}", PLUGINS_INCLUDE_PROPERTY_NAME, e);
       _pluginsInclude = null;
     }
-    init(_pluginsRootDir, _pluginsInclude);
+    init(_pluginsDirectories, _pluginsInclude);
     _initialized = true;
   }
 
-  private void init(String pluginsRootDir, String pluginsInclude) {
-    if (StringUtils.isEmpty(pluginsRootDir)) {
+  private void init(String pluginsDirectories, String pluginsInclude) {
+    if (StringUtils.isEmpty(pluginsDirectories)) {
       LOGGER.info("Env variable '{}' is not specified. Set this env variable to load additional plugins.",
           PLUGINS_DIR_PROPERTY_NAME);
       return;
     } else {
-      if (!new File(pluginsRootDir).exists()) {
-        LOGGER.warn("Plugins root dir [{}] doesn't exist.", pluginsRootDir);
-        return;
-      }
-      LOGGER.info("Plugins root dir is [{}]", pluginsRootDir);
-    }
-    Collection<File> jarFiles = FileUtils.listFiles(new File(pluginsRootDir), new String[]{JAR_FILE_EXTENSION}, true);
-    List<String> pluginsToLoad = null;
-    if (!StringUtils.isEmpty(pluginsInclude)) {
-      pluginsToLoad = Arrays.asList(pluginsInclude.split(","));
-      LOGGER.info("Trying to load plugins: [{}]", Arrays.toString(pluginsToLoad.toArray()));
-    } else {
-      LOGGER.info("Please use env variable '{}' to customize plugins to load. Loading all plugins: {}",
-          PLUGINS_INCLUDE_PROPERTY_NAME, Arrays.toString(jarFiles.toArray()));
-    }
-    for (File jarFile : jarFiles) {
-      File pluginDir = jarFile.getParentFile();
-      String pluginName = pluginDir.getName();
-      if (pluginsToLoad != null) {
-        if (!pluginsToLoad.contains(pluginName)) {
-          continue;
+      String[] directories = pluginsDirectories.split(";");
+      LOGGER.info("Plugin directories env: {}, parsed directories to load: '{}'", pluginsDirectories, directories);
+
+      for (String pluginsDirectory : directories) {
+        if (!new File(pluginsDirectory).exists()) {
+          LOGGER.warn("Plugins dir [{}] doesn't exist.", pluginsDirectory);
+          return;
+        }
+
+        Collection<File> jarFiles = FileUtils.listFiles(
+            new File(pluginsDirectory),
+            new String[]{JAR_FILE_EXTENSION},
+            true
+        );
+        List<String> pluginsToLoad = null;
+        if (!StringUtils.isEmpty(pluginsInclude)) {
+          pluginsToLoad = Arrays.asList(pluginsInclude.split(";"));

Review comment:
       what happens when you accidentally have a plugin with the same name in 2 directories? My guess from reading the code is the second one actually applies. I guess the other option is to say "plugin already loaded, skipping". Either way, I think we should explicitly codify it and add a test.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org