You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2020/04/20 23:10:36 UTC

[GitHub] [beam] udim commented on a change in pull request #11428: [BEAM-9764] multi threading & optional pulling

udim commented on a change in pull request #11428:
URL: https://github.com/apache/beam/pull/11428#discussion_r411751895



##########
File path: sdks/java/container/license_scripts/pull_licenses_java.py
##########
@@ -108,89 +244,27 @@ def write_to_csv(csv_dict):
 
     maven_url_temp = 'https://repo1.maven.org/maven2/{module}/{version}'
 
-    for dep in dependencies['dependencies']:
-        '''
-        An example of a Json blob.
-        {
-            "moduleName": "antlr:antlr",
-            "moduleUrl": "http://www.antlr.org/",
-            "moduleVersion": "2.7.7",
-            "moduleLicense": "BSD License",
-            "moduleLicenseUrl": "http://www.antlr.org/license.html"
-        }
-        '''
-        name = dep['moduleName'].split(':')[1].lower()
-        version = dep['moduleVersion']
-        name_version = name + '-' + version
-        dir_name = '{license_dir}/{name_version}.jar'.format(
-            license_dir=LICENSE_DIR, name_version=name_version)
-        # if auto pulled, directory is existing at {license_dir}
-        if not os.path.isdir(dir_name):
-            # skip self dependencies
-            if dep['moduleName'].startswith('beam'):
-                print('Skippig', name + '-' + version)
-                continue
-            os.mkdir(dir_name)
-            # pull license
-            try:
-                license_url = dep_config[name][version]['license']
-            except:
-                license_url = dep['moduleLicenseUrl']
-            pull_from_url(dir_name + '/LICENSE', license_url, name_version,
-                          no_licenses)
-            # pull notice
-            try:
-                notice_url = dep_config[name][version]['notice']
-                pull_from_url(dir_name + '/NOTICE', notice_url, name_version,
-                              no_licenses)
-            except:
-                notice_url = None
-        else:
-            try:
-                license_url = dep['moduleLicenseUrl']
-            except:
-                license_url = ''
-            print(
-                'License/notice for {name_version} were pulled automatically.'.
-                format(name_version=name_version))
-
-        # get license_type to decide if pull source code.
-        try:
-            license_type = dep['moduleLicense']
-        except:
-            try:
-                license_type = dep_config[name][version]['type']
-            except:
-                no_license_type.add(name_version)
-                license_type = ''
-                continue
+    csv_list = []
+    no_licenses = []
+    no_license_type = []
+    incorrect_source_url = []
 
-        # pull source code if license_type is one of SOURCE_CODE_REQUIRED_LICENSES.
-        if any(x in license_type.lower()
-               for x in SOURCE_CODE_REQUIRED_LICENSES):
-            try:
-                base_url = dep_config[name][version]['source']
-            except:
-                module = dep['moduleName'].split(':')[0].replace('.', '/')
-                base_url = maven_url_temp.format(module=module + '/' + name,
-                                                 version=version)
-            pull_source_code(base_url, dir_name, name_version,
-                             incorrect_source_url)
-            source_included = True
-        else:
-            source_included = False
-
-        csv_dict[name_version] = {
-            'url_to_license': license_url,
-            'license_type': license_type,
-            'source_included': source_included
-        }
+    queue = Queue()
+    for x in range(THREADS):
+        worker = Worker(queue)
+        worker.daemon = True
+        worker.start()
+    for dep in dependencies['dependencies']:
+        queue.put(dep)
+    queue.join()
 

Review comment:
       Sorry, you're right. I wasn't aware of `task_done`.
   
   I still believe that a ThreadPoolExecutor is useful, but feel free to resolve this without any changes.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org