You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@sling.apache.org by GitBox <gi...@apache.org> on 2022/01/14 16:23:34 UTC

[GitHub] [sling-org-apache-sling-feature-cpconverter] anchela commented on a change in pull request #125: Sling 10931

anchela commented on a change in pull request #125:
URL: https://github.com/apache/sling-org-apache-sling-feature-cpconverter/pull/125#discussion_r784972325



##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentExtractor.java
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.vault.packaging.PackageType;
+import org.apache.sling.feature.cpconverter.ContentPackage2FeatureModelConverter;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.features.FeaturesManager;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.osgi.framework.Constants;
+import org.osgi.framework.Version;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.jar.Attributes;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * Extracts the sling initial content from a bundle to an java.io.InputStream.
+ */
+public class BundleSlingInitialContentExtractor {
+
+    private static final double THRESHOLD_RATIO = 10;
+    private static final int BUFFER = 512;
+    private static final long TOOBIG = 0x6400000; // Max size of unzipped data, 100MB
+
+    protected final AssemblerProvider assemblerProvider = new AssemblerProvider();
+    protected final ContentReaderProvider contentReaderProvider = new ContentReaderProvider();
+    protected final ParentFolderRepoInitHandler parentFolderRepoInitHandler = new ParentFolderRepoInitHandler();
+    
+    static Version getModifiedOsgiVersion(@NotNull Version originalVersion) {
+        return new Version( originalVersion.getMajor(), 
+                            originalVersion.getMinor(), 
+                            originalVersion.getMicro(), 
+                    originalVersion.getQualifier() + "_" + ContentPackage2FeatureModelConverter.PACKAGE_CLASSIFIER);
+    }
+
+    @SuppressWarnings("java:S5042") // we already addressed this
+    @Nullable public InputStream extract(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+
+        ContentPackage2FeatureModelConverter contentPackage2FeatureModelConverter = context.getConverter();
+        
+        if (context.getSlingInitialContentPolicy() == ContentPackage2FeatureModelConverter.SlingInitialContentPolicy.KEEP) {
+            return null;
+        }
+        if(CollectionUtils.isEmpty(context.getPathEntryList())){
+            return null;
+        }
+        
+        // remove header
+        final Manifest manifest = context.getManifest();
+        manifest.getMainAttributes().remove(new Attributes.Name(PathEntry.CONTENT_HEADER));
+        // change version to have suffix
+        Version originalVersion = new Version(Objects.requireNonNull(manifest.getMainAttributes().getValue(Constants.BUNDLE_VERSION)));
+        manifest.getMainAttributes().putValue(Constants.BUNDLE_VERSION, getModifiedOsgiVersion(originalVersion).toString());
+        Path newBundleFile = Files.createTempFile(contentPackage2FeatureModelConverter.getTempDirectory().toPath(), "newBundle", ".jar");
+        String basePath = contentPackage2FeatureModelConverter.getTempDirectory().getPath();
+
+        // create JAR file to prevent extracting it twice and for random access
+        try (OutputStream fileOutput = Files.newOutputStream(newBundleFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
+             JarOutputStream bundleOutput = new JarOutputStream(fileOutput, manifest)) {
+
+            Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries = new HashSet<>();
+            
+            AtomicLong total = new AtomicLong(0);
+
+            final JarFile jarFile = context.getJarFile();
+            Enumeration<? extends JarEntry> entries = jarFile.entries();
+            
+            // first we collect all the entries into a set, collectedSlingInitialContentBundleEntries.
+            // we need it up front to be perform various checks in another loop later.
+            while(entries.hasMoreElements()){
+                JarEntry jarEntry = entries.nextElement();
+
+                if (jarEntry.getName().equals(JarFile.MANIFEST_NAME)) {
+                    continue;
+                }
+                byte[] data = new byte[BUFFER];
+                
+                long compressedSize = jarEntry.getCompressedSize();
+                if (!jarEntry.isDirectory()) {
+                    try (InputStream input = new BufferedInputStream(jarFile.getInputStream(jarEntry))) {
+                        if (containsSlingInitialContent(context, jarEntry)) {
+                            
+                            File targetFile = new File(contentPackage2FeatureModelConverter.getTempDirectory(), jarEntry.getName());
+                            String canonicalDestinationPath = targetFile.getCanonicalPath();
+
+                            if (!canonicalDestinationPath.startsWith(contentPackage2FeatureModelConverter.getTempDirectory().getCanonicalPath())) {
+                                throw new IOException("Entry is outside of the target directory");
+                            }
+
+                            targetFile.getParentFile().mkdirs();
+                            if(!targetFile.exists() && !targetFile.createNewFile()){
+                                throw new IOException("Could not create placeholder file!");
+                            }
+                           
+                            FileOutputStream fos = new FileOutputStream(targetFile);
+                            safelyWriteOutputStream(compressedSize, total, data, input, fos, true);
+
+                            SlingInitialContentBundleEntryMetaData bundleEntry = createSlingInitialContentBundleEntry(context, basePath, jarEntry, targetFile);
+                            collectedSlingInitialContentBundleEntries.add(bundleEntry);
+                        } else {
+                            bundleOutput.putNextEntry(jarEntry);
+                            safelyWriteOutputStream(compressedSize, total, data, input, bundleOutput, false);
+                            IOUtils.copy(input, bundleOutput);
+                            bundleOutput.closeEntry();
+                        }
+                    }
+                }
+                
+                if (total.get() + BUFFER > TOOBIG) {
+                    throw new IllegalStateException("File being unzipped is too big.");
+                }
+
+            }
+     
+            // now that we got collectedSlingInitialContentBundleEntries ready, we loop it and perform an extract for each entry.
+            BundleSlingInitialContentJarEntryExtractor jarEntryExtractor = 
+                    new BundleSlingInitialContentJarEntryExtractor(assemblerProvider, contentReaderProvider, parentFolderRepoInitHandler);
+            
+            for(SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData : collectedSlingInitialContentBundleEntries){
+                jarEntryExtractor.extractSlingInitialContent(context, slingInitialContentBundleEntryMetaData, collectedSlingInitialContentBundleEntries);
+            }
+      
+        }
+        
+        // add additional content packages to feature model
+        finalizePackageAssembly(context);
+
+        // return stripped bundle's inputstream which must be deleted on close
+        return Files.newInputStream(newBundleFile, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE);
+    }
+
+    @NotNull
+    private SlingInitialContentBundleEntryMetaData createSlingInitialContentBundleEntry(@NotNull BundleSlingInitialContentExtractorContext context,
+                                                                                        @NotNull String basePath,
+                                                                                        @NotNull JarEntry jarEntry,
+                                                                                        @NotNull File targetFile) throws UnsupportedEncodingException {
+        final String entryName = StringUtils.substringAfter( targetFile.getPath(), basePath + "/");
+        final PathEntry pathEntryValue = context.getPathEntryList().stream().filter(p -> entryName.startsWith( p.getPath())).findFirst().orElseThrow(NullPointerException::new);
+        final String target = pathEntryValue.getTarget();
+        // https://sling.apache.org/documentation/bundles/content-loading-jcr-contentloader.html#file-name-escaping
+        String repositoryPath = (target != null ? target : "/") + URLDecoder.decode(entryName.substring(pathEntryValue.getPath().length()), "UTF-8");
+        return new SlingInitialContentBundleEntryMetaData(targetFile, pathEntryValue, repositoryPath);
+    }
+
+
+
+    public void reset() {
+        parentFolderRepoInitHandler.reset();
+    }
+
+    public void addRepoinitExtension(@NotNull List<VaultPackageAssembler> assemblers, @NotNull FeaturesManager featureManager) throws IOException, ConverterException {
+        parentFolderRepoInitHandler.addRepoinitExtension(assemblers, featureManager);
+    }
+
+    protected void finalizePackageAssembly(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+        for (Map.Entry<PackageType, VaultPackageAssembler> entry : assemblerProvider.getPackageAssemblerEntrySet()) {
+            File packageFile = entry.getValue().createPackage();
+            ContentPackage2FeatureModelConverter converter = context.getConverter();
+            converter.processSubPackage(context.getPath() + "-" + entry.getKey(), context.getRunMode(), converter.open(packageFile), false);
+        }
+        assemblerProvider.clear();
+    }
+
+    private void safelyWriteOutputStream(@NotNull long compressedSize, 
+                                         @NotNull AtomicLong total, 
+                                         @NotNull byte[] data, 
+                                         @NotNull InputStream input, 
+                                         @NotNull OutputStream fos, 
+                                         boolean shouldClose) throws IOException {
+        int count;
+        BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER);
+        while (total.get() + BUFFER <= TOOBIG && (count = input.read(data, 0, BUFFER)) != -1) {
+            dest.write(data, 0, count);
+            total.addAndGet(count);
+
+            double compressionRatio = (double) count / compressedSize;
+            if(compressionRatio > THRESHOLD_RATIO) {

Review comment:
       minor nitpicking: there should be a whitespace between if and the (

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentExtractor.java
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.vault.packaging.PackageType;
+import org.apache.sling.feature.cpconverter.ContentPackage2FeatureModelConverter;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.features.FeaturesManager;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.osgi.framework.Constants;
+import org.osgi.framework.Version;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.jar.Attributes;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * Extracts the sling initial content from a bundle to an java.io.InputStream.
+ */
+public class BundleSlingInitialContentExtractor {
+
+    private static final double THRESHOLD_RATIO = 10;
+    private static final int BUFFER = 512;
+    private static final long TOOBIG = 0x6400000; // Max size of unzipped data, 100MB
+
+    protected final AssemblerProvider assemblerProvider = new AssemblerProvider();
+    protected final ContentReaderProvider contentReaderProvider = new ContentReaderProvider();
+    protected final ParentFolderRepoInitHandler parentFolderRepoInitHandler = new ParentFolderRepoInitHandler();
+    
+    static Version getModifiedOsgiVersion(@NotNull Version originalVersion) {
+        return new Version( originalVersion.getMajor(), 
+                            originalVersion.getMinor(), 
+                            originalVersion.getMicro(), 
+                    originalVersion.getQualifier() + "_" + ContentPackage2FeatureModelConverter.PACKAGE_CLASSIFIER);
+    }
+
+    @SuppressWarnings("java:S5042") // we already addressed this
+    @Nullable public InputStream extract(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+
+        ContentPackage2FeatureModelConverter contentPackage2FeatureModelConverter = context.getConverter();
+        
+        if (context.getSlingInitialContentPolicy() == ContentPackage2FeatureModelConverter.SlingInitialContentPolicy.KEEP) {
+            return null;
+        }
+        if(CollectionUtils.isEmpty(context.getPathEntryList())){
+            return null;
+        }
+        
+        // remove header
+        final Manifest manifest = context.getManifest();
+        manifest.getMainAttributes().remove(new Attributes.Name(PathEntry.CONTENT_HEADER));
+        // change version to have suffix
+        Version originalVersion = new Version(Objects.requireNonNull(manifest.getMainAttributes().getValue(Constants.BUNDLE_VERSION)));
+        manifest.getMainAttributes().putValue(Constants.BUNDLE_VERSION, getModifiedOsgiVersion(originalVersion).toString());
+        Path newBundleFile = Files.createTempFile(contentPackage2FeatureModelConverter.getTempDirectory().toPath(), "newBundle", ".jar");
+        String basePath = contentPackage2FeatureModelConverter.getTempDirectory().getPath();
+
+        // create JAR file to prevent extracting it twice and for random access
+        try (OutputStream fileOutput = Files.newOutputStream(newBundleFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
+             JarOutputStream bundleOutput = new JarOutputStream(fileOutput, manifest)) {
+
+            Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries = new HashSet<>();
+            
+            AtomicLong total = new AtomicLong(0);
+
+            final JarFile jarFile = context.getJarFile();
+            Enumeration<? extends JarEntry> entries = jarFile.entries();
+            
+            // first we collect all the entries into a set, collectedSlingInitialContentBundleEntries.
+            // we need it up front to be perform various checks in another loop later.
+            while(entries.hasMoreElements()){
+                JarEntry jarEntry = entries.nextElement();
+
+                if (jarEntry.getName().equals(JarFile.MANIFEST_NAME)) {
+                    continue;
+                }
+                byte[] data = new byte[BUFFER];
+                
+                long compressedSize = jarEntry.getCompressedSize();
+                if (!jarEntry.isDirectory()) {
+                    try (InputStream input = new BufferedInputStream(jarFile.getInputStream(jarEntry))) {
+                        if (containsSlingInitialContent(context, jarEntry)) {
+                            
+                            File targetFile = new File(contentPackage2FeatureModelConverter.getTempDirectory(), jarEntry.getName());
+                            String canonicalDestinationPath = targetFile.getCanonicalPath();
+
+                            if (!canonicalDestinationPath.startsWith(contentPackage2FeatureModelConverter.getTempDirectory().getCanonicalPath())) {
+                                throw new IOException("Entry is outside of the target directory");
+                            }
+
+                            targetFile.getParentFile().mkdirs();
+                            if(!targetFile.exists() && !targetFile.createNewFile()){
+                                throw new IOException("Could not create placeholder file!");
+                            }
+                           
+                            FileOutputStream fos = new FileOutputStream(targetFile);
+                            safelyWriteOutputStream(compressedSize, total, data, input, fos, true);
+
+                            SlingInitialContentBundleEntryMetaData bundleEntry = createSlingInitialContentBundleEntry(context, basePath, jarEntry, targetFile);
+                            collectedSlingInitialContentBundleEntries.add(bundleEntry);
+                        } else {
+                            bundleOutput.putNextEntry(jarEntry);
+                            safelyWriteOutputStream(compressedSize, total, data, input, bundleOutput, false);
+                            IOUtils.copy(input, bundleOutput);
+                            bundleOutput.closeEntry();
+                        }
+                    }
+                }
+                
+                if (total.get() + BUFFER > TOOBIG) {
+                    throw new IllegalStateException("File being unzipped is too big.");
+                }
+
+            }
+     
+            // now that we got collectedSlingInitialContentBundleEntries ready, we loop it and perform an extract for each entry.
+            BundleSlingInitialContentJarEntryExtractor jarEntryExtractor = 
+                    new BundleSlingInitialContentJarEntryExtractor(assemblerProvider, contentReaderProvider, parentFolderRepoInitHandler);
+            
+            for(SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData : collectedSlingInitialContentBundleEntries){
+                jarEntryExtractor.extractSlingInitialContent(context, slingInitialContentBundleEntryMetaData, collectedSlingInitialContentBundleEntries);
+            }
+      
+        }
+        
+        // add additional content packages to feature model
+        finalizePackageAssembly(context);
+
+        // return stripped bundle's inputstream which must be deleted on close
+        return Files.newInputStream(newBundleFile, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE);
+    }
+
+    @NotNull
+    private SlingInitialContentBundleEntryMetaData createSlingInitialContentBundleEntry(@NotNull BundleSlingInitialContentExtractorContext context,
+                                                                                        @NotNull String basePath,
+                                                                                        @NotNull JarEntry jarEntry,
+                                                                                        @NotNull File targetFile) throws UnsupportedEncodingException {
+        final String entryName = StringUtils.substringAfter( targetFile.getPath(), basePath + "/");
+        final PathEntry pathEntryValue = context.getPathEntryList().stream().filter(p -> entryName.startsWith( p.getPath())).findFirst().orElseThrow(NullPointerException::new);
+        final String target = pathEntryValue.getTarget();
+        // https://sling.apache.org/documentation/bundles/content-loading-jcr-contentloader.html#file-name-escaping
+        String repositoryPath = (target != null ? target : "/") + URLDecoder.decode(entryName.substring(pathEntryValue.getPath().length()), "UTF-8");
+        return new SlingInitialContentBundleEntryMetaData(targetFile, pathEntryValue, repositoryPath);
+    }
+
+
+
+    public void reset() {
+        parentFolderRepoInitHandler.reset();
+    }
+
+    public void addRepoinitExtension(@NotNull List<VaultPackageAssembler> assemblers, @NotNull FeaturesManager featureManager) throws IOException, ConverterException {
+        parentFolderRepoInitHandler.addRepoinitExtension(assemblers, featureManager);
+    }
+
+    protected void finalizePackageAssembly(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+        for (Map.Entry<PackageType, VaultPackageAssembler> entry : assemblerProvider.getPackageAssemblerEntrySet()) {
+            File packageFile = entry.getValue().createPackage();
+            ContentPackage2FeatureModelConverter converter = context.getConverter();
+            converter.processSubPackage(context.getPath() + "-" + entry.getKey(), context.getRunMode(), converter.open(packageFile), false);
+        }
+        assemblerProvider.clear();
+    }
+
+    private void safelyWriteOutputStream(@NotNull long compressedSize, 
+                                         @NotNull AtomicLong total, 
+                                         @NotNull byte[] data, 
+                                         @NotNull InputStream input, 
+                                         @NotNull OutputStream fos, 
+                                         boolean shouldClose) throws IOException {
+        int count;
+        BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER);
+        while (total.get() + BUFFER <= TOOBIG && (count = input.read(data, 0, BUFFER)) != -1) {
+            dest.write(data, 0, count);
+            total.addAndGet(count);
+
+            double compressionRatio = (double) count / compressedSize;
+            if(compressionRatio > THRESHOLD_RATIO) {
+                // ratio between compressed and uncompressed data is highly suspicious, looks like a Zip Bomb Attack
+                break;
+            }
+        }
+        dest.flush();
+
+        if(shouldClose){
+            dest.close();
+        }
+
+    }
+
+    /**
+     * Returns whether the jarEntry is initial content
+     * @param jarEntry
+     * @return
+     */
+    private boolean containsSlingInitialContent( @NotNull BundleSlingInitialContentExtractorContext context, @NotNull JarEntry jarEntry){
+        final String entryName = jarEntry.getName();
+        return  context.getPathEntryList().stream().anyMatch(p -> entryName.startsWith(p.getPath()));

Review comment:
       minor nitpicking: there seems to be an extra whitespace between return and context.

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentJarEntryExtractor.java
##########
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.util.Text;
+import org.apache.jackrabbit.vault.fs.io.Archive;
+import org.apache.jackrabbit.vault.util.PlatformNameFormat;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.vltpkg.DocViewSerializerContentHandlerException;
+import org.apache.sling.feature.cpconverter.vltpkg.SingleFileArchive;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.ContentReader;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+
+import javax.jcr.RepositoryException;
+import javax.xml.stream.XMLStreamException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Set;
+
+/**
+ * Handles the bundle sling initial content extraction on the jarEntry level.
+ */
+public class BundleSlingInitialContentJarEntryExtractor {
+
+
+    private final AssemblerProvider assemblerProvider;
+    private final ContentReaderProvider contentReaderProvider;
+    private final ParentFolderRepoInitHandler parentFolderRepoInitHandler;
+
+    public BundleSlingInitialContentJarEntryExtractor(@NotNull AssemblerProvider assemblerProvider,
+                                                      @NotNull ContentReaderProvider contentReaderProvider,
+                                                      @NotNull ParentFolderRepoInitHandler parentFolderRepoInitHandler) {
+        this.assemblerProvider = assemblerProvider;
+        this.contentReaderProvider = contentReaderProvider;
+        this.parentFolderRepoInitHandler = parentFolderRepoInitHandler;
+    }
+
+    /**
+     *
+     * @return {@code true} in case the given entry was part of the initial content otherwise {@code false}
+     * @throws Exception
+     */
+    public void extractSlingInitialContent(@NotNull BundleSlingInitialContentExtractorContext context, 
+                                           @NotNull SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData, 
+                                           @NotNull Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries) throws IOException, ConverterException {
+
+        String repositoryPath = slingInitialContentBundleEntryMetaData.getRepositoryPath();
+        File file = slingInitialContentBundleEntryMetaData.getTargetFile();
+        PathEntry pathEntryValue = slingInitialContentBundleEntryMetaData.getPathEntry();
+        // all entry paths used by entry handlers start with "/"
+        String contentPackageEntryPath = "/" + org.apache.jackrabbit.vault.util.Constants.ROOT_DIR + PlatformNameFormat.getPlatformPath(repositoryPath);
+
+        Path tmpDocViewInputFile = null;
+
+        try(InputStream bundleFileInputStream = new FileInputStream(file)) {

Review comment:
       missing space between try and (

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/ContentPackageEntryPathComputer.java
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.Set;
+
+import static org.apache.jackrabbit.vault.util.Constants.DOT_CONTENT_XML;
+
+/**
+ * Performs re-computation of the ContentPackagePath of the bundle entry (Sling Initial Content)
+ */
+public class ContentPackageEntryPathComputer {
+
+    private final Set<SlingInitialContentBundleEntryMetaData> bundleEntries;
+    private final String contentPackageEntryPath;
+    private final VaultContentXMLContentCreator contentCreator;
+
+    public ContentPackageEntryPathComputer(@NotNull Set<SlingInitialContentBundleEntryMetaData> bundleEntries, 
+                                           @NotNull final String contentPackageEntryPath, 
+                                           @NotNull VaultContentXMLContentCreator contentCreator){
+        this.bundleEntries = bundleEntries;
+        this.contentPackageEntryPath = contentPackageEntryPath;
+        this.contentCreator = contentCreator;
+    }
+
+    @NotNull
+    public String compute() {
+
+        String recomputedContentPackageEntryPath = FilenameUtils.removeExtension(contentPackageEntryPath);
+
+        // this covers the case of having a primary node name defined in the xml/json descriptor itself.
+        // if this is set, we need to use it in the path.
+        if(StringUtils.isNotBlank(contentCreator.getPrimaryNodeName())){
+            //custom node name
+            recomputedContentPackageEntryPath = StringUtils.substringBeforeLast(recomputedContentPackageEntryPath, "/") ;
+            recomputedContentPackageEntryPath = recomputedContentPackageEntryPath + "/" + contentCreator.getPrimaryNodeName();
+        }
+
+        final String checkIfRecomputedPathCandidate = StringUtils.removeStart(recomputedContentPackageEntryPath, "/jcr_root");
+        //  check if the resulting candidate matches one of the repositoryPaths in the bundle entries we have.
+        //  for example        /apps/testJsonFile.json.xml (descriptor entry)
+        //  will match         /apps/testJsonFile.json (file entry)
+        if(bundleEntries.stream().anyMatch(bundleEntry -> StringUtils.equals(checkIfRecomputedPathCandidate,bundleEntry.getRepositoryPath()))){
+            //we are dealing with a file descriptor here
+            recomputedContentPackageEntryPath = recomputedContentPackageEntryPath + ".dir/" + DOT_CONTENT_XML;
+        }else{

Review comment:
       and here

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/ContentPackageEntryPathComputer.java
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.Set;
+
+import static org.apache.jackrabbit.vault.util.Constants.DOT_CONTENT_XML;
+
+/**
+ * Performs re-computation of the ContentPackagePath of the bundle entry (Sling Initial Content)
+ */
+public class ContentPackageEntryPathComputer {
+
+    private final Set<SlingInitialContentBundleEntryMetaData> bundleEntries;
+    private final String contentPackageEntryPath;
+    private final VaultContentXMLContentCreator contentCreator;
+
+    public ContentPackageEntryPathComputer(@NotNull Set<SlingInitialContentBundleEntryMetaData> bundleEntries, 
+                                           @NotNull final String contentPackageEntryPath, 
+                                           @NotNull VaultContentXMLContentCreator contentCreator){
+        this.bundleEntries = bundleEntries;
+        this.contentPackageEntryPath = contentPackageEntryPath;
+        this.contentCreator = contentCreator;
+    }
+
+    @NotNull
+    public String compute() {
+
+        String recomputedContentPackageEntryPath = FilenameUtils.removeExtension(contentPackageEntryPath);
+
+        // this covers the case of having a primary node name defined in the xml/json descriptor itself.
+        // if this is set, we need to use it in the path.
+        if(StringUtils.isNotBlank(contentCreator.getPrimaryNodeName())){

Review comment:
       missing whitespace between if and ( and between ) and {

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentExtractorContext.java
##########
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.sling.feature.ArtifactId;
+import org.apache.sling.feature.cpconverter.ContentPackage2FeatureModelConverter;
+import org.apache.sling.feature.cpconverter.vltpkg.JcrNamespaceRegistry;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.jar.JarFile;
+import java.util.jar.Manifest;
+
+/**
+ * Holds various context variables for the BundleSlingInitialContentExtractor
+ */
+public class BundleSlingInitialContentExtractorContext {
+
+    private final ContentPackage2FeatureModelConverter.SlingInitialContentPolicy slingInitialContentPolicy;
+    private final String path;
+    private final ArtifactId bundleArtifactId;
+    private final JarFile jarFile;
+    private final ContentPackage2FeatureModelConverter converter;
+    private final String runMode;
+    private final Manifest manifest;
+    private final JcrNamespaceRegistry namespaceRegistry;
+    private final Iterator<PathEntry> pathEntries;
+    private final List<PathEntry> pathEntryList = new ArrayList<>();
+
+    public BundleSlingInitialContentExtractorContext(@NotNull ContentPackage2FeatureModelConverter.SlingInitialContentPolicy slingInitialContentPolicy, 
+                                                     @NotNull String path, 
+                                                     @NotNull ArtifactId bundleArtifactId, 
+                                                     @NotNull JarFile jarFile, 
+                                                     @NotNull ContentPackage2FeatureModelConverter converter, 
+                                                     @Nullable String runMode) throws IOException {
+        this.slingInitialContentPolicy = slingInitialContentPolicy;
+        this.path = path;
+        this.bundleArtifactId = bundleArtifactId;
+        this.jarFile = jarFile;
+        this.converter = converter;
+        this.runMode = runMode;
+
+        this.manifest = Objects.requireNonNull(jarFile.getManifest());
+        this.namespaceRegistry = 
+                new JcrNamespaceRegistryProvider(manifest, 
+                                                jarFile, 
+                                                converter.getFeaturesManager().getNamespaceUriByPrefix()
+                ).provideRegistryFromBundle();
+
+        pathEntries = PathEntry.getContentPaths(manifest, -1);
+
+        if(pathEntries != null){

Review comment:
       again: missing whitespace between if and ( and between ) and {

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentJarEntryExtractor.java
##########
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.util.Text;
+import org.apache.jackrabbit.vault.fs.io.Archive;
+import org.apache.jackrabbit.vault.util.PlatformNameFormat;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.vltpkg.DocViewSerializerContentHandlerException;
+import org.apache.sling.feature.cpconverter.vltpkg.SingleFileArchive;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.ContentReader;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+
+import javax.jcr.RepositoryException;
+import javax.xml.stream.XMLStreamException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Set;
+
+/**
+ * Handles the bundle sling initial content extraction on the jarEntry level.
+ */
+public class BundleSlingInitialContentJarEntryExtractor {
+
+
+    private final AssemblerProvider assemblerProvider;
+    private final ContentReaderProvider contentReaderProvider;
+    private final ParentFolderRepoInitHandler parentFolderRepoInitHandler;
+
+    public BundleSlingInitialContentJarEntryExtractor(@NotNull AssemblerProvider assemblerProvider,
+                                                      @NotNull ContentReaderProvider contentReaderProvider,
+                                                      @NotNull ParentFolderRepoInitHandler parentFolderRepoInitHandler) {
+        this.assemblerProvider = assemblerProvider;
+        this.contentReaderProvider = contentReaderProvider;
+        this.parentFolderRepoInitHandler = parentFolderRepoInitHandler;
+    }
+
+    /**
+     *
+     * @return {@code true} in case the given entry was part of the initial content otherwise {@code false}
+     * @throws Exception
+     */
+    public void extractSlingInitialContent(@NotNull BundleSlingInitialContentExtractorContext context, 
+                                           @NotNull SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData, 
+                                           @NotNull Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries) throws IOException, ConverterException {
+
+        String repositoryPath = slingInitialContentBundleEntryMetaData.getRepositoryPath();
+        File file = slingInitialContentBundleEntryMetaData.getTargetFile();
+        PathEntry pathEntryValue = slingInitialContentBundleEntryMetaData.getPathEntry();
+        // all entry paths used by entry handlers start with "/"
+        String contentPackageEntryPath = "/" + org.apache.jackrabbit.vault.util.Constants.ROOT_DIR + PlatformNameFormat.getPlatformPath(repositoryPath);
+
+        Path tmpDocViewInputFile = null;
+
+        try(InputStream bundleFileInputStream = new FileInputStream(file)) {
+            VaultPackageAssembler packageAssembler = assemblerProvider.initPackageAssemblerForPath(context, repositoryPath, pathEntryValue);
+
+            final ContentReader contentReader = contentReaderProvider.getContentReaderForEntry(file, pathEntryValue);
+            if (contentReader != null) {
+
+                // convert to docview xml
+                tmpDocViewInputFile = Files.createTempFile(context.getConverter().getTempDirectory().toPath(), "docview", ".xml");
+                try (OutputStream docViewOutput = Files.newOutputStream(tmpDocViewInputFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)) {
+
+                    repositoryPath = FilenameUtils.removeExtension(repositoryPath);
+                    boolean isFileDescriptorEntry = isFileDescriptor(collectedSlingInitialContentBundleEntries, contentPackageEntryPath);
+                    VaultContentXMLContentCreator contentCreator = new VaultContentXMLContentCreator(StringUtils.substringBeforeLast(repositoryPath, "/"), docViewOutput, context.getNamespaceRegistry(), packageAssembler, isFileDescriptorEntry);
+
+
+                    if(file.getName().endsWith(".xml")){

Review comment:
       missing space between if and ( and between ) and {

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/ContentPackageEntryPathComputer.java
##########
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.Set;
+
+import static org.apache.jackrabbit.vault.util.Constants.DOT_CONTENT_XML;
+
+/**
+ * Performs re-computation of the ContentPackagePath of the bundle entry (Sling Initial Content)
+ */
+public class ContentPackageEntryPathComputer {
+
+    private final Set<SlingInitialContentBundleEntryMetaData> bundleEntries;
+    private final String contentPackageEntryPath;
+    private final VaultContentXMLContentCreator contentCreator;
+
+    public ContentPackageEntryPathComputer(@NotNull Set<SlingInitialContentBundleEntryMetaData> bundleEntries, 
+                                           @NotNull final String contentPackageEntryPath, 
+                                           @NotNull VaultContentXMLContentCreator contentCreator){
+        this.bundleEntries = bundleEntries;
+        this.contentPackageEntryPath = contentPackageEntryPath;
+        this.contentCreator = contentCreator;
+    }
+
+    @NotNull
+    public String compute() {
+
+        String recomputedContentPackageEntryPath = FilenameUtils.removeExtension(contentPackageEntryPath);
+
+        // this covers the case of having a primary node name defined in the xml/json descriptor itself.
+        // if this is set, we need to use it in the path.
+        if(StringUtils.isNotBlank(contentCreator.getPrimaryNodeName())){
+            //custom node name
+            recomputedContentPackageEntryPath = StringUtils.substringBeforeLast(recomputedContentPackageEntryPath, "/") ;
+            recomputedContentPackageEntryPath = recomputedContentPackageEntryPath + "/" + contentCreator.getPrimaryNodeName();
+        }
+
+        final String checkIfRecomputedPathCandidate = StringUtils.removeStart(recomputedContentPackageEntryPath, "/jcr_root");
+        //  check if the resulting candidate matches one of the repositoryPaths in the bundle entries we have.
+        //  for example        /apps/testJsonFile.json.xml (descriptor entry)
+        //  will match         /apps/testJsonFile.json (file entry)
+        if(bundleEntries.stream().anyMatch(bundleEntry -> StringUtils.equals(checkIfRecomputedPathCandidate,bundleEntry.getRepositoryPath()))){

Review comment:
       same here.... missing white space.... 

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentExtractor.java
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.vault.packaging.PackageType;
+import org.apache.sling.feature.cpconverter.ContentPackage2FeatureModelConverter;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.features.FeaturesManager;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.osgi.framework.Constants;
+import org.osgi.framework.Version;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.jar.Attributes;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * Extracts the sling initial content from a bundle to an java.io.InputStream.
+ */
+public class BundleSlingInitialContentExtractor {
+
+    private static final double THRESHOLD_RATIO = 10;
+    private static final int BUFFER = 512;
+    private static final long TOOBIG = 0x6400000; // Max size of unzipped data, 100MB
+
+    protected final AssemblerProvider assemblerProvider = new AssemblerProvider();
+    protected final ContentReaderProvider contentReaderProvider = new ContentReaderProvider();
+    protected final ParentFolderRepoInitHandler parentFolderRepoInitHandler = new ParentFolderRepoInitHandler();
+    
+    static Version getModifiedOsgiVersion(@NotNull Version originalVersion) {
+        return new Version( originalVersion.getMajor(), 
+                            originalVersion.getMinor(), 
+                            originalVersion.getMicro(), 
+                    originalVersion.getQualifier() + "_" + ContentPackage2FeatureModelConverter.PACKAGE_CLASSIFIER);
+    }
+
+    @SuppressWarnings("java:S5042") // we already addressed this
+    @Nullable public InputStream extract(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+
+        ContentPackage2FeatureModelConverter contentPackage2FeatureModelConverter = context.getConverter();
+        
+        if (context.getSlingInitialContentPolicy() == ContentPackage2FeatureModelConverter.SlingInitialContentPolicy.KEEP) {
+            return null;
+        }
+        if(CollectionUtils.isEmpty(context.getPathEntryList())){
+            return null;
+        }
+        
+        // remove header
+        final Manifest manifest = context.getManifest();
+        manifest.getMainAttributes().remove(new Attributes.Name(PathEntry.CONTENT_HEADER));
+        // change version to have suffix
+        Version originalVersion = new Version(Objects.requireNonNull(manifest.getMainAttributes().getValue(Constants.BUNDLE_VERSION)));
+        manifest.getMainAttributes().putValue(Constants.BUNDLE_VERSION, getModifiedOsgiVersion(originalVersion).toString());
+        Path newBundleFile = Files.createTempFile(contentPackage2FeatureModelConverter.getTempDirectory().toPath(), "newBundle", ".jar");
+        String basePath = contentPackage2FeatureModelConverter.getTempDirectory().getPath();
+
+        // create JAR file to prevent extracting it twice and for random access
+        try (OutputStream fileOutput = Files.newOutputStream(newBundleFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
+             JarOutputStream bundleOutput = new JarOutputStream(fileOutput, manifest)) {
+
+            Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries = new HashSet<>();
+            
+            AtomicLong total = new AtomicLong(0);
+
+            final JarFile jarFile = context.getJarFile();
+            Enumeration<? extends JarEntry> entries = jarFile.entries();
+            
+            // first we collect all the entries into a set, collectedSlingInitialContentBundleEntries.
+            // we need it up front to be perform various checks in another loop later.
+            while(entries.hasMoreElements()){
+                JarEntry jarEntry = entries.nextElement();
+
+                if (jarEntry.getName().equals(JarFile.MANIFEST_NAME)) {
+                    continue;
+                }
+                byte[] data = new byte[BUFFER];
+                
+                long compressedSize = jarEntry.getCompressedSize();
+                if (!jarEntry.isDirectory()) {
+                    try (InputStream input = new BufferedInputStream(jarFile.getInputStream(jarEntry))) {
+                        if (containsSlingInitialContent(context, jarEntry)) {
+                            
+                            File targetFile = new File(contentPackage2FeatureModelConverter.getTempDirectory(), jarEntry.getName());
+                            String canonicalDestinationPath = targetFile.getCanonicalPath();
+
+                            if (!canonicalDestinationPath.startsWith(contentPackage2FeatureModelConverter.getTempDirectory().getCanonicalPath())) {
+                                throw new IOException("Entry is outside of the target directory");
+                            }
+
+                            targetFile.getParentFile().mkdirs();
+                            if(!targetFile.exists() && !targetFile.createNewFile()){
+                                throw new IOException("Could not create placeholder file!");
+                            }
+                           
+                            FileOutputStream fos = new FileOutputStream(targetFile);
+                            safelyWriteOutputStream(compressedSize, total, data, input, fos, true);
+
+                            SlingInitialContentBundleEntryMetaData bundleEntry = createSlingInitialContentBundleEntry(context, basePath, jarEntry, targetFile);
+                            collectedSlingInitialContentBundleEntries.add(bundleEntry);
+                        } else {
+                            bundleOutput.putNextEntry(jarEntry);
+                            safelyWriteOutputStream(compressedSize, total, data, input, bundleOutput, false);
+                            IOUtils.copy(input, bundleOutput);
+                            bundleOutput.closeEntry();
+                        }
+                    }
+                }
+                
+                if (total.get() + BUFFER > TOOBIG) {
+                    throw new IllegalStateException("File being unzipped is too big.");
+                }
+
+            }
+     
+            // now that we got collectedSlingInitialContentBundleEntries ready, we loop it and perform an extract for each entry.
+            BundleSlingInitialContentJarEntryExtractor jarEntryExtractor = 
+                    new BundleSlingInitialContentJarEntryExtractor(assemblerProvider, contentReaderProvider, parentFolderRepoInitHandler);
+            
+            for(SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData : collectedSlingInitialContentBundleEntries){
+                jarEntryExtractor.extractSlingInitialContent(context, slingInitialContentBundleEntryMetaData, collectedSlingInitialContentBundleEntries);
+            }
+      
+        }
+        
+        // add additional content packages to feature model
+        finalizePackageAssembly(context);
+
+        // return stripped bundle's inputstream which must be deleted on close
+        return Files.newInputStream(newBundleFile, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE);
+    }
+
+    @NotNull
+    private SlingInitialContentBundleEntryMetaData createSlingInitialContentBundleEntry(@NotNull BundleSlingInitialContentExtractorContext context,
+                                                                                        @NotNull String basePath,
+                                                                                        @NotNull JarEntry jarEntry,
+                                                                                        @NotNull File targetFile) throws UnsupportedEncodingException {
+        final String entryName = StringUtils.substringAfter( targetFile.getPath(), basePath + "/");
+        final PathEntry pathEntryValue = context.getPathEntryList().stream().filter(p -> entryName.startsWith( p.getPath())).findFirst().orElseThrow(NullPointerException::new);
+        final String target = pathEntryValue.getTarget();
+        // https://sling.apache.org/documentation/bundles/content-loading-jcr-contentloader.html#file-name-escaping
+        String repositoryPath = (target != null ? target : "/") + URLDecoder.decode(entryName.substring(pathEntryValue.getPath().length()), "UTF-8");
+        return new SlingInitialContentBundleEntryMetaData(targetFile, pathEntryValue, repositoryPath);
+    }
+
+
+
+    public void reset() {
+        parentFolderRepoInitHandler.reset();
+    }
+
+    public void addRepoinitExtension(@NotNull List<VaultPackageAssembler> assemblers, @NotNull FeaturesManager featureManager) throws IOException, ConverterException {
+        parentFolderRepoInitHandler.addRepoinitExtension(assemblers, featureManager);
+    }
+
+    protected void finalizePackageAssembly(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+        for (Map.Entry<PackageType, VaultPackageAssembler> entry : assemblerProvider.getPackageAssemblerEntrySet()) {
+            File packageFile = entry.getValue().createPackage();
+            ContentPackage2FeatureModelConverter converter = context.getConverter();
+            converter.processSubPackage(context.getPath() + "-" + entry.getKey(), context.getRunMode(), converter.open(packageFile), false);
+        }
+        assemblerProvider.clear();
+    }
+
+    private void safelyWriteOutputStream(@NotNull long compressedSize, 
+                                         @NotNull AtomicLong total, 
+                                         @NotNull byte[] data, 
+                                         @NotNull InputStream input, 
+                                         @NotNull OutputStream fos, 
+                                         boolean shouldClose) throws IOException {
+        int count;
+        BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER);
+        while (total.get() + BUFFER <= TOOBIG && (count = input.read(data, 0, BUFFER)) != -1) {
+            dest.write(data, 0, count);
+            total.addAndGet(count);
+
+            double compressionRatio = (double) count / compressedSize;
+            if(compressionRatio > THRESHOLD_RATIO) {
+                // ratio between compressed and uncompressed data is highly suspicious, looks like a Zip Bomb Attack
+                break;
+            }
+        }
+        dest.flush();
+
+        if(shouldClose){
+            dest.close();
+        }
+
+    }
+
+    /**
+     * Returns whether the jarEntry is initial content
+     * @param jarEntry
+     * @return
+     */
+    private boolean containsSlingInitialContent( @NotNull BundleSlingInitialContentExtractorContext context, @NotNull JarEntry jarEntry){

Review comment:
       minor nitpicking: there should be a whitespace between ) and {
   

##########
File path: src/main/java/org/apache/sling/feature/cpconverter/handlers/slinginitialcontent/BundleSlingInitialContentExtractor.java
##########
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.sling.feature.cpconverter.handlers.slinginitialcontent;
+
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.jackrabbit.vault.packaging.PackageType;
+import org.apache.sling.feature.cpconverter.ContentPackage2FeatureModelConverter;
+import org.apache.sling.feature.cpconverter.ConverterException;
+import org.apache.sling.feature.cpconverter.features.FeaturesManager;
+import org.apache.sling.feature.cpconverter.vltpkg.VaultPackageAssembler;
+import org.apache.sling.jcr.contentloader.PathEntry;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.osgi.framework.Constants;
+import org.osgi.framework.Version;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.jar.Attributes;
+import java.util.jar.JarEntry;
+import java.util.jar.JarFile;
+import java.util.jar.JarOutputStream;
+import java.util.jar.Manifest;
+
+/**
+ * Extracts the sling initial content from a bundle to an java.io.InputStream.
+ */
+public class BundleSlingInitialContentExtractor {
+
+    private static final double THRESHOLD_RATIO = 10;
+    private static final int BUFFER = 512;
+    private static final long TOOBIG = 0x6400000; // Max size of unzipped data, 100MB
+
+    protected final AssemblerProvider assemblerProvider = new AssemblerProvider();
+    protected final ContentReaderProvider contentReaderProvider = new ContentReaderProvider();
+    protected final ParentFolderRepoInitHandler parentFolderRepoInitHandler = new ParentFolderRepoInitHandler();
+    
+    static Version getModifiedOsgiVersion(@NotNull Version originalVersion) {
+        return new Version( originalVersion.getMajor(), 
+                            originalVersion.getMinor(), 
+                            originalVersion.getMicro(), 
+                    originalVersion.getQualifier() + "_" + ContentPackage2FeatureModelConverter.PACKAGE_CLASSIFIER);
+    }
+
+    @SuppressWarnings("java:S5042") // we already addressed this
+    @Nullable public InputStream extract(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+
+        ContentPackage2FeatureModelConverter contentPackage2FeatureModelConverter = context.getConverter();
+        
+        if (context.getSlingInitialContentPolicy() == ContentPackage2FeatureModelConverter.SlingInitialContentPolicy.KEEP) {
+            return null;
+        }
+        if(CollectionUtils.isEmpty(context.getPathEntryList())){
+            return null;
+        }
+        
+        // remove header
+        final Manifest manifest = context.getManifest();
+        manifest.getMainAttributes().remove(new Attributes.Name(PathEntry.CONTENT_HEADER));
+        // change version to have suffix
+        Version originalVersion = new Version(Objects.requireNonNull(manifest.getMainAttributes().getValue(Constants.BUNDLE_VERSION)));
+        manifest.getMainAttributes().putValue(Constants.BUNDLE_VERSION, getModifiedOsgiVersion(originalVersion).toString());
+        Path newBundleFile = Files.createTempFile(contentPackage2FeatureModelConverter.getTempDirectory().toPath(), "newBundle", ".jar");
+        String basePath = contentPackage2FeatureModelConverter.getTempDirectory().getPath();
+
+        // create JAR file to prevent extracting it twice and for random access
+        try (OutputStream fileOutput = Files.newOutputStream(newBundleFile, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING);
+             JarOutputStream bundleOutput = new JarOutputStream(fileOutput, manifest)) {
+
+            Set<SlingInitialContentBundleEntryMetaData> collectedSlingInitialContentBundleEntries = new HashSet<>();
+            
+            AtomicLong total = new AtomicLong(0);
+
+            final JarFile jarFile = context.getJarFile();
+            Enumeration<? extends JarEntry> entries = jarFile.entries();
+            
+            // first we collect all the entries into a set, collectedSlingInitialContentBundleEntries.
+            // we need it up front to be perform various checks in another loop later.
+            while(entries.hasMoreElements()){
+                JarEntry jarEntry = entries.nextElement();
+
+                if (jarEntry.getName().equals(JarFile.MANIFEST_NAME)) {
+                    continue;
+                }
+                byte[] data = new byte[BUFFER];
+                
+                long compressedSize = jarEntry.getCompressedSize();
+                if (!jarEntry.isDirectory()) {
+                    try (InputStream input = new BufferedInputStream(jarFile.getInputStream(jarEntry))) {
+                        if (containsSlingInitialContent(context, jarEntry)) {
+                            
+                            File targetFile = new File(contentPackage2FeatureModelConverter.getTempDirectory(), jarEntry.getName());
+                            String canonicalDestinationPath = targetFile.getCanonicalPath();
+
+                            if (!canonicalDestinationPath.startsWith(contentPackage2FeatureModelConverter.getTempDirectory().getCanonicalPath())) {
+                                throw new IOException("Entry is outside of the target directory");
+                            }
+
+                            targetFile.getParentFile().mkdirs();
+                            if(!targetFile.exists() && !targetFile.createNewFile()){
+                                throw new IOException("Could not create placeholder file!");
+                            }
+                           
+                            FileOutputStream fos = new FileOutputStream(targetFile);
+                            safelyWriteOutputStream(compressedSize, total, data, input, fos, true);
+
+                            SlingInitialContentBundleEntryMetaData bundleEntry = createSlingInitialContentBundleEntry(context, basePath, jarEntry, targetFile);
+                            collectedSlingInitialContentBundleEntries.add(bundleEntry);
+                        } else {
+                            bundleOutput.putNextEntry(jarEntry);
+                            safelyWriteOutputStream(compressedSize, total, data, input, bundleOutput, false);
+                            IOUtils.copy(input, bundleOutput);
+                            bundleOutput.closeEntry();
+                        }
+                    }
+                }
+                
+                if (total.get() + BUFFER > TOOBIG) {
+                    throw new IllegalStateException("File being unzipped is too big.");
+                }
+
+            }
+     
+            // now that we got collectedSlingInitialContentBundleEntries ready, we loop it and perform an extract for each entry.
+            BundleSlingInitialContentJarEntryExtractor jarEntryExtractor = 
+                    new BundleSlingInitialContentJarEntryExtractor(assemblerProvider, contentReaderProvider, parentFolderRepoInitHandler);
+            
+            for(SlingInitialContentBundleEntryMetaData slingInitialContentBundleEntryMetaData : collectedSlingInitialContentBundleEntries){
+                jarEntryExtractor.extractSlingInitialContent(context, slingInitialContentBundleEntryMetaData, collectedSlingInitialContentBundleEntries);
+            }
+      
+        }
+        
+        // add additional content packages to feature model
+        finalizePackageAssembly(context);
+
+        // return stripped bundle's inputstream which must be deleted on close
+        return Files.newInputStream(newBundleFile, StandardOpenOption.READ, StandardOpenOption.DELETE_ON_CLOSE);
+    }
+
+    @NotNull
+    private SlingInitialContentBundleEntryMetaData createSlingInitialContentBundleEntry(@NotNull BundleSlingInitialContentExtractorContext context,
+                                                                                        @NotNull String basePath,
+                                                                                        @NotNull JarEntry jarEntry,
+                                                                                        @NotNull File targetFile) throws UnsupportedEncodingException {
+        final String entryName = StringUtils.substringAfter( targetFile.getPath(), basePath + "/");
+        final PathEntry pathEntryValue = context.getPathEntryList().stream().filter(p -> entryName.startsWith( p.getPath())).findFirst().orElseThrow(NullPointerException::new);
+        final String target = pathEntryValue.getTarget();
+        // https://sling.apache.org/documentation/bundles/content-loading-jcr-contentloader.html#file-name-escaping
+        String repositoryPath = (target != null ? target : "/") + URLDecoder.decode(entryName.substring(pathEntryValue.getPath().length()), "UTF-8");
+        return new SlingInitialContentBundleEntryMetaData(targetFile, pathEntryValue, repositoryPath);
+    }
+
+
+
+    public void reset() {
+        parentFolderRepoInitHandler.reset();
+    }
+
+    public void addRepoinitExtension(@NotNull List<VaultPackageAssembler> assemblers, @NotNull FeaturesManager featureManager) throws IOException, ConverterException {
+        parentFolderRepoInitHandler.addRepoinitExtension(assemblers, featureManager);
+    }
+
+    protected void finalizePackageAssembly(@NotNull BundleSlingInitialContentExtractorContext context) throws IOException, ConverterException {
+        for (Map.Entry<PackageType, VaultPackageAssembler> entry : assemblerProvider.getPackageAssemblerEntrySet()) {
+            File packageFile = entry.getValue().createPackage();
+            ContentPackage2FeatureModelConverter converter = context.getConverter();
+            converter.processSubPackage(context.getPath() + "-" + entry.getKey(), context.getRunMode(), converter.open(packageFile), false);
+        }
+        assemblerProvider.clear();
+    }
+
+    private void safelyWriteOutputStream(@NotNull long compressedSize, 
+                                         @NotNull AtomicLong total, 
+                                         @NotNull byte[] data, 
+                                         @NotNull InputStream input, 
+                                         @NotNull OutputStream fos, 
+                                         boolean shouldClose) throws IOException {
+        int count;
+        BufferedOutputStream dest = new BufferedOutputStream(fos, BUFFER);
+        while (total.get() + BUFFER <= TOOBIG && (count = input.read(data, 0, BUFFER)) != -1) {
+            dest.write(data, 0, count);
+            total.addAndGet(count);
+
+            double compressionRatio = (double) count / compressedSize;
+            if(compressionRatio > THRESHOLD_RATIO) {
+                // ratio between compressed and uncompressed data is highly suspicious, looks like a Zip Bomb Attack
+                break;
+            }
+        }
+        dest.flush();
+
+        if(shouldClose){

Review comment:
       minor nitpicking: there should be a whitespace between if and the ( and before the {
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@sling.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org