You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by di...@apache.org on 2021/12/30 15:12:37 UTC

[sling-org-apache-sling-sitemap] branch master updated: SLING-10870: implement support for include/exclude paths (#5)

This is an automated email from the ASF dual-hosted git repository.

diru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-sitemap.git


The following commit(s) were added to refs/heads/master by this push:
     new d3dc27e  SLING-10870: implement support for include/exclude paths (#5)
d3dc27e is described below

commit d3dc27eda13142b93172b203f5c3f513165be3e6
Author: Dirk Rudolph <di...@apache.org>
AuthorDate: Thu Dec 30 16:12:30 2021 +0100

    SLING-10870: implement support for include/exclude paths (#5)
---
 .../sling/sitemap/impl/SitemapScheduler.java       | 59 +++++++++++++++++--
 .../sling/sitemap/impl/SitemapServiceImpl.java     | 10 +---
 .../sling/sitemap/impl/SitemapSchedulerTest.java   | 68 ++++++++++++++++++++--
 3 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java b/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
index d0fdc72..23495ec 100644
--- a/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
+++ b/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
@@ -22,6 +22,7 @@ import org.apache.sling.api.resource.LoginException;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ResourceResolver;
 import org.apache.sling.api.resource.ResourceResolverFactory;
+import org.apache.sling.api.resource.path.PathSet;
 import org.apache.sling.commons.scheduler.Scheduler;
 import org.apache.sling.event.jobs.Job;
 import org.apache.sling.event.jobs.JobManager;
@@ -82,11 +83,21 @@ public class SitemapScheduler implements Runnable {
         @AttributeDefinition(name = "Search Path", description = "The path under which sitemap roots should be " +
                 "searched for")
         String searchPath() default "/content";
+
+        @AttributeDefinition(name = "Include Paths", description = "A list of paths that should be included by the scheduler. "
+            + "If left empty, all sitemap roots in the configured search path will be included. Absolute paths and glob patterns "
+            + "are supported.")
+        String[] includePaths() default {};
+
+        @AttributeDefinition(name = "Exclude Paths", description = "A list of paths that should be excluded by the scheduler. "
+            + "If left empty, no sitemap roots in the configured search path will be excluded. Absolute paths and glob patterns "
+            + "are supported.")
+        String[] excludePaths() default {};
     }
 
     public static final String THREADPOOL_NAME = "org-apache-sling-sitemap";
 
-    private static final Logger LOG = LoggerFactory.getLogger(SitemapScheduler.class);
+    private Logger log;
     private static final Map<String, Object> AUTH = Collections.singletonMap(ResourceResolverFactory.SUBSERVICE,
             "sitemap-reader");
 
@@ -103,13 +114,22 @@ public class SitemapScheduler implements Runnable {
     private Set<String> includeGenerators;
     private Set<String> excludeGenerators;
     private String searchPath;
+    private PathSet includePaths;
+    private PathSet excludePaths;
 
     @Activate
     protected void activate(Configuration configuration) {
+        log = LoggerFactory.getLogger(SitemapScheduler.class.getName() + '~' + configuration.scheduler_name());
         includeGenerators = asSet(configuration.includeGenerators());
         excludeGenerators = asSet(configuration.excludeGenerators());
         names = asSet(configuration.names());
         searchPath = configuration.searchPath();
+        if (configuration.includePaths().length > 0) {
+            includePaths = PathSet.fromStringCollection(Arrays.asList(configuration.includePaths()));
+        }
+        if (configuration.excludePaths().length > 0) {
+            excludePaths = PathSet.fromStringCollection(Arrays.asList(configuration.excludePaths()));
+        }
     }
 
     @Override
@@ -124,11 +144,15 @@ public class SitemapScheduler implements Runnable {
                 schedule(sitemapRoots.next(), includeNames);
             }
         } catch (LoginException ex) {
-            LOG.warn("Failed start sitemap jobs: {}", ex.getMessage(), ex);
+            log.warn("Failed start sitemap jobs: {}", ex.getMessage(), ex);
         }
     }
 
     public void schedule(Resource sitemapRoot, @Nullable Collection<String> includeNames) {
+        if (isExcluded(sitemapRoot)) {
+            return;
+        }
+
         Set<String> configuredNames = getApplicableNames(sitemapRoot);
 
         if (includeNames != null) {
@@ -140,16 +164,19 @@ public class SitemapScheduler implements Runnable {
         }
     }
 
-
     /**
      * Returns the names for the given sitemap root this {@link SitemapScheduler} is applicable to. This depends on the
-     * configured generators. If no generators were configured the names of all are returned. If some where configured
+     * configured generators. If no generators were configured the names of all are returned. If some are configured
      * the names provided only by those where the class name matches are returned.
      *
      * @param sitemapRoot
      * @return
      */
     public Set<String> getApplicableNames(Resource sitemapRoot) {
+        if (isExcluded(sitemapRoot)) {
+            return Collections.emptySet();
+        }
+
         Set<String> onDemandNames = generatorManager.getOnDemandNames(sitemapRoot);
         Set<String> toSchedule = generatorManager.getGenerators(sitemapRoot).entrySet().stream()
                 .filter(entry -> includeGenerators == null
@@ -168,12 +195,34 @@ public class SitemapScheduler implements Runnable {
         return toSchedule;
     }
 
+    protected boolean isExcluded(Resource sitemapRoot) {
+        // verify that the sitemapRoot is in the schedulers search path
+        if (!sitemapRoot.getPath().equals(searchPath) && !sitemapRoot.getPath().startsWith(searchPath + "/")) {
+            log.debug("Exclude sitemap root outside of the configured search path '{}': {}", searchPath, sitemapRoot.getPath());
+            return true;
+        }
+
+        // verify if the sitemapRoot is included
+        if (includePaths != null && includePaths.matches(sitemapRoot.getPath()) == null) {
+            log.debug("Sitemap root is not included: {}", sitemapRoot.getPath());
+            return true;
+        }
+
+        // verify if the sitemapRoot is not excluded
+        if (excludePaths != null && excludePaths.matches(sitemapRoot.getPath()) != null) {
+            log.debug("Sitemap root is explicitly excluded: {}", sitemapRoot.getPath());
+            return true;
+        }
+
+        return false;
+    }
+
     protected void addJob(String sitemapRoot, String applicableName) {
         Map<String, Object> jobProperties = new HashMap<>();
         jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_NAME, applicableName);
         jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_ROOT, sitemapRoot);
         Job job = jobManager.addJob(SitemapGeneratorExecutor.JOB_TOPIC, jobProperties);
-        LOG.debug("Added job {}", job.getId());
+        log.debug("Added job {}", job.getId());
     }
 
     @Nullable
diff --git a/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java b/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
index bb2d27c..b6a31c5 100644
--- a/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
+++ b/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
@@ -98,10 +98,7 @@ public class SitemapServiceImpl implements SitemapService {
             return;
         }
         for (ServiceReference<SitemapScheduler> scheduler : schedulers.getServiceReferences()) {
-            Object searchPath = scheduler.getProperty("searchPath");
-            if (searchPath instanceof String && sitemapRoot.getPath().startsWith(searchPath + "/")) {
-                schedulers.getService(scheduler).schedule(sitemapRoot, null);
-            }
+            schedulers.getService(scheduler).schedule(sitemapRoot, null);
         }
     }
 
@@ -112,10 +109,7 @@ public class SitemapServiceImpl implements SitemapService {
         }
 
         for (ServiceReference<SitemapScheduler> scheduler : schedulers.getServiceReferences()) {
-            Object searchPath = scheduler.getProperty("searchPath");
-            if (searchPath instanceof String && sitemapRoot.getPath().startsWith(searchPath + "/")) {
-                schedulers.getService(scheduler).schedule(sitemapRoot, Collections.singleton(name));
-            }
+            schedulers.getService(scheduler).schedule(sitemapRoot, Collections.singleton(name));
         }
     }
 
diff --git a/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java b/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
index b8b081b..eeb8b61 100644
--- a/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
+++ b/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
@@ -44,11 +44,12 @@ import org.mockito.junit.jupiter.MockitoExtension;
 import javax.jcr.Node;
 import javax.jcr.Session;
 import javax.jcr.query.Query;
-import java.util.*;
+import java.util.Collections;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 
-import static org.junit.jupiter.api.Assertions.fail;
+import static org.junit.jupiter.api.Assertions.*;
 import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.*;
@@ -91,7 +92,7 @@ class SitemapSchedulerTest {
 
         AtomicInteger jobCount = new AtomicInteger(0);
 
-        when(jobManager.addJob(any(), any())).then(inv -> {
+        lenient().when(jobManager.addJob(any(), any())).then(inv -> {
             Job job = mock(Job.class);
             when(job.getId()).thenReturn(String.valueOf(jobCount.incrementAndGet()));
             return job;
@@ -194,7 +195,7 @@ class SitemapSchedulerTest {
     }
 
     @Test
-    void testNothingScheduledWhenNameDoesNotNamesFromConfiguration() {
+    void testNothingScheduledWhenNameDoesNotMatchNamesFromConfiguration() {
         // given
         context.registerInjectActivateService(subject, "names", new String[]{
                 "foobar"
@@ -226,6 +227,65 @@ class SitemapSchedulerTest {
         );
     }
 
+    @Test
+    void testPathsOutsideSearchPathExcluded() {
+        // given
+        context.registerInjectActivateService(subject, "searchPath", "/content/site/en");
+
+        // when, then
+        assertFalse(subject.isExcluded(rootEn));
+        assertFalse(subject.isExcluded(rootEnContent));
+        assertTrue(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    void testIncludePaths() {
+        // given
+        context.registerInjectActivateService(subject, "includePaths", "glob:/content/site/*/jcr:content");
+
+        // when, then
+        assertTrue(subject.isExcluded(rootEn));
+        assertFalse(subject.isExcluded(rootEnContent));
+        assertTrue(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    void testExcludePaths() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+
+        // when, then
+        assertTrue(subject.isExcluded(rootEn));
+        assertTrue(subject.isExcluded(rootEnContent));
+        assertFalse(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    void testNothingScheduledForExcludedSitemapRoot() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+        generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+        generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+
+        // when
+        subject.schedule(rootEn, Collections.singleton(SitemapService.DEFAULT_SITEMAP_NAME));
+
+        // then
+        verify(jobManager, never()).addJob(any(), any());
+    }
+
+    @Test
+    void testNoApplicableNamesReturnedForExcludedSitemapRoot() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+        generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+        generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+
+        // when, then
+        assertEquals(0, subject.getApplicableNames(rootEn).size());
+        assertEquals(1, subject.getApplicableNames(rootDe).size());
+    }
+
     private void initResourceResolver(SitemapScheduler scheduler, Consumer<ResourceResolver> resolverConsumer) {
         initResourceResolver(context, scheduler, resolverConsumer);
     }