You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by di...@apache.org on 2021/12/30 14:11:01 UTC

[sling-org-apache-sling-sitemap] 01/01: SLING-10870: implement support for include/exclude paths

This is an automated email from the ASF dual-hosted git repository.

diru pushed a commit to branch issue/SLING-10870
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-sitemap.git

commit 00c5b92a5cd15b769608eabfcbb37aa823b9aa47
Author: Dirk Rudolph <di...@apache.org>
AuthorDate: Thu Dec 30 15:09:40 2021 +0100

    SLING-10870: implement support for include/exclude paths
---
 .../sling/sitemap/impl/SitemapScheduler.java       |  59 +++++++-
 .../sling/sitemap/impl/SitemapServiceImpl.java     |  10 +-
 .../sling/sitemap/impl/SitemapSchedulerTest.java   | 168 ++++++++++++++-------
 3 files changed, 173 insertions(+), 64 deletions(-)

diff --git a/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java b/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
index d0fdc72..23495ec 100644
--- a/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
+++ b/src/main/java/org/apache/sling/sitemap/impl/SitemapScheduler.java
@@ -22,6 +22,7 @@ import org.apache.sling.api.resource.LoginException;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ResourceResolver;
 import org.apache.sling.api.resource.ResourceResolverFactory;
+import org.apache.sling.api.resource.path.PathSet;
 import org.apache.sling.commons.scheduler.Scheduler;
 import org.apache.sling.event.jobs.Job;
 import org.apache.sling.event.jobs.JobManager;
@@ -82,11 +83,21 @@ public class SitemapScheduler implements Runnable {
         @AttributeDefinition(name = "Search Path", description = "The path under which sitemap roots should be " +
                 "searched for")
         String searchPath() default "/content";
+
+        @AttributeDefinition(name = "Include Paths", description = "A list of paths that should be included by the scheduler. "
+            + "If left empty, all sitemap roots in the configured search path will be included. Absolute paths and glob patterns "
+            + "are supported.")
+        String[] includePaths() default {};
+
+        @AttributeDefinition(name = "Exclude Paths", description = "A list of paths that should be excluded by the scheduler. "
+            + "If left empty, no sitemap roots in the configured search path will be excluded. Absolute paths and glob patterns "
+            + "are supported.")
+        String[] excludePaths() default {};
     }
 
     public static final String THREADPOOL_NAME = "org-apache-sling-sitemap";
 
-    private static final Logger LOG = LoggerFactory.getLogger(SitemapScheduler.class);
+    private Logger log;
     private static final Map<String, Object> AUTH = Collections.singletonMap(ResourceResolverFactory.SUBSERVICE,
             "sitemap-reader");
 
@@ -103,13 +114,22 @@ public class SitemapScheduler implements Runnable {
     private Set<String> includeGenerators;
     private Set<String> excludeGenerators;
     private String searchPath;
+    private PathSet includePaths;
+    private PathSet excludePaths;
 
     @Activate
     protected void activate(Configuration configuration) {
+        log = LoggerFactory.getLogger(SitemapScheduler.class.getName() + '~' + configuration.scheduler_name());
         includeGenerators = asSet(configuration.includeGenerators());
         excludeGenerators = asSet(configuration.excludeGenerators());
         names = asSet(configuration.names());
         searchPath = configuration.searchPath();
+        if (configuration.includePaths().length > 0) {
+            includePaths = PathSet.fromStringCollection(Arrays.asList(configuration.includePaths()));
+        }
+        if (configuration.excludePaths().length > 0) {
+            excludePaths = PathSet.fromStringCollection(Arrays.asList(configuration.excludePaths()));
+        }
     }
 
     @Override
@@ -124,11 +144,15 @@ public class SitemapScheduler implements Runnable {
                 schedule(sitemapRoots.next(), includeNames);
             }
         } catch (LoginException ex) {
-            LOG.warn("Failed start sitemap jobs: {}", ex.getMessage(), ex);
+            log.warn("Failed start sitemap jobs: {}", ex.getMessage(), ex);
         }
     }
 
     public void schedule(Resource sitemapRoot, @Nullable Collection<String> includeNames) {
+        if (isExcluded(sitemapRoot)) {
+            return;
+        }
+
         Set<String> configuredNames = getApplicableNames(sitemapRoot);
 
         if (includeNames != null) {
@@ -140,16 +164,19 @@ public class SitemapScheduler implements Runnable {
         }
     }
 
-
     /**
      * Returns the names for the given sitemap root this {@link SitemapScheduler} is applicable to. This depends on the
-     * configured generators. If no generators were configured the names of all are returned. If some where configured
+     * configured generators. If no generators were configured the names of all are returned. If some are configured
      * the names provided only by those where the class name matches are returned.
      *
      * @param sitemapRoot
      * @return
      */
     public Set<String> getApplicableNames(Resource sitemapRoot) {
+        if (isExcluded(sitemapRoot)) {
+            return Collections.emptySet();
+        }
+
         Set<String> onDemandNames = generatorManager.getOnDemandNames(sitemapRoot);
         Set<String> toSchedule = generatorManager.getGenerators(sitemapRoot).entrySet().stream()
                 .filter(entry -> includeGenerators == null
@@ -168,12 +195,34 @@ public class SitemapScheduler implements Runnable {
         return toSchedule;
     }
 
+    protected boolean isExcluded(Resource sitemapRoot) {
+        // verify that the sitemapRoot is in the schedulers search path
+        if (!sitemapRoot.getPath().equals(searchPath) && !sitemapRoot.getPath().startsWith(searchPath + "/")) {
+            log.debug("Exclude sitemap root outside of the configured search path '{}': {}", searchPath, sitemapRoot.getPath());
+            return true;
+        }
+
+        // verify if the sitemapRoot is included
+        if (includePaths != null && includePaths.matches(sitemapRoot.getPath()) == null) {
+            log.debug("Sitemap root is not included: {}", sitemapRoot.getPath());
+            return true;
+        }
+
+        // verify if the sitemapRoot is not excluded
+        if (excludePaths != null && excludePaths.matches(sitemapRoot.getPath()) != null) {
+            log.debug("Sitemap root is explicitly excluded: {}", sitemapRoot.getPath());
+            return true;
+        }
+
+        return false;
+    }
+
     protected void addJob(String sitemapRoot, String applicableName) {
         Map<String, Object> jobProperties = new HashMap<>();
         jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_NAME, applicableName);
         jobProperties.put(SitemapGeneratorExecutor.JOB_PROPERTY_SITEMAP_ROOT, sitemapRoot);
         Job job = jobManager.addJob(SitemapGeneratorExecutor.JOB_TOPIC, jobProperties);
-        LOG.debug("Added job {}", job.getId());
+        log.debug("Added job {}", job.getId());
     }
 
     @Nullable
diff --git a/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java b/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
index bb2d27c..b6a31c5 100644
--- a/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
+++ b/src/main/java/org/apache/sling/sitemap/impl/SitemapServiceImpl.java
@@ -98,10 +98,7 @@ public class SitemapServiceImpl implements SitemapService {
             return;
         }
         for (ServiceReference<SitemapScheduler> scheduler : schedulers.getServiceReferences()) {
-            Object searchPath = scheduler.getProperty("searchPath");
-            if (searchPath instanceof String && sitemapRoot.getPath().startsWith(searchPath + "/")) {
-                schedulers.getService(scheduler).schedule(sitemapRoot, null);
-            }
+            schedulers.getService(scheduler).schedule(sitemapRoot, null);
         }
     }
 
@@ -112,10 +109,7 @@ public class SitemapServiceImpl implements SitemapService {
         }
 
         for (ServiceReference<SitemapScheduler> scheduler : schedulers.getServiceReferences()) {
-            Object searchPath = scheduler.getProperty("searchPath");
-            if (searchPath instanceof String && sitemapRoot.getPath().startsWith(searchPath + "/")) {
-                schedulers.getService(scheduler).schedule(sitemapRoot, Collections.singleton(name));
-            }
+            schedulers.getService(scheduler).schedule(sitemapRoot, Collections.singleton(name));
         }
     }
 
diff --git a/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java b/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
index b8b081b..9a50a9e 100644
--- a/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
+++ b/src/test/java/org/apache/sling/sitemap/impl/SitemapSchedulerTest.java
@@ -19,6 +19,7 @@
 package org.apache.sling.sitemap.impl;
 
 import com.google.common.collect.ImmutableList;
+
 import org.apache.sling.api.resource.LoginException;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ResourceResolver;
@@ -44,16 +45,20 @@ import org.mockito.junit.jupiter.MockitoExtension;
 import javax.jcr.Node;
 import javax.jcr.Session;
 import javax.jcr.query.Query;
+
 import java.util.*;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 import static org.mockito.ArgumentMatchers.argThat;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.*;
 
-@ExtendWith({SlingContextExtension.class, MockitoExtension.class})
+@ExtendWith({ SlingContextExtension.class, MockitoExtension.class })
 class SitemapSchedulerTest {
 
     final SlingContext context = new SlingContext(ResourceResolverType.JCR_MOCK);
@@ -62,8 +67,10 @@ class SitemapSchedulerTest {
     private final SitemapGeneratorManagerImpl generatorManager = new SitemapGeneratorManagerImpl();
     private final SitemapServiceConfiguration sitemapServiceConfiguration = new SitemapServiceConfiguration();
 
-    private final TestGenerator generator1 = new TestGenerator() {};
-    private final TestGenerator generator2 = new TestGenerator() {};
+    private final TestGenerator generator1 = new TestGenerator() {
+    };
+    private final TestGenerator generator2 = new TestGenerator() {
+    };
 
     @Mock
     private ServiceUserMapped serviceUser;
@@ -77,10 +84,10 @@ class SitemapSchedulerTest {
     @BeforeEach
     void setup() {
         rootDe = context.create().resource("/content/site/de", Collections.singletonMap(
-                SitemapService.PROPERTY_SITEMAP_ROOT, Boolean.TRUE));
+            SitemapService.PROPERTY_SITEMAP_ROOT, Boolean.TRUE));
         rootEn = context.create().resource("/content/site/en");
         rootEnContent = context.create().resource("/content/site/en/jcr:content", Collections.singletonMap(
-                SitemapService.PROPERTY_SITEMAP_ROOT, Boolean.TRUE));
+            SitemapService.PROPERTY_SITEMAP_ROOT, Boolean.TRUE));
 
         context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-reader");
         context.registerService(JobManager.class, jobManager);
@@ -91,7 +98,7 @@ class SitemapSchedulerTest {
 
         AtomicInteger jobCount = new AtomicInteger(0);
 
-        when(jobManager.addJob(any(), any())).then(inv -> {
+        lenient().when(jobManager.addJob(any(), any())).then(inv -> {
             Job job = mock(Job.class);
             when(job.getId()).thenReturn(String.valueOf(jobCount.incrementAndGet()));
             return job;
@@ -103,14 +110,14 @@ class SitemapSchedulerTest {
         // given
         context.registerInjectActivateService(subject);
         initResourceResolver(subject, resolver -> MockJcr.setQueryResult(
-                resolver.adaptTo(Session.class),
-                "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
-                        " option(index tag slingSitemaps)",
-                Query.XPATH,
-                ImmutableList.of(
-                        rootDe.adaptTo(Node.class),
-                        rootEnContent.adaptTo(Node.class)
-                )
+            resolver.adaptTo(Session.class),
+            "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
+                " option(index tag slingSitemaps)",
+            Query.XPATH,
+            ImmutableList.of(
+                rootDe.adaptTo(Node.class),
+                rootEnContent.adaptTo(Node.class)
+            )
         ));
         generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
         generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
@@ -120,12 +127,12 @@ class SitemapSchedulerTest {
 
         // then
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/de"))
         );
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/en"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/en"))
         );
     }
 
@@ -134,11 +141,11 @@ class SitemapSchedulerTest {
         // given
         context.registerInjectActivateService(subject);
         initResourceResolver(subject, resolver -> MockJcr.setQueryResult(
-                resolver.adaptTo(Session.class),
-                "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
-                        " option(index tag slingSitemaps)",
-                Query.XPATH,
-                Collections.singletonList(rootDe.adaptTo(Node.class))
+            resolver.adaptTo(Session.class),
+            "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
+                " option(index tag slingSitemaps)",
+            Query.XPATH,
+            Collections.singletonList(rootDe.adaptTo(Node.class))
         ));
         generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME, "sitemap1");
         generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME, "sitemap2");
@@ -148,31 +155,31 @@ class SitemapSchedulerTest {
 
         // then
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch(SitemapService.DEFAULT_SITEMAP_NAME, "/content/site/de"))
         );
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch("sitemap1", "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch("sitemap1", "/content/site/de"))
         );
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch("sitemap2", "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch("sitemap2", "/content/site/de"))
         );
     }
 
     @Test
     void testNothingScheduledWhenNameDoesNotMatchGeneratorFromConfiguration() {
         // given
-        context.registerInjectActivateService(subject, "includeGenerators", new String[]{
-                generator1.getClass().getName()
+        context.registerInjectActivateService(subject, "includeGenerators", new String[] {
+            generator1.getClass().getName()
         });
         initResourceResolver(subject, resolver -> MockJcr.setQueryResult(
-                resolver.adaptTo(Session.class),
-                "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
-                        " option(index tag slingSitemaps)",
-                Query.XPATH,
-                Collections.singletonList(rootDe.adaptTo(Node.class))
+            resolver.adaptTo(Session.class),
+            "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
+                " option(index tag slingSitemaps)",
+            Query.XPATH,
+            Collections.singletonList(rootDe.adaptTo(Node.class))
         ));
         generator1.setNames("sitemap1");
         generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME, "sitemap2");
@@ -188,23 +195,23 @@ class SitemapSchedulerTest {
 
         // then
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch("sitemap1", "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch("sitemap1", "/content/site/de"))
         );
     }
 
     @Test
-    void testNothingScheduledWhenNameDoesNotNamesFromConfiguration() {
+    void testNothingScheduledWhenNameDoesNotMatchNamesFromConfiguration() {
         // given
-        context.registerInjectActivateService(subject, "names", new String[]{
-                "foobar"
+        context.registerInjectActivateService(subject, "names", new String[] {
+            "foobar"
         });
         initResourceResolver(subject, resolver -> MockJcr.setQueryResult(
-                resolver.adaptTo(Session.class),
-                "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
-                        " option(index tag slingSitemaps)",
-                Query.XPATH,
-                Collections.singletonList(rootDe.adaptTo(Node.class))
+            resolver.adaptTo(Session.class),
+            "/jcr:root/content//*[@" + SitemapService.PROPERTY_SITEMAP_ROOT + "=true]" +
+                " option(index tag slingSitemaps)",
+            Query.XPATH,
+            Collections.singletonList(rootDe.adaptTo(Node.class))
         ));
         generator1.setNames("sitemap1");
         generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME, "sitemap2");
@@ -221,23 +228,82 @@ class SitemapSchedulerTest {
 
         // then
         verify(jobManager, times(1)).addJob(
-                eq("org/apache/sling/sitemap/build"),
-                argThat(sitemapJobPropertiesMatch("foobar", "/content/site/de"))
+            eq("org/apache/sling/sitemap/build"),
+            argThat(sitemapJobPropertiesMatch("foobar", "/content/site/de"))
         );
     }
 
+    @Test
+    public void testPathsOutsideSearchPathExcluded() {
+        // given
+        context.registerInjectActivateService(subject, "searchPath", "/content/site/en");
+
+        // when, then
+        assertFalse(subject.isExcluded(rootEn));
+        assertFalse(subject.isExcluded(rootEnContent));
+        assertTrue(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    public void testIncludePaths() {
+        // given
+        context.registerInjectActivateService(subject, "includePaths", "glob:/content/site/*/jcr:content");
+
+        // when, then
+        assertTrue(subject.isExcluded(rootEn));
+        assertFalse(subject.isExcluded(rootEnContent));
+        assertTrue(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    public void testExcludePaths() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+
+        // when, then
+        assertTrue(subject.isExcluded(rootEn));
+        assertTrue(subject.isExcluded(rootEnContent));
+        assertFalse(subject.isExcluded(rootDe));
+    }
+
+    @Test
+    public void testNothingScheduledForExcludedSitemapRoot() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+        generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+        generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+
+        // when
+        subject.schedule(rootEn, Collections.singleton(SitemapService.DEFAULT_SITEMAP_NAME));
+
+        // then
+        verify(jobManager, never()).addJob(any(), any());
+    }
+
+    @Test
+    public void testNoApplicableNamesReturnedForExcludedSitemapRoot() {
+        // given
+        context.registerInjectActivateService(subject, "excludePaths", "glob:/content/site/en**");
+        generator1.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+        generator2.setNames(SitemapService.DEFAULT_SITEMAP_NAME);
+
+        // when, then
+        assertEquals(0, subject.getApplicableNames(rootEn).size());
+        assertEquals(1, subject.getApplicableNames(rootDe).size());
+    }
+
     private void initResourceResolver(SitemapScheduler scheduler, Consumer<ResourceResolver> resolverConsumer) {
         initResourceResolver(context, scheduler, resolverConsumer);
     }
 
     static void initResourceResolver(SlingContext context, SitemapScheduler scheduler,
-                                     Consumer<ResourceResolver> resolverConsumer) {
+        Consumer<ResourceResolver> resolverConsumer) {
         try {
             ResourceResolverFactory original = context.getService(ResourceResolverFactory.class);
             ResourceResolverFactory mock = mock(ResourceResolverFactory.class);
             Fields.allDeclaredFieldsOf(scheduler).instanceFields().stream()
-                    .filter(instanceField -> "resourceResolverFactory".equals(instanceField.name()))
-                    .forEach(instanceField -> instanceField.set(mock));
+                .filter(instanceField -> "resourceResolverFactory".equals(instanceField.name()))
+                .forEach(instanceField -> instanceField.set(mock));
 
             lenient().when(mock.getServiceResourceResolver(any())).then(inv -> {
                 ResourceResolver resolver = original.getServiceResourceResolver(inv.getArgument(0));