You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by di...@apache.org on 2021/06/07 13:32:49 UTC

[sling-whiteboard] 03/05: add sitemap storage cleanup implementation

This is an automated email from the ASF dual-hosted git repository.

diru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-whiteboard.git

commit 6ca7662b3afe9f07ebc8bc5e86614360f71943a4
Author: Dirk Rudolph <di...@apache.org>
AuthorDate: Mon Jun 7 12:02:29 2021 +0200

    add sitemap storage cleanup implementation
---
 .../apache/sling/sitemap/impl/SitemapStorage.java  | 180 +++++++++++++++------
 .../sitemap/impl/SitemapGeneratorExecutorTest.java |   2 +-
 .../sling/sitemap/impl/SitemapServletTest.java     |   4 +-
 .../sling/sitemap/impl/SitemapStorageTest.java     |  83 ++++++++++
 4 files changed, 219 insertions(+), 50 deletions(-)

diff --git a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
index b31786e..4ffa9c4 100644
--- a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
+++ b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
@@ -20,10 +20,13 @@ package org.apache.sling.sitemap.impl;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.jackrabbit.JcrConstants;
+import org.apache.sling.api.SlingConstants;
 import org.apache.sling.api.resource.*;
 import org.apache.sling.api.wrappers.ValueMapDecorator;
+import org.apache.sling.commons.scheduler.Scheduler;
 import org.apache.sling.serviceusermapping.ServiceUserMapped;
 import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
 import org.osgi.service.component.annotations.Activate;
 import org.osgi.service.component.annotations.Component;
 import org.osgi.service.component.annotations.Reference;
@@ -39,13 +42,21 @@ import java.io.OutputStream;
 import java.util.*;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
 import static org.apache.sling.sitemap.impl.SitemapUtil.*;
 
-@Component(service = SitemapStorage.class)
+@Component(
+        service = {SitemapStorage.class, Runnable.class},
+        property = {
+                Scheduler.PROPERTY_SCHEDULER_NAME + "=sitemap-storage-cleanup",
+                Scheduler.PROPERTY_SCHEDULER_CONCURRENT + "=false",
+                Scheduler.PROPERTY_SCHEDULER_RUN_ON + "=" + Scheduler.VALUE_RUN_ON_SINGLE
+        }
+)
 @Designate(ocd = SitemapStorage.Configuration.class)
-public class SitemapStorage {
+public class SitemapStorage implements Runnable {
 
     @ObjectClassDefinition(name = "Apache Sling Sitemap - Storage")
     @interface Configuration {
@@ -57,6 +68,10 @@ public class SitemapStorage {
         @AttributeDefinition(name = "Max State Age", description = "The number of milliseconds after which an " +
                 "intermediate state will deleted.")
         int stateMaxAge() default 60 * 30 * 1000;
+
+        @AttributeDefinition(name = "Cleanup Schedule", description = "A cron expression defining the schedule at " +
+                "which stale intermediate states and old sitemaps will be removed.")
+        String scheduler_expression() default "0 0 1 * * ?";
     }
 
     static final String PN_SITEMAP_ENTRIES = "entries";
@@ -67,11 +82,16 @@ public class SitemapStorage {
             "sitemap-writer");
     private static final String STATE_EXTENSION = ".part";
     private static final String XML_EXTENSION = ".xml";
+    private static final String RT_SITEMAP_PART = "sling/sitemap/part";
+    private static final String RT_SITEMAP_FILE = "sling/sitemap/file";
+    private static final String PN_RESOURCE_TYPE = SlingConstants.NAMESPACE_PREFIX + ':' + SlingConstants.PROPERTY_RESOURCE_TYPE;
 
     @Reference
     private ResourceResolverFactory resourceResolverFactory;
     @Reference(target = "(subServiceName=sitemap-writer)")
     private ServiceUserMapped serviceUserMapped;
+    @Reference
+    private SitemapGeneratorManager generatorManager;
 
     private String rootPath = "/var/sitemaps";
     private int maxStateAge = Integer.MAX_VALUE;
@@ -82,8 +102,30 @@ public class SitemapStorage {
         maxStateAge = configuration.stateMaxAge();
     }
 
+    @Override
+    public void run() {
+        try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
+            Iterator<Resource> descendants = traverse(resolver.getResource(rootPath)).iterator();
+            List<Resource> toDelete = new LinkedList<>();
+            while (descendants.hasNext()) {
+                Resource descendant = descendants.next();
+                if (descendant.isResourceType(RT_SITEMAP_PART) && isExpired(descendant)) {
+                    toDelete.add(descendant);
+                } else if (descendant.isResourceType(RT_SITEMAP_FILE) && !doesSitemapRootExist(descendant)) {
+                    toDelete.add(descendant);
+                }
+            }
+            for (Resource resource : toDelete) {
+                resolver.delete(resource);
+            }
+            resolver.commit();
+        } catch (LoginException | PersistenceException ex) {
+            LOG.warn("Failed to cleanup storage: {}", ex.getMessage(), ex);
+        }
+    }
+
     @NotNull
-    public ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException {
+    ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException {
         String statePath = getSitemapFilePath(sitemapRoot, name) + STATE_EXTENSION;
         try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
             Resource state = resolver.getResource(statePath);
@@ -91,19 +133,10 @@ public class SitemapStorage {
             if (state == null) {
                 return ValueMap.EMPTY;
             }
-            ValueMap stateProperties = state.getValueMap();
-            Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class);
-            if (lastModified != null) {
-                // advance lastModified by maxStateAge to get the point in time the state would expire
-                lastModified.add(Calendar.MILLISECOND, maxStateAge);
-                // check if the expire time is in the future, if so return the state, if not fallback to an empty state
-                if (lastModified.after(Calendar.getInstance())) {
-                    return new ValueMapDecorator(new HashMap<>(state.getValueMap()));
-                } else if (LOG.isDebugEnabled()) {
-                    LOG.debug("State at {} expired at {}", statePath, lastModified.getTime().toGMTString());
-                }
-            }
-            return ValueMap.EMPTY;
+
+            return !isExpired(state)
+                    ? new ValueMapDecorator(new HashMap<>(state.getValueMap()))
+                    : ValueMap.EMPTY;
         } catch (LoginException ex) {
             throw new IOException("Cannot read state at " + statePath, ex);
         }
@@ -116,19 +149,20 @@ public class SitemapStorage {
             Resource folder = getOrCreateFolder(resolver, ResourceUtil.getParent(statePath));
             String stateName = ResourceUtil.getName(statePath);
             Resource stateResource = folder.getChild(stateName);
-            if (stateResource != null) {
+            if (stateResource == null) {
+                Map<String, Object> properties = new HashMap<>(state.size() + 1);
+                properties.putAll(state);
+                properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
+                properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
+                properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_PART);
+                resolver.create(folder, stateName, properties);
+            } else {
                 ModifiableValueMap properties = stateResource.adaptTo(ModifiableValueMap.class);
                 if (properties == null) {
                     throw new IOException("Cannot modify properties of existing state: " + statePath);
                 }
                 properties.putAll(state);
                 properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
-            } else {
-                Map<String, Object> properties = new HashMap<>(state.size() + 1);
-                properties.putAll(state);
-                properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
-                properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
-                resolver.create(folder, stateName, properties);
             }
             resolver.commit();
         } catch (LoginException | PersistenceException ex) {
@@ -163,34 +197,21 @@ public class SitemapStorage {
             if (sitemapResource == null) {
                 Map<String, Object> properties = new HashMap<>(3);
                 properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
+                properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
+                properties.put(JcrConstants.JCR_DATA, data);
                 properties.put(PN_SITEMAP_ENTRIES, entries);
                 properties.put(PN_SITEMAP_SIZE, size);
-                sitemapResource = resolver.create(folder, sitemapFileName, properties);
+                properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_FILE);
+                resolver.create(folder, sitemapFileName, properties);
             } else {
                 ModifiableValueMap properties = sitemapResource.adaptTo(ModifiableValueMap.class);
                 if (properties == null) {
                     throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath);
                 }
-                properties.put(PN_SITEMAP_ENTRIES, entries);
-                properties.put(PN_SITEMAP_SIZE, size);
-            }
-
-            Resource sitemapContent = sitemapResource.getChild(JcrConstants.JCR_CONTENT);
-            if (sitemapContent == null) {
-                Map<String, Object> properties = new HashMap<>();
-                properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_RESOURCE);
-                properties.put(JcrConstants.JCR_ENCODING, "UTF-8");
-                properties.put(JcrConstants.JCR_MIMETYPE, "application/xml");
-                properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
-                properties.put(JcrConstants.JCR_DATA, data);
-                resolver.create(sitemapResource, JcrConstants.JCR_CONTENT, properties);
-            } else {
-                ModifiableValueMap properties = sitemapContent.adaptTo(ModifiableValueMap.class);
-                if (properties == null) {
-                    throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath);
-                }
                 properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
                 properties.put(JcrConstants.JCR_DATA, data);
+                properties.put(PN_SITEMAP_ENTRIES, entries);
+                properties.put(PN_SITEMAP_SIZE, size);
             }
 
             Resource stateResource = resolver.getResource(statePath);
@@ -243,13 +264,11 @@ public class SitemapStorage {
             }
             return StreamSupport.stream(storageResource.getChildren().spliterator(), false)
                     .filter(child -> child.getName().endsWith(XML_EXTENSION))
+                    .filter(child -> child.isResourceType(RT_SITEMAP_FILE))
                     .map(child -> new SitemapStorageInfo(
                             child.getPath(),
                             child.getName().substring(0, child.getName().lastIndexOf('.')),
-                            Optional.ofNullable(child.getChild(JcrConstants.JCR_CONTENT))
-                                    .map(Resource::getValueMap)
-                                    .map(content -> content.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class))
-                                    .orElse(null),
+                            child.getValueMap().get(JcrConstants.JCR_LASTMODIFIED, Calendar.class),
                             child.getValueMap().get(PN_SITEMAP_SIZE, 0),
                             child.getValueMap().get(PN_SITEMAP_ENTRIES, 0)))
                     .filter(filter)
@@ -267,7 +286,8 @@ public class SitemapStorage {
         String sitemapFilePath = rootPath + sitemapRoot.getPath() + '/' + sitemapSelector + XML_EXTENSION;
         try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
             InputStream data = Optional.ofNullable(resolver.getResource(sitemapFilePath))
-                    .map(r -> r.getChild(JcrConstants.JCR_CONTENT))
+                    .filter(r -> r.getName().endsWith(XML_EXTENSION))
+                    .filter(r -> r.isResourceType(RT_SITEMAP_FILE))
                     .map(r -> r.getValueMap().get(JcrConstants.JCR_DATA, InputStream.class))
                     .orElse(null);
 
@@ -284,6 +304,61 @@ public class SitemapStorage {
         }
     }
 
+    /**
+     * Returns true the sitemap root of the given file exists. This first checks if the file is in a top level sitemap's
+     * storage path and if so, checks if the selector resolves to at least one root.
+     *
+     * @param sitemapFile
+     * @return
+     */
+    private boolean doesSitemapRootExist(Resource sitemapFile) {
+        String path = sitemapFile.getPath();
+        String parentPath = ResourceUtil.getParent(path);
+        String sitemapRootPath = parentPath.substring(rootPath.length());
+        Resource sitemapRoot = sitemapFile.getResourceResolver().getResource(sitemapRootPath);
+
+        if (sitemapRoot == null || !SitemapUtil.isTopLevelSitemapRoot(sitemapRoot)) {
+            LOG.debug("Sitemap file's top level sitemap root does not exist: {}", sitemapRootPath);
+            return false;
+        }
+
+        String name = sitemapFile.getName();
+        int lastDot = name.lastIndexOf('.');
+
+        if (lastDot < 0) {
+            LOG.debug("Unexpected name, missing extension: {}", name);
+            return false;
+        }
+
+        Map<Resource, String> candidates = SitemapUtil.resolveSitemapRoots(sitemapRoot, name.substring(0, lastDot));
+        // check if for any of the candidate resource roots a generator with the name exists
+        return candidates.entrySet().stream()
+                .map(entry -> generatorManager.getApplicableNames(entry.getKey(), Collections.singleton(entry.getValue())))
+                .anyMatch(names -> names.size() > 0);
+    }
+
+    /**
+     * Returns true when the state expired according to the configured maximum age.
+     *
+     * @param state
+     * @return
+     */
+    private boolean isExpired(@NotNull Resource state) {
+        ValueMap stateProperties = state.getValueMap();
+        Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class);
+        if (lastModified != null) {
+            // advance lastModified by maxStateAge to get the point in time the state would expire
+            lastModified.add(Calendar.MILLISECOND, maxStateAge);
+            // check if the expire time is in the future
+            if (lastModified.after(Calendar.getInstance())) {
+                return false;
+            } else if (LOG.isDebugEnabled()) {
+                LOG.debug("State at {} expired at {}", state.getPath(), lastModified.getTime().toGMTString());
+            }
+        }
+        return true;
+    }
+
     @NotNull
     private String getSitemapFilePath(@NotNull Resource sitemapRoot, @NotNull String name) {
         Resource topLevelSitemapRoot = getTopLevelSitemapRoot(sitemapRoot);
@@ -315,4 +390,15 @@ public class SitemapStorage {
         }
         return folder;
     }
+
+    private static Stream<Resource> traverse(@Nullable Resource resource) {
+        if (resource == null) {
+            return Stream.empty();
+        }
+
+        return Stream.concat(
+                Stream.of(resource),
+                StreamSupport.stream(resource.getChildren().spliterator(), false).flatMap(SitemapStorage::traverse)
+        );
+    }
 }
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
index 226fb3d..d000c35 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
@@ -88,8 +88,8 @@ public class SitemapGeneratorExecutorTest {
         context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer");
         context.registerService(SitemapGenerator.class, generator);
         context.registerService(JobManager.class, jobManager);
-        context.registerInjectActivateService(storage);
         context.registerInjectActivateService(generatorManager);
+        context.registerInjectActivateService(storage);
         context.registerInjectActivateService(extensionProviderManager);
         context.registerInjectActivateService(sitemapService);
 
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
index 02b4172..bc678ae 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
@@ -230,9 +230,9 @@ public class SitemapServletTest {
         byte[] expectedOutcomeBytes = expectedOutcome.getBytes(StandardCharsets.UTF_8);
         context.registerInjectActivateService(sitemapService, "onDemandNames", "news");
         context.registerInjectActivateService(subject);
-        storage.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(expectedOutcomeBytes),
+        storage.writeSitemap(root, "foo", new ByteArrayInputStream(expectedOutcomeBytes),
                 expectedOutcomeBytes.length, 1);
-        MockSlingHttpServletRequest request = newSitemapReq("sitemap", root);
+        MockSlingHttpServletRequest request = newSitemapReq("foo-sitemap", root);
         MockSlingHttpServletResponse response = context.response();
 
         // when
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
index 55991b2..3603a69 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
@@ -20,6 +20,7 @@ package org.apache.sling.sitemap.impl;
 
 import com.google.common.collect.ImmutableMap;
 import org.apache.commons.io.IOUtils;
+import org.apache.sling.api.resource.ModifiableValueMap;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ValueMap;
 import org.apache.sling.serviceusermapping.ServiceUserMapped;
@@ -39,12 +40,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
 import java.nio.charset.StandardCharsets;
+import java.util.Collections;
 import java.util.Set;
 
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.hasItems;
 import static org.hamcrest.Matchers.hasSize;
 import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.when;
 
 @ExtendWith({SlingContextExtension.class, MockitoExtension.class})
 public class SitemapStorageTest {
@@ -52,16 +56,23 @@ public class SitemapStorageTest {
     public final SlingContext context = new SlingContext();
 
     private final SitemapStorage subject = new SitemapStorage();
+    private final SitemapGeneratorManager generatorManager = new SitemapGeneratorManager();
 
+    @Mock(lenient = true)
+    private SitemapGenerator generator;
     @Mock
     private ServiceUserMapped serviceUser;
 
     @BeforeEach
     public void setup() {
+        context.registerService(SitemapGenerator.class, generator);
         context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer");
+        context.registerInjectActivateService(generatorManager);
         context.registerInjectActivateService(subject,
                 "stateMaxAge", 100
         );
+
+        when(generator.getNames(any())).thenReturn(Collections.singleton(SitemapGenerator.DEFAULT_SITEMAP));
     }
 
     @Test
@@ -133,6 +144,78 @@ public class SitemapStorageTest {
         assertThat(sitemapNames, hasItems(storageInfo("foobar-sitemap"), storageInfo("sitemap")));
     }
 
+    @Test
+    public void testCleanupExpiredStates() throws Exception {
+        // given
+        Resource root = context.create().resource("/content/site/de", ImmutableMap.of(
+                "sitemapRoot", Boolean.TRUE
+        ));
+
+        // when
+        subject.writeState(root, SitemapGenerator.DEFAULT_SITEMAP, ImmutableMap.of("i", 1));
+
+        // then
+        assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part"));
+
+        // and when
+        Thread.sleep(100);
+        subject.run();
+
+        // then
+        assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part"));
+    }
+
+    @Test
+    public void testCleanupObsoleteSitemapsAfterTopLevelChanged() throws Exception {
+        // given
+        Resource newRoot = context.create().resource("/content/site/ch");
+        Resource initialRoot = context.create().resource("/content/site/ch/de-ch", ImmutableMap.of(
+                "sitemapRoot", Boolean.TRUE
+        ));
+
+        // when
+        subject.writeSitemap(initialRoot, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+
+        // then
+        assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml"));
+
+        // and when
+        newRoot.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.TRUE);
+        context.resourceResolver().commit();
+        subject.run();
+
+        // then
+        assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml"));
+    }
+
+    @Test
+    public void testCleanupObsoleteSitemapsAfterNestedSitemapRootChanged() throws Exception {
+        // given
+        Resource root = context.create().resource("/content/site/de", ImmutableMap.of(
+                "sitemapRoot", Boolean.TRUE
+        ));
+        Resource news = context.create().resource("/content/site/de/news", ImmutableMap.of(
+                "sitemapRoot", Boolean.TRUE
+        ));
+
+        // when
+        subject.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+        subject.writeSitemap(news, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+
+        // then
+        assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml"));
+        assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml"));
+
+        // and when
+        news.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.FALSE);
+        context.resourceResolver().commit();
+        subject.run();
+
+        // then
+        assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml"));
+        assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml"));
+    }
+
     static void assertResourceDataEquals(String expectedValue, Resource resource) throws IOException {
         assertNotNull(resource);
         InputStream inputStream = resource.adaptTo(InputStream.class);