You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by di...@apache.org on 2021/06/07 13:32:49 UTC
[sling-whiteboard] 03/05: add sitemap storage cleanup implementation
This is an automated email from the ASF dual-hosted git repository.
diru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-whiteboard.git
commit 6ca7662b3afe9f07ebc8bc5e86614360f71943a4
Author: Dirk Rudolph <di...@apache.org>
AuthorDate: Mon Jun 7 12:02:29 2021 +0200
add sitemap storage cleanup implementation
---
.../apache/sling/sitemap/impl/SitemapStorage.java | 180 +++++++++++++++------
.../sitemap/impl/SitemapGeneratorExecutorTest.java | 2 +-
.../sling/sitemap/impl/SitemapServletTest.java | 4 +-
.../sling/sitemap/impl/SitemapStorageTest.java | 83 ++++++++++
4 files changed, 219 insertions(+), 50 deletions(-)
diff --git a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
index b31786e..4ffa9c4 100644
--- a/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
+++ b/sitemap/src/main/java/org/apache/sling/sitemap/impl/SitemapStorage.java
@@ -20,10 +20,13 @@ package org.apache.sling.sitemap.impl;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.JcrConstants;
+import org.apache.sling.api.SlingConstants;
import org.apache.sling.api.resource.*;
import org.apache.sling.api.wrappers.ValueMapDecorator;
+import org.apache.sling.commons.scheduler.Scheduler;
import org.apache.sling.serviceusermapping.ServiceUserMapped;
import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
@@ -39,13 +42,21 @@ import java.io.OutputStream;
import java.util.*;
import java.util.function.Predicate;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static org.apache.sling.sitemap.impl.SitemapUtil.*;
-@Component(service = SitemapStorage.class)
+@Component(
+ service = {SitemapStorage.class, Runnable.class},
+ property = {
+ Scheduler.PROPERTY_SCHEDULER_NAME + "=sitemap-storage-cleanup",
+ Scheduler.PROPERTY_SCHEDULER_CONCURRENT + "=false",
+ Scheduler.PROPERTY_SCHEDULER_RUN_ON + "=" + Scheduler.VALUE_RUN_ON_SINGLE
+ }
+)
@Designate(ocd = SitemapStorage.Configuration.class)
-public class SitemapStorage {
+public class SitemapStorage implements Runnable {
@ObjectClassDefinition(name = "Apache Sling Sitemap - Storage")
@interface Configuration {
@@ -57,6 +68,10 @@ public class SitemapStorage {
@AttributeDefinition(name = "Max State Age", description = "The number of milliseconds after which an " +
"intermediate state will deleted.")
int stateMaxAge() default 60 * 30 * 1000;
+
+ @AttributeDefinition(name = "Cleanup Schedule", description = "A cron expression defining the schedule at " +
+ "which stale intermediate states and old sitemaps will be removed.")
+ String scheduler_expression() default "0 0 1 * * ?";
}
static final String PN_SITEMAP_ENTRIES = "entries";
@@ -67,11 +82,16 @@ public class SitemapStorage {
"sitemap-writer");
private static final String STATE_EXTENSION = ".part";
private static final String XML_EXTENSION = ".xml";
+ private static final String RT_SITEMAP_PART = "sling/sitemap/part";
+ private static final String RT_SITEMAP_FILE = "sling/sitemap/file";
+ private static final String PN_RESOURCE_TYPE = SlingConstants.NAMESPACE_PREFIX + ':' + SlingConstants.PROPERTY_RESOURCE_TYPE;
@Reference
private ResourceResolverFactory resourceResolverFactory;
@Reference(target = "(subServiceName=sitemap-writer)")
private ServiceUserMapped serviceUserMapped;
+ @Reference
+ private SitemapGeneratorManager generatorManager;
private String rootPath = "/var/sitemaps";
private int maxStateAge = Integer.MAX_VALUE;
@@ -82,8 +102,30 @@ public class SitemapStorage {
maxStateAge = configuration.stateMaxAge();
}
+ @Override
+ public void run() {
+ try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
+ Iterator<Resource> descendants = traverse(resolver.getResource(rootPath)).iterator();
+ List<Resource> toDelete = new LinkedList<>();
+ while (descendants.hasNext()) {
+ Resource descendant = descendants.next();
+ if (descendant.isResourceType(RT_SITEMAP_PART) && isExpired(descendant)) {
+ toDelete.add(descendant);
+ } else if (descendant.isResourceType(RT_SITEMAP_FILE) && !doesSitemapRootExist(descendant)) {
+ toDelete.add(descendant);
+ }
+ }
+ for (Resource resource : toDelete) {
+ resolver.delete(resource);
+ }
+ resolver.commit();
+ } catch (LoginException | PersistenceException ex) {
+ LOG.warn("Failed to cleanup storage: {}", ex.getMessage(), ex);
+ }
+ }
+
@NotNull
- public ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException {
+ ValueMap getState(@NotNull Resource sitemapRoot, @NotNull String name) throws IOException {
String statePath = getSitemapFilePath(sitemapRoot, name) + STATE_EXTENSION;
try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
Resource state = resolver.getResource(statePath);
@@ -91,19 +133,10 @@ public class SitemapStorage {
if (state == null) {
return ValueMap.EMPTY;
}
- ValueMap stateProperties = state.getValueMap();
- Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class);
- if (lastModified != null) {
- // advance lastModified by maxStateAge to get the point in time the state would expire
- lastModified.add(Calendar.MILLISECOND, maxStateAge);
- // check if the expire time is in the future, if so return the state, if not fallback to an empty state
- if (lastModified.after(Calendar.getInstance())) {
- return new ValueMapDecorator(new HashMap<>(state.getValueMap()));
- } else if (LOG.isDebugEnabled()) {
- LOG.debug("State at {} expired at {}", statePath, lastModified.getTime().toGMTString());
- }
- }
- return ValueMap.EMPTY;
+
+ return !isExpired(state)
+ ? new ValueMapDecorator(new HashMap<>(state.getValueMap()))
+ : ValueMap.EMPTY;
} catch (LoginException ex) {
throw new IOException("Cannot read state at " + statePath, ex);
}
@@ -116,19 +149,20 @@ public class SitemapStorage {
Resource folder = getOrCreateFolder(resolver, ResourceUtil.getParent(statePath));
String stateName = ResourceUtil.getName(statePath);
Resource stateResource = folder.getChild(stateName);
- if (stateResource != null) {
+ if (stateResource == null) {
+ Map<String, Object> properties = new HashMap<>(state.size() + 1);
+ properties.putAll(state);
+ properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
+ properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
+ properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_PART);
+ resolver.create(folder, stateName, properties);
+ } else {
ModifiableValueMap properties = stateResource.adaptTo(ModifiableValueMap.class);
if (properties == null) {
throw new IOException("Cannot modify properties of existing state: " + statePath);
}
properties.putAll(state);
properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
- } else {
- Map<String, Object> properties = new HashMap<>(state.size() + 1);
- properties.putAll(state);
- properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
- properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
- resolver.create(folder, stateName, properties);
}
resolver.commit();
} catch (LoginException | PersistenceException ex) {
@@ -163,34 +197,21 @@ public class SitemapStorage {
if (sitemapResource == null) {
Map<String, Object> properties = new HashMap<>(3);
properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
+ properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
+ properties.put(JcrConstants.JCR_DATA, data);
properties.put(PN_SITEMAP_ENTRIES, entries);
properties.put(PN_SITEMAP_SIZE, size);
- sitemapResource = resolver.create(folder, sitemapFileName, properties);
+ properties.put(PN_RESOURCE_TYPE, RT_SITEMAP_FILE);
+ resolver.create(folder, sitemapFileName, properties);
} else {
ModifiableValueMap properties = sitemapResource.adaptTo(ModifiableValueMap.class);
if (properties == null) {
throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath);
}
- properties.put(PN_SITEMAP_ENTRIES, entries);
- properties.put(PN_SITEMAP_SIZE, size);
- }
-
- Resource sitemapContent = sitemapResource.getChild(JcrConstants.JCR_CONTENT);
- if (sitemapContent == null) {
- Map<String, Object> properties = new HashMap<>();
- properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_RESOURCE);
- properties.put(JcrConstants.JCR_ENCODING, "UTF-8");
- properties.put(JcrConstants.JCR_MIMETYPE, "application/xml");
- properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
- properties.put(JcrConstants.JCR_DATA, data);
- resolver.create(sitemapResource, JcrConstants.JCR_CONTENT, properties);
- } else {
- ModifiableValueMap properties = sitemapContent.adaptTo(ModifiableValueMap.class);
- if (properties == null) {
- throw new IOException("Cannot overwrite existing sitemap at: " + sitemapFilePath);
- }
properties.put(JcrConstants.JCR_LASTMODIFIED, Calendar.getInstance());
properties.put(JcrConstants.JCR_DATA, data);
+ properties.put(PN_SITEMAP_ENTRIES, entries);
+ properties.put(PN_SITEMAP_SIZE, size);
}
Resource stateResource = resolver.getResource(statePath);
@@ -243,13 +264,11 @@ public class SitemapStorage {
}
return StreamSupport.stream(storageResource.getChildren().spliterator(), false)
.filter(child -> child.getName().endsWith(XML_EXTENSION))
+ .filter(child -> child.isResourceType(RT_SITEMAP_FILE))
.map(child -> new SitemapStorageInfo(
child.getPath(),
child.getName().substring(0, child.getName().lastIndexOf('.')),
- Optional.ofNullable(child.getChild(JcrConstants.JCR_CONTENT))
- .map(Resource::getValueMap)
- .map(content -> content.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class))
- .orElse(null),
+ child.getValueMap().get(JcrConstants.JCR_LASTMODIFIED, Calendar.class),
child.getValueMap().get(PN_SITEMAP_SIZE, 0),
child.getValueMap().get(PN_SITEMAP_ENTRIES, 0)))
.filter(filter)
@@ -267,7 +286,8 @@ public class SitemapStorage {
String sitemapFilePath = rootPath + sitemapRoot.getPath() + '/' + sitemapSelector + XML_EXTENSION;
try (ResourceResolver resolver = resourceResolverFactory.getServiceResourceResolver(AUTH)) {
InputStream data = Optional.ofNullable(resolver.getResource(sitemapFilePath))
- .map(r -> r.getChild(JcrConstants.JCR_CONTENT))
+ .filter(r -> r.getName().endsWith(XML_EXTENSION))
+ .filter(r -> r.isResourceType(RT_SITEMAP_FILE))
.map(r -> r.getValueMap().get(JcrConstants.JCR_DATA, InputStream.class))
.orElse(null);
@@ -284,6 +304,61 @@ public class SitemapStorage {
}
}
+ /**
+ * Returns true the sitemap root of the given file exists. This first checks if the file is in a top level sitemap's
+ * storage path and if so, checks if the selector resolves to at least one root.
+ *
+ * @param sitemapFile
+ * @return
+ */
+ private boolean doesSitemapRootExist(Resource sitemapFile) {
+ String path = sitemapFile.getPath();
+ String parentPath = ResourceUtil.getParent(path);
+ String sitemapRootPath = parentPath.substring(rootPath.length());
+ Resource sitemapRoot = sitemapFile.getResourceResolver().getResource(sitemapRootPath);
+
+ if (sitemapRoot == null || !SitemapUtil.isTopLevelSitemapRoot(sitemapRoot)) {
+ LOG.debug("Sitemap file's top level sitemap root does not exist: {}", sitemapRootPath);
+ return false;
+ }
+
+ String name = sitemapFile.getName();
+ int lastDot = name.lastIndexOf('.');
+
+ if (lastDot < 0) {
+ LOG.debug("Unexpected name, missing extension: {}", name);
+ return false;
+ }
+
+ Map<Resource, String> candidates = SitemapUtil.resolveSitemapRoots(sitemapRoot, name.substring(0, lastDot));
+ // check if for any of the candidate resource roots a generator with the name exists
+ return candidates.entrySet().stream()
+ .map(entry -> generatorManager.getApplicableNames(entry.getKey(), Collections.singleton(entry.getValue())))
+ .anyMatch(names -> names.size() > 0);
+ }
+
+ /**
+ * Returns true when the state expired according to the configured maximum age.
+ *
+ * @param state
+ * @return
+ */
+ private boolean isExpired(@NotNull Resource state) {
+ ValueMap stateProperties = state.getValueMap();
+ Calendar lastModified = stateProperties.get(JcrConstants.JCR_LASTMODIFIED, Calendar.class);
+ if (lastModified != null) {
+ // advance lastModified by maxStateAge to get the point in time the state would expire
+ lastModified.add(Calendar.MILLISECOND, maxStateAge);
+ // check if the expire time is in the future
+ if (lastModified.after(Calendar.getInstance())) {
+ return false;
+ } else if (LOG.isDebugEnabled()) {
+ LOG.debug("State at {} expired at {}", state.getPath(), lastModified.getTime().toGMTString());
+ }
+ }
+ return true;
+ }
+
@NotNull
private String getSitemapFilePath(@NotNull Resource sitemapRoot, @NotNull String name) {
Resource topLevelSitemapRoot = getTopLevelSitemapRoot(sitemapRoot);
@@ -315,4 +390,15 @@ public class SitemapStorage {
}
return folder;
}
+
+ private static Stream<Resource> traverse(@Nullable Resource resource) {
+ if (resource == null) {
+ return Stream.empty();
+ }
+
+ return Stream.concat(
+ Stream.of(resource),
+ StreamSupport.stream(resource.getChildren().spliterator(), false).flatMap(SitemapStorage::traverse)
+ );
+ }
}
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
index 226fb3d..d000c35 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapGeneratorExecutorTest.java
@@ -88,8 +88,8 @@ public class SitemapGeneratorExecutorTest {
context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer");
context.registerService(SitemapGenerator.class, generator);
context.registerService(JobManager.class, jobManager);
- context.registerInjectActivateService(storage);
context.registerInjectActivateService(generatorManager);
+ context.registerInjectActivateService(storage);
context.registerInjectActivateService(extensionProviderManager);
context.registerInjectActivateService(sitemapService);
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
index 02b4172..bc678ae 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapServletTest.java
@@ -230,9 +230,9 @@ public class SitemapServletTest {
byte[] expectedOutcomeBytes = expectedOutcome.getBytes(StandardCharsets.UTF_8);
context.registerInjectActivateService(sitemapService, "onDemandNames", "news");
context.registerInjectActivateService(subject);
- storage.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(expectedOutcomeBytes),
+ storage.writeSitemap(root, "foo", new ByteArrayInputStream(expectedOutcomeBytes),
expectedOutcomeBytes.length, 1);
- MockSlingHttpServletRequest request = newSitemapReq("sitemap", root);
+ MockSlingHttpServletRequest request = newSitemapReq("foo-sitemap", root);
MockSlingHttpServletResponse response = context.response();
// when
diff --git a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
index 55991b2..3603a69 100644
--- a/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
+++ b/sitemap/src/test/java/org/apache/sling/sitemap/impl/SitemapStorageTest.java
@@ -20,6 +20,7 @@ package org.apache.sling.sitemap.impl;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.io.IOUtils;
+import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ValueMap;
import org.apache.sling.serviceusermapping.ServiceUserMapped;
@@ -39,12 +40,15 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
+import java.util.Collections;
import java.util.Set;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.hasItems;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.when;
@ExtendWith({SlingContextExtension.class, MockitoExtension.class})
public class SitemapStorageTest {
@@ -52,16 +56,23 @@ public class SitemapStorageTest {
public final SlingContext context = new SlingContext();
private final SitemapStorage subject = new SitemapStorage();
+ private final SitemapGeneratorManager generatorManager = new SitemapGeneratorManager();
+ @Mock(lenient = true)
+ private SitemapGenerator generator;
@Mock
private ServiceUserMapped serviceUser;
@BeforeEach
public void setup() {
+ context.registerService(SitemapGenerator.class, generator);
context.registerService(ServiceUserMapped.class, serviceUser, "subServiceName", "sitemap-writer");
+ context.registerInjectActivateService(generatorManager);
context.registerInjectActivateService(subject,
"stateMaxAge", 100
);
+
+ when(generator.getNames(any())).thenReturn(Collections.singleton(SitemapGenerator.DEFAULT_SITEMAP));
}
@Test
@@ -133,6 +144,78 @@ public class SitemapStorageTest {
assertThat(sitemapNames, hasItems(storageInfo("foobar-sitemap"), storageInfo("sitemap")));
}
+ @Test
+ public void testCleanupExpiredStates() throws Exception {
+ // given
+ Resource root = context.create().resource("/content/site/de", ImmutableMap.of(
+ "sitemapRoot", Boolean.TRUE
+ ));
+
+ // when
+ subject.writeState(root, SitemapGenerator.DEFAULT_SITEMAP, ImmutableMap.of("i", 1));
+
+ // then
+ assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part"));
+
+ // and when
+ Thread.sleep(100);
+ subject.run();
+
+ // then
+ assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.part"));
+ }
+
+ @Test
+ public void testCleanupObsoleteSitemapsAfterTopLevelChanged() throws Exception {
+ // given
+ Resource newRoot = context.create().resource("/content/site/ch");
+ Resource initialRoot = context.create().resource("/content/site/ch/de-ch", ImmutableMap.of(
+ "sitemapRoot", Boolean.TRUE
+ ));
+
+ // when
+ subject.writeSitemap(initialRoot, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+
+ // then
+ assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml"));
+
+ // and when
+ newRoot.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.TRUE);
+ context.resourceResolver().commit();
+ subject.run();
+
+ // then
+ assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/ch/de-ch/sitemap.xml"));
+ }
+
+ @Test
+ public void testCleanupObsoleteSitemapsAfterNestedSitemapRootChanged() throws Exception {
+ // given
+ Resource root = context.create().resource("/content/site/de", ImmutableMap.of(
+ "sitemapRoot", Boolean.TRUE
+ ));
+ Resource news = context.create().resource("/content/site/de/news", ImmutableMap.of(
+ "sitemapRoot", Boolean.TRUE
+ ));
+
+ // when
+ subject.writeSitemap(root, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+ subject.writeSitemap(news, SitemapGenerator.DEFAULT_SITEMAP, new ByteArrayInputStream(new byte[0]), 0, 0);
+
+ // then
+ assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml"));
+ assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml"));
+
+ // and when
+ news.adaptTo(ModifiableValueMap.class).put("sitemapRoot", Boolean.FALSE);
+ context.resourceResolver().commit();
+ subject.run();
+
+ // then
+ assertNotNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/sitemap.xml"));
+ assertNull(context.resourceResolver().getResource("/var/sitemaps/content/site/de/news-sitemap.xml"));
+ }
+
static void assertResourceDataEquals(String expectedValue, Resource resource) throws IOException {
assertNotNull(resource);
InputStream inputStream = resource.adaptTo(InputStream.class);