You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sis.apache.org by de...@apache.org on 2022/09/10 16:35:25 UTC

[sis] 02/03: Add an API (internal for now) for building an aggregated view from all the files found in a directory.

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git

commit 305acc37505ae1469d36fdeaf8317d73ea096fcc
Author: Martin Desruisseaux <ma...@geomatys.com>
AuthorDate: Sat Sep 10 17:42:21 2022 +0200

    Add an API (internal for now) for building an aggregated view from all the files found in a directory.
---
 .../aggregate/ConcatenatedGridResource.java        |  1 +
 .../storage/aggregate/CoverageAggregator.java      | 87 ++++++++++++++++++++--
 .../sis/internal/storage/aggregate/Group.java      |  2 +-
 .../internal/storage/aggregate/GroupAggregate.java | 21 +++---
 .../storage/aggregate/GroupByTransform.java        | 23 +++---
 .../apache/sis/internal/storage/folder/Store.java  | 27 ++++++-
 .../storage/folder/UnstructuredAggregate.java      | 43 +++++++++++
 7 files changed, 173 insertions(+), 31 deletions(-)

diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/ConcatenatedGridResource.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/ConcatenatedGridResource.java
index 706f4f4c71..6d7ffd8fc0 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/ConcatenatedGridResource.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/ConcatenatedGridResource.java
@@ -42,6 +42,7 @@ import org.apache.sis.util.ArraysExt;
 /**
  * A grid coverage resource where a single dimension is the concatenation of many grid coverage resources.
  * All components must have the same "grid to CRS" transform.
+ * Instances of {@code ConcatenatedGridResource} are created by {@link CoverageAggregator}.
  *
  * @author  Martin Desruisseaux (Geomatys)
  * @version 1.3
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/CoverageAggregator.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/CoverageAggregator.java
index ae72ea938b..194a16f751 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/CoverageAggregator.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/CoverageAggregator.java
@@ -16,12 +16,21 @@
  */
 package org.apache.sis.internal.storage.aggregate;
 
-import java.util.List;
 import java.util.Locale;
+import java.util.List;
+import java.util.Queue;
+import java.util.ArrayDeque;
+import java.util.Set;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.Collections;
+import java.util.Optional;
 import java.util.stream.Stream;
 import org.opengis.referencing.operation.NoninvertibleTransformException;
 import org.apache.sis.storage.Resource;
 import org.apache.sis.storage.Aggregate;
+import org.apache.sis.storage.DataStore;
 import org.apache.sis.storage.DataStoreException;
 import org.apache.sis.storage.DataStoreContentException;
 import org.apache.sis.storage.GridCoverageResource;
@@ -39,11 +48,16 @@ import org.apache.sis.util.collection.BackingStoreException;
  */
 public final class CoverageAggregator extends Group<GroupBySample> {
     /**
-     * The listeners of the parent resource (typically a {@link org.apache.sis.storage.DataStore}),
-     * or {@code null} if none.
+     * The listeners of the parent resource (typically a {@link DataStore}), or {@code null} if none.
      */
     private final StoreListeners listeners;
 
+    /**
+     * The aggregates which where the sources of components added during a call to {@link #addComponents(Aggregate)}.
+     * This is used for reusing existing aggregates instead of {@link GroupAggregate} when the content is the same.
+     */
+    private final Map<Set<Resource>, Queue<Aggregate>> aggregates;
+
     /**
      * Creates an initially empty aggregator.
      *
@@ -52,6 +66,7 @@ public final class CoverageAggregator extends Group<GroupBySample> {
      */
     public CoverageAggregator(final StoreListeners listeners) {
         this.listeners = listeners;
+        aggregates = new HashMap<>();
     }
 
     /**
@@ -89,6 +104,7 @@ public final class CoverageAggregator extends Group<GroupBySample> {
 
     /**
      * Adds the given resource. This method can be invoked from any thread.
+     * This method does <em>not</em> recursively decomposes an {@link Aggregate} into its component.
      *
      * @param  resource  resource to add.
      * @throws DataStoreException if the resource can not be used.
@@ -107,20 +123,79 @@ public final class CoverageAggregator extends Group<GroupBySample> {
         }
     }
 
+    /**
+     * Adds all components of the given aggregate. This method can be invoked from any thread.
+     * It delegates to {@link #add(GridCoverageResource)} for each component in the aggregate
+     * which is an instance of {@link GridCoverageResource}.
+     * Components that are themselves instance of {@link Aggregate} are decomposed recursively.
+     *
+     * @param  resource  resource to add.
+     * @throws DataStoreException if a component of the resource can not be used.
+     *
+     * @todo Instead of ignoring non-coverage instances, we should put them in a separated aggregate.
+     */
+    public void addComponents(final Aggregate resource) throws DataStoreException {
+        boolean hasDuplicated = false;
+        final Set<Resource> components = Collections.newSetFromMap(new IdentityHashMap<>());
+        for (final Resource component : resource.components()) {
+            if (components.add(component)) {
+                if (component instanceof GridCoverageResource) {
+                    add((GridCoverageResource) component);
+                } else if (component instanceof Aggregate) {
+                    addComponents((Aggregate) component);
+                }
+            } else {
+                hasDuplicated = true;       // Should never happen, but we are paranoiac.
+            }
+        }
+        if (!(hasDuplicated || components.isEmpty())) {
+            /*
+             * We should not have 2 aggregates with the same components.
+             * But if it happens anyway, put the aggregates in a queue.
+             * Each aggregate will be used at most once.
+             */
+            synchronized (aggregates) {
+                aggregates.computeIfAbsent(components, (k) -> new ArrayDeque<>(1)).add(resource);
+            }
+        }
+    }
+
+    /**
+     * If an user-supplied aggregate exists for all the given components, returns that aggregate.
+     * The returned aggregate is removed from the pool; aggregates are not returned twice.
+     * This method is thread-safe.
+     *
+     * @param  components  the components for which to get user-supplied aggregate.
+     * @return user-supplied aggregate if it exists. The returned aggregate is removed from the pool.
+     */
+    final Optional<Aggregate> existingAggregate(final Resource[] components) {
+        final Set<Resource> key = Collections.newSetFromMap(new IdentityHashMap<>());
+        if (Collections.addAll(key, components)) {
+            final Queue<Aggregate> r;
+            synchronized (aggregates) {
+                r = aggregates.get(key);
+            }
+            if (r != null) {
+                return Optional.ofNullable(r.poll());
+            }
+        }
+        return Optional.empty();
+    }
+
     /**
      * Builds a resource which is the aggregation or concatenation of all components added to this aggregator.
      * The returned resource will be an instance of {@link GridCoverageResource} if possible,
-     * or an instance of {@link Aggregate} is some heterogeneity in grid geometries or sample dimensions
+     * or an instance of {@link Aggregate} if some heterogeneity in grid geometries or sample dimensions
      * prevent the concatenation of all coverages in a single resource.
      *
      * <p>This method is not thread safe. If the {@code add(…)} and {@code addAll(…)} methods were invoked
-     * in background threads, but all additions must be finished before this method is invoked.</p>
+     * in background threads, then all additions must be finished before this method is invoked.</p>
      *
      * @return the aggregation or concatenation of all components added to this aggregator.
      */
     public Resource build() {
         final GroupAggregate aggregate = prepareAggregate(listeners);
         aggregate.fillWithChildAggregates(this, GroupBySample::createComponents);
-        return aggregate.simplify();
+        return aggregate.simplify(this);
     }
 }
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/Group.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/Group.java
index 2abf4bdfa4..e78ef1f4fa 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/Group.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/Group.java
@@ -62,7 +62,7 @@ abstract class Group<E> {
 
     /**
      * Prepares an initially empty aggregate.
-     * One of the {@code GroupAggregate.fill(…)} methods must be invoked after this method.
+     * One of the {@code GroupAggregate.fillFoo(…)} methods must be invoked after this method.
      *
      * @param listeners  listeners of the parent resource, or {@code null} if none.
      * @return an initially empty aggregate.
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupAggregate.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupAggregate.java
index 0688d2d978..5da7504221 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupAggregate.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupAggregate.java
@@ -74,7 +74,7 @@ final class GroupAggregate extends AbstractResource implements Aggregate {
      * This is used for skipping calls to {@link #simplify()} when it is known that
      * no component can be simplified.
      */
-    private boolean componentsAreCoverages;
+    private boolean componentsAreLeaves;
 
     /**
      * The envelope of this aggregate, or {@code null} if not yet computed.
@@ -123,10 +123,12 @@ final class GroupAggregate extends AbstractResource implements Aggregate {
      *                     The first {@link BiConsumer} argument is a {@code children} member (the source)
      *                     and the second argument is the sub-aggregate to initialize (the target).
      */
-    final <E> void fillWithChildAggregates(final Group<E> children, final BiConsumer<E,GroupAggregate> childFiller) {
+    final <E extends Group> void fillWithChildAggregates(final Group<E> children, final BiConsumer<E,GroupAggregate> childFiller) {
+        assert components.length == children.members.size();
         for (int i=0; i < components.length; i++) {
-            final GroupAggregate child = children.prepareAggregate(listeners);
-            childFiller.accept(children.members.get(i), child);
+            final E member = children.members.get(i);
+            final GroupAggregate child = member.prepareAggregate(listeners);
+            childFiller.accept(member, child);
             components[i] = child;
         }
     }
@@ -140,7 +142,7 @@ final class GroupAggregate extends AbstractResource implements Aggregate {
      */
     @SuppressWarnings("AssignmentToCollectionOrArrayFieldFromParameter")    // Copy done by GroupBySample constructor.
     final void fillWithCoverageComponents(final List<GroupByTransform> children, final List<SampleDimension> ranges) {
-        componentsAreCoverages = true;
+        componentsAreLeaves = true;
         for (int i=0; i < components.length; i++) {
             components[i] = children.get(i).createResource(listeners, ranges);
         }
@@ -149,21 +151,22 @@ final class GroupAggregate extends AbstractResource implements Aggregate {
     /**
      * Simplifies the resource tree by removing all aggregates of 1 component.
      *
+     * @param  aggregator  the aggregation builder which is invoking this method.
      * @return the resource to use after simplification.
      */
-    final Resource simplify() {
-        if (!componentsAreCoverages) {
+    final Resource simplify(final CoverageAggregator aggregator) {
+        if (!componentsAreLeaves) {
             for (int i=0; i < components.length; i++) {
                 final Resource r = components[i];
                 if (r instanceof GroupAggregate) {
-                    components[i] = ((GroupAggregate) r).simplify();
+                    components[i] = ((GroupAggregate) r).simplify(aggregator);
                 }
             }
         }
         if (components.length == 1) {
             return components[0];
         }
-        return this;
+        return aggregator.existingAggregate(components).orElse(this);
     }
 
     /**
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupByTransform.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupByTransform.java
index 77257a619b..031db6ce89 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupByTransform.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/aggregate/GroupByTransform.java
@@ -112,7 +112,6 @@ final class GroupByTransform extends Group<GridSlice> {
         final DimensionSelector[] selects;
         synchronized (members) {                // Should no longer be needed at this step, but we are paranoiac.
             int i = members.size();
-            if (i < 2) return ArraysExt.EMPTY_INT;
             selects = new DimensionSelector[geometry.getDimension()];
             for (int dim = selects.length; --dim >= 0;) {
                 selects[dim] = new DimensionSelector(dim, i);
@@ -134,25 +133,21 @@ final class GroupByTransform extends Group<GridSlice> {
 
     /**
      * Sorts the slices in increasing order of low grid coordinates in the concatenated dimension.
-     * Then build a concatenated grid coverage resource capable to perform binary searches along that dimension.
+     * Then builds a concatenated grid coverage resource capable to perform binary searches along that dimension.
      *
      * @param  parentListeners   listeners of the parent resource, or {@code null} if none.
      * @param  sampleDimensions  the sample dimensions of the resource to build.
+     * @return the concatenated resource.
      */
     final GridCoverageResource createResource(final StoreListeners parentListeners, final List<SampleDimension> ranges) {
-        final int[] dimensions = findConcatenatedDimensions();
-        if (dimensions.length == 0) {
-            return null;
-        }
-        final int dim = dimensions[0];
-        final GridSliceLocator locator;
-        final GridCoverageResource[] slices;
-        final GridGeometry domain;
-        synchronized (members) {                // Should no longer be needed at this step, but we are paranoiac.
-            slices  = new GridCoverageResource[members.size()];
-            locator = new GridSliceLocator(members, dim, slices);
-            domain  = locator.union(geometry, members, GridSlice::getGridExtent);
+        final int n = members.size();
+        if (n == 1) {
+            return members.get(0).resource;
         }
+        final int[] dimensions = findConcatenatedDimensions();
+        final GridCoverageResource[] slices  = new GridCoverageResource[n];
+        final GridSliceLocator       locator = new GridSliceLocator(members, dimensions[0], slices);
+        final GridGeometry           domain  = locator.union(geometry, members, GridSlice::getGridExtent);
         return new ConcatenatedGridResource(parentListeners, domain, ranges, slices, locator);
     }
 }
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/Store.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/Store.java
index 22c788df69..364a1e4dda 100644
--- a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/Store.java
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/Store.java
@@ -55,6 +55,7 @@ import org.apache.sis.internal.storage.MetadataBuilder;
 import org.apache.sis.internal.storage.StoreUtilities;
 import org.apache.sis.internal.storage.StoreResource;
 import org.apache.sis.internal.storage.Resources;
+import org.apache.sis.internal.storage.aggregate.CoverageAggregator;
 import org.apache.sis.storage.event.StoreEvent;
 import org.apache.sis.storage.event.StoreListener;
 import org.apache.sis.storage.event.WarningEvent;
@@ -82,7 +83,7 @@ import org.apache.sis.storage.event.WarningEvent;
  * @since   0.8
  * @module
  */
-class Store extends DataStore implements StoreResource, Aggregate, DirectoryStream.Filter<Path> {
+class Store extends DataStore implements StoreResource, UnstructuredAggregate, DirectoryStream.Filter<Path> {
     /**
      * The data store for the root directory specified by the user.
      * May be {@code this} if this store instance is for the root directory.
@@ -151,6 +152,14 @@ class Store extends DataStore implements StoreResource, Aggregate, DirectoryStre
      */
     private transient boolean sharedRepositoryReported;
 
+    /**
+     * A structured view of this aggregate, or {@code null} if not net computed.
+     * May be {@code this} if {@link CoverageAggregator} can not do better than current resource.
+     *
+     * @see #getStructuredView()
+     */
+    private transient Resource structuredView;
+
     /**
      * Creates a new folder store from the given file, path or URI.
      * The folder store will attempt to open only the files of the given format, if non-null.
@@ -397,6 +406,22 @@ class Store extends DataStore implements StoreResource, Aggregate, DirectoryStre
         }
     }
 
+    /**
+     * Returns a more structured (if possible) view of this resource.
+     *
+     * @return structured view. May be {@code this} if this method can not do better than current resource.
+     * @throws DataStoreException if an error occurred during the attempt to create a structured view.
+     */
+    @Override
+    public synchronized Resource getStructuredView() throws DataStoreException {
+        if (structuredView == null) {
+            final CoverageAggregator aggregator = new CoverageAggregator(listeners);
+            aggregator.addComponents(this);
+            structuredView = aggregator.build();
+        }
+        return structuredView;
+    }
+
     /**
      * Returns the resource bundle to use for error message in exceptions.
      */
diff --git a/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/UnstructuredAggregate.java b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/UnstructuredAggregate.java
new file mode 100644
index 0000000000..a3d01f3397
--- /dev/null
+++ b/storage/sis-storage/src/main/java/org/apache/sis/internal/storage/folder/UnstructuredAggregate.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sis.internal.storage.folder;
+
+import org.apache.sis.storage.Resource;
+import org.apache.sis.storage.Aggregate;
+import org.apache.sis.storage.DataStoreException;
+import org.apache.sis.internal.storage.aggregate.CoverageAggregator;
+
+
+/**
+ * A data store which may provide a more structured view of its components.
+ * This is an experimental interface that may change in any future version.
+ * Structure is inferred by {@link CoverageAggregator}.
+ *
+ * @author  Martin Desruisseaux (Geomatys)
+ * @version 1.3
+ * @since   1.3
+ * @module
+ */
+public interface UnstructuredAggregate extends Aggregate {
+    /**
+     * Returns a more structured (if possible) view of this resource.
+     *
+     * @return structured view. May be {@code this} if this method can not do better than current resource.
+     * @throws DataStoreException if an error occurred during the attempt to create a structured view.
+     */
+    Resource getStructuredView() throws DataStoreException;
+}