You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gu...@apache.org on 2019/01/29 02:29:45 UTC

[lucene-solr] branch solr-13131 updated: SOLR-13149 First draft of Cagegory Routed Alias class. Not really useful or testable until I implement MaintanCategoryRoutedAlias however

This is an automated email from the ASF dual-hosted git repository.

gus pushed a commit to branch solr-13131
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/solr-13131 by this push:
     new 3f3098e  SOLR-13149 First draft of Cagegory Routed Alias class. Not really useful or testable until I implement MaintanCategoryRoutedAlias however
3f3098e is described below

commit 3f3098ea373b5f982c1115c8c855881d5712c8da
Author: Gus Heck <gu...@apache.org>
AuthorDate: Mon Jan 28 21:29:26 2019 -0500

    SOLR-13149 First draft of Cagegory Routed Alias class. Not really
    useful or testable until I implement MaintanCategoryRoutedAlias however
---
 .../cloud/api/collections/CategoryRoutedAlias.java | 158 +++++++++++++++++++--
 .../MaintainCategoryRoutedAliasCmd.java            |  30 ++++
 .../solr/cloud/api/collections/RoutedAlias.java    |  10 +-
 .../cloud/api/collections/TimeRoutedAlias.java     |   6 +-
 4 files changed, 186 insertions(+), 18 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
index 5a352a6..a20cf63 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
@@ -17,28 +17,74 @@
 
 package org.apache.solr.cloud.api.collections;
 
-import java.time.Instant;
+import java.lang.invoke.MethodHandles;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.Aliases;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.admin.CollectionsHandler;
+import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.update.AddUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-public class CategoryRoutedAlias implements RoutedAlias {
+public class CategoryRoutedAlias implements RoutedAlias<String> {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  public static final String COLLECTION_INFIX = "__CRA__";
+
+  /**
+   * Parameters required for creating a category routed alias
+   */
+  public static final Set<String> REQUIRED_ROUTER_PARAMS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+      CommonParams.NAME,
+      ROUTER_TYPE_NAME,
+      ROUTER_FIELD)));
+
+  public static final String ROUTER_MAX_CARDINALITY = "router.maxCardinality";
+  public static final String ROUTER_MUST_MATCH = "router.mustMatch";
+
+  /**
+   * Optional parameters for creating a category routed alias excluding parameters for collection creation.
+   */
+  public static final Set<String> OPTIONAL_ROUTER_PARAMS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+      ROUTER_MAX_CARDINALITY,
+      ROUTER_MUST_MATCH)));
+
+  private List<String> collectionNames; // List of collections currently in the CRA
+  private Aliases parsedCollectionsAliases; // a cached reference to the source of what we parse into parsedCollectionsDesc
   private final String aliasName;
-  private final Map<String, String> aliasProperties;
+  private final Map<String, String> aliasMetadata;
 
   CategoryRoutedAlias(String aliasName, Map<String, String> aliasMetadata) {
     this.aliasName = aliasName;
-    this.aliasProperties = aliasMetadata;
+    this.aliasMetadata = aliasMetadata;
   }
 
   @Override
   public boolean updateParsedCollectionAliases(ZkController zkController) {
+    final Aliases aliases = zkController.getZkStateReader().getAliases(); // note: might be different from last request
+    if (this.parsedCollectionsAliases != aliases) {
+      if (this.parsedCollectionsAliases != null) {
+        log.debug("Observing possibly updated alias: {}", getAliasName());
+      }
+      // slightly inefficient, but not easy to make changes to the return value of parseCollections
+      this.collectionNames = parseCollections(aliases).stream().map(Map.Entry::getValue).collect(Collectors.toList());
+      this.parsedCollectionsAliases = aliases;
+      return true;
+    }
     return false;
   }
 
@@ -48,37 +94,125 @@ public class CategoryRoutedAlias implements RoutedAlias {
   }
 
   @Override
-  public List<Map.Entry<Instant, String>> parseCollections(Aliases aliases) {
-    return null;
+  public String getRouteField() {
+    return aliasMetadata.get(ROUTER_FIELD);
   }
 
   @Override
-  public void validateRouteValue(AddUpdateCommand cmd) {
+  public List<Map.Entry<String, String>> parseCollections(Aliases aliases) {
+    final List<String> collections = aliases.getCollectionAliasListMap().get(aliasName);
+    if (collections == null) {
+      throw RoutedAlias.newAliasMustExistException(getAliasName());
+    }
+    List<Map.Entry<String,String>> result = new ArrayList<>(collections.size());
+    for (String collection : collections) {
+      String collCategory = parseCategoryFromCollectionName(aliasName, collection);
+      result.add(new AbstractMap.SimpleImmutableEntry<>(collCategory, collection));
+    }
+    // TODO Think about this... is order needed? if so perhaps better if insertion maintains order?
+    result.sort((e1, e2) -> e2.getKey().compareTo(e1.getKey())); // reverse sort by key
 
+    // note that this is also sorted by value since the value corresponds to the key plus a the alias name which
+    // is constant within a given alias.
+    return result;
+  }
+
+  /**
+   * Pattern for Category Routed Alias is aliasName__CRA__datadrivincategory. The __CRA__ infix is to
+   * reduce the chance of inadvertently duplicating (or worse yet, adopting) other collections
+   * that are not supposed to be included in the alias. With Time routed aliases the timestamp in
+   * the collection name was sufficiently unique, but given that aliasName could be anything and
+   * 2 part collection names of the form foo_bar are probably common in the wild, the infix seems
+   * necessary.
+   *
+   */
+  private String parseCategoryFromCollectionName(String aliasName, String collection) {
+    String prefix = aliasName + COLLECTION_INFIX;
+    if (!collection.startsWith(prefix)) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"Category Routed Alias collection names " +
+          "must start with the name of the alias plus " + COLLECTION_INFIX);
+    }
+    return collection.substring(prefix.length(),collection.length());
+  }
+
+  @Override
+  public void validateRouteValue(AddUpdateCommand cmd) throws SolrException {
+    //Mostly this will be filled out by SOLR-13150 and SOLR-13151
+  }
+
+  /**
+   * Calculate a safe collection name from a data value. Any non-word character is
+   * replace with an underscore
+   *
+   * @param dataValue a value from the route field for a particular document
+   * @return the suffix value for it's corresponding collection name.
+   */
+  private String safeKeyValue(String dataValue) {
+    return dataValue.trim().replaceAll("\\W", "_");
+  }
+
+  private String buildCollectionNameFromValue(String value) {
+    return aliasName + COLLECTION_INFIX + safeKeyValue(value);
   }
 
   @Override
   public String createCollectionsIfRequired(AddUpdateCommand cmd) {
-    return null;
+    SolrQueryRequest req = cmd.getReq();
+    SolrCore core = req.getCore();
+    CoreContainer coreContainer = core.getCoreContainer();
+    CollectionsHandler collectionsHandler = coreContainer.getCollectionsHandler();
+    String dataValue = String.valueOf(cmd.getSolrInputDocument().getFieldValue(getRouteField()));
+
+   String candidateCollectionName = buildCollectionNameFromValue(dataValue);
+
+    try {
+      // Note: CRA's have no way to predict values that determine collection so preemptive async creation
+      // is not possible. We have no choice but to block and wait (to do otherwise would imperil the overseer).
+      do {
+        if (this.collectionNames.contains(candidateCollectionName)) {
+          return candidateCollectionName;
+        } else {
+          // this could time out in which case we simply let it throw an error
+          MaintainCategoryRoutedAliasCmd.remoteInvoke(collectionsHandler, getAliasName(), candidateCollectionName);
+          // It's possible no collection was created because of a race and that's okay... we'll retry.
+
+          // Ensure our view of the aliases has updated. If we didn't do this, our zkStateReader might
+          //  not yet know about the new alias (thus won't see the newly added collection to it), and we might think
+          //  we failed.
+          collectionsHandler.getCoreContainer().getZkController().getZkStateReader().aliasesManager.update();
+
+          // we should see some sort of update to our aliases
+          if (!updateParsedCollectionAliases(coreContainer.getZkController())) { // thus we didn't make progress...
+            // this is not expected, even in known failure cases, but we check just in case
+            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+                "We need to create a new category routed collection but for unknown reasons were unable to do so.");
+          }
+        }
+      } while (true);
+    } catch (SolrException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+    }
   }
 
   @Override
   public Optional<String> computeInitialCollectionName() {
-    return null;
+    return Optional.empty();
   }
 
   @Override
   public Map<String, String> getAliasMetadata() {
-    return aliasProperties;
+    return aliasMetadata;
   }
 
   @Override
   public Set<String> getRequiredParams() {
-    return new HashSet<>();
+    return REQUIRED_ROUTER_PARAMS;
   }
 
   @Override
   public Set<String> getOptionalParams() {
-    return new HashSet<>();
+    return OPTIONAL_ROUTER_PARAMS;
   }
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
index 59274d6..93a832b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
@@ -17,11 +17,41 @@
 
 package org.apache.solr.cloud.api.collections;
 
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.cloud.Overseer;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.admin.CollectionsHandler;
 
 public class MaintainCategoryRoutedAliasCmd extends AliasCmd {
+
+  public static final String IF_CATEGORY_COLLECTION_NOT_FOUND = "ifCategoryCollectionNotFound";
+
+  /**
+   * Invokes this command from the client.  If there's a problem it will throw an exception.
+   * Please note that is important to never add async to this invocation. This method must
+   * block (up to the standard OCP timeout) to prevent large batches of add's from sending a message
+   * to the overseer for every document added in RoutedAliasUpdateProcessor.
+   */
+  public static NamedList remoteInvoke(CollectionsHandler collHandler, String aliasName, String categoryCollection)
+      throws Exception {
+    final String operation = CollectionParams.CollectionAction.MAINTAINCATEGORYROUTEDALIAS.toLower();
+    Map<String, Object> msg = new HashMap<>();
+    msg.put(Overseer.QUEUE_OPERATION, operation);
+    msg.put(CollectionParams.NAME, aliasName);
+    msg.put(IF_CATEGORY_COLLECTION_NOT_FOUND, categoryCollection);
+    final SolrResponse rsp = collHandler.sendToOCPQueue(new ZkNodeProps(msg));
+    if (rsp.getException() != null) {
+      throw rsp.getException();
+    }
+    return rsp.getResponse();
+  }
+
   @Override
   public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
     //todo
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
index 5e7df68..c083ba2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
@@ -17,7 +17,6 @@
 
 package org.apache.solr.cloud.api.collections;
 
-import java.time.Instant;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
@@ -34,7 +33,7 @@ import org.apache.solr.update.AddUpdateCommand;
 import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
 import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
 
-public interface RoutedAlias {
+public interface RoutedAlias<K> {
 
   /**
    * Types supported. Every entry here must have a case in the switch statement in {@link #fromProps(String, Map)}
@@ -117,18 +116,20 @@ public interface RoutedAlias {
    */
   String getAliasName();
 
+  String getRouteField();
+
   /**
    * Parses the elements of the collection list. Result is returned them in sorted order (desc) if there
    * is a natural order for this type of routed alias
    */
-  List<Map.Entry<Instant, String>> parseCollections(Aliases aliases);
+  List<Map.Entry<K, String>> parseCollections(Aliases aliases);
 
   /**
    * Check that the value we will be routing on is legal for this type of routed alias.
    *
    * @param cmd the command containing the document
    */
-  void validateRouteValue(AddUpdateCommand cmd);
+  void validateRouteValue(AddUpdateCommand cmd) throws SolrException;
 
   /**
    * Create any required collections and return the name of the collection to which the current document should be sent.
@@ -147,4 +148,5 @@ public interface RoutedAlias {
   Set<String> getRequiredParams();
 
   Set<String> getOptionalParams();
+
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
index afc9ce4..e48d406 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
@@ -71,7 +71,7 @@ import static org.apache.solr.common.params.CommonParams.TZ;
  * @see MaintainTimeRoutedAliasCmd
  * @see RoutedAliasUpdateProcessor
  */
-public class TimeRoutedAlias implements RoutedAlias {
+public class TimeRoutedAlias implements RoutedAlias<Instant> {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   // This class is created once per request and the overseer methods prevent duplicate create requests
@@ -252,6 +252,7 @@ public class TimeRoutedAlias implements RoutedAlias {
     return aliasName;
   }
 
+  @Override
   public String getRouteField() {
     return routeField;
   }
@@ -314,7 +315,7 @@ public class TimeRoutedAlias implements RoutedAlias {
   }
 
   @Override
-  public void validateRouteValue(AddUpdateCommand cmd) {
+  public void validateRouteValue(AddUpdateCommand cmd) throws SolrException {
     final Instant docTimestamp =
         parseRouteKey(cmd.getSolrInputDocument().getFieldValue(getRouteField()));
 
@@ -554,4 +555,5 @@ public class TimeRoutedAlias implements RoutedAlias {
     ASYNC_PREEMPTIVE,
     SYNCHRONOUS
   }
+
 }