You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gu...@apache.org on 2019/01/29 02:55:16 UTC
[lucene-solr] 05/05: SOLR-13149 First draft of Cagegory Routed
Alias class. Not really useful or testable until I implement
MaintanCategoryRoutedAlias however
This is an automated email from the ASF dual-hosted git repository.
gus pushed a commit to branch solr-13131
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 7abca19dc17712a6e7723d8425e9a3aa77f58f56
Author: Gus Heck <gu...@apache.org>
AuthorDate: Mon Jan 28 21:29:26 2019 -0500
SOLR-13149 First draft of Cagegory Routed Alias class. Not really
useful or testable until I implement MaintanCategoryRoutedAlias however
---
.../cloud/api/collections/CategoryRoutedAlias.java | 158 +++++++++++++++++++--
.../MaintainCategoryRoutedAliasCmd.java | 30 ++++
.../solr/cloud/api/collections/RoutedAlias.java | 10 +-
.../cloud/api/collections/TimeRoutedAlias.java | 6 +-
4 files changed, 186 insertions(+), 18 deletions(-)
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
index 5a352a6..a20cf63 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CategoryRoutedAlias.java
@@ -17,28 +17,74 @@
package org.apache.solr.cloud.api.collections;
-import java.time.Instant;
+import java.lang.invoke.MethodHandles;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.solr.cloud.ZkController;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Aliases;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.admin.CollectionsHandler;
+import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.update.AddUpdateCommand;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-public class CategoryRoutedAlias implements RoutedAlias {
+public class CategoryRoutedAlias implements RoutedAlias<String> {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ public static final String COLLECTION_INFIX = "__CRA__";
+
+ /**
+ * Parameters required for creating a category routed alias
+ */
+ public static final Set<String> REQUIRED_ROUTER_PARAMS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+ CommonParams.NAME,
+ ROUTER_TYPE_NAME,
+ ROUTER_FIELD)));
+
+ public static final String ROUTER_MAX_CARDINALITY = "router.maxCardinality";
+ public static final String ROUTER_MUST_MATCH = "router.mustMatch";
+
+ /**
+ * Optional parameters for creating a category routed alias excluding parameters for collection creation.
+ */
+ public static final Set<String> OPTIONAL_ROUTER_PARAMS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+ ROUTER_MAX_CARDINALITY,
+ ROUTER_MUST_MATCH)));
+
+ private List<String> collectionNames; // List of collections currently in the CRA
+ private Aliases parsedCollectionsAliases; // a cached reference to the source of what we parse into parsedCollectionsDesc
private final String aliasName;
- private final Map<String, String> aliasProperties;
+ private final Map<String, String> aliasMetadata;
CategoryRoutedAlias(String aliasName, Map<String, String> aliasMetadata) {
this.aliasName = aliasName;
- this.aliasProperties = aliasMetadata;
+ this.aliasMetadata = aliasMetadata;
}
@Override
public boolean updateParsedCollectionAliases(ZkController zkController) {
+ final Aliases aliases = zkController.getZkStateReader().getAliases(); // note: might be different from last request
+ if (this.parsedCollectionsAliases != aliases) {
+ if (this.parsedCollectionsAliases != null) {
+ log.debug("Observing possibly updated alias: {}", getAliasName());
+ }
+ // slightly inefficient, but not easy to make changes to the return value of parseCollections
+ this.collectionNames = parseCollections(aliases).stream().map(Map.Entry::getValue).collect(Collectors.toList());
+ this.parsedCollectionsAliases = aliases;
+ return true;
+ }
return false;
}
@@ -48,37 +94,125 @@ public class CategoryRoutedAlias implements RoutedAlias {
}
@Override
- public List<Map.Entry<Instant, String>> parseCollections(Aliases aliases) {
- return null;
+ public String getRouteField() {
+ return aliasMetadata.get(ROUTER_FIELD);
}
@Override
- public void validateRouteValue(AddUpdateCommand cmd) {
+ public List<Map.Entry<String, String>> parseCollections(Aliases aliases) {
+ final List<String> collections = aliases.getCollectionAliasListMap().get(aliasName);
+ if (collections == null) {
+ throw RoutedAlias.newAliasMustExistException(getAliasName());
+ }
+ List<Map.Entry<String,String>> result = new ArrayList<>(collections.size());
+ for (String collection : collections) {
+ String collCategory = parseCategoryFromCollectionName(aliasName, collection);
+ result.add(new AbstractMap.SimpleImmutableEntry<>(collCategory, collection));
+ }
+ // TODO Think about this... is order needed? if so perhaps better if insertion maintains order?
+ result.sort((e1, e2) -> e2.getKey().compareTo(e1.getKey())); // reverse sort by key
+ // note that this is also sorted by value since the value corresponds to the key plus a the alias name which
+ // is constant within a given alias.
+ return result;
+ }
+
+ /**
+ * Pattern for Category Routed Alias is aliasName__CRA__datadrivincategory. The __CRA__ infix is to
+ * reduce the chance of inadvertently duplicating (or worse yet, adopting) other collections
+ * that are not supposed to be included in the alias. With Time routed aliases the timestamp in
+ * the collection name was sufficiently unique, but given that aliasName could be anything and
+ * 2 part collection names of the form foo_bar are probably common in the wild, the infix seems
+ * necessary.
+ *
+ */
+ private String parseCategoryFromCollectionName(String aliasName, String collection) {
+ String prefix = aliasName + COLLECTION_INFIX;
+ if (!collection.startsWith(prefix)) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"Category Routed Alias collection names " +
+ "must start with the name of the alias plus " + COLLECTION_INFIX);
+ }
+ return collection.substring(prefix.length(),collection.length());
+ }
+
+ @Override
+ public void validateRouteValue(AddUpdateCommand cmd) throws SolrException {
+ //Mostly this will be filled out by SOLR-13150 and SOLR-13151
+ }
+
+ /**
+ * Calculate a safe collection name from a data value. Any non-word character is
+ * replace with an underscore
+ *
+ * @param dataValue a value from the route field for a particular document
+ * @return the suffix value for it's corresponding collection name.
+ */
+ private String safeKeyValue(String dataValue) {
+ return dataValue.trim().replaceAll("\\W", "_");
+ }
+
+ private String buildCollectionNameFromValue(String value) {
+ return aliasName + COLLECTION_INFIX + safeKeyValue(value);
}
@Override
public String createCollectionsIfRequired(AddUpdateCommand cmd) {
- return null;
+ SolrQueryRequest req = cmd.getReq();
+ SolrCore core = req.getCore();
+ CoreContainer coreContainer = core.getCoreContainer();
+ CollectionsHandler collectionsHandler = coreContainer.getCollectionsHandler();
+ String dataValue = String.valueOf(cmd.getSolrInputDocument().getFieldValue(getRouteField()));
+
+ String candidateCollectionName = buildCollectionNameFromValue(dataValue);
+
+ try {
+ // Note: CRA's have no way to predict values that determine collection so preemptive async creation
+ // is not possible. We have no choice but to block and wait (to do otherwise would imperil the overseer).
+ do {
+ if (this.collectionNames.contains(candidateCollectionName)) {
+ return candidateCollectionName;
+ } else {
+ // this could time out in which case we simply let it throw an error
+ MaintainCategoryRoutedAliasCmd.remoteInvoke(collectionsHandler, getAliasName(), candidateCollectionName);
+ // It's possible no collection was created because of a race and that's okay... we'll retry.
+
+ // Ensure our view of the aliases has updated. If we didn't do this, our zkStateReader might
+ // not yet know about the new alias (thus won't see the newly added collection to it), and we might think
+ // we failed.
+ collectionsHandler.getCoreContainer().getZkController().getZkStateReader().aliasesManager.update();
+
+ // we should see some sort of update to our aliases
+ if (!updateParsedCollectionAliases(coreContainer.getZkController())) { // thus we didn't make progress...
+ // this is not expected, even in known failure cases, but we check just in case
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
+ "We need to create a new category routed collection but for unknown reasons were unable to do so.");
+ }
+ }
+ } while (true);
+ } catch (SolrException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+ }
}
@Override
public Optional<String> computeInitialCollectionName() {
- return null;
+ return Optional.empty();
}
@Override
public Map<String, String> getAliasMetadata() {
- return aliasProperties;
+ return aliasMetadata;
}
@Override
public Set<String> getRequiredParams() {
- return new HashSet<>();
+ return REQUIRED_ROUTER_PARAMS;
}
@Override
public Set<String> getOptionalParams() {
- return new HashSet<>();
+ return OPTIONAL_ROUTER_PARAMS;
}
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
index 59274d6..93a832b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/MaintainCategoryRoutedAliasCmd.java
@@ -17,11 +17,41 @@
package org.apache.solr.cloud.api.collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.SolrResponse;
+import org.apache.solr.cloud.Overseer;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.handler.admin.CollectionsHandler;
public class MaintainCategoryRoutedAliasCmd extends AliasCmd {
+
+ public static final String IF_CATEGORY_COLLECTION_NOT_FOUND = "ifCategoryCollectionNotFound";
+
+ /**
+ * Invokes this command from the client. If there's a problem it will throw an exception.
+ * Please note that is important to never add async to this invocation. This method must
+ * block (up to the standard OCP timeout) to prevent large batches of add's from sending a message
+ * to the overseer for every document added in RoutedAliasUpdateProcessor.
+ */
+ public static NamedList remoteInvoke(CollectionsHandler collHandler, String aliasName, String categoryCollection)
+ throws Exception {
+ final String operation = CollectionParams.CollectionAction.MAINTAINCATEGORYROUTEDALIAS.toLower();
+ Map<String, Object> msg = new HashMap<>();
+ msg.put(Overseer.QUEUE_OPERATION, operation);
+ msg.put(CollectionParams.NAME, aliasName);
+ msg.put(IF_CATEGORY_COLLECTION_NOT_FOUND, categoryCollection);
+ final SolrResponse rsp = collHandler.sendToOCPQueue(new ZkNodeProps(msg));
+ if (rsp.getException() != null) {
+ throw rsp.getException();
+ }
+ return rsp.getResponse();
+ }
+
@Override
public void call(ClusterState state, ZkNodeProps message, NamedList results) throws Exception {
//todo
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
index 5e7df68..c083ba2 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RoutedAlias.java
@@ -17,7 +17,6 @@
package org.apache.solr.cloud.api.collections;
-import java.time.Instant;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
@@ -34,7 +33,7 @@ import org.apache.solr.update.AddUpdateCommand;
import static org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
-public interface RoutedAlias {
+public interface RoutedAlias<K> {
/**
* Types supported. Every entry here must have a case in the switch statement in {@link #fromProps(String, Map)}
@@ -117,18 +116,20 @@ public interface RoutedAlias {
*/
String getAliasName();
+ String getRouteField();
+
/**
* Parses the elements of the collection list. Result is returned them in sorted order (desc) if there
* is a natural order for this type of routed alias
*/
- List<Map.Entry<Instant, String>> parseCollections(Aliases aliases);
+ List<Map.Entry<K, String>> parseCollections(Aliases aliases);
/**
* Check that the value we will be routing on is legal for this type of routed alias.
*
* @param cmd the command containing the document
*/
- void validateRouteValue(AddUpdateCommand cmd);
+ void validateRouteValue(AddUpdateCommand cmd) throws SolrException;
/**
* Create any required collections and return the name of the collection to which the current document should be sent.
@@ -147,4 +148,5 @@ public interface RoutedAlias {
Set<String> getRequiredParams();
Set<String> getOptionalParams();
+
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
index afc9ce4..e48d406 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/TimeRoutedAlias.java
@@ -71,7 +71,7 @@ import static org.apache.solr.common.params.CommonParams.TZ;
* @see MaintainTimeRoutedAliasCmd
* @see RoutedAliasUpdateProcessor
*/
-public class TimeRoutedAlias implements RoutedAlias {
+public class TimeRoutedAlias implements RoutedAlias<Instant> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// This class is created once per request and the overseer methods prevent duplicate create requests
@@ -252,6 +252,7 @@ public class TimeRoutedAlias implements RoutedAlias {
return aliasName;
}
+ @Override
public String getRouteField() {
return routeField;
}
@@ -314,7 +315,7 @@ public class TimeRoutedAlias implements RoutedAlias {
}
@Override
- public void validateRouteValue(AddUpdateCommand cmd) {
+ public void validateRouteValue(AddUpdateCommand cmd) throws SolrException {
final Instant docTimestamp =
parseRouteKey(cmd.getSolrInputDocument().getFieldValue(getRouteField()));
@@ -554,4 +555,5 @@ public class TimeRoutedAlias implements RoutedAlias {
ASYNC_PREEMPTIVE,
SYNCHRONOUS
}
+
}