You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2017/12/22 16:05:38 UTC

lucene-solr:branch_7x: SOLR-11201: Implement autoscaling trigger for arbitrary metrics that creates events when a given metric breaches a threshold

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 1888209f3 -> 136b2581e


SOLR-11201: Implement autoscaling trigger for arbitrary metrics that creates events when a given metric breaches a threshold

(cherry picked from commit 43f17f7)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/136b2581
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/136b2581
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/136b2581

Branch: refs/heads/branch_7x
Commit: 136b2581e854a205a5a2e2a6c3dcc288138f0ce1
Parents: 1888209
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Fri Dec 22 21:18:37 2017 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Fri Dec 22 21:35:29 2017 +0530

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../solr/cloud/autoscaling/AutoScaling.java     |   2 +
 .../cloud/autoscaling/ComputePlanAction.java    |  26 ++-
 .../solr/cloud/autoscaling/MetricTrigger.java   | 190 +++++++++++++++++++
 .../cloud/autoscaling/MetricTriggerTest.java    | 133 +++++++++++++
 .../autoscaling/TriggerIntegrationTest.java     | 132 ++++++++++++-
 .../src/solrcloud-autoscaling-triggers.adoc     |  32 +++-
 .../cloud/autoscaling/TriggerEventType.java     |   3 +-
 .../solr/common/params/AutoScalingParams.java   |   4 +
 9 files changed, 521 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8bbca31..d8045bf 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -40,6 +40,9 @@ New Features
   processor.  (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau,
   Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe)
 
+* SOLR-11201: Implement autoscaling trigger for arbitrary metrics that creates events when
+  a given metric breaches a threshold (shalin)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java
index 039067c..3ebfbd0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java
@@ -155,6 +155,8 @@ public class AutoScaling {
           return new NodeLostTrigger(name, props, loader, cloudManager);
         case SEARCHRATE:
           return new SearchRateTrigger(name, props, loader, cloudManager);
+        case METRIC:
+          return new MetricTrigger(name, props, loader, cloudManager);
         default:
           throw new IllegalArgumentException("Unknown event type: " + type + " in trigger: " + name);
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index 8cce976..b1e33e1 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -40,6 +40,8 @@ import org.apache.solr.common.util.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP;
+
 /**
  * This class is responsible for using the configured policy and preferences
  * with the hints provided by the trigger event to compute the required cluster operations.
@@ -133,8 +135,30 @@ public class ComputePlanAction extends TriggerActionBase {
           }
         }
         break;
+      case METRIC:
+        Map<String, Number> sourceNodes = (Map<String, Number>) event.getProperty(AutoScalingParams.NODE);
+        String collection = (String) event.getProperty(AutoScalingParams.COLLECTION);
+        String shard = (String) event.getProperty(AutoScalingParams.SHARD);
+        String preferredOp = (String) event.getProperty(PREFERRED_OP);
+        if (sourceNodes.isEmpty()) {
+          log.warn("No nodes reported in event: " + event);
+          return NoneSuggester.INSTANCE;
+        }
+        CollectionParams.CollectionAction action = CollectionParams.CollectionAction.get(preferredOp == null ? CollectionParams.CollectionAction.MOVEREPLICA.toLower() : preferredOp);
+        suggester = session.getSuggester(action);
+        for (String node : sourceNodes.keySet()) {
+          suggester = suggester.hint(Suggester.Hint.SRC_NODE, node);
+        }
+        if (collection != null) {
+          if (shard == null) {
+            suggester = suggester.hint(Suggester.Hint.COLL, collection);
+          } else {
+            suggester = suggester.hint(Suggester.Hint.COLL_SHARD, new Pair(collection, shard));
+          }
+        }
+        break;
       default:
-        throw new UnsupportedOperationException("No support for events other than nodeAdded, nodeLost and searchRate, received: " + event.getEventType());
+        throw new UnsupportedOperationException("No support for events other than nodeAdded, nodeLost, searchRate and metric. Received: " + event.getEventType());
     }
     return suggester;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java
new file mode 100644
index 0000000..531e4e6
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling;
+
+import java.lang.invoke.MethodHandles;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+
+import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
+import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager;
+import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.params.AutoScalingParams;
+import org.apache.solr.core.SolrResourceLoader;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.solr.common.params.AutoScalingParams.ABOVE;
+import static org.apache.solr.common.params.AutoScalingParams.BELOW;
+import static org.apache.solr.common.params.AutoScalingParams.METRIC;
+import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP;
+
+public class MetricTrigger extends TriggerBase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private final String metric;
+  private final Number above, below;
+  private final String collection, shard, node, preferredOp;
+
+  private final Map<String, Long> lastNodeEvent = new ConcurrentHashMap<>();
+
+  public MetricTrigger(String name, Map<String, Object> properties, SolrResourceLoader loader, SolrCloudManager cloudManager) {
+    super(TriggerEventType.METRIC, name, properties, loader, cloudManager);
+    this.metric = (String) properties.get(METRIC);
+    this.above = (Number) properties.get(ABOVE);
+    this.below = (Number) properties.get(BELOW);
+    this.collection = (String) properties.getOrDefault(AutoScalingParams.COLLECTION, Policy.ANY);
+    shard = (String) properties.getOrDefault(AutoScalingParams.SHARD, Policy.ANY);
+    if (collection.equals(Policy.ANY) && !shard.equals(Policy.ANY)) {
+      throw new IllegalArgumentException("When 'shard' is other than #ANY then collection name must be also other than #ANY");
+    }
+    node = (String) properties.getOrDefault(AutoScalingParams.NODE, Policy.ANY);
+    preferredOp = (String) properties.getOrDefault(PREFERRED_OP, null);
+  }
+
+  @Override
+  protected Map<String, Object> getState() {
+    return null;
+  }
+
+  @Override
+  protected void setState(Map<String, Object> state) {
+    lastNodeEvent.clear();
+    Map<String, Long> nodeTimes = (Map<String, Long>) state.get("lastNodeEvent");
+    if (nodeTimes != null) {
+      lastNodeEvent.putAll(nodeTimes);
+    }
+  }
+
+  @Override
+  public void restoreState(AutoScaling.Trigger old) {
+    assert old.isClosed();
+    if (old instanceof MetricTrigger) {
+      MetricTrigger that = (MetricTrigger) old;
+      assert this.name.equals(that.name);
+      this.lastNodeEvent.clear();
+      this.lastNodeEvent.putAll(that.lastNodeEvent);
+    } else {
+      throw new SolrException(SolrException.ErrorCode.INVALID_STATE,
+          "Unable to restore state from an unknown type of trigger");
+    }
+  }
+
+  @Override
+  public void run() {
+    AutoScaling.TriggerEventProcessor processor = processorRef.get();
+    if (processor == null) {
+      return;
+    }
+
+    Set<String> liveNodes = null;
+    if (node.equals(Policy.ANY)) {
+      if (collection.equals(Policy.ANY)) {
+        liveNodes = cloudManager.getClusterStateProvider().getLiveNodes();
+      } else {
+        final Set<String> nodes = new HashSet<>();
+        ClusterState.CollectionRef ref = cloudManager.getClusterStateProvider().getState(collection);
+        DocCollection docCollection;
+        if (ref == null || (docCollection = ref.get()) == null) {
+          log.warn("MetricTrigger could not find collection: {}", collection);
+          return;
+        }
+        if (shard.equals(Policy.ANY)) {
+          docCollection.getReplicas().forEach(replica -> {
+            nodes.add(replica.getNodeName());
+          });
+        } else {
+          Slice slice = docCollection.getSlice(shard);
+          if (slice == null) {
+            log.warn("MetricTrigger could not find collection: {} shard: {}", collection, shard);
+            return;
+          }
+          slice.getReplicas().forEach(replica -> nodes.add(replica.getNodeName()));
+        }
+        liveNodes = nodes;
+      }
+    } else {
+      liveNodes = Collections.singleton(node);
+    }
+
+    Map<String, Number> rates = new HashMap<>(liveNodes.size());
+    for (String node : liveNodes) {
+      Map<String, Object> values = cloudManager.getNodeStateProvider().getNodeValues(node, Collections.singletonList(metric));
+      values.forEach((tag, rate) -> rates.computeIfAbsent(node, s -> (Number) rate));
+    }
+
+    long now = cloudManager.getTimeSource().getTime();
+    // check for exceeded rates and filter out those with less than waitFor from previous events
+    Map<String, Number> hotNodes = rates.entrySet().stream()
+        .filter(entry -> waitForElapsed(entry.getKey(), now, lastNodeEvent))
+        .filter(entry -> (below != null && Double.compare(entry.getValue().doubleValue(), below.doubleValue()) < 0) || (above != null && Double.compare(entry.getValue().doubleValue(), above.doubleValue()) > 0))
+        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+    if (hotNodes.isEmpty()) return;
+
+    final AtomicLong eventTime = new AtomicLong(now);
+    hotNodes.forEach((n, r) -> {
+      long time = lastNodeEvent.get(n);
+      if (eventTime.get() > time) {
+        eventTime.set(time);
+      }
+    });
+
+    if (processor.process(new MetricBreachedEvent(getName(), collection, shard, preferredOp, eventTime.get(), metric, hotNodes))) {
+      hotNodes.keySet().forEach(node -> lastNodeEvent.put(node, now));
+    }
+  }
+
+  private boolean waitForElapsed(String name, long now, Map<String, Long> lastEventMap) {
+    Long lastTime = lastEventMap.computeIfAbsent(name, s -> now);
+    long elapsed = TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS);
+    log.trace("name={}, lastTime={}, elapsed={}", name, lastTime, elapsed);
+    if (TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS) < getWaitForSecond()) {
+      return false;
+    }
+    return true;
+  }
+
+  public static class MetricBreachedEvent extends TriggerEvent {
+    public MetricBreachedEvent(String source, String collection, String shard, String preferredOp, long eventTime, String metric, Map<String, Number> hotNodes) {
+      super(TriggerEventType.METRIC, source, eventTime, null);
+      properties.put(METRIC, metric);
+      properties.put(AutoScalingParams.NODE, hotNodes);
+      if (!collection.equals(Policy.ANY)) {
+        properties.put(AutoScalingParams.COLLECTION, collection);
+      }
+      if (!shard.equals(Policy.ANY))  {
+        properties.put(AutoScalingParams.SHARD, shard);
+      }
+      if (preferredOp != null)  {
+        properties.put(PREFERRED_OP, preferredOp);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java
new file mode 100644
index 0000000..96083f4
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud.autoscaling;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.impl.SolrClientCloudManager;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.cloud.ZkDistributedQueueFactory;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.metrics.SolrCoreMetricManager;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MetricTriggerTest extends SolrCloudTestCase {
+
+  private AutoScaling.TriggerEventProcessor noFirstRunProcessor = event -> {
+    fail("Did not expect the listener to fire on first run!");
+    return true;
+  };
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1)
+        .addConfig("conf", configset("cloud-minimal"))
+        .configure();
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(DEFAULT_TEST_COLLECTION_NAME,
+        "conf", 1, 1);
+    CloudSolrClient solrClient = cluster.getSolrClient();
+    create.setMaxShardsPerNode(1);
+    create.process(solrClient);
+  }
+
+  @Test
+  public void test() throws Exception {
+    CoreDescriptor coreDescriptor = cluster.getJettySolrRunner(0).getCoreContainer().getCoreDescriptors().iterator().next();
+    String shardId = coreDescriptor.getCloudDescriptor().getShardId();
+    String coreName = coreDescriptor.getName();
+    String replicaName = Utils.parseMetricsReplicaName(DEFAULT_TEST_COLLECTION_NAME, coreName);
+    long waitForSeconds = 2 + random().nextInt(5);
+    String registry = SolrCoreMetricManager.createRegistryName(true, DEFAULT_TEST_COLLECTION_NAME, shardId, replicaName, null);
+    String tag = "metrics:" + registry + ":ADMIN./admin/file.requests";
+
+    Map<String, Object> props = createTriggerProps(waitForSeconds, tag, 1.0d, null, DEFAULT_TEST_COLLECTION_NAME, null, null);
+
+    final List<TriggerEvent> events = new ArrayList<>();
+    SolrZkClient zkClient = cluster.getSolrClient().getZkStateReader().getZkClient();
+    SolrResourceLoader loader = cluster.getJettySolrRunner(0).getCoreContainer().getResourceLoader();
+    SolrCloudManager cloudManager = new SolrClientCloudManager(new ZkDistributedQueueFactory(zkClient), cluster.getSolrClient());
+
+    try (MetricTrigger metricTrigger = new MetricTrigger("metricTrigger", props, loader, cloudManager)) {
+      metricTrigger.setProcessor(noFirstRunProcessor);
+      metricTrigger.run();
+      metricTrigger.setProcessor(event -> events.add(event));
+      assertEquals(0, events.size());
+      Thread.sleep(waitForSeconds * 1000 + 2000);
+      metricTrigger.run();
+      assertEquals(1, events.size());
+    }
+
+    events.clear();
+    tag = "metrics:" + registry + ":ADMIN./admin/file.handlerStart";
+    props = createTriggerProps(waitForSeconds, tag, null, 100.0d, DEFAULT_TEST_COLLECTION_NAME, null, null);
+    try (MetricTrigger metricTrigger = new MetricTrigger("metricTrigger", props, loader, cloudManager)) {
+      metricTrigger.setProcessor(noFirstRunProcessor);
+      metricTrigger.run();
+      metricTrigger.setProcessor(event -> events.add(event));
+      assertEquals(0, events.size());
+      Thread.sleep(waitForSeconds * 1000 + 2000);
+      metricTrigger.run();
+      assertEquals(1, events.size());
+    }
+  }
+
+  private Map<String, Object> createTriggerProps(long waitForSeconds, String metric, Double below, Double above, String collection, String shard, String node) {
+    Map<String, Object> props = new HashMap<>();
+    props.put("metric", metric);
+    if (above != null) {
+      props.put("above", above);
+    }
+    if (below != null) {
+      props.put("below", below);
+    }
+    if (collection != null) {
+      props.put("collection", collection);
+    }
+    if (shard != null) {
+      props.put("shard", shard);
+    }
+    if (node != null) {
+      props.put("node", node);
+    }
+    props.put("event", "metric");
+    props.put("waitFor", waitForSeconds);
+    props.put("enabled", true);
+
+    List<Map<String, String>> actions = new ArrayList<>(3);
+    Map<String, String> map = new HashMap<>(2);
+    map.put("name", "compute_plan");
+    map.put("class", "solr.ComputePlanAction");
+    actions.add(map);
+    map = new HashMap<>(2);
+    map.put("name", "execute_plan");
+    map.put("class", "solr.ExecutePlanAction");
+    actions.add(map);
+    props.put("actions", actions);
+    return props;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
index c104a99..eb196c1 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java
@@ -45,18 +45,22 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.response.CollectionAdminResponse;
 import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.LiveNodesListener;
+import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.AutoScalingParams;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.TimeSource;
 import org.apache.solr.common.util.Utils;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.metrics.SolrCoreMetricManager;
 import org.apache.solr.util.LogLevel;
 import org.apache.solr.util.TimeOut;
-import org.apache.solr.common.util.TimeSource;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZKUtil;
 import org.apache.zookeeper.data.Stat;
@@ -1497,4 +1501,130 @@ public class TriggerIntegrationTest extends SolrCloudTestCase {
     assertEquals(collectionRate, totalShardRate.get(), 5.0);
     assertEquals(collectionRate, totalReplicaRate.get(), 5.0);
   }
+
+  @Test
+  public void testMetricTrigger() throws Exception {
+    // at least 3 nodes
+    for (int i = cluster.getJettySolrRunners().size(); i < 3; i++) {
+      cluster.startJettySolrRunner();
+    }
+    cluster.waitForAllNodes(5);
+
+    String collectionName = "testMetricTrigger";
+    CloudSolrClient solrClient = cluster.getSolrClient();
+    CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName,
+        "conf", 2, 1);
+    create.process(solrClient);
+    solrClient.setDefaultCollection(collectionName);
+
+    waitForState("Timed out waiting for collection:" + collectionName + " to become active", collectionName, clusterShape(2, 1));
+
+    DocCollection docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
+    String shardId = "shard1";
+    Replica replica = docCollection.getSlice(shardId).getReplicas().iterator().next();
+    String coreName = replica.getCoreName();
+    String replicaName = Utils.parseMetricsReplicaName(collectionName, coreName);
+    long waitForSeconds = 2 + random().nextInt(5);
+    String registry = SolrCoreMetricManager.createRegistryName(true, collectionName, shardId, replicaName, null);
+    String tag = "metrics:" + registry + ":INDEX.sizeInBytes";
+
+    String setTriggerCommand = "{" +
+        "'set-trigger' : {" +
+        "'name' : 'metric_trigger'," +
+        "'event' : 'metric'," +
+        "'waitFor' : '" + waitForSeconds + "s'," +
+        "'enabled' : true," +
+        "'metric': '" + tag + "'" +
+        "'above' : 100.0," +
+        "'collection': '" + collectionName + "'" +
+        "'shard':'"  + shardId + "'" +
+        "'actions' : [" +
+        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+        "{'name':'test','class':'" + TestSearchRateAction.class.getName() + "'}" +
+        "]" +
+        "}}";
+    SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+    NamedList<Object> response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    String setListenerCommand1 = "{" +
+        "'set-listener' : " +
+        "{" +
+        "'name' : 'srt'," +
+        "'trigger' : 'metric_trigger'," +
+        "'stage' : ['FAILED','SUCCEEDED']," +
+        "'afterAction': ['compute', 'execute', 'test']," +
+        "'class' : '" + TestTriggerListener.class.getName() + "'" +
+        "}" +
+        "}";
+    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1);
+    response = solrClient.request(req);
+    assertEquals(response.get("result").toString(), "success");
+
+    for (int i = 0; i < 500; i++) {
+      solrClient.add(new SolrInputDocument("id", String.valueOf(i), "x_s", "x" + i));
+    }
+    solrClient.commit();
+
+    boolean await = triggerFiredLatch.await(20, TimeUnit.SECONDS);
+    assertTrue("The trigger did not fire at all", await);
+    // wait for listener to capture the SUCCEEDED stage
+    Thread.sleep(2000);
+    assertEquals(listenerEvents.toString(), 4, listenerEvents.get("srt").size());
+    CapturedEvent ev = listenerEvents.get("srt").get(0);
+    long now = timeSource.getTime();
+    // verify waitFor
+    assertTrue(TimeUnit.SECONDS.convert(waitForSeconds, TimeUnit.NANOSECONDS) - WAIT_FOR_DELTA_NANOS <= now - ev.event.getEventTime());
+    assertEquals(collectionName, ev.event.getProperties().get("collection"));
+
+    String oldReplicaName = replica.getName();
+    docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
+    assertEquals(2, docCollection.getReplicas().size());
+    assertNull(docCollection.getReplica(oldReplicaName));
+
+    // todo uncomment the following code once SOLR-11714 is fixed
+    // find a new replica and create its metric name
+//    replica = docCollection.getSlice(shardId).getReplicas().iterator().next();
+//    coreName = replica.getCoreName();
+//    replicaName = Utils.parseMetricsReplicaName(collectionName, coreName);
+//    registry = SolrCoreMetricManager.createRegistryName(true, collectionName, shardId, replicaName, null);
+//    tag = "metrics:" + registry + ":INDEX.sizeInBytes";
+//
+//    setTriggerCommand = "{" +
+//        "'set-trigger' : {" +
+//        "'name' : 'metric_trigger'," +
+//        "'event' : 'metric'," +
+//        "'waitFor' : '" + waitForSeconds + "s'," +
+//        "'enabled' : true," +
+//        "'metric': '" + tag + "'" +
+//        "'above' : 100.0," +
+//        "'collection': '" + collectionName + "'" +
+//        "'shard':'"  + shardId + "'" +
+//        "'preferredOperation':'addreplica'" +
+//        "'actions' : [" +
+//        "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," +
+//        "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," +
+//        "{'name':'test','class':'" + TestSearchRateAction.class.getName() + "'}" +
+//        "]" +
+//        "}}";
+//    req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand);
+//    response = solrClient.request(req);
+//    assertEquals(response.get("result").toString(), "success");
+//
+//    triggerFiredLatch = new CountDownLatch(1);
+//    listenerEvents.clear();
+//    await = triggerFiredLatch.await(20, TimeUnit.SECONDS);
+//    assertTrue("The trigger did not fire at all", await);
+//    // wait for listener to capture the SUCCEEDED stage
+//    Thread.sleep(2000);
+//    assertEquals(listenerEvents.toString(), 4, listenerEvents.get("srt").size());
+//    ev = listenerEvents.get("srt").get(0);
+//    now = timeSource.getTime();
+//    // verify waitFor
+//    assertTrue(TimeUnit.SECONDS.convert(waitForSeconds, TimeUnit.NANOSECONDS) - WAIT_FOR_DELTA_NANOS <= now - ev.event.getEventTime());
+//    assertEquals(collectionName, ev.event.getProperties().get("collection"));
+//    docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName);
+//    assertEquals(3, docCollection.getReplicas().size());
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
index 70cbb5f..5a98657 100644
--- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
+++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc
@@ -34,6 +34,7 @@ Currently the following event types (and corresponding trigger implementations)
 
 * `nodeAdded` - generated when a new node joins the cluster
 * `nodeLost` - generated when a node leaves the cluster
+* `metric` - generated when the configured metric crosses a configured lower or upper threshold value
 
 Events are not necessarily generated immediately after the corresponding state change occurred - the
 maximum rate of events is controlled by the `waitFor` configuration parameter (see below).
@@ -57,6 +58,35 @@ to add replicas on the live nodes to maintain the expected replication factor).
 
 You can see the section <<solrcloud-autoscaling-auto-add-replicas.adoc#solrcloud-autoscaling-auto-add-replicas, Autoscaling Automatically Adding Replicas>> to learn more about how the `.autoAddReplicas` trigger works.
 
+== Metric Trigger
+
+The metric trigger can be used to monitor any metric exposed by the Metrics API. It supports lower and upper threshold configurations as well as optional filters to limit operation to specific collection, shards and nodes.
+
+This trigger supports the following configuration:
+
+* `metric` - (string, required) The metric property name to be watched in the format metrics:group:prefix e.g. `metric:solr.node:CONTAINER.fs.coreRoot.usableSpace`
+* `below` - (double, optional) The lower threshold for the metric value. The trigger produces a metric breached event if the metric's value falls below this value
+* `above` - (double, optional) The upper threshold for the metric value. The trigger produces a metric breached event if the metric's value crosses above this value
+* `collection` - (string, optional) The collection used to limit the nodes on which the given metric is watched. When the metric is breached, trigger actions will limit operations to this collection only.
+* `shard` - (string, optional) The shard used to limit the nodes on which the given metric is watched. When the metric is breached, trigger actions will limit operations to this shard only.
+* `node` - (string, optional) The node on which the given metric is watched. Trigger actions will operate on this node only.
+* `preferredOperation` (string, optional, defaults to `MOVEREPLICA`) - The operation to be performed in response to an event generated by this trigger. By default, replicas will be moved from the hot node to others. The only other supported value is `ADDREPLICA` which adds more replicas if the metric is breached.
+
+.Example: Metric Trigger that fires when total usable space on a node having replicas of "mycollection" falls below 100GB
+[source,json]
+----
+{
+  "set-trigger": {
+    "name": "metric_trigger",
+    "event": "metric",
+    "waitFor": "5s",
+    "metric": "metric:solr.node:CONTAINER.fs.coreRoot.usableSpace",
+    "below": 107374182400,
+    "collection": "mycollection"
+  }
+}
+----
+
 == Trigger Configuration
 Trigger configurations are managed using the Autoscaling Write API and the commands `set-trigger`, `remove-trigger`,
 `suspend-trigger`, and `resume-trigger`.
@@ -74,7 +104,7 @@ Action configuration consists of the following properties:
 
 * `name` - (string, required) A unique name of the action configuration.
 * `class` - (string, required) The action implementation class.
-* A dditional implementation-specific properties may be provided
+* Additional implementation-specific properties may be provided
 
 If the Action configuration is omitted, then by default, the `ComputePlanAction` and the `ExecutePlanAction` are automatically added to the trigger configuration.
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java
index 238d7e1..96bc773 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java
@@ -27,5 +27,6 @@ public enum TriggerEventType {
   SCHEDULED,
   SEARCHRATE,
   INDEXRATE,
-  INVALID
+  INVALID,
+  METRIC
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/136b2581/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java b/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java
index cf259c6..4f00e28 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java
@@ -46,6 +46,10 @@ public interface AutoScalingParams {
   String RATE = "rate";
   String REMOVE_LISTENERS = "removeListeners";
   String ZK_VERSION = "zkVersion";
+  String METRIC = "metric";
+  String ABOVE = "above";
+  String BELOW = "below";
+  String PREFERRED_OP = "preferredOperation";
 
   // commands
   String CMD_SET_TRIGGER = "set-trigger";