You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by sa...@apache.org on 2017/07/15 00:56:22 UTC
phoenix git commit: PHOENIX-4027 Mark index as disabled during
partial rebuild after configurable amount of time
Repository: phoenix
Updated Branches:
refs/heads/master 3477977f3 -> d541d6f28
PHOENIX-4027 Mark index as disabled during partial rebuild after configurable amount of time
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/d541d6f2
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/d541d6f2
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/d541d6f2
Branch: refs/heads/master
Commit: d541d6f2875a590580e8ccf05f26795083b06658
Parents: 3477977
Author: Samarth Jain <sa...@apache.org>
Authored: Fri Jul 14 17:56:16 2017 -0700
Committer: Samarth Jain <sa...@apache.org>
Committed: Fri Jul 14 17:56:16 2017 -0700
----------------------------------------------------------------------
.../phoenix/end2end/PhoenixRuntimeIT.java | 6 --
.../end2end/index/MutableIndexFailureIT.java | 3 +
.../coprocessor/MetaDataRegionObserver.java | 60 +++++++++++---------
.../org/apache/phoenix/query/QueryServices.java | 2 +
.../phoenix/query/QueryServicesOptions.java | 4 +-
5 files changed, 41 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
index 1109070..95ab906 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
@@ -187,9 +187,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT {
assertEquals(
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
- assertEquals(
- Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
- rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_PAUSE));
ConnectionQueryServices rebuildQueryServices = rebuildIndexConnection.getQueryServices();
HConnection rebuildIndexHConnection =
(HConnection) Whitebox.getInternalState(rebuildQueryServices,
@@ -212,9 +209,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT {
assertEquals(
Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
- assertEquals(
- Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
- rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_PAUSE));
}
}
}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
index f47707b..8e2564d 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
@@ -127,6 +127,9 @@ public class MutableIndexFailureIT extends BaseTest {
serverProps.put("hbase.balancer.period", String.valueOf(Integer.MAX_VALUE));
serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_ATTRIB, Boolean.TRUE.toString());
serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_INTERVAL_ATTRIB, "4000");
+ serverProps.put(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD, "30000"); // give up rebuilding after 30 seconds
+ // need to override rpc retries otherwise test doesn't pass
+ serverProps.put(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER, Long.toString(1));
Map<String, String> clientProps = Collections.singletonMap(QueryServices.TRANSACTIONS_ENABLED, Boolean.TRUE.toString());
NUM_SLAVES_BASE = 4;
setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator()));
http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
index 4c63587..5cfacfc 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
@@ -76,8 +76,10 @@ import org.apache.phoenix.schema.MetaDataClient;
import org.apache.phoenix.schema.PIndexState;
import org.apache.phoenix.schema.PName;
import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.TableNotFoundException;
import org.apache.phoenix.schema.TableRef;
+import org.apache.phoenix.schema.types.PChar;
import org.apache.phoenix.schema.types.PLong;
import org.apache.phoenix.util.ByteUtil;
import org.apache.phoenix.util.MetaDataUtil;
@@ -105,7 +107,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
private boolean enableRebuildIndex = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD;
private long rebuildIndexTimeInterval = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD_INTERVAL;
private static Map<PName, Long> batchExecutedPerTableMap = new HashMap<PName, Long>();
-
@GuardedBy("MetaDataRegionObserver.class")
private static Properties rebuildIndexConnectionProps;
@@ -194,8 +195,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
initRebuildIndexConnectionProps(e.getEnvironment().getConfiguration());
// starts index rebuild schedule work
BuildIndexScheduleTask task = new BuildIndexScheduleTask(e.getEnvironment());
- // run scheduled task every 10 secs
- executor.scheduleAtFixedRate(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
+ executor.scheduleWithFixedDelay(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
} catch (ClassNotFoundException ex) {
LOG.error("BuildIndexScheduleTask cannot start!", ex);
}
@@ -212,6 +212,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
RegionCoprocessorEnvironment env;
private long rebuildIndexBatchSize = HConstants.LATEST_TIMESTAMP;
private long configuredBatches = 10;
+ private long indexDisableTimestampThreshold;
public BuildIndexScheduleTask(RegionCoprocessorEnvironment env) {
this.env = env;
@@ -220,6 +221,9 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
QueryServices.INDEX_FAILURE_HANDLING_REBUILD_PERIOD, HConstants.LATEST_TIMESTAMP);
this.configuredBatches = configuration.getLong(
QueryServices.INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE, configuredBatches);
+ this.indexDisableTimestampThreshold =
+ configuration.getLong(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD,
+ QueryServicesOptions.DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD);
}
@Override
@@ -309,6 +313,32 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
+ indexPTable.getName() + " are online.");
continue;
}
+ long indexDisableTimestamp =
+ PLong.INSTANCE.getCodec().decodeLong(disabledTimeStamp, 0,
+ SortOrder.ASC);
+ PIndexState state = PIndexState.fromSerializedValue(indexState[0]);
+ if (indexDisableTimestamp > 0 && System.currentTimeMillis()
+ - indexDisableTimestamp > indexDisableTimestampThreshold) {
+ /*
+ * It has been too long since the index has been disabled and any future
+ * attempts to reenable it likely will fail. So we are going to mark the
+ * index as disabled and set the index disable timestamp to 0 so that the
+ * rebuild task won't pick up this index again for rebuild.
+ */
+ try {
+ updateIndexState(conn, indexTableFullName, env, state,
+ PIndexState.DISABLE, 0l);
+ LOG.error("Unable to rebuild index " + indexTableFullName
+ + ". Won't attempt again since index disable timestamp is older than current time by "
+ + indexDisableTimestampThreshold
+ + " milliseconds. Manual intervention needed to re-build the index");
+ } catch (Throwable ex) {
+ LOG.error(
+ "Unable to mark index " + indexTableFullName + " as disabled.", ex);
+ }
+ continue; // don't attempt another rebuild irrespective of whether
+ // updateIndexState worked or not
+ }
// Allow index to begin incremental maintenance as index is back online and we
// cannot transition directly from DISABLED -> ACTIVE
if (Bytes.compareTo(PIndexState.DISABLE.getSerializedBytes(), indexState) == 0) {
@@ -430,24 +460,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
}
}
} catch (Exception e) {
- for (PTable index : indexesToPartiallyRebuild) {
- String indexTableFullName = SchemaUtil.getTableName(
- index.getSchemaName().getString(),
- index.getTableName().getString());
- try {
- /*
- * We are going to mark the index as disabled and set the index
- * disable timestamp to 0 so that the rebuild task won't pick up
- * this index again for rebuild.
- */
- updateIndexState(conn, indexTableFullName, env,
- PIndexState.INACTIVE, PIndexState.DISABLE, 0l);
- } catch (Throwable ex) {
- LOG.error("Unable to mark index " + indexTableFullName + " as disabled after rebuilding it failed", ex);
- }
- }
- LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild
- + ". Won't attempt again. Manual intervention needed to re-build the index", e);
+ LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild, e);
}
}
}
@@ -550,9 +563,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
int indexRebuildRpcRetriesCounter =
config.getInt(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER,
QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER);
- long indexRebuildRpcRetryPauseTimeMs =
- config.getLong(QueryServices.INDEX_REBUILD_RPC_RETRY_PAUSE_TIME,
- QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE);
// Set SCN so that we don't ping server and have the upper bound set back to
// the timestamp when the failure occurred.
props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(Long.MAX_VALUE));
@@ -565,8 +575,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
Long.toString(indexRebuildRPCTimeoutMs));
props.setProperty(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
Long.toString(indexRebuildRpcRetriesCounter));
- props.setProperty(HConstants.HBASE_CLIENT_PAUSE,
- Long.toString(indexRebuildRpcRetryPauseTimeMs));
// don't run a second index populations upsert select
props.setProperty(QueryServices.INDEX_POPULATION_SLEEP_TIME, "0");
rebuildIndexConnectionProps = PropertiesUtil.combineProperties(props, config);
http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
index a0575d9..57aba16 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
@@ -142,6 +142,8 @@ public interface QueryServices extends SQLCloseable {
"phoenix.index.failure.handling.rebuild.interval";
public static final String INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE = "phoenix.index.rebuild.batch.perTable";
+ // If index disable timestamp is older than this threshold, then index rebuild task won't attempt to rebuild it
+ public static final String INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = "phoenix.index.rebuild.disabletimestamp.threshold";
// Block writes to data table when index write fails
public static final String INDEX_FAILURE_BLOCK_WRITE = "phoenix.index.failure.block.write";
http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index e6be091..21d8300 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -185,8 +185,8 @@ public class QueryServicesOptions {
public static final long DEFAULT_INDEX_REBUILD_QUERY_TIMEOUT = 30000 * 60; // 30 mins
public static final long DEFAULT_INDEX_REBUILD_RPC_TIMEOUT = 30000 * 60; // 30 mins
public static final long DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT = 30000 * 60; // 30 mins
- public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 5;
- public static final long DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE = 3000; // 3 seconds
+ public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 1; // no retries at rpc level
+ public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 30000 * 60; // 30 mins
/**
* HConstants#HIGH_QOS is the max we will see to a standard table. We go higher to differentiate