You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by sa...@apache.org on 2017/07/15 00:56:22 UTC

phoenix git commit: PHOENIX-4027 Mark index as disabled during partial rebuild after configurable amount of time

Repository: phoenix
Updated Branches:
  refs/heads/master 3477977f3 -> d541d6f28


PHOENIX-4027 Mark index as disabled during partial rebuild after configurable amount of time


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/d541d6f2
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/d541d6f2
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/d541d6f2

Branch: refs/heads/master
Commit: d541d6f2875a590580e8ccf05f26795083b06658
Parents: 3477977
Author: Samarth Jain <sa...@apache.org>
Authored: Fri Jul 14 17:56:16 2017 -0700
Committer: Samarth Jain <sa...@apache.org>
Committed: Fri Jul 14 17:56:16 2017 -0700

----------------------------------------------------------------------
 .../phoenix/end2end/PhoenixRuntimeIT.java       |  6 --
 .../end2end/index/MutableIndexFailureIT.java    |  3 +
 .../coprocessor/MetaDataRegionObserver.java     | 60 +++++++++++---------
 .../org/apache/phoenix/query/QueryServices.java |  2 +
 .../phoenix/query/QueryServicesOptions.java     |  4 +-
 5 files changed, 41 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
index 1109070..95ab906 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/PhoenixRuntimeIT.java
@@ -187,9 +187,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT {
                 assertEquals(
                     Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
                     rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
-                assertEquals(
-                    Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
-                    rebuildQueryServicesConfig.get(HConstants.HBASE_CLIENT_PAUSE));
                 ConnectionQueryServices rebuildQueryServices = rebuildIndexConnection.getQueryServices();
                 HConnection rebuildIndexHConnection =
                         (HConnection) Whitebox.getInternalState(rebuildQueryServices,
@@ -212,9 +209,6 @@ public class PhoenixRuntimeIT extends ParallelStatsDisabledIT {
                 assertEquals(
                     Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER),
                     rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));
-                assertEquals(
-                    Long.toString(QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE),
-                    rebuildHConnectionConfig.get(HConstants.HBASE_CLIENT_PAUSE));
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
index f47707b..8e2564d 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/MutableIndexFailureIT.java
@@ -127,6 +127,9 @@ public class MutableIndexFailureIT extends BaseTest {
         serverProps.put("hbase.balancer.period", String.valueOf(Integer.MAX_VALUE));
         serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_ATTRIB, Boolean.TRUE.toString());
         serverProps.put(QueryServices.INDEX_FAILURE_HANDLING_REBUILD_INTERVAL_ATTRIB, "4000");
+        serverProps.put(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD, "30000"); // give up rebuilding after 30 seconds
+        // need to override rpc retries otherwise test doesn't pass
+        serverProps.put(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER, Long.toString(1));
         Map<String, String> clientProps = Collections.singletonMap(QueryServices.TRANSACTIONS_ENABLED, Boolean.TRUE.toString());
         NUM_SLAVES_BASE = 4;
         setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator()));

http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
index 4c63587..5cfacfc 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataRegionObserver.java
@@ -76,8 +76,10 @@ import org.apache.phoenix.schema.MetaDataClient;
 import org.apache.phoenix.schema.PIndexState;
 import org.apache.phoenix.schema.PName;
 import org.apache.phoenix.schema.PTable;
+import org.apache.phoenix.schema.SortOrder;
 import org.apache.phoenix.schema.TableNotFoundException;
 import org.apache.phoenix.schema.TableRef;
+import org.apache.phoenix.schema.types.PChar;
 import org.apache.phoenix.schema.types.PLong;
 import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.MetaDataUtil;
@@ -105,7 +107,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
     private boolean enableRebuildIndex = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD;
     private long rebuildIndexTimeInterval = QueryServicesOptions.DEFAULT_INDEX_FAILURE_HANDLING_REBUILD_INTERVAL;
     private static Map<PName, Long> batchExecutedPerTableMap = new HashMap<PName, Long>();
-
     @GuardedBy("MetaDataRegionObserver.class")
     private static Properties rebuildIndexConnectionProps;
 
@@ -194,8 +195,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
             initRebuildIndexConnectionProps(e.getEnvironment().getConfiguration());
             // starts index rebuild schedule work
             BuildIndexScheduleTask task = new BuildIndexScheduleTask(e.getEnvironment());
-            // run scheduled task every 10 secs
-            executor.scheduleAtFixedRate(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
+            executor.scheduleWithFixedDelay(task, 10000, rebuildIndexTimeInterval, TimeUnit.MILLISECONDS);
         } catch (ClassNotFoundException ex) {
             LOG.error("BuildIndexScheduleTask cannot start!", ex);
         }
@@ -212,6 +212,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
         RegionCoprocessorEnvironment env;
         private long rebuildIndexBatchSize = HConstants.LATEST_TIMESTAMP;
         private long configuredBatches = 10;
+        private long indexDisableTimestampThreshold;
 
         public BuildIndexScheduleTask(RegionCoprocessorEnvironment env) {
             this.env = env;
@@ -220,6 +221,9 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
                     QueryServices.INDEX_FAILURE_HANDLING_REBUILD_PERIOD, HConstants.LATEST_TIMESTAMP);
             this.configuredBatches = configuration.getLong(
                     QueryServices.INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE, configuredBatches);
+            this.indexDisableTimestampThreshold =
+                    configuration.getLong(QueryServices.INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD,
+                        QueryServicesOptions.DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD);
         }
 
         @Override
@@ -309,6 +313,32 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
                                 + indexPTable.getName() + " are online.");
                         continue;
                     }
+                    long indexDisableTimestamp =
+                            PLong.INSTANCE.getCodec().decodeLong(disabledTimeStamp, 0,
+                                SortOrder.ASC);
+                    PIndexState state = PIndexState.fromSerializedValue(indexState[0]);
+                    if (indexDisableTimestamp > 0 && System.currentTimeMillis()
+                            - indexDisableTimestamp > indexDisableTimestampThreshold) {
+                        /*
+                         * It has been too long since the index has been disabled and any future
+                         * attempts to reenable it likely will fail. So we are going to mark the
+                         * index as disabled and set the index disable timestamp to 0 so that the
+                         * rebuild task won't pick up this index again for rebuild.
+                         */
+                        try {
+                            updateIndexState(conn, indexTableFullName, env, state,
+                                PIndexState.DISABLE, 0l);
+                            LOG.error("Unable to rebuild index " + indexTableFullName
+                                    + ". Won't attempt again since index disable timestamp is older than current time by "
+                                    + indexDisableTimestampThreshold
+                                    + " milliseconds. Manual intervention needed to re-build the index");
+                        } catch (Throwable ex) {
+                            LOG.error(
+                                "Unable to mark index " + indexTableFullName + " as disabled.", ex);
+                        }
+                        continue; // don't attempt another rebuild irrespective of whether
+                                  // updateIndexState worked or not
+                    }
                     // Allow index to begin incremental maintenance as index is back online and we
                     // cannot transition directly from DISABLED -> ACTIVE
                     if (Bytes.compareTo(PIndexState.DISABLE.getSerializedBytes(), indexState) == 0) {
@@ -430,24 +460,7 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
 								}
 							}
 						} catch (Exception e) {
-							for (PTable index : indexesToPartiallyRebuild) {
-						        String indexTableFullName = SchemaUtil.getTableName(
-                                    index.getSchemaName().getString(),
-                                    index.getTableName().getString());
-                                try {
-                                    /*
-                                     * We are going to mark the index as disabled and set the index
-                                     * disable timestamp to 0 so that the rebuild task won't pick up
-                                     * this index again for rebuild.
-                                     */
-                                    updateIndexState(conn, indexTableFullName, env,
-                                        PIndexState.INACTIVE, PIndexState.DISABLE, 0l);
-                                } catch (Throwable ex) {
-						            LOG.error("Unable to mark index " + indexTableFullName + " as disabled after rebuilding it failed", ex);
-						        }
-						    }
-							LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild
-									+ ". Won't attempt again. Manual intervention needed to re-build the index", e);
+							LOG.error("Unable to rebuild " + dataPTable + " indexes " + indexesToPartiallyRebuild, e);
 						}
 					}
 				}
@@ -550,9 +563,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
             int indexRebuildRpcRetriesCounter =
                     config.getInt(QueryServices.INDEX_REBUILD_RPC_RETRIES_COUNTER,
                         QueryServicesOptions.DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER);
-            long indexRebuildRpcRetryPauseTimeMs =
-                    config.getLong(QueryServices.INDEX_REBUILD_RPC_RETRY_PAUSE_TIME,
-                        QueryServicesOptions.DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE);
             // Set SCN so that we don't ping server and have the upper bound set back to
             // the timestamp when the failure occurred.
             props.setProperty(PhoenixRuntime.CURRENT_SCN_ATTRIB, Long.toString(Long.MAX_VALUE));
@@ -565,8 +575,6 @@ public class MetaDataRegionObserver extends BaseRegionObserver {
                 Long.toString(indexRebuildRPCTimeoutMs));
             props.setProperty(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
                 Long.toString(indexRebuildRpcRetriesCounter));
-            props.setProperty(HConstants.HBASE_CLIENT_PAUSE,
-                Long.toString(indexRebuildRpcRetryPauseTimeMs));
             // don't run a second index populations upsert select
             props.setProperty(QueryServices.INDEX_POPULATION_SLEEP_TIME, "0");
             rebuildIndexConnectionProps = PropertiesUtil.combineProperties(props, config);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
index a0575d9..57aba16 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServices.java
@@ -142,6 +142,8 @@ public interface QueryServices extends SQLCloseable {
         "phoenix.index.failure.handling.rebuild.interval";
     
     public static final String INDEX_FAILURE_HANDLING_REBUILD_NUMBER_OF_BATCHES_PER_TABLE = "phoenix.index.rebuild.batch.perTable";
+    // If index disable timestamp is older than this threshold, then index rebuild task won't attempt to rebuild it
+    public static final String INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = "phoenix.index.rebuild.disabletimestamp.threshold";
 
     // Block writes to data table when index write fails
     public static final String INDEX_FAILURE_BLOCK_WRITE = "phoenix.index.failure.block.write";

http://git-wip-us.apache.org/repos/asf/phoenix/blob/d541d6f2/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
index e6be091..21d8300 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/query/QueryServicesOptions.java
@@ -185,8 +185,8 @@ public class QueryServicesOptions {
     public static final long DEFAULT_INDEX_REBUILD_QUERY_TIMEOUT = 30000 * 60; // 30 mins
     public static final long DEFAULT_INDEX_REBUILD_RPC_TIMEOUT = 30000 * 60; // 30 mins
     public static final long DEFAULT_INDEX_REBUILD_CLIENT_SCANNER_TIMEOUT = 30000 * 60; // 30 mins
-    public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 5;
-    public static final long DEFAULT_INDEX_REBULD_RPC_RETRY_PAUSE = 3000; // 3 seconds
+    public static final int DEFAULT_INDEX_REBUILD_RPC_RETRIES_COUNTER = 1; // no retries at rpc level
+    public static final int DEFAULT_INDEX_REBUILD_DISABLE_TIMESTAMP_THRESHOLD = 30000 * 60; // 30 mins
 
     /**
      * HConstants#HIGH_QOS is the max we will see to a standard table. We go higher to differentiate