You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ch...@apache.org on 2015/12/01 10:17:40 UTC

hbase git commit: HBASE-14891 Add log for uncaught exception in RegionServerMetricsWrapperRunnable(Yu Li)

Repository: hbase
Updated Branches:
  refs/heads/branch-1 c719e8c45 -> 2d7db8959


HBASE-14891 Add log for uncaught exception in RegionServerMetricsWrapperRunnable(Yu Li)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2d7db895
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2d7db895
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2d7db895

Branch: refs/heads/branch-1
Commit: 2d7db89590a768762e037b6d920a223a5d923300
Parents: c719e8c
Author: chenheng <ch...@apache.org>
Authored: Tue Dec 1 17:16:23 2015 +0800
Committer: chenheng <ch...@apache.org>
Committed: Tue Dec 1 17:16:23 2015 +0800

----------------------------------------------------------------------
 .../MetricsRegionServerWrapperImpl.java         | 231 ++++++++++---------
 1 file changed, 119 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2d7db895/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index 2f14edb..b179956 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -458,125 +458,132 @@ class MetricsRegionServerWrapperImpl
 
     @Override
     synchronized public void run() {
-      initBlockCache();
-      cacheStats = blockCache.getStats();
-
-      HDFSBlocksDistribution hdfsBlocksDistribution =
-          new HDFSBlocksDistribution();
-      HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions =
-          new HDFSBlocksDistribution();
-
-      long tempNumStores = 0;
-      long tempNumStoreFiles = 0;
-      long tempMemstoreSize = 0;
-      long tempStoreFileSize = 0;
-      long tempReadRequestsCount = 0;
-      long tempWriteRequestsCount = 0;
-      long tempCheckAndMutateChecksFailed = 0;
-      long tempCheckAndMutateChecksPassed = 0;
-      long tempStorefileIndexSize = 0;
-      long tempTotalStaticIndexSize = 0;
-      long tempTotalStaticBloomSize = 0;
-      long tempNumMutationsWithoutWAL = 0;
-      long tempDataInMemoryWithoutWAL = 0;
-      double tempPercentFileLocal = 0;
-      double tempPercentFileLocalSecondaryRegions = 0;
-      long tempFlushedCellsCount = 0;
-      long tempCompactedCellsCount = 0;
-      long tempMajorCompactedCellsCount = 0;
-      long tempFlushedCellsSize = 0;
-      long tempCompactedCellsSize = 0;
-      long tempMajorCompactedCellsSize = 0;
-      long tempBlockedRequestsCount = 0L;
-
-      for (Region r : regionServer.getOnlineRegionsLocalContext()) {
-        tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL();
-        tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL();
-        tempReadRequestsCount += r.getReadRequestsCount();
-        tempWriteRequestsCount += r.getWriteRequestsCount();
-        tempCheckAndMutateChecksFailed += r.getCheckAndMutateChecksFailed();
-        tempCheckAndMutateChecksPassed += r.getCheckAndMutateChecksPassed();
-        tempBlockedRequestsCount += r.getBlockedRequestsCount();
-        List<Store> storeList = r.getStores();
-        tempNumStores += storeList.size();
-        for (Store store : storeList) {
-          tempNumStoreFiles += store.getStorefilesCount();
-          tempMemstoreSize += store.getMemStoreSize();
-          tempStoreFileSize += store.getStorefilesSize();
-          tempStorefileIndexSize += store.getStorefilesIndexSize();
-          tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
-          tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
-          tempFlushedCellsCount += store.getFlushedCellsCount();
-          tempCompactedCellsCount += store.getCompactedCellsCount();
-          tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
-          tempFlushedCellsSize += store.getFlushedCellsSize();
-          tempCompactedCellsSize += store.getCompactedCellsSize();
-          tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
+      try {
+        initBlockCache();
+        cacheStats = blockCache.getStats();
+
+        HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
+        HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions =
+            new HDFSBlocksDistribution();
+
+        long tempNumStores = 0;
+        long tempNumStoreFiles = 0;
+        long tempMemstoreSize = 0;
+        long tempStoreFileSize = 0;
+        long tempReadRequestsCount = 0;
+        long tempWriteRequestsCount = 0;
+        long tempCheckAndMutateChecksFailed = 0;
+        long tempCheckAndMutateChecksPassed = 0;
+        long tempStorefileIndexSize = 0;
+        long tempTotalStaticIndexSize = 0;
+        long tempTotalStaticBloomSize = 0;
+        long tempNumMutationsWithoutWAL = 0;
+        long tempDataInMemoryWithoutWAL = 0;
+        double tempPercentFileLocal = 0;
+        double tempPercentFileLocalSecondaryRegions = 0;
+        long tempFlushedCellsCount = 0;
+        long tempCompactedCellsCount = 0;
+        long tempMajorCompactedCellsCount = 0;
+        long tempFlushedCellsSize = 0;
+        long tempCompactedCellsSize = 0;
+        long tempMajorCompactedCellsSize = 0;
+        long tempBlockedRequestsCount = 0L;
+
+        for (Region r : regionServer.getOnlineRegionsLocalContext()) {
+          tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL();
+          tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL();
+          tempReadRequestsCount += r.getReadRequestsCount();
+          tempWriteRequestsCount += r.getWriteRequestsCount();
+          tempCheckAndMutateChecksFailed += r.getCheckAndMutateChecksFailed();
+          tempCheckAndMutateChecksPassed += r.getCheckAndMutateChecksPassed();
+          tempBlockedRequestsCount += r.getBlockedRequestsCount();
+          List<Store> storeList = r.getStores();
+          tempNumStores += storeList.size();
+          for (Store store : storeList) {
+            tempNumStoreFiles += store.getStorefilesCount();
+            tempMemstoreSize += store.getMemStoreSize();
+            tempStoreFileSize += store.getStorefilesSize();
+            tempStorefileIndexSize += store.getStorefilesIndexSize();
+            tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
+            tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
+            tempFlushedCellsCount += store.getFlushedCellsCount();
+            tempCompactedCellsCount += store.getCompactedCellsCount();
+            tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
+            tempFlushedCellsSize += store.getFlushedCellsSize();
+            tempCompactedCellsSize += store.getCompactedCellsSize();
+            tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
+          }
+
+          HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution();
+          hdfsBlocksDistribution.add(distro);
+          if (r.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
+            hdfsBlocksDistributionSecondaryRegions.add(distro);
+          }
         }
 
-        HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution();
-        hdfsBlocksDistribution.add(distro);
-        if (r.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
-          hdfsBlocksDistributionSecondaryRegions.add(distro);
-        }
-      }
-
-      float localityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(
-          regionServer.getServerName().getHostname());
-      tempPercentFileLocal = Double.isNaN(tempBlockedRequestsCount) ? 0 : (localityIndex * 100);
+        float localityIndex =
+            hdfsBlocksDistribution
+                .getBlockLocalityIndex(regionServer.getServerName().getHostname());
+        tempPercentFileLocal = Double.isNaN(tempBlockedRequestsCount) ? 0 : (localityIndex * 100);
 
-      float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions
-          .getBlockLocalityIndex(regionServer.getServerName().getHostname());
-      tempPercentFileLocalSecondaryRegions =
-          Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100);
+        float localityIndexSecondaryRegions =
+            hdfsBlocksDistributionSecondaryRegions.getBlockLocalityIndex(regionServer
+                .getServerName().getHostname());
+        tempPercentFileLocalSecondaryRegions = Double
+            .isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100);
 
-      // Compute the number of requests per second
-      long currentTime = EnvironmentEdgeManager.currentTime();
+        // Compute the number of requests per second
+        long currentTime = EnvironmentEdgeManager.currentTime();
 
-      // assume that it took PERIOD seconds to start the executor.
-      // this is a guess but it's a pretty good one.
-      if (lastRan == 0) {
-        lastRan = currentTime - period;
-      }
+        // assume that it took PERIOD seconds to start the executor.
+        // this is a guess but it's a pretty good one.
+        if (lastRan == 0) {
+          lastRan = currentTime - period;
+        }
 
-      //If we've time traveled keep the last requests per second.
-      if ((currentTime - lastRan) > 0) {
-        long currentRequestCount = getTotalRequestCount();
-        requestsPerSecond = (currentRequestCount - lastRequestCount) /
-            ((currentTime - lastRan) / 1000.0);
-        lastRequestCount = currentRequestCount;
+        // If we've time traveled keep the last requests per second.
+        if ((currentTime - lastRan) > 0) {
+          long currentRequestCount = getTotalRequestCount();
+          requestsPerSecond =
+              (currentRequestCount - lastRequestCount) / ((currentTime - lastRan) / 1000.0);
+          lastRequestCount = currentRequestCount;
+        }
+        lastRan = currentTime;
+
+        WALProvider provider = regionServer.walFactory.getWALProvider();
+        WALProvider metaProvider = regionServer.walFactory.getMetaWALProvider();
+        numWALFiles =
+            (provider == null ? 0 : provider.getNumLogFiles())
+                + (metaProvider == null ? 0 : metaProvider.getNumLogFiles());
+        walFileSize =
+            (provider == null ? 0 : provider.getLogFileSize())
+                + (provider == null ? 0 : provider.getLogFileSize());
+        // Copy over computed values so that no thread sees half computed values.
+        numStores = tempNumStores;
+        numStoreFiles = tempNumStoreFiles;
+        memstoreSize = tempMemstoreSize;
+        storeFileSize = tempStoreFileSize;
+        readRequestsCount = tempReadRequestsCount;
+        writeRequestsCount = tempWriteRequestsCount;
+        checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
+        checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
+        storefileIndexSize = tempStorefileIndexSize;
+        totalStaticIndexSize = tempTotalStaticIndexSize;
+        totalStaticBloomSize = tempTotalStaticBloomSize;
+        numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
+        dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
+        percentFileLocal = tempPercentFileLocal;
+        percentFileLocalSecondaryRegions = tempPercentFileLocalSecondaryRegions;
+        flushedCellsCount = tempFlushedCellsCount;
+        compactedCellsCount = tempCompactedCellsCount;
+        majorCompactedCellsCount = tempMajorCompactedCellsCount;
+        flushedCellsSize = tempFlushedCellsSize;
+        compactedCellsSize = tempCompactedCellsSize;
+        majorCompactedCellsSize = tempMajorCompactedCellsSize;
+        blockedRequestsCount = tempBlockedRequestsCount;
+      } catch (Throwable e) {
+        LOG.warn("Caught exception! Will suppress and retry.", e);
       }
-      lastRan = currentTime;
-
-      WALProvider provider = regionServer.walFactory.getWALProvider();
-      WALProvider metaProvider = regionServer.walFactory.getMetaWALProvider();
-      numWALFiles = (provider == null ? 0 : provider.getNumLogFiles()) +
-          (metaProvider == null ? 0 : metaProvider.getNumLogFiles());
-      walFileSize = (provider == null ? 0 : provider.getLogFileSize()) +
-          (provider == null ? 0 : provider.getLogFileSize());
-      // Copy over computed values so that no thread sees half computed values.
-      numStores = tempNumStores;
-      numStoreFiles = tempNumStoreFiles;
-      memstoreSize = tempMemstoreSize;
-      storeFileSize = tempStoreFileSize;
-      readRequestsCount = tempReadRequestsCount;
-      writeRequestsCount = tempWriteRequestsCount;
-      checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
-      checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
-      storefileIndexSize = tempStorefileIndexSize;
-      totalStaticIndexSize = tempTotalStaticIndexSize;
-      totalStaticBloomSize = tempTotalStaticBloomSize;
-      numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
-      dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
-      percentFileLocal = tempPercentFileLocal;
-      percentFileLocalSecondaryRegions = tempPercentFileLocalSecondaryRegions;
-      flushedCellsCount = tempFlushedCellsCount;
-      compactedCellsCount = tempCompactedCellsCount;
-      majorCompactedCellsCount = tempMajorCompactedCellsCount;
-      flushedCellsSize = tempFlushedCellsSize;
-      compactedCellsSize = tempCompactedCellsSize;
-      majorCompactedCellsSize = tempMajorCompactedCellsSize;
-      blockedRequestsCount = tempBlockedRequestsCount;
     }
   }