You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by ec...@apache.org on 2014/08/27 00:04:47 UTC
[4/6] git commit: ACCUMULO-3084 just warn about long timer delays
ACCUMULO-3084 just warn about long timer delays
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/1dc77838
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/1dc77838
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/1dc77838
Branch: refs/heads/master
Commit: 1dc77838353f3b253e407cfbb27f7de3cc69c815
Parents: 13d2094 c906e4b
Author: Eric C. Newton <er...@gmail.com>
Authored: Tue Aug 26 17:47:35 2014 -0400
Committer: Eric C. Newton <er...@gmail.com>
Committed: Tue Aug 26 17:47:35 2014 -0400
----------------------------------------------------------------------
.../accumulo/server/GarbageCollectionLogger.java | 13 +++++++++++++
.../java/org/apache/accumulo/tserver/TabletServer.java | 5 +++--
2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
index b66d0f7,0000000..d53f240
mode 100644,000000..100644
--- a/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/server/GarbageCollectionLogger.java
@@@ -1,103 -1,0 +1,116 @@@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.server;
+
+import java.lang.management.GarbageCollectorMXBean;
+import java.lang.management.ManagementFactory;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.accumulo.core.conf.AccumuloConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.server.util.Halt;
+import org.apache.log4j.Logger;
+
+public class GarbageCollectionLogger {
+ private static final Logger log = Logger.getLogger(GarbageCollectionLogger.class);
+
+ private final HashMap<String,Long> prevGcTime = new HashMap<String,Long>();
+ private long lastMemorySize = 0;
+ private long gcTimeIncreasedCount = 0;
++ private static long lastMemoryCheckTime = 0;
+
+ public GarbageCollectionLogger() {
+ }
+
+ public synchronized void logGCInfo(AccumuloConfiguration conf) {
++ final long now = System.currentTimeMillis();
++
+ List<GarbageCollectorMXBean> gcmBeans = ManagementFactory.getGarbageCollectorMXBeans();
+ Runtime rt = Runtime.getRuntime();
+
+ StringBuilder sb = new StringBuilder("gc");
+
+ boolean sawChange = false;
+
+ long maxIncreaseInCollectionTime = 0;
+
+ for (GarbageCollectorMXBean gcBean : gcmBeans) {
+ Long prevTime = prevGcTime.get(gcBean.getName());
+ long pt = 0;
+ if (prevTime != null) {
+ pt = prevTime;
+ }
+
+ long time = gcBean.getCollectionTime();
+
+ if (time - pt != 0) {
+ sawChange = true;
+ }
+
+ long increaseInCollectionTime = time - pt;
+ sb.append(String.format(" %s=%,.2f(+%,.2f) secs", gcBean.getName(), time / 1000.0, increaseInCollectionTime / 1000.0));
+ maxIncreaseInCollectionTime = Math.max(increaseInCollectionTime, maxIncreaseInCollectionTime);
+ prevGcTime.put(gcBean.getName(), time);
+ }
+
+ long mem = rt.freeMemory();
+ if (maxIncreaseInCollectionTime == 0) {
+ gcTimeIncreasedCount = 0;
+ } else {
+ gcTimeIncreasedCount++;
+ if (gcTimeIncreasedCount > 3 && mem < rt.maxMemory() * 0.05) {
+ log.warn("Running low on memory");
+ gcTimeIncreasedCount = 0;
+ }
+ }
+
+ if (mem > lastMemorySize) {
+ sawChange = true;
+ }
+
+ String sign = "+";
+ if (mem - lastMemorySize <= 0) {
+ sign = "";
+ }
+
+ sb.append(String.format(" freemem=%,d(%s%,d) totalmem=%,d", mem, sign, (mem - lastMemorySize), rt.totalMemory()));
+
+ if (sawChange) {
+ log.debug(sb.toString());
+ }
+
+ final long keepAliveTimeout = conf.getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT);
++ if (lastMemoryCheckTime > 0 && lastMemoryCheckTime < now) {
++ final long diff = now - lastMemoryCheckTime;
++ if (diff > keepAliveTimeout) {
++ log.warn(String.format("Check for long GC pauses not called in a timely fashion %.1f", diff / 1000.));
++ }
++ lastMemoryCheckTime = now;
++ return;
++ }
++
+ if (maxIncreaseInCollectionTime > keepAliveTimeout) {
+ Halt.halt("Garbage collection may be interfering with lock keep-alive. Halting.", -1);
+ }
+
+ lastMemorySize = mem;
++ lastMemoryCheckTime = now;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/1dc77838/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
----------------------------------------------------------------------
diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
index ce84bd3,f1a9f2e..b9505d9
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
@@@ -236,70 -235,28 +236,71 @@@ import org.apache.zookeeper.KeeperExcep
import com.google.common.net.HostAndPort;
-enum ScanRunState {
- QUEUED, RUNNING, FINISHED
-}
-
public class TabletServer extends AbstractMetricsImpl implements org.apache.accumulo.tserver.metrics.TabletServerMBean {
- private static final Logger log = Logger.getLogger(TabletServer.class);
-
- private static HashMap<String,Long> prevGcTime = new HashMap<String,Long>();
- private static long lastMemorySize = 0;
- private static long lastMemoryCheckTime = 0;
- private static long gcTimeIncreasedCount;
+ static enum ScanRunState {
+ QUEUED, RUNNING, FINISHED
+ }
+ private static final Logger log = Logger.getLogger(TabletServer.class);
+ private static final String METRICS_PREFIX = "tserver";
private static final long MAX_TIME_TO_WAIT_FOR_SCAN_RESULT_MILLIS = 1000;
private static final long RECENTLY_SPLIT_MILLIES = 60 * 1000;
+ private static final long TIME_BETWEEN_GC_CHECKS = 5000;
+ private static final Set<Column> EMPTY_COLUMNS = Collections.emptySet();
+
+ private final GarbageCollectionLogger gcLogger = new GarbageCollectionLogger();
+ private final TransactionWatcher watcher = new TransactionWatcher();
+ private final ZooCache masterLockCache = new ZooCache();
+
+ private final TabletServerLogger logger;
+
+ private final TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics();
+ public TabletServerMinCMetrics getMinCMetrics() {
+ return mincMetrics;
+ }
+
+ private final ServerConfiguration serverConfig;
+ private final LogSorter logSorter;
+ private final TabletStatsKeeper statsKeeper;
+ private final AtomicInteger logIdGenerator = new AtomicInteger();
- private TabletServerLogger logger;
+ private final VolumeManager fs;
+ public Instance getInstance() {
+ return serverConfig.getInstance();
+ }
+
+ private final SortedMap<KeyExtent,Tablet> onlineTablets = Collections.synchronizedSortedMap(new TreeMap<KeyExtent,Tablet>());
+ private final SortedSet<KeyExtent> unopenedTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>());
+ private final SortedSet<KeyExtent> openingTablets = Collections.synchronizedSortedSet(new TreeSet<KeyExtent>());
+ @SuppressWarnings("unchecked")
+ private final Map<KeyExtent,Long> recentlyUnloadedCache = Collections.synchronizedMap(new LRUMap(1000));
- protected TabletServerMinCMetrics mincMetrics = new TabletServerMinCMetrics();
+ private final TabletServerResourceManager resourceManager;
+ private final SecurityOperation security;
- private ServerConfiguration serverConfig;
- private LogSorter logSorter = null;
+ private final BlockingDeque<MasterMessage> masterMessages = new LinkedBlockingDeque<MasterMessage>();
+
+ private Thread majorCompactorThread;
+
+ private HostAndPort clientAddress;
+
+ private volatile boolean serverStopRequested = false;
+ private volatile boolean majorCompactorDisabled = false;
+ private volatile boolean shutdownComplete = false;
-
++
+ private ZooLock tabletServerLock;
+
+ private TServer server;
+
+ private DistributedWorkQueue bulkFailedCopyQ;
+
+ private String lockID;
+
+ private static ObjectName OBJECT_NAME = null;
+
+ public static final AtomicLong seekCount = new AtomicLong(0);
+
+ private final AtomicLong totalMinorCompactions = new AtomicLong(0);
public TabletServer(ServerConfiguration conf, VolumeManager fs) {
super();
@@@ -3281,7 -3539,7 +3282,7 @@@
}
};
- SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, 1000);
- SimpleTimer.getInstance().schedule(gcDebugTask, 0, 1000);
++ SimpleTimer.getInstance(aconf).schedule(gcDebugTask, 0, TIME_BETWEEN_GC_CHECKS);
Runnable constraintTask = new Runnable() {