From e9a200a95f0788aff68d37e65b294e8cfaada913 Mon Sep 17 00:00:00 2001 From: Jonathan Cobb Date: Mon, 24 Aug 2020 09:16:35 -0400 Subject: [PATCH] allow grace period for node not found in db before reaping --- .../java/bubble/cloud/compute/NodeReaper.java | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java b/bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java index 55730372..8ec2aa9b 100644 --- a/bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java +++ b/bubble-server/src/main/java/bubble/cloud/compute/NodeReaper.java @@ -12,13 +12,13 @@ import bubble.server.BubbleConfiguration; import bubble.service.cloud.NetworkService; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import org.cobbzilla.util.collection.ExpirationMap; import org.cobbzilla.util.daemon.SimpleDaemon; import org.cobbzilla.util.network.NetworkUtil; import org.cobbzilla.util.string.StringUtil; import org.cobbzilla.util.time.TimeUtil; import org.springframework.beans.factory.annotation.Autowired; -import java.util.HashMap; import java.util.List; import java.util.Map; @@ -33,6 +33,7 @@ public class NodeReaper extends SimpleDaemon { private static final long STARTUP_DELAY = MINUTES.toMillis(30); private static final long KILL_CHECK_INTERVAL = MINUTES.toMillis(30); + private static final long MAX_TIME_NOT_IN_DB_BEFORE_DELETION = MINUTES.toMillis(45); private static final long MAX_DOWNTIME_BEFORE_DELETION = DAYS.toMillis(2); private final ComputeServiceDriverBase compute; @@ -48,7 +49,8 @@ public class NodeReaper extends SimpleDaemon { @Autowired private BubbleConfiguration configuration; @Autowired private NetworkService networkService; - private final Map unreachableSince = new HashMap<>(100); + private final Map noNodeInDb = new ExpirationMap<>(100, MAX_TIME_NOT_IN_DB_BEFORE_DELETION*2); + private final Map unreachableSince = new ExpirationMap<>(100, MAX_DOWNTIME_BEFORE_DELETION*2); private String prefix() { return compute.getClass().getSimpleName()+": "; } @@ -68,20 +70,27 @@ public class NodeReaper extends SimpleDaemon { if (wouldKillSelf(node)) return; final var nodeFromDB = nodeDAO.findByIp4(node.getIp4()); if (nodeFromDB == null) { - final String message = prefix() + "processNode: no node exists with ip4=" + node.getIp4() + ", killing it"; - log.warn(message); - reportError(message); - final var domain = domainDAO.findByUuid(node.getDomain()); - final var dns = domain != null ? cloudDAO.findByUuid(domain.getPublicDns()) : null; - try { - if (dns != null) dns.getDnsDriver(configuration).deleteNode(node); - compute.stop(node); - } catch (Exception e) { - final String errMessage = prefix() + "processNode: error stopping node " + node.getIp4(); - reportError(errMessage, e); - log.error(errMessage, e); + final Long notInDbSince = noNodeInDb.get(node.getIp4()); + if (notInDbSince == null) { + noNodeInDb.put(node.getIp4(), now()); + + } else if (now() - notInDbSince > MAX_TIME_NOT_IN_DB_BEFORE_DELETION) { + final String message = prefix() + "processNode: no node exists with ip4=" + node.getIp4() + ", killing it"; + log.warn(message); + reportError(message); + final var domain = domainDAO.findByUuid(node.getDomain()); + final var dns = domain != null ? cloudDAO.findByUuid(domain.getPublicDns()) : null; + try { + if (dns != null) dns.getDnsDriver(configuration).deleteNode(node); + compute.stop(node); + } catch (Exception e) { + final String errMessage = prefix() + "processNode: error stopping node " + node.getIp4(); + reportError(errMessage, e); + log.error(errMessage, e); + } } } else { + noNodeInDb.remove(nodeFromDB.getIp4()); if (networkService.isReachable(nodeFromDB)) { unreachableSince.remove(nodeFromDB.getUuid()); } else {