From a50b3ec2a138d0812de2fd3461dd10358926f3da Mon Sep 17 00:00:00 2001 From: Jonathan Cobb Date: Mon, 6 Jul 2020 19:15:42 -0400 Subject: [PATCH] improve stop network handling --- .../java/bubble/dao/bill/AccountPlanDAO.java | 2 +- .../resources/account/AccountsResource.java | 6 +- .../bubble/resources/account/MeResource.java | 6 +- .../cloud/NetworkActionsResource.java | 6 +- .../bubble/server/BubbleConfiguration.java | 2 +- .../service/cloud/NodeLaunchMonitor.java | 74 ++++++++++++++++++- .../bubble/service/cloud/NodeLauncher.java | 2 +- .../service/cloud/NodeProgressMeter.java | 21 +++++- .../cloud/NodeProgressMeterConstants.java | 1 + .../service/cloud/StandardNetworkService.java | 58 +++------------ .../post_auth/ResourceMessages.properties | 1 + bubble-web | 2 +- 12 files changed, 114 insertions(+), 67 deletions(-) diff --git a/bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java b/bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java index d1fefc18..231d1d17 100644 --- a/bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java +++ b/bubble-server/src/main/java/bubble/dao/bill/AccountPlanDAO.java @@ -94,7 +94,7 @@ public class AccountPlanDAO extends AccountOwnedEntityDAO { public boolean isNotDeleted(String networkUuid) { final AccountPlan accountPlan = findByNetwork(networkUuid); - return accountPlan != null && accountPlan.notDeleted() && accountPlan.notDeleted(); + return accountPlan != null && accountPlan.notDeleting() && accountPlan.notDeleted(); } @Override public Object preCreate(AccountPlan accountPlan) { diff --git a/bubble-server/src/main/java/bubble/resources/account/AccountsResource.java b/bubble-server/src/main/java/bubble/resources/account/AccountsResource.java index dd5ee85d..8fbf0bd6 100644 --- a/bubble-server/src/main/java/bubble/resources/account/AccountsResource.java +++ b/bubble-server/src/main/java/bubble/resources/account/AccountsResource.java @@ -27,7 +27,7 @@ import bubble.service.account.MitmControlService; import bubble.service.account.StandardAuthenticatorService; import bubble.service.account.download.AccountDownloadService; import bubble.service.boot.SelfNodeService; -import bubble.service.cloud.StandardNetworkService; +import bubble.service.cloud.NodeLaunchMonitor; import lombok.extern.slf4j.Slf4j; import org.cobbzilla.wizard.auth.ChangePasswordRequest; import org.cobbzilla.wizard.model.HashedPassword; @@ -168,14 +168,14 @@ public class AccountsResource { return ok(accountDAO.update(c.account)); } - @Autowired private StandardNetworkService networkService; + @Autowired private NodeLaunchMonitor launchMonitor; @GET @Path("/{id}"+EP_STATUS) public Response listLaunchStatuses(@Context Request req, @Context ContainerRequest ctx, @PathParam("id") String id) { final AccountContext c = new AccountContext(ctx, id); - return ok(networkService.listLaunchStatuses(c.account.getUuid())); + return ok(launchMonitor.listLaunchStatuses(c.account.getUuid())); } @Path("/{id}"+EP_PROMOTIONS) diff --git a/bubble-server/src/main/java/bubble/resources/account/MeResource.java b/bubble-server/src/main/java/bubble/resources/account/MeResource.java index 470ed4c2..15c50099 100644 --- a/bubble-server/src/main/java/bubble/resources/account/MeResource.java +++ b/bubble-server/src/main/java/bubble/resources/account/MeResource.java @@ -27,7 +27,7 @@ import bubble.service.account.StandardAccountMessageService; import bubble.service.account.StandardAuthenticatorService; import bubble.service.account.download.AccountDownloadService; import bubble.service.boot.BubbleModelSetupService; -import bubble.service.cloud.StandardNetworkService; +import bubble.service.cloud.NodeLaunchMonitor; import com.fasterxml.jackson.databind.JsonNode; import lombok.Cleanup; import lombok.Getter; @@ -369,13 +369,13 @@ public class MeResource { return configuration.subResource(ReferralCodesResource.class, caller); } - @Autowired private StandardNetworkService networkService; + @Autowired private NodeLaunchMonitor launchMonitor; @GET @Path(EP_STATUS) public Response listLaunchStatuses(@Context Request req, @Context ContainerRequest ctx) { final Account caller = userPrincipal(ctx); - return ok(networkService.listLaunchStatuses(caller.getUuid())); + return ok(launchMonitor.listLaunchStatuses(caller.getUuid())); } @Path(EP_PROMOTIONS) diff --git a/bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java b/bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java index b4d7b9c2..17a3ce54 100644 --- a/bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java +++ b/bubble-server/src/main/java/bubble/resources/cloud/NetworkActionsResource.java @@ -21,6 +21,7 @@ import bubble.model.cloud.*; import bubble.server.BubbleConfiguration; import bubble.service.account.StandardAuthenticatorService; import bubble.service.backup.NetworkKeysService; +import bubble.service.cloud.NodeLaunchMonitor; import bubble.service.cloud.NodeProgressMeterTick; import bubble.service.cloud.StandardNetworkService; import lombok.extern.slf4j.Slf4j; @@ -49,6 +50,7 @@ public class NetworkActionsResource { @Autowired private BubbleNodeDAO nodeDAO; @Autowired private StandardNetworkService networkService; + @Autowired private NodeLaunchMonitor launchMonitor; @Autowired private AccountMessageDAO messageDAO; @Autowired private AccountPolicyDAO policyDAO; @Autowired private NetworkKeysService keysService; @@ -94,7 +96,7 @@ public class NetworkActionsResource { @Context ContainerRequest ctx) { final Account caller = userPrincipal(ctx); final String account = caller.admin() ? network.getAccount() : caller.getUuid(); - return ok(networkService.listLaunchStatuses(account, network.getUuid())); + return ok(launchMonitor.listLaunchStatuses(account, network.getUuid())); } @GET @Path(EP_STATUS+"/{uuid}") @@ -103,7 +105,7 @@ public class NetworkActionsResource { @PathParam("uuid") String uuid) { final Account caller = userPrincipal(ctx); final String account = caller.admin() ? network.getAccount() : caller.getUuid(); - final NodeProgressMeterTick tick = networkService.getLaunchStatus(account, uuid); + final NodeProgressMeterTick tick = launchMonitor.getLaunchStatus(account, uuid); return tick == null ? notFound(uuid) : ok(tick); } diff --git a/bubble-server/src/main/java/bubble/server/BubbleConfiguration.java b/bubble-server/src/main/java/bubble/server/BubbleConfiguration.java index 2dcd72bb..f70ff17c 100644 --- a/bubble-server/src/main/java/bubble/server/BubbleConfiguration.java +++ b/bubble-server/src/main/java/bubble/server/BubbleConfiguration.java @@ -103,7 +103,7 @@ public class BubbleConfiguration extends PgRestServerConfiguration } @Getter @Setter private Boolean backupsEnabled = true; - public boolean backupsEnabled() { return !isSelfSage() && (backupsEnabled == null || backupsEnabled); } + public boolean backupsEnabled() { return (hasSageNode() || isSelfSage()) && (backupsEnabled == null || backupsEnabled); } @Override public void registerConfigHandlerbarsHelpers(Handlebars handlebars) { registerUtilityHelpers(handlebars); } diff --git a/bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java b/bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java index 750edb55..ed4ac32d 100644 --- a/bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java +++ b/bubble-server/src/main/java/bubble/service/cloud/NodeLaunchMonitor.java @@ -6,22 +6,32 @@ package bubble.service.cloud; import bubble.model.cloud.AnsibleInstallType; import bubble.model.cloud.BubbleNetwork; +import bubble.notify.NewNodeNotification; import bubble.server.BubbleConfiguration; import lombok.Getter; +import lombok.NonNull; import lombok.ToString; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.exception.ExceptionUtils; +import org.cobbzilla.util.collection.ExpirationEvictionPolicy; +import org.cobbzilla.util.collection.ExpirationMap; import org.cobbzilla.util.daemon.SimpleDaemon; +import org.cobbzilla.wizard.cache.redis.RedisService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey; +import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix; +import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.cobbzilla.util.daemon.ZillaRuntime.*; +import static org.cobbzilla.util.json.JsonUtil.json; @Service @Slf4j public class NodeLaunchMonitor extends SimpleDaemon { @@ -31,21 +41,29 @@ public class NodeLaunchMonitor extends SimpleDaemon { @Getter private final long sleepTime = SECONDS.toMillis(15); + @Override public void processException(Exception e) { log.warn("processException: "+shortError(e)); } + @Autowired private BubbleConfiguration configuration; + @Autowired private RedisService redis; + @Autowired private StandardNetworkService networkService; + + @Getter(lazy=true) private final RedisService networkSetupStatus = redis.prefixNamespace(getClass().getSimpleName()+"_status_"); private final Map launcherThreads = new ConcurrentHashMap<>(); + private final Map canceledNetworks = new ExpirationMap<>(50, HOURS.toMillis(2)); - public void register(String networkUuid, Thread t) { + public void register(String nnUuid, String networkUuid, Thread t) { startIfNotRunning(); final LauncherEntry previousLaunch = launcherThreads.get(networkUuid); if (previousLaunch != null && previousLaunch.isAlive()) { log.warn("registerLauncher("+networkUuid+"): entry thread exists, stopping it: "+previousLaunch); forceEndLauncher(previousLaunch); } - launcherThreads.put(networkUuid, new LauncherEntry(networkUuid, t)); + launcherThreads.put(networkUuid, new LauncherEntry(nnUuid, networkUuid, t)); } public void cancel(String networkUuid) { + canceledNetworks.put(networkUuid, networkUuid); final LauncherEntry previousLaunch = launcherThreads.get(networkUuid); if (previousLaunch == null || !previousLaunch.isAlive()) { log.warn("cancel("+networkUuid+"): entry does not thread exist, or is not alive: "+previousLaunch); @@ -102,21 +120,69 @@ public class NodeLaunchMonitor extends SimpleDaemon { } private void forceEndLauncher(LauncherEntry entry) { + final NodeProgressMeter meter = getProgressMeter(entry.getNnUuid()); + if (meter != null) meter.cancel(); terminate(entry.getThread(), LAUNCH_TERMINATE_TIMEOUT); launcherThreads.remove(entry.getNetworkUuid()); } - @Override public void processException(Exception e) { log.warn("processException: "+shortError(e)); } + private final Map progressMeters = new ExpirationMap<>(50, HOURS.toMillis(1), ExpirationEvictionPolicy.atime); + + public NodeProgressMeter getProgressMeter(String nnId) { return progressMeters.get(nnId); } + + public NodeProgressMeter getProgressMeter(@NonNull NewNodeNotification nn) { + return progressMeters.computeIfAbsent(nn.getUuid(), k -> new NodeProgressMeter(nn, getNetworkSetupStatus(), networkService, this)); + } + + public NodeProgressMeterTick getLaunchStatus(String accountUuid, String uuid) { + final String json = getNetworkSetupStatus().get(getProgressMeterKey(uuid, accountUuid)); + if (json == null) return null; + try { + final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class); + if (!tick.hasAccount() || !tick.getAccount().equals(accountUuid)) { + log.warn("getLaunchStatus: tick.account != accountUuid, returning null"); + return null; + } + return tick.setPattern(null); + } catch (Exception e) { + return die("getLaunchStatus: "+e); + } + } + + public List listLaunchStatuses(String accountUuid) { + return listLaunchStatuses(accountUuid, null); + } + + public List listLaunchStatuses(String accountUuid, String networkUuid) { + final RedisService stats = getNetworkSetupStatus(); + final List ticks = new ArrayList<>(); + for (String key : stats.keys(getProgressMeterPrefix(accountUuid)+"*")) { + final String json = stats.get_withPrefix(key); + if (json != null) { + try { + final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class).setPattern(null); + if (networkUuid != null && tick.hasNetwork() && networkUuid.equals(tick.getNetwork())) { + ticks.add(tick); + } + } catch (Exception e) { + log.warn("currentTicks (bad json?): "+e); + } + } + } + return ticks; + } @ToString private static class LauncherEntry { + @Getter private final String nnUuid; @Getter private final String networkUuid; @Getter private final Thread thread; @Getter private final long ctime; @Getter private volatile long mtime; - public LauncherEntry(String networkUuid, Thread thread) { + public LauncherEntry(String nnUuid, String networkUuid, Thread thread) { + this.nnUuid = nnUuid; this.networkUuid = networkUuid; this.thread = thread; this.ctime = now(); diff --git a/bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java b/bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java index 24f00bfd..df3ab2fa 100644 --- a/bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java +++ b/bubble-server/src/main/java/bubble/service/cloud/NodeLauncher.java @@ -47,7 +47,7 @@ public class NodeLauncher implements Runnable { final Thread launchThread = new Thread(new NodeLaunchThread(nodeRef, exceptionRef, networkService, newNodeRequest, launchMonitor)); launchThread.setDaemon(true); launchThread.setName("NodeLaunchThread(network="+networkUuid+")"); - launchMonitor.register(networkUuid, launchThread); + launchMonitor.register(newNodeRequest.getUuid(), networkUuid, launchThread); log.info("NodeLauncher.run: launching node..."+newNodeRequest.getFqdn()); launchThread.start(); diff --git a/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java b/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java index 8b3da13c..23f710d6 100644 --- a/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java +++ b/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeter.java @@ -67,7 +67,7 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { public NodeProgressMeter(NewNodeNotification nn, RedisService redis, StandardNetworkService networkService, - NodeLaunchMonitor launchMonitor) throws IOException { + NodeLaunchMonitor launchMonitor) { this.nn = nn; this.redis = redis; @@ -83,7 +83,11 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { key = nn.getUuid(); final PipedInputStream pipeIn = new PipedInputStream(PIPE_SIZE); - connect(pipeIn); + try { + connect(pipeIn); + } catch (IOException e) { + die("NodeProgressMeter: error connecting pipe: "+shortError(e)); + } reader = new BufferedReader(new InputStreamReader(pipeIn)); writer = new BufferedWriter(new OutputStreamWriter(this)); @@ -199,6 +203,18 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { return new UncloseableNodeProgressMeter(this); } + public void cancel () { + log.info("cancel: cancelling progress meter for network: "+nn.getNetworkName()); + closed.set(true); + success.set(true); + _setCurrentTick(new NodeProgressMeterTick() + .setNetwork(nn.getNetwork()) + .setAccount(nn.getAccount()) + .setMessageKey(METER_CANCELED) + .setPercent(0)); + background(this::close); + } + private class UncloseableNodeProgressMeter extends NodeProgressMeter { private final NodeProgressMeter meter; public UncloseableNodeProgressMeter(NodeProgressMeter meter) throws IOException { @@ -206,5 +222,6 @@ public class NodeProgressMeter extends PipedOutputStream implements Runnable { this.meter = meter; } @Override public void close() {} + @Override public void cancel() { meter.cancel(); } } } diff --git a/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java b/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java index 259bcebf..f8ddd7cf 100644 --- a/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java +++ b/bubble-server/src/main/java/bubble/service/cloud/NodeProgressMeterConstants.java @@ -68,6 +68,7 @@ public class NodeProgressMeterConstants { public static final String METER_ERROR_ROLE_VALIDATION_ERRORS = "BUBBLE-ERROR: ROLE VALIDATION FAILED"; public static final String METER_COMPLETED = "meter_completed"; + public static final String METER_CANCELED = "meter_canceled"; public static final String METER_START_OR_DNS_ERROR = "meter_start_or_dns_error"; public static final String METER_UNKNOWN_ERROR = "meter_unknown_error"; diff --git a/bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java b/bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java index a3924199..e0a8ca09 100644 --- a/bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java +++ b/bubble-server/src/main/java/bubble/service/cloud/StandardNetworkService.java @@ -76,10 +76,9 @@ import static bubble.server.BubbleConfiguration.ENV_DEBUG_NODE_INSTALL; import static bubble.service.boot.StandardSelfNodeService.*; import static bubble.service.cloud.NodeLaunchException.fatalLaunchFailure; import static bubble.service.cloud.NodeLaunchException.launchFailureCanRetry; -import static bubble.service.cloud.NodeProgressMeter.getProgressMeterKey; -import static bubble.service.cloud.NodeProgressMeter.getProgressMeterPrefix; import static bubble.service.cloud.NodeProgressMeterConstants.*; -import static java.util.concurrent.TimeUnit.*; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric; import static org.cobbzilla.util.daemon.Await.awaitAll; import static org.cobbzilla.util.daemon.ZillaRuntime.*; @@ -136,9 +135,9 @@ public class StandardNetworkService implements NetworkService { @Autowired private RedisService redisService; @Getter(lazy=true) private final RedisService networkLocks = redisService.prefixNamespace(getClass().getSimpleName()+"_lock_"); - @Getter(lazy=true) private final RedisService networkSetupStatus = redisService.prefixNamespace(getClass().getSimpleName()+"_status_"); - @NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn, NodeLaunchMonitor launchMonitor) { + @NonNull public BubbleNode newNode(@NonNull final NewNodeNotification nn, + NodeLaunchMonitor launchMonitor) { final long start = now(); log.info("newNode starting:\n"+json(nn)); ComputeServiceDriver computeDriver = null; @@ -148,7 +147,7 @@ public class StandardNetworkService implements NetworkService { final BubbleNetwork network = nn.getNetworkObject(); final ExecutorService backgroundJobs = DaemonThreadFactory.fixedPool(3); try { - progressMeter = new NodeProgressMeter(nn, getNetworkSetupStatus(), this, launchMonitor); + progressMeter = launchMonitor.getProgressMeter(nn); progressMeter.write(METER_TICK_CONFIRMING_NETWORK_LOCK); if (!confirmLock(nn.getNetwork(), lock)) { @@ -536,7 +535,7 @@ public class StandardNetworkService implements NetworkService { log.info(prefix+"starting"); final NotificationReceipt receipt = notificationService.notify(node, NotificationType.health_check, null); if (receipt == null) { - log.info(prefix+" health_check failed, checking via cloud"); + log.info(prefix+"health_check failed, checking via cloud"); final CloudService cloud = cloudDAO.findByUuid(node.getCloud()); if (cloud == null) { log.warn(prefix+"cloud not found: "+node.getCloud()); @@ -705,6 +704,8 @@ public class StandardNetworkService implements NetworkService { return true; } + networkDAO.update(network.setState(BubbleNetworkState.stopping)); + background(() -> { String lock = null; final String networkUuid = network.getUuid(); @@ -726,9 +727,6 @@ public class StandardNetworkService implements NetworkService { } } - network.setState(BubbleNetworkState.stopping); - networkDAO.update(network); - final ValidationResult validationResult = new ValidationResult(); // todo: parallel shutdown? @@ -755,7 +753,7 @@ public class StandardNetworkService implements NetworkService { } catch (RuntimeException e) { log.error("stopNetwork: error stopping: "+e); - if (network != null) network.setState(BubbleNetworkState.error_stopping); + network.setState(BubbleNetworkState.error_stopping); networkDAO.update(network); throw e; @@ -786,44 +784,6 @@ public class StandardNetworkService implements NetworkService { return cloud; } - public NodeProgressMeterTick getLaunchStatus(String accountUuid, String uuid) { - final String json = getNetworkSetupStatus().get(getProgressMeterKey(uuid, accountUuid)); - if (json == null) return null; - try { - final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class); - if (!tick.hasAccount() || !tick.getAccount().equals(accountUuid)) { - log.warn("getLaunchStatus: tick.account != accountUuid, returning null"); - return null; - } - return tick.setPattern(null); - } catch (Exception e) { - return die("getLaunchStatus: "+e); - } - } - - public List listLaunchStatuses(String accountUuid) { - return listLaunchStatuses(accountUuid, null); - } - - public List listLaunchStatuses(String accountUuid, String networkUuid) { - final RedisService stats = getNetworkSetupStatus(); - final List ticks = new ArrayList<>(); - for (String key : stats.keys(getProgressMeterPrefix(accountUuid)+"*")) { - final String json = stats.get_withPrefix(key); - if (json != null) { - try { - final NodeProgressMeterTick tick = json(json, NodeProgressMeterTick.class).setPattern(null); - if (networkUuid != null && tick.hasNetwork() && networkUuid.equals(tick.getNetwork())) { - ticks.add(tick); - } - } catch (Exception e) { - log.warn("currentTicks (bad json?): "+e); - } - } - } - return ticks; - } - private static class NodeLaunchAwait implements Runnable { private final NodeProgressMeter progressMeter; diff --git a/bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties b/bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties index 78ca3be3..e5638a76 100644 --- a/bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties +++ b/bubble-server/src/main/resources/message_templates/en_US/server/post_auth/ResourceMessages.properties @@ -399,6 +399,7 @@ meter_tick_ready_check2=And what a lovely pie it is... #meter_tick_ssh_keys=Setting up SSH keys # Launch progress meter: success marker +meter_canceled=Bubble installation was canceled meter_completed=Bubble installation completed successfully! On to your Bubble! # Launch progress meter: errors diff --git a/bubble-web b/bubble-web index 5ece6028..1ba5ed45 160000 --- a/bubble-web +++ b/bubble-web @@ -1 +1 @@ -Subproject commit 5ece6028d57ecf5b970d249e8ed35b70f32b23fb +Subproject commit 1ba5ed4572441a5ec253f455e4f00eea111bcacc