From c540795911ceb40e577e969451be0362a8eeb9c4 Mon Sep 17 00:00:00 2001 From: Siddhant Srivastava Date: Wed, 4 Sep 2024 10:10:26 -0700 Subject: [PATCH] fix: separate nrf due to nucleus exit --- scripts/loader | 5 ++++ .../KernelUpdateDeploymentTask.java | 24 ++++++++++++++++--- .../activator/KernelUpdateActivator.java | 14 +++++++++++ .../activator/KernelUpdateActivatorTest.java | 23 +++++++++++++++--- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/scripts/loader b/scripts/loader index 86b27642d4..f4dcc6c855 100755 --- a/scripts/loader +++ b/scripts/loader @@ -14,6 +14,8 @@ echo "Greengrass root: "${GG_ROOT} LAUNCH_DIR="$GG_ROOT/alts/current" CONFIG_FILE="" +echo "Absolute launch dir: "$(readlink $LAUNCH_DIR) + is_directory_link() { [ -L "$1" ] && [ -d "$1" ] } @@ -100,4 +102,7 @@ if [ $sigterm_received -eq 0 ] && is_directory_link "${GG_ROOT}/alts/old" && is_ flip_link "${GG_ROOT}/alts/old" "${LAUNCH_DIR}" fi +## Touch an empty file to indicate rollback due to unexpected Nucleus exit +touch "${GG_ROOT}/work/aws.greengrass.Nucleus/restart_panic" + exit ${kernel_exit_code} diff --git a/src/main/java/com/aws/greengrass/deployment/KernelUpdateDeploymentTask.java b/src/main/java/com/aws/greengrass/deployment/KernelUpdateDeploymentTask.java index 89fa294f52..10bca7313a 100644 --- a/src/main/java/com/aws/greengrass/deployment/KernelUpdateDeploymentTask.java +++ b/src/main/java/com/aws/greengrass/deployment/KernelUpdateDeploymentTask.java @@ -23,6 +23,7 @@ import com.aws.greengrass.util.Utils; import java.io.IOException; +import java.nio.file.Files; import java.util.Collections; import java.util.List; import java.util.concurrent.CancellationException; @@ -32,12 +33,14 @@ import java.util.stream.Collectors; import static com.aws.greengrass.deployment.DeploymentConfigMerger.DEPLOYMENT_ID_LOG_KEY; +import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART; import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ACTIVATION; import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK; import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.ROLLBACK_BOOTSTRAP; public class KernelUpdateDeploymentTask implements DeploymentTask { + public static final String RESTART_PANIC_FILE_NAME = "restart_panic"; private final Kernel kernel; private final Logger logger; private final Deployment deployment; @@ -149,10 +152,25 @@ private void saveDeploymentStatusDetails(Throwable failureCause) throws IOExcept private DeploymentException getDeploymentStatusDetails() { if (Utils.isEmpty(deployment.getStageDetails())) { - return new DeploymentException( - "Nucleus update workflow failed to restart Nucleus. See loader logs for more details", - DeploymentErrorCode.NUCLEUS_RESTART_FAILURE); + try { + if (Files.deleteIfExists( + kernel.getNucleusPaths().workPath(DEFAULT_NUCLEUS_COMPONENT_NAME) + .resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath())) { + return new DeploymentException( + "Nucleus update workflow failed to restart Nucleus. See loader logs for more details", + DeploymentErrorCode.NUCLEUS_RESTART_FAILURE); + } else { + return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an " + + "unexpected device IO error", + DeploymentErrorCode.IO_WRITE_ERROR); + } + } catch (IOException e) { + return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an " + + "unexpected device IO error. See loader logs for more details", e, + DeploymentErrorCode.IO_WRITE_ERROR); + } } + List errorStack = deployment.getErrorStack() == null ? Collections.emptyList() : deployment.getErrorStack().stream().map(DeploymentErrorCode::valueOf).collect(Collectors.toList()); diff --git a/src/main/java/com/aws/greengrass/deployment/activator/KernelUpdateActivator.java b/src/main/java/com/aws/greengrass/deployment/activator/KernelUpdateActivator.java index 18affabefd..07cda7efd7 100644 --- a/src/main/java/com/aws/greengrass/deployment/activator/KernelUpdateActivator.java +++ b/src/main/java/com/aws/greengrass/deployment/activator/KernelUpdateActivator.java @@ -16,10 +16,12 @@ import com.aws.greengrass.lifecyclemanager.KernelAlternatives; import com.aws.greengrass.lifecyclemanager.KernelLifecycle; import com.aws.greengrass.lifecyclemanager.exceptions.DirectoryValidationException; +import com.aws.greengrass.util.NucleusPaths; import com.aws.greengrass.util.Pair; import com.aws.greengrass.util.Utils; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.List; import java.util.Map; @@ -28,6 +30,8 @@ import static com.aws.greengrass.deployment.DeploymentConfigMerger.DEPLOYMENT_ID_LOG_KEY; import static com.aws.greengrass.deployment.DeploymentConfigMerger.MERGE_CONFIG_EVENT_KEY; +import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME; +import static com.aws.greengrass.deployment.KernelUpdateDeploymentTask.RESTART_PANIC_FILE_NAME; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART; import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK; @@ -39,6 +43,7 @@ public class KernelUpdateActivator extends DeploymentActivator { private final BootstrapManager bootstrapManager; private final KernelAlternatives kernelAlternatives; + private final NucleusPaths nucleusPaths; /** * Constructor of KernelUpdateActivator. @@ -51,6 +56,7 @@ public KernelUpdateActivator(Kernel kernel, BootstrapManager bootstrapManager) { super(kernel); this.bootstrapManager = bootstrapManager; this.kernelAlternatives = kernel.getContext().get(KernelAlternatives.class); + this.nucleusPaths = kernel.getNucleusPaths(); } @Override @@ -81,6 +87,14 @@ public void activate(Map newConfig, Deployment deployment, updateConfiguration(deploymentDocument.getTimestamp(), newConfig); + // Try and delete restart panic file if it exists + try { + Files.deleteIfExists(nucleusPaths.workPath(DEFAULT_NUCLEUS_COMPONENT_NAME) + .resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath()); + } catch (IOException e) { + logger.atWarn().log("Unable to delete an existing restart panic file", e); + } + Path bootstrapTaskFilePath; try { bootstrapTaskFilePath = deploymentDirectoryManager.getBootstrapTaskFilePath(); diff --git a/src/test/java/com/aws/greengrass/deployment/activator/KernelUpdateActivatorTest.java b/src/test/java/com/aws/greengrass/deployment/activator/KernelUpdateActivatorTest.java index d340fd8b69..3922d5e79f 100644 --- a/src/test/java/com/aws/greengrass/deployment/activator/KernelUpdateActivatorTest.java +++ b/src/test/java/com/aws/greengrass/deployment/activator/KernelUpdateActivatorTest.java @@ -23,6 +23,7 @@ import com.aws.greengrass.lifecyclemanager.exceptions.DirectoryValidationException; import com.aws.greengrass.testcommons.testutilities.GGExtension; import com.aws.greengrass.testcommons.testutilities.TestUtils; +import com.aws.greengrass.util.NucleusPaths; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -39,6 +40,7 @@ import java.util.Map; import java.util.concurrent.CompletableFuture; +import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.NO_OP; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT; import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART; @@ -72,6 +74,8 @@ class KernelUpdateActivatorTest { @Mock KernelAlternatives kernelAlternatives; @Mock + NucleusPaths nucleusPaths; + @Mock CompletableFuture totallyCompleteFuture; @Mock Deployment deployment; @@ -85,9 +89,10 @@ class KernelUpdateActivatorTest { KernelUpdateActivator kernelUpdateActivator; @BeforeEach - void beforeEach() { + void beforeEach() throws IOException { doReturn(deploymentDirectoryManager).when(context).get(eq(DeploymentDirectoryManager.class)); doReturn(kernelAlternatives).when(context).get(eq(KernelAlternatives.class)); + doReturn(nucleusPaths).when(kernel).getNucleusPaths(); doReturn(context).when(kernel).getContext(); lenient().doReturn(config).when(kernel).getConfig(); kernelUpdateActivator = new KernelUpdateActivator(kernel, bootstrapManager); @@ -120,6 +125,8 @@ void GIVEN_deployment_activate_WHEN_prepareBootstrap_fails_THEN_deployment_rollb doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath(); Path targetConfigFilePath = mock(Path.class); doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath(); + IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE"); + doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME)); IOException mockIOE = new IOException("mock error"); doThrow(mockIOE).when(kernelAlternatives).prepareBootstrap(eq("testId")); doThrow(new IOException()).when(deploymentDirectoryManager).writeDeploymentMetadata(eq(deployment)); @@ -146,6 +153,8 @@ void GIVEN_deployment_activate_WHEN_bootstrap_task_fails_THEN_deployment_rollbac doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath(); ServiceUpdateException mockSUE = new ServiceUpdateException("mock error", DeploymentErrorCode.COMPONENT_BOOTSTRAP_ERROR, DeploymentErrorType.USER_COMPONENT_ERROR); + IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE"); + doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME)); doThrow(mockSUE).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath)); doThrow(new IOException()).when(kernelAlternatives).prepareRollback(); @@ -163,10 +172,14 @@ void GIVEN_deployment_activate_WHEN_bootstrap_task_fails_THEN_deployment_rollbac } @Test - void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart() throws Exception { + void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart(ExtensionContext context) throws Exception { + ignoreExceptionOfType(context, IOException.class); + Path bootstrapFilePath = mock(Path.class); doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath(); Path targetConfigFilePath = mock(Path.class); + IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE"); + doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME)); doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath(); doReturn(NO_OP).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath)); doReturn(false).when(bootstrapManager).hasNext(); @@ -179,11 +192,15 @@ void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart() th } @Test - void GIVEN_deployment_activate_WHEN_bootstrap_requires_reboot_THEN_request_reboot() throws Exception { + void GIVEN_deployment_activate_WHEN_bootstrap_requires_reboot_THEN_request_reboot(ExtensionContext context) throws Exception { + ignoreExceptionOfType(context, IOException.class); + Path bootstrapFilePath = mock(Path.class); doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath(); Path targetConfigFilePath = mock(Path.class); doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath(); + IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE"); + doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME)); doReturn(REQUEST_REBOOT).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath)); doReturn(true).when(bootstrapManager).hasNext();