Skip to content

Commit

Permalink
fix: separate nrf due to nucleus exit
Browse files Browse the repository at this point in the history
  • Loading branch information
alter-mage committed Sep 5, 2024
1 parent 991fa35 commit c540795
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 6 deletions.
5 changes: 5 additions & 0 deletions scripts/loader
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ echo "Greengrass root: "${GG_ROOT}
LAUNCH_DIR="$GG_ROOT/alts/current"
CONFIG_FILE=""

echo "Absolute launch dir: "$(readlink $LAUNCH_DIR)

is_directory_link() {
[ -L "$1" ] && [ -d "$1" ]
}
Expand Down Expand Up @@ -100,4 +102,7 @@ if [ $sigterm_received -eq 0 ] && is_directory_link "${GG_ROOT}/alts/old" && is_
flip_link "${GG_ROOT}/alts/old" "${LAUNCH_DIR}"
fi

## Touch an empty file to indicate rollback due to unexpected Nucleus exit
touch "${GG_ROOT}/work/aws.greengrass.Nucleus/restart_panic"

exit ${kernel_exit_code}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.aws.greengrass.util.Utils;

import java.io.IOException;
import java.nio.file.Files;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CancellationException;
Expand All @@ -32,12 +33,14 @@
import java.util.stream.Collectors;

import static com.aws.greengrass.deployment.DeploymentConfigMerger.DEPLOYMENT_ID_LOG_KEY;
import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ACTIVATION;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.ROLLBACK_BOOTSTRAP;

public class KernelUpdateDeploymentTask implements DeploymentTask {
public static final String RESTART_PANIC_FILE_NAME = "restart_panic";
private final Kernel kernel;
private final Logger logger;
private final Deployment deployment;
Expand Down Expand Up @@ -149,10 +152,25 @@ private void saveDeploymentStatusDetails(Throwable failureCause) throws IOExcept

private DeploymentException getDeploymentStatusDetails() {
if (Utils.isEmpty(deployment.getStageDetails())) {
return new DeploymentException(
"Nucleus update workflow failed to restart Nucleus. See loader logs for more details",
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
try {
if (Files.deleteIfExists(
kernel.getNucleusPaths().workPath(DEFAULT_NUCLEUS_COMPONENT_NAME)
.resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath())) {
return new DeploymentException(
"Nucleus update workflow failed to restart Nucleus. See loader logs for more details",
DeploymentErrorCode.NUCLEUS_RESTART_FAILURE);
} else {
return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an "
+ "unexpected device IO error",
DeploymentErrorCode.IO_WRITE_ERROR);
}
} catch (IOException e) {
return new DeploymentException("Nucleus update workflow failed to restart Nucleus due to an "
+ "unexpected device IO error. See loader logs for more details", e,
DeploymentErrorCode.IO_WRITE_ERROR);
}
}

List<DeploymentErrorCode> errorStack = deployment.getErrorStack() == null ? Collections.emptyList()
: deployment.getErrorStack().stream().map(DeploymentErrorCode::valueOf).collect(Collectors.toList());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
import com.aws.greengrass.lifecyclemanager.KernelAlternatives;
import com.aws.greengrass.lifecyclemanager.KernelLifecycle;
import com.aws.greengrass.lifecyclemanager.exceptions.DirectoryValidationException;
import com.aws.greengrass.util.NucleusPaths;
import com.aws.greengrass.util.Pair;
import com.aws.greengrass.util.Utils;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
Expand All @@ -28,6 +30,8 @@

import static com.aws.greengrass.deployment.DeploymentConfigMerger.DEPLOYMENT_ID_LOG_KEY;
import static com.aws.greengrass.deployment.DeploymentConfigMerger.MERGE_CONFIG_EVENT_KEY;
import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME;
import static com.aws.greengrass.deployment.KernelUpdateDeploymentTask.RESTART_PANIC_FILE_NAME;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
import static com.aws.greengrass.deployment.model.Deployment.DeploymentStage.KERNEL_ROLLBACK;
Expand All @@ -39,6 +43,7 @@
public class KernelUpdateActivator extends DeploymentActivator {
private final BootstrapManager bootstrapManager;
private final KernelAlternatives kernelAlternatives;
private final NucleusPaths nucleusPaths;

/**
* Constructor of KernelUpdateActivator.
Expand All @@ -51,6 +56,7 @@ public KernelUpdateActivator(Kernel kernel, BootstrapManager bootstrapManager) {
super(kernel);
this.bootstrapManager = bootstrapManager;
this.kernelAlternatives = kernel.getContext().get(KernelAlternatives.class);
this.nucleusPaths = kernel.getNucleusPaths();
}

@Override
Expand Down Expand Up @@ -81,6 +87,14 @@ public void activate(Map<String, Object> newConfig, Deployment deployment,

updateConfiguration(deploymentDocument.getTimestamp(), newConfig);

// Try and delete restart panic file if it exists
try {
Files.deleteIfExists(nucleusPaths.workPath(DEFAULT_NUCLEUS_COMPONENT_NAME)
.resolve(RESTART_PANIC_FILE_NAME).toAbsolutePath());
} catch (IOException e) {
logger.atWarn().log("Unable to delete an existing restart panic file", e);
}

Path bootstrapTaskFilePath;
try {
bootstrapTaskFilePath = deploymentDirectoryManager.getBootstrapTaskFilePath();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import com.aws.greengrass.lifecyclemanager.exceptions.DirectoryValidationException;
import com.aws.greengrass.testcommons.testutilities.GGExtension;
import com.aws.greengrass.testcommons.testutilities.TestUtils;
import com.aws.greengrass.util.NucleusPaths;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
Expand All @@ -39,6 +40,7 @@
import java.util.Map;
import java.util.concurrent.CompletableFuture;

import static com.aws.greengrass.deployment.DeviceConfiguration.DEFAULT_NUCLEUS_COMPONENT_NAME;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.NO_OP;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_REBOOT;
import static com.aws.greengrass.deployment.bootstrap.BootstrapSuccessCode.REQUEST_RESTART;
Expand Down Expand Up @@ -72,6 +74,8 @@ class KernelUpdateActivatorTest {
@Mock
KernelAlternatives kernelAlternatives;
@Mock
NucleusPaths nucleusPaths;
@Mock
CompletableFuture<DeploymentResult> totallyCompleteFuture;
@Mock
Deployment deployment;
Expand All @@ -85,9 +89,10 @@ class KernelUpdateActivatorTest {
KernelUpdateActivator kernelUpdateActivator;

@BeforeEach
void beforeEach() {
void beforeEach() throws IOException {
doReturn(deploymentDirectoryManager).when(context).get(eq(DeploymentDirectoryManager.class));
doReturn(kernelAlternatives).when(context).get(eq(KernelAlternatives.class));
doReturn(nucleusPaths).when(kernel).getNucleusPaths();
doReturn(context).when(kernel).getContext();
lenient().doReturn(config).when(kernel).getConfig();
kernelUpdateActivator = new KernelUpdateActivator(kernel, bootstrapManager);
Expand Down Expand Up @@ -120,6 +125,8 @@ void GIVEN_deployment_activate_WHEN_prepareBootstrap_fails_THEN_deployment_rollb
doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath();
Path targetConfigFilePath = mock(Path.class);
doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath();
IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE");
doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME));
IOException mockIOE = new IOException("mock error");
doThrow(mockIOE).when(kernelAlternatives).prepareBootstrap(eq("testId"));
doThrow(new IOException()).when(deploymentDirectoryManager).writeDeploymentMetadata(eq(deployment));
Expand All @@ -146,6 +153,8 @@ void GIVEN_deployment_activate_WHEN_bootstrap_task_fails_THEN_deployment_rollbac
doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath();
ServiceUpdateException mockSUE = new ServiceUpdateException("mock error", DeploymentErrorCode.COMPONENT_BOOTSTRAP_ERROR,
DeploymentErrorType.USER_COMPONENT_ERROR);
IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE");
doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME));
doThrow(mockSUE).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath));
doThrow(new IOException()).when(kernelAlternatives).prepareRollback();

Expand All @@ -163,10 +172,14 @@ void GIVEN_deployment_activate_WHEN_bootstrap_task_fails_THEN_deployment_rollbac
}

@Test
void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart() throws Exception {
void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart(ExtensionContext context) throws Exception {
ignoreExceptionOfType(context, IOException.class);

Path bootstrapFilePath = mock(Path.class);
doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath();
Path targetConfigFilePath = mock(Path.class);
IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE");
doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME));
doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath();
doReturn(NO_OP).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath));
doReturn(false).when(bootstrapManager).hasNext();
Expand All @@ -179,11 +192,15 @@ void GIVEN_deployment_activate_WHEN_bootstrap_finishes_THEN_request_restart() th
}

@Test
void GIVEN_deployment_activate_WHEN_bootstrap_requires_reboot_THEN_request_reboot() throws Exception {
void GIVEN_deployment_activate_WHEN_bootstrap_requires_reboot_THEN_request_reboot(ExtensionContext context) throws Exception {
ignoreExceptionOfType(context, IOException.class);

Path bootstrapFilePath = mock(Path.class);
doReturn(bootstrapFilePath).when(deploymentDirectoryManager).getBootstrapTaskFilePath();
Path targetConfigFilePath = mock(Path.class);
doReturn(targetConfigFilePath).when(deploymentDirectoryManager).getTargetConfigFilePath();
IOException mockNucleusWorkPathIOE = new IOException("Mock Nucleus work path IOE");
doThrow(mockNucleusWorkPathIOE).when(nucleusPaths).workPath(eq(DEFAULT_NUCLEUS_COMPONENT_NAME));
doReturn(REQUEST_REBOOT).when(bootstrapManager).executeAllBootstrapTasksSequentially(eq(bootstrapFilePath));
doReturn(true).when(bootstrapManager).hasNext();

Expand Down

0 comments on commit c540795

Please sign in to comment.