Skip to content

Commit

Permalink
feat: Implement storage proxy status check CLI command
Browse files Browse the repository at this point in the history
  • Loading branch information
jopemachine committed Jun 24, 2024
1 parent adc7af6 commit 7eec2ba
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 7 deletions.
99 changes: 99 additions & 0 deletions src/ai/backend/storage/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,106 @@
import asyncio
import pathlib
import sys
from pprint import pformat

import click
from tabulate import tabulate

from ai.backend.cli.types import CliContextInfo
from ai.backend.common.exception import ConfigurationError
from ai.backend.common.types import LogSeverity
from ai.backend.storage.config import load_local_config


@click.group()
def main():
"""The root entrypoint for unified CLI of storage-proxy"""
pass


async def inspect_server_status(cli_ctx: CliContextInfo, storage_proxy_pid: int) -> None:
command = f"ps -p '{storage_proxy_pid}' -f"
process = await asyncio.create_subprocess_shell(command, stdout=asyncio.subprocess.PIPE)
stdout, stderr = await process.communicate()
if stderr:
raise RuntimeError(f"Failed to execute the command: {command}")

lines = stdout.decode().splitlines()
process_list = []

for line in lines[1:]:
columns = line.split()
# Combine all text following UID, PID, PPID, C, STIME, TTY, TIME into CMD
process_info = columns[:7] + [" ".join(columns[7:])]
process_list.append(process_info)

print(tabulate(process_list, headers=lines[0].split(), tablefmt="pretty"))
pass


@main.command()
@click.pass_obj
@click.option(
"-f",
"--config-path",
"--config",
type=click.Path(
file_okay=True,
dir_okay=False,
exists=True,
path_type=pathlib.Path,
),
default=None,
help="The config file path. (default: ./agent.toml and /etc/backend.ai/agent.toml)",
)
@click.option(
"--debug",
is_flag=True,
help="Set the logging level to DEBUG",
)
@click.option(
"-s",
"--systemctl",
is_flag=True,
help="Include the systemctl status command result in the output",
)
@click.option(
"--log-level",
type=click.Choice([*LogSeverity], case_sensitive=False),
default=LogSeverity.INFO,
help="Set the logging verbosity level",
)
def status(
cli_ctx: CliContextInfo,
config_path: pathlib.Path,
log_level: LogSeverity,
debug: bool = False,
systemctl: bool = False,
) -> None:
"""
Collect and print each storage proxy server process's status.
"""

try:
local_config = load_local_config(config_path, log_level, debug=debug)
except ConfigurationError as e:
print(
"ConfigurationError: Could not read or validate the storage-proxy local config:",
file=sys.stderr,
)
print(pformat(e.invalid_data), file=sys.stderr)
raise click.Abort()

pid_filepath = local_config["storage-proxy"]["pid-file"]

if not pid_filepath.exists():
print(
'ConfigurationError: "pid-file" not found in the configuration file.',
file=sys.stderr,
)
raise click.Abort()

with open(pid_filepath, "r") as file:
storage_proxy_pid = int(file.read())

asyncio.run(inspect_server_status(cli_ctx, storage_proxy_pid))
9 changes: 8 additions & 1 deletion src/ai/backend/storage/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from ai.backend.common.etcd import AsyncEtcd, ConfigScopes
from ai.backend.common.logging import logging_config_iv
from ai.backend.common.types import LogSeverity

from .types import VolumeInfo

Expand Down Expand Up @@ -113,7 +114,9 @@
)


def load_local_config(config_path: Path | None, debug: bool = False) -> dict[str, Any]:
def load_local_config(
config_path: Path | None, log_level: LogSeverity, debug: bool = False
) -> dict[str, Any]:
# Determine where to read configuration.
raw_cfg, cfg_src_path = read_from_file(config_path, "storage-proxy")
os.chdir(cfg_src_path.parent)
Expand All @@ -125,6 +128,10 @@ def load_local_config(config_path: Path | None, debug: bool = False) -> dict[str
if debug:
override_key(raw_cfg, ("debug", "enabled"), True)

override_key(raw_cfg, ("debug", "enabled"), log_level == LogSeverity.DEBUG)
override_key(raw_cfg, ("logging", "level"), log_level)
override_key(raw_cfg, ("logging", "pkg-ns", "ai.backend"), log_level)

try:
local_config = check(raw_cfg, local_config_iv)
local_config["_src"] = cfg_src_path
Expand Down
7 changes: 1 addition & 6 deletions src/ai/backend/storage/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from ai.backend.common.config import (
ConfigurationError,
override_key,
redis_config_iv,
)
from ai.backend.common.defs import REDIS_STREAM_DB
Expand Down Expand Up @@ -252,7 +251,7 @@ def main(
) -> int:
"""Start the storage-proxy service as a foreground process."""
try:
local_config = load_local_config(config_path, debug=debug)
local_config = load_local_config(config_path, log_level, debug=debug)
except ConfigurationError as e:
print(
"ConfigurationError: Could not read or validate the storage-proxy local config:",
Expand All @@ -262,10 +261,6 @@ def main(
raise click.Abort()
if debug:
log_level = LogSeverity.DEBUG
override_key(local_config, ("debug", "enabled"), log_level == LogSeverity.DEBUG)
override_key(local_config, ("logging", "level"), log_level)
override_key(local_config, ("logging", "pkg-ns", "ai.backend"), log_level)

multiprocessing.set_start_method("spawn")

if cli_ctx.invoked_subcommand is None:
Expand Down

0 comments on commit 7eec2ba

Please sign in to comment.