diff --git a/.idea/misc.xml b/.idea/misc.xml index 90b1042..af96df3 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/.idea/podlaunch.iml b/.idea/podlaunch.iml index 909438d..f571432 100644 --- a/.idea/podlaunch.iml +++ b/.idea/podlaunch.iml @@ -2,7 +2,7 @@ - + \ No newline at end of file diff --git a/imagerebuild.py b/imagerebuild.py index 086a884..7258d30 100755 --- a/imagerebuild.py +++ b/imagerebuild.py @@ -8,11 +8,17 @@ from typing import Set, Callable import click import sh -from sh import podman -from sh import systemctl SERVICES_BASE_PATH = "/infra/services/" +progressbar = click.progressbar + + +def hidden_progressbar(*args, **kwargs): + bar = click.progressbar(*args, **kwargs) + bar.is_hidden = True + return bar + def resolve_image_units(): services_path = pathlib.Path(SERVICES_BASE_PATH) @@ -20,33 +26,29 @@ def resolve_image_units(): logging.info(f"Found {len(services_set)} services: {str(services_set)}") - systemctl("daemon-reload") + sh.systemctl("daemon-reload") def remove_masked_unit( _item_set: Set[str], item: str, item_to_unit: Callable[[str], str] = lambda i: i, ): - load_state = systemctl.show( + load_state_output = sh.systemctl.show( "--property=LoadState", "--value", item_to_unit(item) ) - load_state = load_state.stdout.strip().decode( - encoding="utf-8", errors="replace" - ) + load_state = load_state_output.strip() logging.debug(f"{item} load state: {repr(load_state)}") if load_state == "masked": logging.info(f"Removed masked entry: {item}") _item_set.remove(item) - with click.progressbar(list(services_set), label="Checking service units..", show_pos=True) as bar: + with progressbar(list(services_set), label="Checking service units..", show_pos=True) as bar: for service in bar: remove_masked_unit(services_set, service, lambda srv: f"pod@{srv}.service") def add_wants_to_image_units(_image_units: Set[str], unit: str): - wants = systemctl.show("--property=Wants", "--value", unit) - wants_list = ( - wants.stdout.strip().decode(encoding="utf-8", errors="replace").split(" ") - ) + wants_output = sh.systemctl.show("--property=Wants", "--value", unit) + wants_list = wants_output.strip().split(" ") logging.debug(f"{unit} wants: {repr(wants_list)}") for next_unit in wants_list: if next_unit.startswith("image@") and next_unit.endswith(".service"): @@ -55,7 +57,7 @@ def resolve_image_units(): image_units: Set[str] = set() - with click.progressbar( + with progressbar( length=len(services_set) * 2, label="Collecting container image services.." ) as bar: for service in services_set: @@ -77,7 +79,7 @@ def resolve_image_units(): new_image_units ) # add new image units to all image units - with click.progressbar( + with progressbar( list(image_units), label="Checking container image units..", show_pos=True ) as bar: for image_unit in bar: @@ -88,39 +90,47 @@ def resolve_image_units(): @click.command() -@click.option("--verbose", is_flag=True, default=False, help="Enable INFO logging") -def main(verbose): - if verbose: - logging.root.setLevel(logging.INFO) +@click.option("-v", "--verbose", count=True, help="Enable INFO logging, use twice for DEBUG") +def main(verbose: int): + if verbose > 0: + if verbose > 2: + logging.root.setLevel(logging.DEBUG) + elif verbose == 2: + logging.root.setLevel(logging.DEBUG) + shlogger = logging.getLogger("sh") + shlogger.setLevel(logging.INFO) + else: + logging.root.setLevel(logging.INFO) + + global progressbar + progressbar = hidden_progressbar image_units = resolve_image_units() image_tags: Set[str] = set() - with click.progressbar(image_units, label="Collecting container image tags..") as bar: + with progressbar(image_units, label="Collecting container image tags..") as bar: for image_unit in bar: - environment = systemctl.show( + environment_output = sh.systemctl.show( "--property=Environment", "--value", image_unit, ) - environment_list = ( - environment.stdout.strip() - .decode(encoding="utf-8", errors="replace") - .split(" ") - ) + environment_list = environment_output.strip().split(" ") logging.debug(f"{image_unit} environment: {repr(environment_list)}") for envvar in environment_list: search_str = "IMAGE_TAG=" if envvar.startswith(search_str): - image_tags.add(envvar[len(search_str) :]) + keylen = len(search_str) + image_tags.add(envvar[keylen:]) started_processes = [] - with click.progressbar( + with progressbar( length=len(image_tags), label="Untagging container images..", show_pos=True ) as bar: for image_tag in image_tags: - process = podman.untag( + process = sh.podman.untag( image_tag, + _return_cmd=True, _bg=True, _err_to_out=True, _done=lambda cmd, success, exit_code: bar.update(1), @@ -131,7 +141,7 @@ def main(verbose): for p in started_processes: try: p.wait() - except sh.ErrorReturnCode as error: + except sh.ErrorReturnCode: # ignore missing image tags if "image not known".encode() in p.stdout: pass @@ -139,18 +149,16 @@ def main(verbose): raise started_processes = [] - with click.progressbar( + with progressbar( length=len(image_units), label="Building images..", show_pos=True ) as bar: semaphore = multiprocessing.Semaphore(8) for image_unit in image_units: try: - systemctl("reset-failed", image_unit, _bg=False, _err_to_out=True) + sh.systemctl("reset-failed", image_unit, _bg=False, _err_to_out=True) except sh.ErrorReturnCode as error: if f"Unit {image_unit} not loaded".encode() in error.stdout: - logging.info( - f"Not resetting failed state for {image_unit}, unit not loaded" - ) + logging.info(f"Not resetting failed state for {image_unit}, unit not loaded") else: raise @@ -162,7 +170,9 @@ def main(verbose): logging.warning(f"{cmd.cmd}{tuple(cmd.call_args)} completed with exit code {exit_code}") semaphore.release() - process = systemctl.restart(image_unit, _bg=True, _done=restart_done) + process = sh.systemctl.restart( + image_unit, _return_cmd=True, _bg=True, _done=restart_done + ) started_processes.append(process) # join processes [p.wait() for p in started_processes] diff --git a/main.py b/main.py index 3e3c490..9b1e8c2 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,12 @@ +import datetime import json import logging import os import pathlib +import random import sys import threading import traceback -from datetime import datetime, timedelta from queue import SimpleQueue from signal import signal, SIGHUP, SIGINT, SIGTERM, setitimer, SIGALRM, ITIMER_REAL, SIGUSR1, SIGUSR2, strsignal @@ -14,6 +15,7 @@ import sh SERVICES_BASE_PATH = "/infra/services/" POD_CHECK_TIME = 120.0 +RAND_OFFSET_MAX_SECONDS = 10.0 shlog = sh.bake(_out=sys.stdout, _err=sys.stderr) sdnotify = sh.Command("systemd-notify") @@ -42,7 +44,7 @@ class PodKeeper: self.reloading = threading.Event() self.checking = threading.Event() self.waiter = threading.Event() - self.last_check = datetime.utcnow() + self.last_check = datetime.datetime.now(datetime.UTC) self.passing_signal = threading.Event() self.pass_signal_nums = SimpleQueue() @@ -116,7 +118,7 @@ class PodKeeper: traceback.print_exc() def check_pod(self): - new_timestamp = datetime.utcnow() + new_timestamp = datetime.datetime.now(datetime.UTC) inspect_command = sh.podman.pod.inspect(self.podname, _return_cmd=True) multiple_descriptions = json.loads(inspect_command.stdout) if not multiple_descriptions: @@ -128,7 +130,7 @@ class PodKeeper: for container in pod_description["Containers"]: if container["State"] != "running": print(f"Container {container['Name']} exited", file=sys.stderr, flush=True) - logs_since = self.last_check - timedelta(seconds=POD_CHECK_TIME) + logs_since = self.last_check - datetime.timedelta(seconds=POD_CHECK_TIME) print(f"Log since last check (-{POD_CHECK_TIME}s):\n", file=sys.stderr, flush=True) shlog.podman.logs('--since', logs_since.isoformat(), container['Name'], _out=sys.stderr) self.stopping.set() @@ -185,7 +187,8 @@ def main(network, log_driver, log_level, replace, remove, verbose, identifier): signal(SIGALRM, keeper.check) signal(SIGUSR1, keeper.passthrough) signal(SIGUSR2, keeper.passthrough) - setitimer(ITIMER_REAL, 3.0, POD_CHECK_TIME) + random_offset = random.random() * RAND_OFFSET_MAX_SECONDS + setitimer(ITIMER_REAL, 1.0 + random_offset, POD_CHECK_TIME + random_offset) keeper.run()