Improve production.py robustness with orphan detection and kill command

This commit is contained in:
ComputerTech 2026-03-31 12:19:09 +01:00
parent 8365b38673
commit a35c60ce63
1 changed files with 106 additions and 15 deletions

View File

@ -52,22 +52,65 @@ def _process_alive(pid: int) -> bool:
return False
def _find_all_pids() -> list[int]:
"""Search 'ps' for all Gunicorn/Bastebin processes associated with this directory."""
pids = []
try:
# Use 'ps' to find processes related to this Gunicorn instance
out = subprocess.check_output(['ps', 'wax', '-o', 'pid,command'], text=True)
for line in out.splitlines():
# Match gunicorn running from this directory or with this config
if 'gunicorn' in line and (BASE_DIR in line or CONF_FILE in line):
if 'production.py' in line: continue
parts = line.strip().split()
if parts:
try:
pids.append(int(parts[0]))
except ValueError:
continue
except (subprocess.SubprocessError, FileNotFoundError):
pass
return sorted(list(set(pids)))
def _is_port_in_use(port: int) -> bool:
"""Check if the given port is already occupied."""
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(0.5)
return s.connect_ex(('127.0.0.1', port)) == 0
def cmd_status() -> None:
pid = _read_pid()
if pid and _process_alive(pid):
print(f'Running (PID {pid})')
pid_file_val = _read_pid()
all_pids = _find_all_pids()
if pid_file_val and _process_alive(pid_file_val):
print(f'Running (Master PID {pid_file_val})')
if len(all_pids) > 1:
print(f'Workers ({len(all_pids) - 1} processes)')
elif all_pids:
print(f'Warning: Found {len(all_pids)} orphaned processes not tracked in {PID_FILE}:')
print(f' PIDs: {", ".join(map(str, all_pids))}')
print(' Action: Use "python production.py kill" to clean these up.')
else:
if pid:
os.remove(PID_FILE)
if pid_file_val:
try: os.remove(PID_FILE)
except OSError: pass
print('Stopped')
def cmd_start(host: str, port: int, workers: int) -> None:
pid = _read_pid()
if pid and _process_alive(pid):
print(f'Already running (PID {pid}). Use "restart" to reload.')
all_pids = _find_all_pids()
if (pid and _process_alive(pid)) or all_pids:
print(f'Already running (Master PID {pid or all_pids[0]}). Use "restart" to reload.')
return
if _is_port_in_use(port):
sys.exit(f'Error: Port {port} is already in use by another program (possibly an old zombie instance).')
gunicorn = _gunicorn_bin()
log = open(LOG_FILE, 'a')
@ -101,22 +144,36 @@ def cmd_start(host: str, port: int, workers: int) -> None:
print(f'Logs: {LOG_FILE}')
def cmd_stop(graceful: bool = True) -> bool:
def cmd_stop(graceful: bool = True, force: bool = False) -> bool:
pid = _read_pid()
all_pids = _find_all_pids()
if not pid or not _process_alive(pid):
print('Not running.')
if all_pids:
if force:
print(f"Cleaning up {len(all_pids)} orphaned processes...")
return cmd_kill()
print(f'Error: PID file is missing, but {len(all_pids)} orphaned processes were found.')
print('Use "python production.py kill" to force stop.')
else:
print('Not running.')
if pid:
os.remove(PID_FILE)
try: os.remove(PID_FILE)
except OSError: pass
return False
# Standard shutdown
sig = signal.SIGTERM if graceful else signal.SIGKILL
os.kill(pid, sig)
# Wait up to 10 s for a clean shutdown.
print(f'Stopping PID {pid}...', end='', flush=True)
for _ in range(20):
time.sleep(0.5)
print('.', end='', flush=True)
if not _process_alive(pid):
break
print(' Done.')
if _process_alive(pid):
# Force-kill if still alive after graceful period.
@ -124,9 +181,39 @@ def cmd_stop(graceful: bool = True) -> bool:
time.sleep(0.5)
if os.path.exists(PID_FILE):
os.remove(PID_FILE)
try: os.remove(PID_FILE)
except OSError: pass
print(f'Stopped (was PID {pid})')
# Double check for ANY remaining processes in this directory
remaining = _find_all_pids()
if remaining and force:
for p in remaining:
try: os.kill(p, signal.SIGKILL)
except OSError: pass
return True
def cmd_kill() -> bool:
"""Nuke all Bastebin-related processes."""
pids = _find_all_pids()
if not pids:
print("Nothing to kill.")
if os.path.exists(PID_FILE):
os.remove(PID_FILE)
return False
print(f"Killing {len(pids)} processes...")
for pid in pids:
try:
print(f" - {pid}")
os.kill(pid, signal.SIGKILL)
except OSError:
pass
if os.path.exists(PID_FILE):
os.remove(PID_FILE)
print("Clean slate achieved.")
return True
@ -159,9 +246,11 @@ def _build_parser() -> argparse.ArgumentParser:
p_start = sub.add_parser('start', help='Start Gunicorn in the background')
p_restart = sub.add_parser('restart', help='Gracefully reload (SIGHUP) or start if stopped')
sub.add_parser('stop', help='Stop Gunicorn gracefully')
sub.add_parser('status', help='Show whether Gunicorn is running')
p_stop = sub.add_parser('stop', help='Stop Gunicorn gracefully')
sub.add_parser('kill', help='Forcefully kill all Bastebin processes')
sub.add_parser('status', help='Show whether Gunicorn is running')
p_stop.add_argument('--force', action='store_true', help='Kill all orphaned processes')
_add_server_args(p_start)
_add_server_args(p_restart)
@ -209,7 +298,9 @@ def main() -> None:
if args.command == 'start':
cmd_start(host, port, workers)
elif args.command == 'stop':
cmd_stop()
cmd_stop(force=args.force)
elif args.command == 'kill':
cmd_kill()
elif args.command == 'restart':
cmd_restart(host, port, workers)
elif args.command == 'status':