Improve production.py robustness with orphan detection and kill command
This commit is contained in:
parent
8365b38673
commit
a35c60ce63
121
production.py
121
production.py
|
|
@ -52,22 +52,65 @@ def _process_alive(pid: int) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _find_all_pids() -> list[int]:
|
||||
"""Search 'ps' for all Gunicorn/Bastebin processes associated with this directory."""
|
||||
pids = []
|
||||
try:
|
||||
# Use 'ps' to find processes related to this Gunicorn instance
|
||||
out = subprocess.check_output(['ps', 'wax', '-o', 'pid,command'], text=True)
|
||||
for line in out.splitlines():
|
||||
# Match gunicorn running from this directory or with this config
|
||||
if 'gunicorn' in line and (BASE_DIR in line or CONF_FILE in line):
|
||||
if 'production.py' in line: continue
|
||||
parts = line.strip().split()
|
||||
if parts:
|
||||
try:
|
||||
pids.append(int(parts[0]))
|
||||
except ValueError:
|
||||
continue
|
||||
except (subprocess.SubprocessError, FileNotFoundError):
|
||||
pass
|
||||
return sorted(list(set(pids)))
|
||||
|
||||
|
||||
def _is_port_in_use(port: int) -> bool:
|
||||
"""Check if the given port is already occupied."""
|
||||
import socket
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.settimeout(0.5)
|
||||
return s.connect_ex(('127.0.0.1', port)) == 0
|
||||
|
||||
|
||||
def cmd_status() -> None:
|
||||
pid = _read_pid()
|
||||
if pid and _process_alive(pid):
|
||||
print(f'Running (PID {pid})')
|
||||
pid_file_val = _read_pid()
|
||||
all_pids = _find_all_pids()
|
||||
|
||||
if pid_file_val and _process_alive(pid_file_val):
|
||||
print(f'Running (Master PID {pid_file_val})')
|
||||
if len(all_pids) > 1:
|
||||
print(f'Workers ({len(all_pids) - 1} processes)')
|
||||
elif all_pids:
|
||||
print(f'Warning: Found {len(all_pids)} orphaned processes not tracked in {PID_FILE}:')
|
||||
print(f' PIDs: {", ".join(map(str, all_pids))}')
|
||||
print(' Action: Use "python production.py kill" to clean these up.')
|
||||
else:
|
||||
if pid:
|
||||
os.remove(PID_FILE)
|
||||
if pid_file_val:
|
||||
try: os.remove(PID_FILE)
|
||||
except OSError: pass
|
||||
print('Stopped')
|
||||
|
||||
|
||||
def cmd_start(host: str, port: int, workers: int) -> None:
|
||||
pid = _read_pid()
|
||||
if pid and _process_alive(pid):
|
||||
print(f'Already running (PID {pid}). Use "restart" to reload.')
|
||||
all_pids = _find_all_pids()
|
||||
|
||||
if (pid and _process_alive(pid)) or all_pids:
|
||||
print(f'Already running (Master PID {pid or all_pids[0]}). Use "restart" to reload.')
|
||||
return
|
||||
|
||||
if _is_port_in_use(port):
|
||||
sys.exit(f'Error: Port {port} is already in use by another program (possibly an old zombie instance).')
|
||||
|
||||
gunicorn = _gunicorn_bin()
|
||||
|
||||
log = open(LOG_FILE, 'a')
|
||||
|
|
@ -101,22 +144,36 @@ def cmd_start(host: str, port: int, workers: int) -> None:
|
|||
print(f'Logs: {LOG_FILE}')
|
||||
|
||||
|
||||
def cmd_stop(graceful: bool = True) -> bool:
|
||||
def cmd_stop(graceful: bool = True, force: bool = False) -> bool:
|
||||
pid = _read_pid()
|
||||
all_pids = _find_all_pids()
|
||||
|
||||
if not pid or not _process_alive(pid):
|
||||
print('Not running.')
|
||||
if all_pids:
|
||||
if force:
|
||||
print(f"Cleaning up {len(all_pids)} orphaned processes...")
|
||||
return cmd_kill()
|
||||
print(f'Error: PID file is missing, but {len(all_pids)} orphaned processes were found.')
|
||||
print('Use "python production.py kill" to force stop.')
|
||||
else:
|
||||
print('Not running.')
|
||||
if pid:
|
||||
os.remove(PID_FILE)
|
||||
try: os.remove(PID_FILE)
|
||||
except OSError: pass
|
||||
return False
|
||||
|
||||
# Standard shutdown
|
||||
sig = signal.SIGTERM if graceful else signal.SIGKILL
|
||||
os.kill(pid, sig)
|
||||
|
||||
# Wait up to 10 s for a clean shutdown.
|
||||
print(f'Stopping PID {pid}...', end='', flush=True)
|
||||
for _ in range(20):
|
||||
time.sleep(0.5)
|
||||
print('.', end='', flush=True)
|
||||
if not _process_alive(pid):
|
||||
break
|
||||
print(' Done.')
|
||||
|
||||
if _process_alive(pid):
|
||||
# Force-kill if still alive after graceful period.
|
||||
|
|
@ -124,9 +181,39 @@ def cmd_stop(graceful: bool = True) -> bool:
|
|||
time.sleep(0.5)
|
||||
|
||||
if os.path.exists(PID_FILE):
|
||||
os.remove(PID_FILE)
|
||||
try: os.remove(PID_FILE)
|
||||
except OSError: pass
|
||||
|
||||
print(f'Stopped (was PID {pid})')
|
||||
# Double check for ANY remaining processes in this directory
|
||||
remaining = _find_all_pids()
|
||||
if remaining and force:
|
||||
for p in remaining:
|
||||
try: os.kill(p, signal.SIGKILL)
|
||||
except OSError: pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def cmd_kill() -> bool:
|
||||
"""Nuke all Bastebin-related processes."""
|
||||
pids = _find_all_pids()
|
||||
if not pids:
|
||||
print("Nothing to kill.")
|
||||
if os.path.exists(PID_FILE):
|
||||
os.remove(PID_FILE)
|
||||
return False
|
||||
|
||||
print(f"Killing {len(pids)} processes...")
|
||||
for pid in pids:
|
||||
try:
|
||||
print(f" - {pid}")
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
if os.path.exists(PID_FILE):
|
||||
os.remove(PID_FILE)
|
||||
print("Clean slate achieved.")
|
||||
return True
|
||||
|
||||
|
||||
|
|
@ -159,9 +246,11 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
|
||||
p_start = sub.add_parser('start', help='Start Gunicorn in the background')
|
||||
p_restart = sub.add_parser('restart', help='Gracefully reload (SIGHUP) or start if stopped')
|
||||
sub.add_parser('stop', help='Stop Gunicorn gracefully')
|
||||
sub.add_parser('status', help='Show whether Gunicorn is running')
|
||||
p_stop = sub.add_parser('stop', help='Stop Gunicorn gracefully')
|
||||
sub.add_parser('kill', help='Forcefully kill all Bastebin processes')
|
||||
sub.add_parser('status', help='Show whether Gunicorn is running')
|
||||
|
||||
p_stop.add_argument('--force', action='store_true', help='Kill all orphaned processes')
|
||||
_add_server_args(p_start)
|
||||
_add_server_args(p_restart)
|
||||
|
||||
|
|
@ -209,7 +298,9 @@ def main() -> None:
|
|||
if args.command == 'start':
|
||||
cmd_start(host, port, workers)
|
||||
elif args.command == 'stop':
|
||||
cmd_stop()
|
||||
cmd_stop(force=args.force)
|
||||
elif args.command == 'kill':
|
||||
cmd_kill()
|
||||
elif args.command == 'restart':
|
||||
cmd_restart(host, port, workers)
|
||||
elif args.command == 'status':
|
||||
|
|
|
|||
Loading…
Reference in New Issue