debug logs
This commit is contained in:
@@ -256,6 +256,7 @@ class Backend:
|
|||||||
self.backend_errored(str(e))
|
self.backend_errored(str(e))
|
||||||
|
|
||||||
async def _start_tracking(self) -> None:
|
async def _start_tracking(self) -> None:
|
||||||
|
log.info("Starting tracking tasks (read_logs, send_metrics_loop, healthcheck, send_delete_requests_loop)")
|
||||||
task_names = ["read_logs", "send_metrics_loop", "healthcheck", "send_delete_requests_loop"]
|
task_names = ["read_logs", "send_metrics_loop", "healthcheck", "send_delete_requests_loop"]
|
||||||
results = await gather(
|
results = await gather(
|
||||||
self.__read_logs(),
|
self.__read_logs(),
|
||||||
@@ -265,6 +266,7 @@ class Backend:
|
|||||||
return_exceptions=True
|
return_exceptions=True
|
||||||
)
|
)
|
||||||
# If we get here, one or more tasks exited (they should run forever)
|
# If we get here, one or more tasks exited (they should run forever)
|
||||||
|
log.error(f"CRITICAL: _start_tracking gather returned! This should never happen. Results: {results}")
|
||||||
for name, result in zip(task_names, results):
|
for name, result in zip(task_names, results):
|
||||||
if isinstance(result, Exception):
|
if isinstance(result, Exception):
|
||||||
log.error(f"Tracking task '{name}' crashed with exception: {result}", exc_info=result)
|
log.error(f"Tracking task '{name}' crashed with exception: {result}", exc_info=result)
|
||||||
|
|||||||
@@ -119,9 +119,14 @@ class Metrics:
|
|||||||
await self.__send_delete_requests_and_reset()
|
await self.__send_delete_requests_and_reset()
|
||||||
|
|
||||||
async def _send_metrics_loop(self) -> Awaitable[NoReturn]:
|
async def _send_metrics_loop(self) -> Awaitable[NoReturn]:
|
||||||
|
loop_count = 0
|
||||||
while True:
|
while True:
|
||||||
await sleep(METRICS_UPDATE_INTERVAL)
|
await sleep(METRICS_UPDATE_INTERVAL)
|
||||||
|
loop_count += 1
|
||||||
elapsed = time.time() - self.last_metric_update
|
elapsed = time.time() - self.last_metric_update
|
||||||
|
# Log heartbeat every 30 seconds to confirm loop is running
|
||||||
|
if loop_count % 30 == 0:
|
||||||
|
log.debug(f"[heartbeat] metrics loop alive, loop_count={loop_count}, model_loaded={self.system_metrics.model_is_loaded}")
|
||||||
if self.system_metrics.model_is_loaded is False and elapsed >= 10:
|
if self.system_metrics.model_is_loaded is False and elapsed >= 10:
|
||||||
log.debug(f"sending loading model metrics after {int(elapsed)}s wait")
|
log.debug(f"sending loading model metrics after {int(elapsed)}s wait")
|
||||||
await self.__send_metrics_and_reset()
|
await self.__send_metrics_and_reset()
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
from typing import List
|
from typing import List
|
||||||
import ssl
|
import ssl
|
||||||
from asyncio import run, gather
|
from asyncio import run, gather
|
||||||
@@ -12,7 +14,25 @@ from aiohttp import web
|
|||||||
log = logging.getLogger(__file__)
|
log = logging.getLogger(__file__)
|
||||||
|
|
||||||
|
|
||||||
|
def _setup_signal_handlers():
|
||||||
|
"""Setup signal handlers to log when process receives termination signals."""
|
||||||
|
def signal_handler(signum, frame):
|
||||||
|
sig_name = signal.Signals(signum).name
|
||||||
|
log.error(f"SIGNAL RECEIVED: {sig_name} ({signum}) - process is being terminated")
|
||||||
|
sys.stdout.flush()
|
||||||
|
sys.stderr.flush()
|
||||||
|
sys.exit(128 + signum)
|
||||||
|
|
||||||
|
# Handle common termination signals
|
||||||
|
for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP]:
|
||||||
|
try:
|
||||||
|
signal.signal(sig, signal_handler)
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass # Some signals may not be available
|
||||||
|
|
||||||
|
|
||||||
def start_server(backend: Backend, routes: List[web.RouteDef], **kwargs):
|
def start_server(backend: Backend, routes: List[web.RouteDef], **kwargs):
|
||||||
|
_setup_signal_handlers()
|
||||||
try:
|
try:
|
||||||
log.debug("getting certificate...")
|
log.debug("getting certificate...")
|
||||||
use_ssl = os.environ.get("USE_SSL", "false") == "true"
|
use_ssl = os.environ.get("USE_SSL", "false") == "true"
|
||||||
|
|||||||
Reference in New Issue
Block a user