cat_gateway/service/utilities/
catch_panic.rs

1//! Handle catching panics created by endpoints, logging them and properly responding.
2use std::{any::Any, backtrace::Backtrace, cell::RefCell};
3
4use chrono::prelude::*;
5use panic_message::panic_message;
6use poem::{http::StatusCode, middleware::PanicHandler, IntoResponse};
7use poem_openapi::payload::Json;
8use serde_json::json;
9use tracing::debug;
10
11use crate::{
12    service::{
13        common::responses::code_500_internal_server_error::InternalServerError,
14        utilities::health::{get_live_counter, inc_live_counter, set_not_live},
15    },
16    settings::Settings,
17};
18
19/// Customized Panic handler.
20/// Catches all panics, and turns them into 500.
21/// Does not crash the service, BUT will set it to NOT LIVE.
22/// Logs the panic as an error.
23/// This should cause Kubernetes to restart the service.
24#[derive(Clone)]
25pub(crate) struct ServicePanicHandler;
26
27/// Implementation of the special `/panic` endpoint,
28/// which is used only for testing purposes and enabled for all networks except `Mainnet`
29#[poem::handler]
30#[allow(clippy::panic)]
31pub(crate) fn panic_endpoint() {
32    panic!("Intentional panicking")
33}
34
35// Customized Panic handler - data storage.
36// Allows us to catch the backtrace so we can include it in logs.
37thread_local! {
38    static BACKTRACE: RefCell<Option<String>> = const { RefCell::new(None) };
39    static LOCATION: RefCell<Option<String>> = const { RefCell::new(None) };
40}
41
42/// Sets a custom panic hook to capture the Backtrace and Panic Location for logging
43/// purposes. This hook gets called BEFORE we catch it.  So the thread local variables
44/// stored here are valid when processing the panic capture.
45pub(crate) fn set_panic_hook() {
46    std::panic::set_hook(Box::new(|panic_info| {
47        // Get the backtrace and format it.
48        let raw_trace = Backtrace::force_capture();
49        let trace = format!("{raw_trace}");
50        BACKTRACE.with(move |b| b.borrow_mut().replace(trace));
51
52        // Get the location and format it.
53        let location = match panic_info.location() {
54            Some(location) => format!("{location}"),
55            None => "Unknown".to_string(),
56        };
57        LOCATION.with(move |l| l.borrow_mut().replace(location));
58    }));
59}
60
61impl PanicHandler for ServicePanicHandler {
62    type Response = poem::Response;
63
64    /// Handle a panic.
65    /// Log the panic and respond with a 500 with appropriate data.
66    fn get_response(
67        &self,
68        err: Box<dyn Any + Send + 'static>,
69    ) -> Self::Response {
70        // Increment the counter used for liveness checks.
71        inc_live_counter();
72
73        let current_count = get_live_counter();
74        debug!(
75            live_counter = current_count,
76            "Handling service panic response"
77        );
78
79        // If current count is above the threshold, then flag the system as NOT live.
80        if current_count > Settings::service_live_counter_threshold() {
81            set_not_live();
82        }
83
84        let server_err = InternalServerError::new(None);
85
86        // Get the unique identifier for this panic, so we can find it in the logs.
87        let panic_identifier = server_err.id().to_string();
88
89        // Get the message from the panic as best we can.
90        let err_msg = panic_message(&err);
91
92        // This is the location of the panic.
93        let location = match LOCATION.with(|l| l.borrow_mut().take()) {
94            Some(location) => location,
95            None => "Unknown".to_string(),
96        };
97
98        // This is the backtrace of the panic.
99        let backtrace = match BACKTRACE.with(|b| b.borrow_mut().take()) {
100            Some(backtrace) => backtrace,
101            None => "Unknown".to_string(),
102        };
103
104        // For some reason logging doesn't work here.
105        // So manually form a log message and send to stdout.
106        let time = chrono::Utc::now().to_rfc3339_opts(SecondsFormat::Nanos, true);
107
108        let json_log = json!({
109            "backtrace": backtrace,
110            "location": location,
111            "message": err_msg,
112            "id": panic_identifier,
113            "level": "PANIC",
114            "timestamp": time
115        })
116        .to_string();
117
118        println!("{json_log}");
119
120        let mut resp = Json(server_err).into_response();
121        resp.set_status(StatusCode::INTERNAL_SERVER_ERROR);
122        resp
123    }
124}