cat_gateway/service/utilities/
catch_panic.rs

1//! Handle catching panics created by endpoints, logging them and properly responding.
2use std::{any::Any, backtrace::Backtrace, cell::RefCell};
3
4use chrono::prelude::*;
5use panic_message::panic_message;
6use poem::{http::StatusCode, middleware::PanicHandler, IntoResponse};
7use poem_openapi::payload::Json;
8use serde_json::json;
9use tracing::debug;
10
11use crate::{
12    service::{
13        common::responses::code_500_internal_server_error::InternalServerError,
14        utilities::health::{get_live_counter, inc_live_counter, set_not_live},
15    },
16    settings::Settings,
17};
18
19/// Customized Panic handler.
20/// Catches all panics, and turns them into 500.
21/// Does not crash the service, BUT will set it to NOT LIVE.
22/// Logs the panic as an error.
23/// This should cause Kubernetes to restart the service.
24#[derive(Clone)]
25pub(crate) struct ServicePanicHandler;
26
27/// Implementation of the special `/panic` endpoint,
28/// which is used only for testing purposes and enabled for all networks except `Mainnet`
29#[poem::handler]
30#[allow(clippy::panic)]
31pub(crate) fn panic_endpoint() {
32    panic!("Intentional panicking")
33}
34
35// Customized Panic handler - data storage.
36// Allows us to catch the backtrace so we can include it in logs.
37thread_local! {
38    static BACKTRACE: RefCell<Option<String>> = const { RefCell::new(None) };
39    static LOCATION: RefCell<Option<String>> = const { RefCell::new(None) };
40}
41
42/// Sets a custom panic hook to capture the Backtrace and Panic Location for logging
43/// purposes. This hook gets called BEFORE we catch it.  So the thread local variables
44/// stored here are valid when processing the panic capture.
45pub(crate) fn set_panic_hook() {
46    std::panic::set_hook(Box::new(|panic_info| {
47        // Get the backtrace and format it.
48        let raw_trace = Backtrace::force_capture();
49        let trace = format!("{raw_trace}");
50        BACKTRACE.with(move |b| b.borrow_mut().replace(trace));
51
52        // Get the location and format it.
53        let location = match panic_info.location() {
54            Some(location) => format!("{location}"),
55            None => "Unknown".to_string(),
56        };
57        LOCATION.with(move |l| l.borrow_mut().replace(location));
58    }));
59}
60
61impl PanicHandler for ServicePanicHandler {
62    type Response = poem::Response;
63
64    /// Handle a panic.
65    /// Log the panic and respond with a 500 with appropriate data.
66    fn get_response(&self, err: Box<dyn Any + Send + 'static>) -> Self::Response {
67        // Increment the counter used for liveness checks.
68        inc_live_counter();
69
70        let current_count = get_live_counter();
71        debug!(
72            live_counter = current_count,
73            "Handling service panic response"
74        );
75
76        // If current count is above the threshold, then flag the system as NOT live.
77        if current_count > Settings::service_live_counter_threshold() {
78            set_not_live();
79        }
80
81        let server_err = InternalServerError::new(None);
82
83        // Get the unique identifier for this panic, so we can find it in the logs.
84        let panic_identifier = server_err.id().to_string();
85
86        // Get the message from the panic as best we can.
87        let err_msg = panic_message(&err);
88
89        // This is the location of the panic.
90        let location = match LOCATION.with(|l| l.borrow_mut().take()) {
91            Some(location) => location,
92            None => "Unknown".to_string(),
93        };
94
95        // This is the backtrace of the panic.
96        let backtrace = match BACKTRACE.with(|b| b.borrow_mut().take()) {
97            Some(backtrace) => backtrace,
98            None => "Unknown".to_string(),
99        };
100
101        // For some reason logging doesn't work here.
102        // So manually form a log message and send to stdout.
103        let time = chrono::Utc::now().to_rfc3339_opts(SecondsFormat::Nanos, true);
104
105        let json_log = json!({
106            "backtrace": backtrace,
107            "location": location,
108            "message": err_msg,
109            "id": panic_identifier,
110            "level": "PANIC",
111            "timestamp": time
112        })
113        .to_string();
114
115        println!("{json_log}");
116
117        let mut resp = Json(server_err).into_response();
118        resp.set_status(StatusCode::INTERNAL_SERVER_ERROR);
119        resp
120    }
121}