cat_gateway/db/index/schema/
mod.rs

1//! Index Schema
2
3use std::sync::Arc;
4
5use anyhow::Context;
6use cardano_chain_follower::Network;
7use handlebars::Handlebars;
8use scylla::client::session::Session;
9use serde_json::json;
10use tracing::error;
11
12use crate::{settings::cassandra_db, utils::blake2b_hash::generate_uuid_string_from_data};
13
14/// Keyspace Create (Templated)
15const CREATE_NAMESPACE_CQL: &str = include_str!("./cql/namespace.cql");
16
17/// All Schema Creation Statements
18const SCHEMAS: &[(&str, &str)] = &[
19    (
20        // Sync Status Table Schema
21        include_str!("./cql/sync_status.cql"),
22        "Create Sync Status Table",
23    ),
24    (
25        // TXO by Stake Address Table Schema
26        include_str!("./cql/txo_by_stake_table.cql"),
27        "Create Table TXO By Stake Address",
28    ),
29    (
30        // TXO Assets by Stake Address Table Schema
31        include_str!("./cql/txo_assets_by_stake_table.cql"),
32        "Create Table TXO Assets By Stake Address",
33    ),
34    (
35        // TXO Unstaked Table Schema
36        include_str!("cql/unstaked_txo_by_txn_id.cql"),
37        "Create Table Unstaked TXO By Txn Hash",
38    ),
39    (
40        // TXO Unstaked Assets Table Schema
41        include_str!("cql/unstaked_txo_assets_by_txn_id.cql"),
42        "Create Table Unstaked TXO Assets By Txn Hash",
43    ),
44    (
45        // TXI by Stake Address table schema.
46        include_str!("cql/txi_by_txn_id_table.cql"),
47        "Create Table TXI By Stake Address",
48    ),
49    (
50        // Stake Address/Registration Table Schema
51        include_str!("cql/stake_registration.cql"),
52        "Create Table Stake Registration",
53    ),
54    (
55        // CIP-36 Registration Table Schema
56        include_str!("cql/cip36_registration.cql"),
57        "Create Table CIP-36 Registration",
58    ),
59    (
60        // CIP-36 invalid registration table schema.
61        include_str!("cql/cip36_registration_invalid.cql"),
62        "Create Table CIP-36 Registration Invalid",
63    ),
64    (
65        // CIP-36 registration for vote key table schema.
66        include_str!("cql/cip36_registration_for_vote_key.cql"),
67        "Create Table CIP-36 Registration For a stake address",
68    ),
69    (
70        // RBAC registration table schema.
71        include_str!("cql/rbac_registration.cql"),
72        "Create Table RBAC Registration",
73    ),
74    (
75        // RBAC invalid registration table schema.
76        include_str!("cql/rbac_invalid_registration.cql"),
77        "Create Table Invalid RBAC Registration",
78    ),
79    (
80        // Catalyst ID for transaction ID table schema.
81        include_str!("cql/catalyst_id_for_txn_id.cql"),
82        "Create table Catalyst ID for transaction ID",
83    ),
84    (
85        // Catalyst ID for stake address table schema.
86        include_str!("cql/catalyst_id_for_stake_address.cql"),
87        "Create table Catalyst ID for stake address",
88    ),
89    (
90        // Catalyst ID for public key table schema.
91        include_str!("cql/catalyst_id_for_public_key.cql"),
92        "Create table Catalyst ID for public key",
93    ),
94];
95
96/// Removes all comments from each line in the input query text and joins the remaining
97/// lines into a single string, reducing consecutive whitespace characters to a single
98/// space. Comments are defined as any text following `--` on a line.
99///
100/// # Arguments
101///
102/// * `text`: A string slice that holds the query to be cleaned.
103///
104/// # Returns
105///
106/// A new string with comments removed and whitespace reduced, where each remaining line
107/// from the original text is separated by a newline character.
108fn remove_comments_and_join_query_lines(text: &str) -> String {
109    // Split the input text into lines, removing any trailing empty lines
110    let raw_lines: Vec<&str> = text.lines().collect();
111    let mut clean_lines: Vec<String> = Vec::new();
112
113    // Filter out comments from each line
114    for line in raw_lines {
115        let mut clean_line = line.to_string();
116        if let Some(no_comment) = line.split_once("--") {
117            clean_line = no_comment.0.to_string();
118        }
119        clean_line = clean_line
120            .split_whitespace()
121            .collect::<Vec<&str>>()
122            .join(" ")
123            .trim()
124            .to_string();
125        if !clean_line.is_empty() {
126            clean_lines.push(clean_line);
127        }
128    }
129    clean_lines.join("\n")
130}
131
132/// Generates a unique schema version identifier based on the content of all CQL schemas.
133///
134/// This function processes each CQL schema, removes comments from its lines and joins
135/// them into a single string. It then sorts these processed strings to ensure consistency
136/// in schema versions regardless of their order in the list. Finally, it generates a UUID
137/// from a 127 bit hash of this sorted collection of schema contents, which serves as a
138/// unique identifier for the current version of all schemas.
139///
140/// # Returns
141///
142/// A string representing the UUID derived from the concatenated and cleaned CQL
143/// schema contents.
144fn generate_cql_schema_version() -> String {
145    // Where we will actually store the bytes we derive the UUID from.
146    let mut clean_schemas: Vec<String> = Vec::new();
147
148    // Iterate through each CQL schema and add it to the list of clean schemas documents.
149    for (schema, _) in SCHEMAS {
150        let schema = remove_comments_and_join_query_lines(schema);
151        if !schema.is_empty() {
152            clean_schemas.push(schema);
153        }
154    }
155
156    // make sure any re-ordering of the schemas in the list does not effect the generated
157    // schema version
158    clean_schemas.sort();
159
160    // Generate a unique hash of the clean schemas,
161    // and use it to form a UUID to identify the schema version.
162    generate_uuid_string_from_data("Catalyst-Gateway Index Database Schema", &clean_schemas)
163}
164
165/// Get the namespace for a particular db configuration
166pub(crate) fn namespace(persistent: bool, network: Network) -> String {
167    // Build and set the Keyspace to use.
168    let namespace = if persistent { "p" } else { "v" };
169    format!(
170        "{namespace}_{network}_{}",
171        generate_cql_schema_version().replace('-', "_")
172    )
173}
174
175/// Create the namespace we will use for this session
176/// Ok to run this if the namespace already exists.
177async fn create_namespace(
178    session: &mut Arc<Session>, cfg: &cassandra_db::EnvVars, persistent: bool, network: Network,
179) -> anyhow::Result<()> {
180    let keyspace = namespace(persistent, network);
181
182    let mut reg = Handlebars::new();
183    // disable default `html_escape` function
184    // which transforms `<`, `>` symbols to `&lt`, `&gt`
185    reg.register_escape_fn(|s| s.into());
186    let query = reg
187        .render_template(
188            CREATE_NAMESPACE_CQL,
189            &json!({"keyspace": keyspace,"options": cfg.deployment.clone().to_string()}),
190        )
191        .context(format!("Keyspace: {keyspace}"))?;
192
193    // Create the Keyspace if it doesn't exist already.
194    let stmt = session
195        .prepare(query)
196        .await
197        .context(format!("Keyspace: {keyspace}"))?;
198    session
199        .execute_unpaged(&stmt, ())
200        .await
201        .context(format!("Keyspace: {keyspace}"))?;
202
203    // Wait for the Schema to be ready.
204    session.await_schema_agreement().await?;
205
206    // Set the Keyspace to use for this session.
207    if let Err(error) = session.use_keyspace(keyspace.clone(), false).await {
208        error!(keyspace = keyspace, error = %error, "Failed to set keyspace");
209    }
210
211    Ok(())
212}
213
214/// Create the Schema on the connected Cassandra DB
215pub(crate) async fn create_schema(
216    session: &mut Arc<Session>, cfg: &cassandra_db::EnvVars, persistent: bool, network: Network,
217) -> anyhow::Result<()> {
218    create_namespace(session, cfg, persistent, network)
219        .await
220        .context("Creating Namespace")?;
221
222    let mut errors = Vec::with_capacity(SCHEMAS.len());
223
224    for (schema, schema_name) in SCHEMAS {
225        match session.prepare(*schema).await {
226            Ok(stmt) => {
227                if let Err(err) = session.execute_unpaged(&stmt, ()).await {
228                    error!(schema=schema_name, error=%err, "Failed to Execute Create Schema Query");
229                    errors.push(anyhow::anyhow!(
230                        "Failed to Execute Create Schema Query: {err}\n--\nSchema: {schema_name}\n--\n{schema}"
231                    ));
232                };
233            },
234            Err(err) => {
235                error!(schema=schema_name, error=%err, "Failed to Prepare Create Schema Query");
236                errors.push(anyhow::anyhow!(
237                    "Failed to Prepare Create Schema Query: {err}\n--\nSchema: {schema_name}\n--\n{schema}"
238                ));
239            },
240        }
241    }
242
243    if !errors.is_empty() {
244        let fmt_err: Vec<_> = errors.into_iter().map(|err| format!("{err}")).collect();
245        return Err(anyhow::anyhow!(format!(
246            "{} Error(s): {}",
247            fmt_err.len(),
248            fmt_err.join("\n")
249        )));
250    }
251
252    // Wait for the Schema to be ready.
253    session.await_schema_agreement().await?;
254
255    Ok(())
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    /// The version of the Index DB Schema we SHOULD BE using.
263    /// DO NOT change this unless you are intentionally changing the Schema.
264    ///
265    /// This constant is ONLY used by Unit tests to identify when the schema version will
266    /// change accidentally, and is NOT to be used directly to set the schema version of
267    /// the table namespaces.
268    const SCHEMA_VERSION: &str = "69e28bc1-be89-8407-83dc-9c4cc408d3a9";
269
270    #[test]
271    /// This test is designed to fail if the schema version has changed.
272    /// It is used to help detect inadvertent schema version changes.
273    /// If you did NOT intend to change the index db schema and this test fails,
274    /// then revert or fix your changes to the schema files.
275    fn check_schema_version_has_not_changed() {
276        let calculated_version = generate_cql_schema_version();
277        assert_eq!(SCHEMA_VERSION, calculated_version);
278    }
279
280    #[test]
281    fn test_namespace_persistent() {
282        let network = Network::Preprod;
283        let persistent = true;
284        let namespace = namespace(persistent, network);
285        let schema_version = generate_cql_schema_version().replace('-', "_");
286        let expected = format!("p_{network}_{schema_version}");
287        assert_eq!(namespace, expected);
288    }
289
290    #[test]
291    fn test_namespace_volatile() {
292        let network = Network::Preprod;
293        let persistent = false;
294        let namespace = namespace(persistent, network);
295        let schema_version = generate_cql_schema_version().replace('-', "_");
296        let expected = format!("v_{network}_{schema_version}");
297        assert_eq!(namespace, expected);
298    }
299
300    #[test]
301    fn test_no_comments() {
302        let input = "SELECT * FROM table1;";
303        let expected_output = "SELECT * FROM table1;";
304        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
305    }
306
307    #[test]
308    fn test_single_line_comment() {
309        let input = "SELECT -- some comment * FROM table1;";
310        let expected_output = "SELECT";
311        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
312    }
313
314    #[test]
315    fn test_multi_line_comment() {
316        let input = "SELECT -- some comment\n* FROM table1;";
317        let expected_output = "SELECT\n* FROM table1;";
318        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
319    }
320
321    #[test]
322    fn test_multiple_lines() {
323        let input = "SELECT * FROM table1;\n-- another comment\nSELECT * FROM table2;";
324        let expected_output = "SELECT * FROM table1;\nSELECT * FROM table2;";
325        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
326    }
327
328    #[test]
329    fn test_empty_lines() {
330        let input = "\n\nSELECT * FROM table1;\n-- comment here\n\n";
331        let expected_output = "SELECT * FROM table1;";
332        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
333    }
334
335    #[test]
336    fn test_whitespace_only() {
337        let input = "   \n  -- comment here\n   ";
338        let expected_output = "";
339        assert_eq!(remove_comments_and_join_query_lines(input), expected_output);
340    }
341}