futu_auth/
metrics.rs

1//! 全局 metrics registry —— 供 `/metrics` Prometheus 端点消费
2//!
3//! ## 设计
4//!
5//! - 全局 `OnceLock<Arc<Registry>>`,进程初始化时 `Registry::install()` 一次
6//! - [`audit::reject`] / [`audit::allow`] / [`audit::trade`] 在写日志的同时
7//!   增 counter
8//! - [`RuntimeCounters::check_and_commit`] 拒时也会通知 metrics(限额 hit)
9//! - `Registry::render_prometheus()` 输出 Prometheus text exposition 格式
10//!
11//! ## 为什么不用 `prometheus` crate
12//!
13//! 输出格式简单(几类 counter),手写避免再引一份依赖。真要升级到更复杂的
14//! metric(histogram / summary)再换。
15//!
16//! ## 维度
17//!
18//! 所有 counter 的 label:`iface`(grpc/rest/ws/mcp)+ `outcome`
19//! (allow/reject/success/failure/unknown)+ `key_id`(未配 key 时 `<none>`)。
20//! 维度超细会爆 cardinality,但 `key_id` 上限就是 keys.json 里的条数(几十级),
21//! 可接受。
22
23use std::sync::{Arc, OnceLock};
24
25use dashmap::DashMap;
26
27/// 事件计数:(iface, endpoint_or_tool 这里不放 —— 放会爆维度, outcome, key_id) → count
28type EventKey = (String, String, String);
29
30/// 限额拒 counter:(iface, key_id, reason_category) → count
31///
32/// reason_category 只枚举大类:rate / daily / per_order / market / symbol /
33/// side / hours / other,字符串固定不爆维度
34type LimitRejectKey = (String, String, String);
35
36#[derive(Debug, Default)]
37pub struct Registry {
38    /// auth / trade 事件:(iface, outcome, key_id) → count
39    events: DashMap<EventKey, u64>,
40    /// 限额拒:(iface, key_id, reason_cat) → count
41    limit_rejects: DashMap<LimitRejectKey, u64>,
42    /// WS 按 scope 过滤掉的推送:(required_scope, client_key_id) → count
43    ///
44    /// 例:`(trade, key_bot_a)` 多 = bot_a 只有 qot:read,但服务端在推 trade 事件
45    ws_filtered: DashMap<(String, String), u64>,
46}
47
48impl Registry {
49    /// 记录一次 auth/trade 事件
50    pub fn record_event(&self, iface: &str, outcome: &str, key_id: &str) {
51        let key = (iface.to_string(), outcome.to_string(), key_id.to_string());
52        *self.events.entry(key).or_insert(0) += 1;
53    }
54
55    /// 记录一次限额拒(reason_cat 见文档)
56    pub fn record_limit_reject(&self, iface: &str, key_id: &str, reason_cat: &str) {
57        let key = (
58            iface.to_string(),
59            key_id.to_string(),
60            reason_cat.to_string(),
61        );
62        *self.limit_rejects.entry(key).or_insert(0) += 1;
63    }
64
65    /// 记录 WS 因 scope 不足被过滤掉的一条推送
66    pub fn record_ws_filtered(&self, required_scope: &str, client_key_id: &str) {
67        let key = (required_scope.to_string(), client_key_id.to_string());
68        *self.ws_filtered.entry(key).or_insert(0) += 1;
69    }
70
71    /// Prometheus text exposition 格式输出
72    pub fn render_prometheus(&self) -> String {
73        let mut s = String::with_capacity(4096);
74
75        s.push_str("# HELP futu_auth_events_total Auth / trade events by iface, outcome, key_id\n");
76        s.push_str("# TYPE futu_auth_events_total counter\n");
77        for kv in self.events.iter() {
78            let (iface, outcome, key_id) = kv.key();
79            let v = *kv.value();
80            s.push_str(&format!(
81                "futu_auth_events_total{{iface={},outcome={},key_id={}}} {}\n",
82                prom_label(iface),
83                prom_label(outcome),
84                prom_label(key_id),
85                v
86            ));
87        }
88
89        s.push_str(
90            "# HELP futu_auth_limit_rejects_total Limit-check rejects by iface, key_id, reason\n",
91        );
92        s.push_str("# TYPE futu_auth_limit_rejects_total counter\n");
93        for kv in self.limit_rejects.iter() {
94            let (iface, key_id, reason) = kv.key();
95            let v = *kv.value();
96            s.push_str(&format!(
97                "futu_auth_limit_rejects_total{{iface={},key_id={},reason={}}} {}\n",
98                prom_label(iface),
99                prom_label(key_id),
100                prom_label(reason),
101                v
102            ));
103        }
104
105        s.push_str(
106            "# HELP futu_ws_filtered_pushes_total Pushes filtered out for client lacking the required scope\n",
107        );
108        s.push_str("# TYPE futu_ws_filtered_pushes_total counter\n");
109        for kv in self.ws_filtered.iter() {
110            let (req_scope, key_id) = kv.key();
111            let v = *kv.value();
112            s.push_str(&format!(
113                "futu_ws_filtered_pushes_total{{required_scope={},key_id={}}} {}\n",
114                prom_label(req_scope),
115                prom_label(key_id),
116                v
117            ));
118        }
119
120        s
121    }
122}
123
124static GLOBAL: OnceLock<Arc<Registry>> = OnceLock::new();
125
126/// 安装全局 registry。只会生效第一次 install;测试场景可能多次调用,
127/// 后续 install 被忽略(保持 first-writer 语义)
128pub fn install(reg: Arc<Registry>) {
129    let _ = GLOBAL.set(reg);
130}
131
132/// 取全局 registry;未 install 时返回 None(调用方 no-op)
133pub fn global() -> Option<Arc<Registry>> {
134    GLOBAL.get().cloned()
135}
136
137/// 把限额 reject 的 reason 字串分类成固定小集合
138///
139/// 对应 `limits.rs::check_and_commit` 的拒绝文案前缀。新增类目时要同步这里,
140/// 否则会落到 `"other"` 桶,dashboard 看不出来。
141pub fn classify_limit_reason(reason: &str) -> &'static str {
142    let r = reason.to_ascii_lowercase();
143    if r.starts_with("rate limit") {
144        "rate"
145    } else if r.starts_with("daily value") {
146        "daily"
147    } else if r.starts_with("order value") || r.starts_with("per-order") {
148        "per_order"
149    } else if r.starts_with("market ") {
150        "market"
151    } else if r.starts_with("symbol ") {
152        "symbol"
153    } else if r.starts_with("trd_side") {
154        "side"
155    } else if r.starts_with("outside hours") || r.starts_with("invalid hours_window") {
156        "hours"
157    } else {
158        "other"
159    }
160}
161
162/// Prometheus label 值转义:按 exposition 格式要求,label 值必须在双引号里且
163/// 转义 `\"`、反斜杠、换行。暴露给 [`Registry::render_prometheus`] 用
164fn prom_label(v: &str) -> String {
165    let mut out = String::with_capacity(v.len() + 2);
166    out.push('"');
167    for c in v.chars() {
168        match c {
169            '\\' => out.push_str("\\\\"),
170            '"' => out.push_str("\\\""),
171            '\n' => out.push_str("\\n"),
172            _ => out.push(c),
173        }
174    }
175    out.push('"');
176    out
177}
178
179/// 在 [`audit::reject`] / [`audit::allow`] 里同步调用,保证日志和 metrics 一致
180///
181/// 单独写在这里(而不是 audit.rs)是为了让 `audit` 只关心 tracing,
182/// 这个模块专管数值聚合
183pub(crate) fn bump_auth_event(iface: &str, outcome: &str, key_id: &str) {
184    if let Some(r) = global() {
185        r.record_event(iface, outcome, key_id);
186    }
187}
188
189pub(crate) fn bump_limit_reject(iface: &str, key_id: &str, reason: &str) {
190    if let Some(r) = global() {
191        r.record_limit_reject(iface, key_id, classify_limit_reason(reason));
192    }
193}
194
195/// 便捷:记录 ws 因 scope 过滤掉的事件
196pub fn bump_ws_filtered(required_scope: &str, client_key_id: &str) {
197    if let Some(r) = global() {
198        r.record_ws_filtered(required_scope, client_key_id);
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn prom_label_escapes() {
208        assert_eq!(prom_label(r#"a"b\c"#), r#""a\"b\\c""#);
209        assert_eq!(prom_label("ok"), r#""ok""#);
210    }
211
212    #[test]
213    fn classify_limit_reason_covers_all_buckets() {
214        assert_eq!(
215            classify_limit_reason("rate limit exceeded: 3 in 60s"),
216            "rate"
217        );
218        assert_eq!(
219            classify_limit_reason("daily value cap exceeded: 100 > 50"),
220            "daily"
221        );
222        assert_eq!(
223            classify_limit_reason("order value 200 exceeds per-order cap 100"),
224            "per_order"
225        );
226        assert_eq!(
227            classify_limit_reason(r#"market "US" not in allowed list ["HK"]"#),
228            "market"
229        );
230        assert_eq!(
231            classify_limit_reason(r#"symbol "HK.09988" not in allowed list"#),
232            "symbol"
233        );
234        assert_eq!(
235            classify_limit_reason(r#"trd_side "BUY" not in allowed list"#),
236            "side"
237        );
238        assert_eq!(
239            classify_limit_reason("outside hours window 09:30-16:00 (now=08:15)"),
240            "hours"
241        );
242        assert_eq!(classify_limit_reason("weird reason"), "other");
243    }
244
245    #[test]
246    fn registry_render_prometheus_is_well_formed() {
247        let r = Registry::default();
248        r.record_event("rest", "allow", "key_1");
249        r.record_event("rest", "allow", "key_1");
250        r.record_event("rest", "reject", "<missing>");
251        r.record_limit_reject("mcp", "key_1", "rate");
252        r.record_ws_filtered("trade", "key_2");
253
254        let s = r.render_prometheus();
255        assert!(
256            s.contains(r#"futu_auth_events_total{iface="rest",outcome="allow",key_id="key_1"} 2"#)
257        );
258        assert!(s.contains(
259            r#"futu_auth_events_total{iface="rest",outcome="reject",key_id="<missing>"} 1"#
260        ));
261        assert!(s.contains(
262            r#"futu_auth_limit_rejects_total{iface="mcp",key_id="key_1",reason="rate"} 1"#
263        ));
264        assert!(
265            s.contains(r#"futu_ws_filtered_pushes_total{required_scope="trade",key_id="key_2"} 1"#)
266        );
267        // HELP/TYPE 行也要在
268        assert!(s.contains("# HELP futu_auth_events_total"));
269        assert!(s.contains("# TYPE futu_auth_events_total counter"));
270    }
271}