Skip to main content

futu_auth/
store.rs

1//! KeyStore: keys.json 加载 + 热替换 + 明文验证
2
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6
7use arc_swap::ArcSwap;
8use chrono::Utc;
9use serde::{Deserialize, Serialize};
10
11use crate::key::KeyRecord;
12
13#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum KeyStoreError {
16    #[error("read {path:?}: {source}")]
17    Read {
18        path: PathBuf,
19        source: std::io::Error,
20    },
21    #[error("parse {path:?}: {source}")]
22    Parse {
23        path: PathBuf,
24        source: serde_json::Error,
25    },
26    #[error("write {path:?}: {source}")]
27    Write {
28        path: PathBuf,
29        source: std::io::Error,
30    },
31    #[error("serialize: {0}")]
32    Serialize(#[from] serde_json::Error),
33    #[error("unsupported keys.json version {0} (supported: 1)")]
34    UnsupportedVersion(u32),
35    #[error("duplicate key id {0:?}")]
36    DuplicateId(String),
37}
38
39/// keys.json 顶层文件结构
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KeysFile {
42    pub version: u32,
43    pub keys: Vec<KeyRecord>,
44}
45
46const CURRENT_VERSION: u32 = 1;
47
48/// KeyStore:热可替换的 keys 集合
49#[derive(Debug)]
50pub struct KeyStore {
51    path: Option<PathBuf>,
52    current: ArcSwap<KeysFile>,
53}
54
55impl KeyStore {
56    /// 空 store(没有 keys 文件时)
57    pub fn empty() -> Self {
58        Self {
59            path: None,
60            current: ArcSwap::from_pointee(KeysFile {
61                version: CURRENT_VERSION,
62                keys: vec![],
63            }),
64        }
65    }
66
67    /// 从文件加载
68    pub fn load(path: impl Into<PathBuf>) -> Result<Self, KeyStoreError> {
69        let path = path.into();
70        let file = Self::load_file(&path)?;
71        Ok(Self {
72            path: Some(path),
73            current: ArcSwap::from_pointee(file),
74        })
75    }
76
77    fn load_file(path: &Path) -> Result<KeysFile, KeyStoreError> {
78        let text = fs::read_to_string(path).map_err(|source| KeyStoreError::Read {
79            path: path.to_path_buf(),
80            source,
81        })?;
82        let mut file: KeysFile =
83            serde_json::from_str(&text).map_err(|source| KeyStoreError::Parse {
84                path: path.to_path_buf(),
85                source,
86            })?;
87        if file.version != CURRENT_VERSION {
88            return Err(KeyStoreError::UnsupportedVersion(file.version));
89        }
90        // 检查重复 id
91        let mut seen = std::collections::HashSet::new();
92        for k in &file.keys {
93            if !seen.insert(k.id.clone()) {
94                return Err(KeyStoreError::DuplicateId(k.id.clone()));
95            }
96        }
97        // v1.4.104 eli S-002 (P0) fix: load 时立即注入 fail-closed sentinel —
98        //
99        // 之前 expand_allowed_card_nums 只在 daemon 启动 + SIGHUP 跑, 但 MCP 等
100        // keystore consumer **不调** expand → 受 `allowed_card_nums` 限制的 key
101        // 加载后 `allowed_acc_ids = None`, 被限额引擎当作 "无限制" silent allow.
102        //
103        // **修法**: load_file 时对每条 key, 若 `allowed_card_nums` 非空但
104        // `allowed_acc_ids` 是 None / empty → 注入 sentinel `Some({0})`. 真实
105        // expansion (e.g. opend daemon GetAccList 之后) 会以 resolved acc_ids 覆盖.
106        // MCP 等不跑 expand 的消费方仍受 sentinel 保护 (fail-closed: real acc_id
107        // ≠ 0 → 永远 reject).
108        //
109        // 这是架构层 fix (与 v1.4.103 codex F1 P1 expand-time sentinel 同语义,
110        // 但提前到 load 时让所有 consumer 受益, 不依赖每个消费方都调 expand).
111        for rec in &mut file.keys {
112            // v1.4.106 F-P2-D: snapshot 文件源原始 allowed_acc_ids (sentinel
113            // 注入和 card_num expansion 之前). expand_allowed_card_nums 用
114            // 此字段作起步, 防止累积 stale resolutions.
115            rec.raw_explicit_acc_ids = rec.allowed_acc_ids.clone();
116
117            let has_card_nums = rec
118                .allowed_card_nums
119                .as_ref()
120                .is_some_and(|v| !v.is_empty());
121            let has_acc_ids = rec.allowed_acc_ids.as_ref().is_some_and(|s| !s.is_empty());
122            if has_card_nums && !has_acc_ids {
123                // 写 sentinel acc_id=0; expand_allowed_card_nums 后续会用
124                // resolved acc_ids 覆盖. 此期间任何 acc_id ≠ 0 的 query 全 reject.
125                let mut sentinel = rec.allowed_acc_ids.clone().unwrap_or_default();
126                sentinel.insert(0);
127                rec.allowed_acc_ids = Some(sentinel);
128                tracing::warn!(
129                    key_id = %rec.id,
130                    card_nums = ?rec.allowed_card_nums,
131                    "v1.4.104 eli S-002 (P0): keystore load 注入 fail-closed sentinel \
132                     allowed_acc_ids={{0}} (caller 配 allowed_card_nums 但 daemon 还没\
133                     expand). expand_allowed_card_nums 跑完后真实 resolved acc_ids 覆盖. \
134                     MCP / 不跑 expand 的 consumer 仍按 sentinel 保护."
135                );
136            }
137        }
138        Ok(file)
139    }
140
141    /// SIGHUP 热重载:用同一路径重新读文件
142    pub fn reload(&self) -> Result<(), KeyStoreError> {
143        let Some(path) = &self.path else {
144            return Ok(());
145        };
146        let file = Self::load_file(path)?;
147        self.current.store(Arc::new(file));
148        Ok(())
149    }
150
151    /// v1.4.103 (B10): 把每条 key 的 `allowed_card_nums` (string format) 通过
152    /// `resolver` 解析成 acc_id, **合并**进 `allowed_acc_ids` (in-memory only,
153    /// 不写回 keys.json — 文件源不变, 重载后再 expand).
154    ///
155    /// `resolver(card_num) -> Vec<u64>` 由 caller 提供 (典型 closure 持
156    /// `Arc<TrdCache>` 调 `find_acc_ids_by_card_num`).
157    ///
158    /// **行为**:
159    /// - resolver 返 1 个 acc_id → 加入 allowed_acc_ids (resolved)
160    /// - 返 0 个 → 通过 `unresolved_callback` 通知 caller (e.g. log warn)
161    /// - 返 ≥ 2 个 → 通过 `ambiguous_callback` 通知 caller (loud, skip 该条)
162    ///
163    /// 返 `(resolved_count, unresolved_count, ambiguous_count)`.
164    ///
165    /// **典型调用 (daemon 启动 GetAccList 成功后)**:
166    /// ```ignore
167    /// let cache_clone = trd_cache.clone();
168    /// key_store.expand_allowed_card_nums(
169    ///     |cn: &str| cache_clone.find_acc_ids_by_card_num(cn),
170    ///     |key_id, cn| tracing::warn!(key_id, card_num=cn, "card_num not found"),
171    ///     |key_id, cn, candidates| tracing::warn!(key_id, card_num=cn, ?candidates, "ambiguous card_num"),
172    /// );
173    /// ```
174    pub fn expand_allowed_card_nums<R, FU, FA>(
175        &self,
176        resolver: R,
177        mut unresolved_callback: FU,
178        mut ambiguous_callback: FA,
179    ) -> (usize, usize, usize)
180    where
181        R: Fn(&str) -> Vec<u64>,
182        FU: FnMut(&str, &str),         // key_id, card_num
183        FA: FnMut(&str, &str, &[u64]), // key_id, card_num, candidates
184    {
185        let current = self.current.load();
186        let mut new_keys = current.keys.clone();
187        let mut resolved = 0;
188        let mut unresolved = 0;
189        let mut ambiguous = 0;
190        for rec in &mut new_keys {
191            let Some(card_nums) = rec.allowed_card_nums.clone() else {
192                continue;
193            };
194            // v1.4.106 F-P2-D: 从 raw_explicit_acc_ids 起步重新 resolve, 不
195            // 累积 stale resolutions. 之前 `rec.allowed_acc_ids.clone()` 起
196            // 步会让连续 expand 累积 — 若 keys.json 没动但 cache 里某 acc 不
197            // 再可见, 旧 resolved acc_id 仍留在 allowed set 中. 现在每次
198            // expand 都从 file 源原始集合重新计算. raw 为 None → 空集起步.
199            let mut acc_ids = rec.raw_explicit_acc_ids.clone().unwrap_or_default();
200            for cn in &card_nums {
201                let candidates = resolver(cn);
202                match candidates.len() {
203                    0 => {
204                        unresolved += 1;
205                        unresolved_callback(&rec.id, cn);
206                    }
207                    1 => {
208                        acc_ids.insert(candidates[0]);
209                        resolved += 1;
210                    }
211                    _ => {
212                        ambiguous += 1;
213                        ambiguous_callback(&rec.id, cn, &candidates);
214                    }
215                }
216            }
217            // v1.4.103 codex F1 (P1) fail-closed: 无论 acc_ids 是否非空, 都
218            // **必须** 写入 Some(...) — 哪怕是空 HashSet (= "denylist 全部",
219            // 限额引擎 step 0 acc_id 白名单非空 + 不含 ctx.acc_id → reject).
220            //
221            // 旧逻辑 (silent unrestricted): `if !acc_ids.is_empty() { rec.allowed_acc_ids = Some(acc_ids); }`
222            // 当 caller 配置 allowed_card_nums 但**全部 unresolved/ambiguous** 时,
223            // acc_ids 留空, allowed_acc_ids 仍 None → 限额引擎按 "无限制" 处理 →
224            // 受限 key silent unrestricted (反模式 D / pitfall #45).
225            //
226            // 新逻辑: 只要 caller 显式写了 allowed_card_nums (说明 *intent* 是限制),
227            // 就强制 Some(acc_ids) — 即便空集. 限额引擎检测到 Some(empty) 时
228            // 视为 "全 reject" (限额 step 0 `allowed.is_empty()` 已 short-circuit
229            // 不 reject, 但 contains check 永远 false → reject).
230            //
231            // **wait**: 看 limits.rs:332 `check_full_skip_rate`, step 0 是
232            // `if let (Some(allowed), Some(id)) = (&limits.allowed_acc_ids, ctx.acc_id) && !allowed.is_empty() && !allowed.contains(&id)`.
233            // 关键: `!allowed.is_empty()` short-circuit empty set, 等于 "无限制".
234            // 这就是 silent-unrestricted 的根源. 但 `allowed.is_empty()` 短路是
235            // **故意的语义** (允许 None / 空集都视为 "不限制") — 改这个会破坏
236            // 现有 user contract.
237            //
238            // **正确修法**: 既然空集语义 = 无限制不能改, 我们要把 caller intent
239            // (allowed_card_nums 非空) → 限额能识别 "想限但无法 resolve" 的状态.
240            // 选: 把 sentinel acc_id (e.g. 0) 写入 acc_ids 触发 reject — 因为
241            // 没有真账户 acc_id == 0. 限额检查时 acc_ids = {0}, ctx.acc_id =
242            // <real id> ≠ 0 → reject. legitimate id 也 reject — 这是
243            // fail-closed 保守语义.
244            if !card_nums.is_empty() {
245                if acc_ids.is_empty() {
246                    // 全部 unresolved/ambiguous → 写 sentinel 0 让限额 reject 一切
247                    acc_ids.insert(0u64);
248                }
249                rec.allowed_acc_ids = Some(acc_ids);
250            } else if !acc_ids.is_empty() {
251                rec.allowed_acc_ids = Some(acc_ids);
252            }
253        }
254        self.current.store(Arc::new(KeysFile {
255            version: current.version,
256            keys: new_keys,
257        }));
258        (resolved, unresolved, ambiguous)
259    }
260
261    /// 明文校验:遍历所有未过期 key,匹配则返回 KeyRecord 快照
262    ///
263    /// 如果 key 设置了 `allowed_machines` 且本机不在白名单,会打 warn 日志并视为未匹配。
264    /// 这样做法的代价:攻击者可以通过"能不能过"侧信道区分 key 是否存在 — 我们接受,
265    /// 因为 plaintext 空间是 256 bit 随机 hex,侧信道没意义。
266    pub fn verify(&self, plaintext: &str) -> Option<Arc<KeyRecord>> {
267        let snap = self.current.load_full();
268        let now = Utc::now();
269        for k in snap.keys.iter() {
270            if k.is_expired(now) {
271                continue;
272            }
273            if k.matches(plaintext) {
274                if let Err(e) = k.check_machine() {
275                    tracing::warn!(
276                        key_id = %k.id,
277                        error = %e,
278                        "api key matched but machine binding failed; rejecting"
279                    );
280                    return None;
281                }
282                return Some(Arc::new(k.clone()));
283            }
284        }
285        None
286    }
287
288    /// 是否已加载 keys 文件(非 empty)
289    #[must_use]
290    pub fn is_configured(&self) -> bool {
291        self.path.is_some()
292    }
293
294    pub fn path(&self) -> Option<&Path> {
295        self.path.as_deref()
296    }
297
298    #[must_use]
299    pub fn len(&self) -> usize {
300        self.current.load().keys.len()
301    }
302
303    #[must_use]
304    pub fn is_empty(&self) -> bool {
305        self.len() == 0
306    }
307
308    /// v1.4.105 eli #4 fix: 当前 KeyStore 是否有任意 key 配置了
309    /// `allowed_card_nums` 限制. 用于 standalone MCP / gRPC / 任何不持
310    /// `TrdCache` 的 keystore consumer 在启动时判断:
311    /// - `false` → 没有 card_num 限制, 跳过 daemon `GetAccList` + expand 全流程
312    ///   (避免无意义的 daemon 请求)
313    /// - `true` → 必须连 daemon, 调 `GetAccList`, 通过
314    ///   [`Self::expand_allowed_card_nums`] 把 card_num resolve 成 acc_id;
315    ///   否则 fail-closed sentinel `{0}` 会让所有真账户 reject (eli BUG
316    ///   v1.4.104-002: standalone MCP 漏调 expand 导致 `0757` 配置的 key
317    ///   全 reject).
318    ///
319    /// 注意: 本方法只检查 raw `allowed_card_nums` 是否非空 — load_file 阶段
320    /// 注入的 sentinel `allowed_acc_ids = {0}` **不**算"已 expand"; 只有
321    /// caller 真跑过 [`Self::expand_allowed_card_nums`] 后才会用 resolved
322    /// acc_ids 覆盖 sentinel.
323    #[must_use]
324    pub fn has_any_card_num_restrictions(&self) -> bool {
325        self.current
326            .load()
327            .keys
328            .iter()
329            .any(|k| k.allowed_card_nums.as_ref().is_some_and(|v| !v.is_empty()))
330    }
331
332    /// 按 id 查询当前快照中的 key(**不做 expiry / machine 校验**,调用方自己做)
333    ///
334    /// 典型用法:MCP 在启动时 `verify(plaintext)` 拿到 id,后续每个请求用
335    /// `get_by_id` 取最新记录,这样 SIGHUP 重载 keys.json 后 scope / 限额 /
336    /// expires_at 的变更能立刻生效(不用重启进程)。
337    ///
338    /// 返回 None 表示 id 在当前文件里不存在(被 remove_key 删掉了),调用方应
339    /// 视为"key 已吊销"直接拒绝。
340    ///
341    /// **注意**:此方法**不做 machine binding 校验**。对于跨 SIGHUP 的 per-msg /
342    /// per-tool 复检场景应改用 [`Self::get_by_id_for_current_machine`],确保
343    /// SIGHUP 后新加的 `allowed_machines` 限制立即生效(避免 startup 验过 →
344    /// SIGHUP 收紧 → 仍按老 record 放行的语义漂移)。
345    pub fn get_by_id(&self, id: &str) -> Option<Arc<KeyRecord>> {
346        let snap = self.current.load_full();
347        snap.keys
348            .iter()
349            .find(|k| k.id == id)
350            .map(|k| Arc::new(k.clone()))
351    }
352
353    /// 按 id 查询当前快照中的 key + 立即校验本机 machine binding。
354    ///
355    /// **统一生命周期入口**: 任何 surface (WS / MCP / REST / gRPC) 在
356    /// 已 verify-once → 跨 SIGHUP 复检场景下应使用此方法替代裸 [`Self::get_by_id`],
357    /// 避免如下漂移:
358    ///
359    /// - startup 时 `verify(plaintext)` 检查 machine binding ✅
360    /// - SIGHUP reload 把该 key 的 `allowed_machines` 收紧(移除本机指纹)
361    /// - 后续 per-msg / per-tool 仅调 `get_by_id` → **绕过 machine binding** →
362    ///   silent unrestricted (反模式 D / pitfall #45 silent-success 同模式)
363    ///
364    /// 行为:
365    /// - id 不存在 → `None`(key 已被 remove_key 吊销,caller 视为吊销拒绝)
366    /// - id 存在 + machine 校验通过 → `Some(rec)`
367    /// - id 存在 + machine 校验失败 → `None` + warn log(与 `verify` 同语义)
368    ///
369    /// **不做 expiry 校验** —— pipeline.rs Step 1.5 / caller 自己做(与
370    /// `get_by_id` 行为对齐,仅差 machine 一层)。
371    pub fn get_by_id_for_current_machine(&self, id: &str) -> Option<Arc<KeyRecord>> {
372        let rec = self.get_by_id(id)?;
373        if let Err(e) = rec.check_machine() {
374            tracing::warn!(
375                key_id = %rec.id,
376                error = %e,
377                "api key get_by_id_for_current_machine: machine binding failed; \
378                 treating as revoked (caller should reject as if key not found)"
379            );
380            return None;
381        }
382        Some(rec)
383    }
384
385    /// 导出当前所有 keys 的 id(用于调试 / 审计)
386    #[must_use]
387    pub fn ids(&self) -> Vec<String> {
388        self.current
389            .load()
390            .keys
391            .iter()
392            .map(|k| k.id.clone())
393            .collect()
394    }
395}
396
397/// 追加一条新 key 到 keys.json(atomic rename)
398/// v1.4.106 codex 0558 F5 (P2): RMW (read-modify-write) helper that holds the
399/// flock for the **entire** sequence — load, mutate, write. Without this,
400/// concurrent append_key callers can load → load → write → write and lose
401/// the first writer's record.
402fn with_keys_lock<F, R>(path: &Path, f: F) -> Result<R, KeyStoreError>
403where
404    F: FnOnce(&Path) -> Result<R, KeyStoreError>,
405{
406    if let Some(parent) = path.parent()
407        && !parent.as_os_str().is_empty()
408    {
409        fs::create_dir_all(parent).map_err(|source| KeyStoreError::Write {
410            path: parent.to_path_buf(),
411            source,
412        })?;
413    }
414    let _guard = AdvisoryLockGuard::acquire_exclusive(path)?;
415    f(path)
416}
417
418pub fn append_key(path: &Path, new_record: KeyRecord) -> Result<(), KeyStoreError> {
419    with_keys_lock(path, |path| {
420        let mut file = match fs::metadata(path) {
421            Ok(_) => KeyStore::load_file(path)?,
422            Err(_) => KeysFile {
423                version: CURRENT_VERSION,
424                keys: vec![],
425            },
426        };
427        if file.keys.iter().any(|k| k.id == new_record.id) {
428            return Err(KeyStoreError::DuplicateId(new_record.id));
429        }
430        file.keys.push(new_record);
431        write_atomic_inner(path, &file)
432    })
433}
434
435/// 读取 keys.json 并返回所有记录快照(展示用;不暴露 hash 以外的敏感位)
436pub fn list_keys(path: &Path) -> Result<Vec<KeyRecord>, KeyStoreError> {
437    let file = KeyStore::load_file(path)?;
438    Ok(file.keys)
439}
440
441/// 按 id 编辑一条 key(atomic rename);闭包返回 `false` 代表未改动 → 跳过落盘
442///
443/// 适用于就地修改 `allowed_machines` / `expires_at` / `note` 等配置,
444/// 而不想走 "revoke + regen" 流程(否则 plaintext 会换)。
445pub fn update_key<F>(path: &Path, id: &str, mutate: F) -> Result<bool, KeyStoreError>
446where
447    F: FnOnce(&mut KeyRecord) -> bool,
448{
449    // v1.4.106 F5: 整个 RMW 在 flock 内, 防 concurrent update_key 互相覆盖
450    with_keys_lock(path, |path| {
451        let mut file = KeyStore::load_file(path)?;
452        let Some(rec) = file.keys.iter_mut().find(|k| k.id == id) else {
453            return Ok(false);
454        };
455        let changed = mutate(rec);
456        if changed {
457            write_atomic_inner(path, &file)?;
458        }
459        Ok(changed)
460    })
461}
462
463/// 按 id 删除一条 key(atomic rename);返回是否真的删掉了一条
464pub fn remove_key(path: &Path, id: &str) -> Result<bool, KeyStoreError> {
465    // v1.4.106 F5: 整个 RMW 在 flock 内
466    with_keys_lock(path, |path| {
467        let mut file = KeyStore::load_file(path)?;
468        let before = file.keys.len();
469        file.keys.retain(|k| k.id != id);
470        let removed = before != file.keys.len();
471        if removed {
472            write_atomic_inner(path, &file)?;
473        }
474        Ok(removed)
475    })
476}
477
478/// v1.4.106 codex 0558 F5 (P2): atomic write with advisory flock + unique
479/// tempfile + fsync.
480///
481/// **背景**: 之前的 write_atomic 有 3 个问题:
482///   1. `tmp = path.with_extension("json.tmp")` — 同 path **每个 process 共
483///      享**, 并发写 (daemon + futucli + multiple admin reload) 会互相覆写
484///      tempfile, 最后 rename 时数据交错.
485///   2. **无 advisory flock** — 没有跨 process coordination, race 可让 reader
486///      看到部分写完的 `keys.json` (rename 前如果别的 process 也在 truncate).
487///   3. **无 fsync** — 写完立刻 rename 在 ext4 数据=writeback 模式 / SSD power
488///      loss 下可能丢内容 (file 在 inode 层面存在但 data block 没 flush).
489///
490/// **修法**: 1) tempfile 名带 pid + nanos: `keys.json.<pid>.<nanos>.tmp` — race-free.
491/// 2) 对 `keys.json.lock` (sibling lock file) 取 LOCK_EX flock; 读路径
492/// (load_file) 取 LOCK_SH (本 commit 暂只写路径用 LOCK_EX, 读端后续可加 —
493/// 写不会破坏 reader 因为 rename 是原子 inode 替换). 3) tempfile open 后
494/// write_all → sync_all → close → set_permissions → rename → 持有 lock 期间.
495///
496/// v1.4.106 codex 0558 F5: 写盘只做 unique tempfile + fsync + rename. flock 由
497/// caller (with_keys_lock) 在 RMW 范围统一持有, 这里**不再**单独加锁 (避免
498/// 与 with_keys_lock 重入).
499fn write_atomic_inner(path: &Path, file: &KeysFile) -> Result<(), KeyStoreError> {
500    let text = serde_json::to_string_pretty(file)?;
501
502    // v1.4.106 F5: unique tempfile (pid + nanos), 防 concurrent rename 战 tempfile.
503    let tmp_name = match path.file_name().and_then(|n| n.to_str()) {
504        Some(name) => format!(
505            "{name}.{pid}.{nanos}.tmp",
506            pid = std::process::id(),
507            nanos = std::time::SystemTime::now()
508                .duration_since(std::time::UNIX_EPOCH)
509                .map(|d| d.as_nanos())
510                .unwrap_or(0),
511        ),
512        None => format!(
513            "keys.{pid}.{nanos}.tmp",
514            pid = std::process::id(),
515            nanos = std::time::SystemTime::now()
516                .duration_since(std::time::UNIX_EPOCH)
517                .map(|d| d.as_nanos())
518                .unwrap_or(0),
519        ),
520    };
521    let tmp = path
522        .parent()
523        .map(|p| p.join(&tmp_name))
524        .unwrap_or_else(|| Path::new(&tmp_name).to_path_buf());
525
526    // 写入 tempfile + fsync — 0600 mode 通过 OpenOptions (Unix) 创建时即生效.
527    use std::io::Write;
528    #[cfg(unix)]
529    let mut f = {
530        use std::os::unix::fs::OpenOptionsExt;
531        fs::OpenOptions::new()
532            .create_new(true)
533            .write(true)
534            .mode(0o600)
535            .open(&tmp)
536            .map_err(|source| KeyStoreError::Write {
537                path: tmp.clone(),
538                source,
539            })?
540    };
541    #[cfg(not(unix))]
542    let mut f = fs::OpenOptions::new()
543        .create_new(true)
544        .write(true)
545        .open(&tmp)
546        .map_err(|source| KeyStoreError::Write {
547            path: tmp.clone(),
548            source,
549        })?;
550
551    let write_res = f
552        .write_all(text.as_bytes())
553        .and_then(|_| f.sync_all())
554        .map_err(|source| KeyStoreError::Write {
555            path: tmp.clone(),
556            source,
557        });
558    drop(f);
559
560    if let Err(e) = write_res {
561        let _ = fs::remove_file(&tmp);
562        return Err(e);
563    }
564
565    // 防御 chmod (Unix), OpenOptions.mode 已 0600, 但部分 fs / umask 异常时兜底.
566    #[cfg(unix)]
567    {
568        use std::os::unix::fs::PermissionsExt;
569        let _ = fs::set_permissions(&tmp, fs::Permissions::from_mode(0o600));
570    }
571
572    // atomic rename → tempfile 替换 inode, reader 看到的永远是完整 file.
573    fs::rename(&tmp, path).map_err(|source| KeyStoreError::Write {
574        path: path.to_path_buf(),
575        source,
576    })?;
577
578    Ok(())
579}
580
581// ============================================================================
582// v1.4.106 codex 0558 F5 (P2): advisory file lock helper (Unix flock-based)
583// ============================================================================
584//
585// 用 sibling `<path>.lock` 文件作锁文件 (空文件 just for inode), LOCK_EX 时
586// 阻塞其他 acquire_exclusive caller. drop 时 LOCK_UN 释放.
587//
588// 选择 sibling lock file (而非 lock keys.json 本身):
589//   - 避免 lock + rename 互动 (rename 替换 inode, 老 fd 上的 lock 实际作用于
590//     老 inode, 新 reader 拿不到锁信号)
591//   - 跨 process 一致 — 任何 process open `<path>.lock` 拿同 inode
592
593#[cfg(unix)]
594struct AdvisoryLockGuard {
595    fd: std::os::fd::OwnedFd,
596}
597
598#[cfg(unix)]
599impl AdvisoryLockGuard {
600    fn acquire_exclusive(path: &Path) -> Result<Self, KeyStoreError> {
601        use std::os::fd::AsRawFd;
602        use std::os::unix::fs::OpenOptionsExt;
603
604        // sibling lock path: `<dir>/<file_name>.lock` (不复用
605        // path.with_extension — 它替换最后一个扩展名, 我们要追加一个新扩展名).
606        let lock_path = path
607            .parent()
608            .map(|p| {
609                let mut name = path
610                    .file_name()
611                    .and_then(|n| n.to_str())
612                    .unwrap_or("keys")
613                    .to_string();
614                name.push_str(".lock");
615                p.join(name)
616            })
617            .unwrap_or_else(|| std::path::PathBuf::from("keys.lock"));
618
619        let file = fs::OpenOptions::new()
620            .create(true)
621            .write(true)
622            .truncate(false)
623            .mode(0o600)
624            .open(&lock_path)
625            .map_err(|source| KeyStoreError::Write {
626                path: lock_path.clone(),
627                source,
628            })?;
629        let raw = file.as_raw_fd();
630        // SAFETY: flock(2) is async-signal-safe; we hold OwnedFd via File
631        // until the guard drops, so fd is valid.
632        let rc = unsafe { libc::flock(raw, libc::LOCK_EX) };
633        if rc != 0 {
634            return Err(KeyStoreError::Write {
635                path: lock_path,
636                source: std::io::Error::last_os_error(),
637            });
638        }
639        let owned: std::os::fd::OwnedFd = file.into();
640        Ok(Self { fd: owned })
641    }
642}
643
644#[cfg(unix)]
645impl Drop for AdvisoryLockGuard {
646    fn drop(&mut self) {
647        use std::os::fd::AsRawFd;
648        // best-effort unlock on close; closing fd also implicitly releases lock.
649        let _ = unsafe { libc::flock(self.fd.as_raw_fd(), libc::LOCK_UN) };
650    }
651}
652
653#[cfg(not(unix))]
654struct AdvisoryLockGuard;
655
656#[cfg(not(unix))]
657impl AdvisoryLockGuard {
658    fn acquire_exclusive(_path: &Path) -> Result<Self, KeyStoreError> {
659        // Windows 上目前无锁; 后续可改 LockFileEx. 单 user 场景下不会 race.
660        Ok(Self)
661    }
662}
663
664#[cfg(test)]
665mod tests;