futu_auth/store.rs
1//! KeyStore: keys.json 加载 + 热替换 + 明文验证
2
3use std::fs;
4use std::path::{Path, PathBuf};
5use std::sync::Arc;
6
7use arc_swap::ArcSwap;
8use chrono::Utc;
9use serde::{Deserialize, Serialize};
10
11use crate::key::KeyRecord;
12
13#[derive(Debug, thiserror::Error)]
14#[non_exhaustive]
15pub enum KeyStoreError {
16 #[error("read {path:?}: {source}")]
17 Read {
18 path: PathBuf,
19 source: std::io::Error,
20 },
21 #[error("parse {path:?}: {source}")]
22 Parse {
23 path: PathBuf,
24 source: serde_json::Error,
25 },
26 #[error("write {path:?}: {source}")]
27 Write {
28 path: PathBuf,
29 source: std::io::Error,
30 },
31 #[error("serialize: {0}")]
32 Serialize(#[from] serde_json::Error),
33 #[error("unsupported keys.json version {0} (supported: 1)")]
34 UnsupportedVersion(u32),
35 #[error("duplicate key id {0:?}")]
36 DuplicateId(String),
37}
38
39/// keys.json 顶层文件结构
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct KeysFile {
42 pub version: u32,
43 pub keys: Vec<KeyRecord>,
44}
45
46const CURRENT_VERSION: u32 = 1;
47
48/// KeyStore:热可替换的 keys 集合
49#[derive(Debug)]
50pub struct KeyStore {
51 path: Option<PathBuf>,
52 current: ArcSwap<KeysFile>,
53}
54
55impl KeyStore {
56 /// 空 store(没有 keys 文件时)
57 pub fn empty() -> Self {
58 Self {
59 path: None,
60 current: ArcSwap::from_pointee(KeysFile {
61 version: CURRENT_VERSION,
62 keys: vec![],
63 }),
64 }
65 }
66
67 /// 从文件加载
68 pub fn load(path: impl Into<PathBuf>) -> Result<Self, KeyStoreError> {
69 let path = path.into();
70 let file = Self::load_file(&path)?;
71 Ok(Self {
72 path: Some(path),
73 current: ArcSwap::from_pointee(file),
74 })
75 }
76
77 fn load_file(path: &Path) -> Result<KeysFile, KeyStoreError> {
78 let text = fs::read_to_string(path).map_err(|source| KeyStoreError::Read {
79 path: path.to_path_buf(),
80 source,
81 })?;
82 let mut file: KeysFile =
83 serde_json::from_str(&text).map_err(|source| KeyStoreError::Parse {
84 path: path.to_path_buf(),
85 source,
86 })?;
87 if file.version != CURRENT_VERSION {
88 return Err(KeyStoreError::UnsupportedVersion(file.version));
89 }
90 // 检查重复 id
91 let mut seen = std::collections::HashSet::new();
92 for k in &file.keys {
93 if !seen.insert(k.id.clone()) {
94 return Err(KeyStoreError::DuplicateId(k.id.clone()));
95 }
96 }
97 // v1.4.104 eli S-002 (P0) fix: load 时立即注入 fail-closed sentinel —
98 //
99 // 之前 expand_allowed_card_nums 只在 daemon 启动 + SIGHUP 跑, 但 MCP 等
100 // keystore consumer **不调** expand → 受 `allowed_card_nums` 限制的 key
101 // 加载后 `allowed_acc_ids = None`, 被限额引擎当作 "无限制" silent allow.
102 //
103 // **修法**: load_file 时对每条 key, 若 `allowed_card_nums` 非空但
104 // `allowed_acc_ids` 是 None / empty → 注入 sentinel `Some({0})`. 真实
105 // expansion (e.g. opend daemon GetAccList 之后) 会以 resolved acc_ids 覆盖.
106 // MCP 等不跑 expand 的消费方仍受 sentinel 保护 (fail-closed: real acc_id
107 // ≠ 0 → 永远 reject).
108 //
109 // 这是架构层 fix (与 v1.4.103 codex F1 P1 expand-time sentinel 同语义,
110 // 但提前到 load 时让所有 consumer 受益, 不依赖每个消费方都调 expand).
111 for rec in &mut file.keys {
112 // v1.4.106 F-P2-D: snapshot 文件源原始 allowed_acc_ids (sentinel
113 // 注入和 card_num expansion 之前). expand_allowed_card_nums 用
114 // 此字段作起步, 防止累积 stale resolutions.
115 rec.raw_explicit_acc_ids = rec.allowed_acc_ids.clone();
116
117 let has_card_nums = rec
118 .allowed_card_nums
119 .as_ref()
120 .is_some_and(|v| !v.is_empty());
121 let has_acc_ids = rec.allowed_acc_ids.as_ref().is_some_and(|s| !s.is_empty());
122 if has_card_nums && !has_acc_ids {
123 // 写 sentinel acc_id=0; expand_allowed_card_nums 后续会用
124 // resolved acc_ids 覆盖. 此期间任何 acc_id ≠ 0 的 query 全 reject.
125 let mut sentinel = rec.allowed_acc_ids.clone().unwrap_or_default();
126 sentinel.insert(0);
127 rec.allowed_acc_ids = Some(sentinel);
128 tracing::warn!(
129 key_id = %rec.id,
130 card_nums = ?rec.allowed_card_nums,
131 "v1.4.104 eli S-002 (P0): keystore load 注入 fail-closed sentinel \
132 allowed_acc_ids={{0}} (caller 配 allowed_card_nums 但 daemon 还没\
133 expand). expand_allowed_card_nums 跑完后真实 resolved acc_ids 覆盖. \
134 MCP / 不跑 expand 的 consumer 仍按 sentinel 保护."
135 );
136 }
137 }
138 Ok(file)
139 }
140
141 /// SIGHUP 热重载:用同一路径重新读文件
142 pub fn reload(&self) -> Result<(), KeyStoreError> {
143 let Some(path) = &self.path else {
144 return Ok(());
145 };
146 let file = Self::load_file(path)?;
147 self.current.store(Arc::new(file));
148 Ok(())
149 }
150
151 /// v1.4.103 (B10): 把每条 key 的 `allowed_card_nums` (string format) 通过
152 /// `resolver` 解析成 acc_id, **合并**进 `allowed_acc_ids` (in-memory only,
153 /// 不写回 keys.json — 文件源不变, 重载后再 expand).
154 ///
155 /// `resolver(card_num) -> Vec<u64>` 由 caller 提供 (典型 closure 持
156 /// `Arc<TrdCache>` 调 `find_acc_ids_by_card_num`).
157 ///
158 /// **行为**:
159 /// - resolver 返 1 个 acc_id → 加入 allowed_acc_ids (resolved)
160 /// - 返 0 个 → 通过 `unresolved_callback` 通知 caller (e.g. log warn)
161 /// - 返 ≥ 2 个 → 通过 `ambiguous_callback` 通知 caller (loud, skip 该条)
162 ///
163 /// 返 `(resolved_count, unresolved_count, ambiguous_count)`.
164 ///
165 /// **典型调用 (daemon 启动 GetAccList 成功后)**:
166 /// ```ignore
167 /// let cache_clone = trd_cache.clone();
168 /// key_store.expand_allowed_card_nums(
169 /// |cn: &str| cache_clone.find_acc_ids_by_card_num(cn),
170 /// |key_id, cn| tracing::warn!(key_id, card_num=cn, "card_num not found"),
171 /// |key_id, cn, candidates| tracing::warn!(key_id, card_num=cn, ?candidates, "ambiguous card_num"),
172 /// );
173 /// ```
174 pub fn expand_allowed_card_nums<R, FU, FA>(
175 &self,
176 resolver: R,
177 mut unresolved_callback: FU,
178 mut ambiguous_callback: FA,
179 ) -> (usize, usize, usize)
180 where
181 R: Fn(&str) -> Vec<u64>,
182 FU: FnMut(&str, &str), // key_id, card_num
183 FA: FnMut(&str, &str, &[u64]), // key_id, card_num, candidates
184 {
185 let current = self.current.load();
186 let mut new_keys = current.keys.clone();
187 let mut resolved = 0;
188 let mut unresolved = 0;
189 let mut ambiguous = 0;
190 for rec in &mut new_keys {
191 let Some(card_nums) = rec.allowed_card_nums.clone() else {
192 continue;
193 };
194 // v1.4.106 F-P2-D: 从 raw_explicit_acc_ids 起步重新 resolve, 不
195 // 累积 stale resolutions. 之前 `rec.allowed_acc_ids.clone()` 起
196 // 步会让连续 expand 累积 — 若 keys.json 没动但 cache 里某 acc 不
197 // 再可见, 旧 resolved acc_id 仍留在 allowed set 中. 现在每次
198 // expand 都从 file 源原始集合重新计算. raw 为 None → 空集起步.
199 let mut acc_ids = rec.raw_explicit_acc_ids.clone().unwrap_or_default();
200 for cn in &card_nums {
201 let candidates = resolver(cn);
202 match candidates.len() {
203 0 => {
204 unresolved += 1;
205 unresolved_callback(&rec.id, cn);
206 }
207 1 => {
208 acc_ids.insert(candidates[0]);
209 resolved += 1;
210 }
211 _ => {
212 ambiguous += 1;
213 ambiguous_callback(&rec.id, cn, &candidates);
214 }
215 }
216 }
217 // v1.4.103 codex F1 (P1) fail-closed: 无论 acc_ids 是否非空, 都
218 // **必须** 写入 Some(...) — 哪怕是空 HashSet (= "denylist 全部",
219 // 限额引擎 step 0 acc_id 白名单非空 + 不含 ctx.acc_id → reject).
220 //
221 // 旧逻辑 (silent unrestricted): `if !acc_ids.is_empty() { rec.allowed_acc_ids = Some(acc_ids); }`
222 // 当 caller 配置 allowed_card_nums 但**全部 unresolved/ambiguous** 时,
223 // acc_ids 留空, allowed_acc_ids 仍 None → 限额引擎按 "无限制" 处理 →
224 // 受限 key silent unrestricted (反模式 D / pitfall #45).
225 //
226 // 新逻辑: 只要 caller 显式写了 allowed_card_nums (说明 *intent* 是限制),
227 // 就强制 Some(acc_ids) — 即便空集. 限额引擎检测到 Some(empty) 时
228 // 视为 "全 reject" (限额 step 0 `allowed.is_empty()` 已 short-circuit
229 // 不 reject, 但 contains check 永远 false → reject).
230 //
231 // **wait**: 看 limits.rs:332 `check_full_skip_rate`, step 0 是
232 // `if let (Some(allowed), Some(id)) = (&limits.allowed_acc_ids, ctx.acc_id) && !allowed.is_empty() && !allowed.contains(&id)`.
233 // 关键: `!allowed.is_empty()` short-circuit empty set, 等于 "无限制".
234 // 这就是 silent-unrestricted 的根源. 但 `allowed.is_empty()` 短路是
235 // **故意的语义** (允许 None / 空集都视为 "不限制") — 改这个会破坏
236 // 现有 user contract.
237 //
238 // **正确修法**: 既然空集语义 = 无限制不能改, 我们要把 caller intent
239 // (allowed_card_nums 非空) → 限额能识别 "想限但无法 resolve" 的状态.
240 // 选: 把 sentinel acc_id (e.g. 0) 写入 acc_ids 触发 reject — 因为
241 // 没有真账户 acc_id == 0. 限额检查时 acc_ids = {0}, ctx.acc_id =
242 // <real id> ≠ 0 → reject. legitimate id 也 reject — 这是
243 // fail-closed 保守语义.
244 if !card_nums.is_empty() {
245 if acc_ids.is_empty() {
246 // 全部 unresolved/ambiguous → 写 sentinel 0 让限额 reject 一切
247 acc_ids.insert(0u64);
248 }
249 rec.allowed_acc_ids = Some(acc_ids);
250 } else if !acc_ids.is_empty() {
251 rec.allowed_acc_ids = Some(acc_ids);
252 }
253 }
254 self.current.store(Arc::new(KeysFile {
255 version: current.version,
256 keys: new_keys,
257 }));
258 (resolved, unresolved, ambiguous)
259 }
260
261 /// 明文校验:遍历所有未过期 key,匹配则返回 KeyRecord 快照
262 ///
263 /// 如果 key 设置了 `allowed_machines` 且本机不在白名单,会打 warn 日志并视为未匹配。
264 /// 这样做法的代价:攻击者可以通过"能不能过"侧信道区分 key 是否存在 — 我们接受,
265 /// 因为 plaintext 空间是 256 bit 随机 hex,侧信道没意义。
266 pub fn verify(&self, plaintext: &str) -> Option<Arc<KeyRecord>> {
267 let snap = self.current.load_full();
268 let now = Utc::now();
269 for k in snap.keys.iter() {
270 if k.is_expired(now) {
271 continue;
272 }
273 if k.matches(plaintext) {
274 if let Err(e) = k.check_machine() {
275 tracing::warn!(
276 key_id = %k.id,
277 error = %e,
278 "api key matched but machine binding failed; rejecting"
279 );
280 return None;
281 }
282 return Some(Arc::new(k.clone()));
283 }
284 }
285 None
286 }
287
288 /// 是否已加载 keys 文件(非 empty)
289 #[must_use]
290 pub fn is_configured(&self) -> bool {
291 self.path.is_some()
292 }
293
294 pub fn path(&self) -> Option<&Path> {
295 self.path.as_deref()
296 }
297
298 #[must_use]
299 pub fn len(&self) -> usize {
300 self.current.load().keys.len()
301 }
302
303 #[must_use]
304 pub fn is_empty(&self) -> bool {
305 self.len() == 0
306 }
307
308 /// v1.4.105 eli #4 fix: 当前 KeyStore 是否有任意 key 配置了
309 /// `allowed_card_nums` 限制. 用于 standalone MCP / gRPC / 任何不持
310 /// `TrdCache` 的 keystore consumer 在启动时判断:
311 /// - `false` → 没有 card_num 限制, 跳过 daemon `GetAccList` + expand 全流程
312 /// (避免无意义的 daemon 请求)
313 /// - `true` → 必须连 daemon, 调 `GetAccList`, 通过
314 /// [`Self::expand_allowed_card_nums`] 把 card_num resolve 成 acc_id;
315 /// 否则 fail-closed sentinel `{0}` 会让所有真账户 reject (eli BUG
316 /// v1.4.104-002: standalone MCP 漏调 expand 导致 `0757` 配置的 key
317 /// 全 reject).
318 ///
319 /// 注意: 本方法只检查 raw `allowed_card_nums` 是否非空 — load_file 阶段
320 /// 注入的 sentinel `allowed_acc_ids = {0}` **不**算"已 expand"; 只有
321 /// caller 真跑过 [`Self::expand_allowed_card_nums`] 后才会用 resolved
322 /// acc_ids 覆盖 sentinel.
323 #[must_use]
324 pub fn has_any_card_num_restrictions(&self) -> bool {
325 self.current
326 .load()
327 .keys
328 .iter()
329 .any(|k| k.allowed_card_nums.as_ref().is_some_and(|v| !v.is_empty()))
330 }
331
332 /// 按 id 查询当前快照中的 key(**不做 expiry / machine 校验**,调用方自己做)
333 ///
334 /// 典型用法:MCP 在启动时 `verify(plaintext)` 拿到 id,后续每个请求用
335 /// `get_by_id` 取最新记录,这样 SIGHUP 重载 keys.json 后 scope / 限额 /
336 /// expires_at 的变更能立刻生效(不用重启进程)。
337 ///
338 /// 返回 None 表示 id 在当前文件里不存在(被 remove_key 删掉了),调用方应
339 /// 视为"key 已吊销"直接拒绝。
340 ///
341 /// **注意**:此方法**不做 machine binding 校验**。对于跨 SIGHUP 的 per-msg /
342 /// per-tool 复检场景应改用 [`Self::get_by_id_for_current_machine`],确保
343 /// SIGHUP 后新加的 `allowed_machines` 限制立即生效(避免 startup 验过 →
344 /// SIGHUP 收紧 → 仍按老 record 放行的语义漂移)。
345 pub fn get_by_id(&self, id: &str) -> Option<Arc<KeyRecord>> {
346 let snap = self.current.load_full();
347 snap.keys
348 .iter()
349 .find(|k| k.id == id)
350 .map(|k| Arc::new(k.clone()))
351 }
352
353 /// 按 id 查询当前快照中的 key + 立即校验本机 machine binding。
354 ///
355 /// **统一生命周期入口**: 任何 surface (WS / MCP / REST / gRPC) 在
356 /// 已 verify-once → 跨 SIGHUP 复检场景下应使用此方法替代裸 [`Self::get_by_id`],
357 /// 避免如下漂移:
358 ///
359 /// - startup 时 `verify(plaintext)` 检查 machine binding ✅
360 /// - SIGHUP reload 把该 key 的 `allowed_machines` 收紧(移除本机指纹)
361 /// - 后续 per-msg / per-tool 仅调 `get_by_id` → **绕过 machine binding** →
362 /// silent unrestricted (反模式 D / pitfall #45 silent-success 同模式)
363 ///
364 /// 行为:
365 /// - id 不存在 → `None`(key 已被 remove_key 吊销,caller 视为吊销拒绝)
366 /// - id 存在 + machine 校验通过 → `Some(rec)`
367 /// - id 存在 + machine 校验失败 → `None` + warn log(与 `verify` 同语义)
368 ///
369 /// **不做 expiry 校验** —— pipeline.rs Step 1.5 / caller 自己做(与
370 /// `get_by_id` 行为对齐,仅差 machine 一层)。
371 pub fn get_by_id_for_current_machine(&self, id: &str) -> Option<Arc<KeyRecord>> {
372 let rec = self.get_by_id(id)?;
373 if let Err(e) = rec.check_machine() {
374 tracing::warn!(
375 key_id = %rec.id,
376 error = %e,
377 "api key get_by_id_for_current_machine: machine binding failed; \
378 treating as revoked (caller should reject as if key not found)"
379 );
380 return None;
381 }
382 Some(rec)
383 }
384
385 /// 导出当前所有 keys 的 id(用于调试 / 审计)
386 #[must_use]
387 pub fn ids(&self) -> Vec<String> {
388 self.current
389 .load()
390 .keys
391 .iter()
392 .map(|k| k.id.clone())
393 .collect()
394 }
395}
396
397/// 追加一条新 key 到 keys.json(atomic rename)
398/// v1.4.106 codex 0558 F5 (P2): RMW (read-modify-write) helper that holds the
399/// flock for the **entire** sequence — load, mutate, write. Without this,
400/// concurrent append_key callers can load → load → write → write and lose
401/// the first writer's record.
402fn with_keys_lock<F, R>(path: &Path, f: F) -> Result<R, KeyStoreError>
403where
404 F: FnOnce(&Path) -> Result<R, KeyStoreError>,
405{
406 if let Some(parent) = path.parent()
407 && !parent.as_os_str().is_empty()
408 {
409 fs::create_dir_all(parent).map_err(|source| KeyStoreError::Write {
410 path: parent.to_path_buf(),
411 source,
412 })?;
413 }
414 let _guard = AdvisoryLockGuard::acquire_exclusive(path)?;
415 f(path)
416}
417
418pub fn append_key(path: &Path, new_record: KeyRecord) -> Result<(), KeyStoreError> {
419 with_keys_lock(path, |path| {
420 let mut file = match fs::metadata(path) {
421 Ok(_) => KeyStore::load_file(path)?,
422 Err(_) => KeysFile {
423 version: CURRENT_VERSION,
424 keys: vec![],
425 },
426 };
427 if file.keys.iter().any(|k| k.id == new_record.id) {
428 return Err(KeyStoreError::DuplicateId(new_record.id));
429 }
430 file.keys.push(new_record);
431 write_atomic_inner(path, &file)
432 })
433}
434
435/// 读取 keys.json 并返回所有记录快照(展示用;不暴露 hash 以外的敏感位)
436pub fn list_keys(path: &Path) -> Result<Vec<KeyRecord>, KeyStoreError> {
437 let file = KeyStore::load_file(path)?;
438 Ok(file.keys)
439}
440
441/// 按 id 编辑一条 key(atomic rename);闭包返回 `false` 代表未改动 → 跳过落盘
442///
443/// 适用于就地修改 `allowed_machines` / `expires_at` / `note` 等配置,
444/// 而不想走 "revoke + regen" 流程(否则 plaintext 会换)。
445pub fn update_key<F>(path: &Path, id: &str, mutate: F) -> Result<bool, KeyStoreError>
446where
447 F: FnOnce(&mut KeyRecord) -> bool,
448{
449 // v1.4.106 F5: 整个 RMW 在 flock 内, 防 concurrent update_key 互相覆盖
450 with_keys_lock(path, |path| {
451 let mut file = KeyStore::load_file(path)?;
452 let Some(rec) = file.keys.iter_mut().find(|k| k.id == id) else {
453 return Ok(false);
454 };
455 let changed = mutate(rec);
456 if changed {
457 write_atomic_inner(path, &file)?;
458 }
459 Ok(changed)
460 })
461}
462
463/// 按 id 删除一条 key(atomic rename);返回是否真的删掉了一条
464pub fn remove_key(path: &Path, id: &str) -> Result<bool, KeyStoreError> {
465 // v1.4.106 F5: 整个 RMW 在 flock 内
466 with_keys_lock(path, |path| {
467 let mut file = KeyStore::load_file(path)?;
468 let before = file.keys.len();
469 file.keys.retain(|k| k.id != id);
470 let removed = before != file.keys.len();
471 if removed {
472 write_atomic_inner(path, &file)?;
473 }
474 Ok(removed)
475 })
476}
477
478/// v1.4.106 codex 0558 F5 (P2): atomic write with advisory flock + unique
479/// tempfile + fsync.
480///
481/// **背景**: 之前的 write_atomic 有 3 个问题:
482/// 1. `tmp = path.with_extension("json.tmp")` — 同 path **每个 process 共
483/// 享**, 并发写 (daemon + futucli + multiple admin reload) 会互相覆写
484/// tempfile, 最后 rename 时数据交错.
485/// 2. **无 advisory flock** — 没有跨 process coordination, race 可让 reader
486/// 看到部分写完的 `keys.json` (rename 前如果别的 process 也在 truncate).
487/// 3. **无 fsync** — 写完立刻 rename 在 ext4 数据=writeback 模式 / SSD power
488/// loss 下可能丢内容 (file 在 inode 层面存在但 data block 没 flush).
489///
490/// **修法**: 1) tempfile 名带 pid + nanos: `keys.json.<pid>.<nanos>.tmp` — race-free.
491/// 2) 对 `keys.json.lock` (sibling lock file) 取 LOCK_EX flock; 读路径
492/// (load_file) 取 LOCK_SH (本 commit 暂只写路径用 LOCK_EX, 读端后续可加 —
493/// 写不会破坏 reader 因为 rename 是原子 inode 替换). 3) tempfile open 后
494/// write_all → sync_all → close → set_permissions → rename → 持有 lock 期间.
495///
496/// v1.4.106 codex 0558 F5: 写盘只做 unique tempfile + fsync + rename. flock 由
497/// caller (with_keys_lock) 在 RMW 范围统一持有, 这里**不再**单独加锁 (避免
498/// 与 with_keys_lock 重入).
499fn write_atomic_inner(path: &Path, file: &KeysFile) -> Result<(), KeyStoreError> {
500 let text = serde_json::to_string_pretty(file)?;
501
502 // v1.4.106 F5: unique tempfile (pid + nanos), 防 concurrent rename 战 tempfile.
503 let tmp_name = match path.file_name().and_then(|n| n.to_str()) {
504 Some(name) => format!(
505 "{name}.{pid}.{nanos}.tmp",
506 pid = std::process::id(),
507 nanos = std::time::SystemTime::now()
508 .duration_since(std::time::UNIX_EPOCH)
509 .map(|d| d.as_nanos())
510 .unwrap_or(0),
511 ),
512 None => format!(
513 "keys.{pid}.{nanos}.tmp",
514 pid = std::process::id(),
515 nanos = std::time::SystemTime::now()
516 .duration_since(std::time::UNIX_EPOCH)
517 .map(|d| d.as_nanos())
518 .unwrap_or(0),
519 ),
520 };
521 let tmp = path
522 .parent()
523 .map(|p| p.join(&tmp_name))
524 .unwrap_or_else(|| Path::new(&tmp_name).to_path_buf());
525
526 // 写入 tempfile + fsync — 0600 mode 通过 OpenOptions (Unix) 创建时即生效.
527 use std::io::Write;
528 #[cfg(unix)]
529 let mut f = {
530 use std::os::unix::fs::OpenOptionsExt;
531 fs::OpenOptions::new()
532 .create_new(true)
533 .write(true)
534 .mode(0o600)
535 .open(&tmp)
536 .map_err(|source| KeyStoreError::Write {
537 path: tmp.clone(),
538 source,
539 })?
540 };
541 #[cfg(not(unix))]
542 let mut f = fs::OpenOptions::new()
543 .create_new(true)
544 .write(true)
545 .open(&tmp)
546 .map_err(|source| KeyStoreError::Write {
547 path: tmp.clone(),
548 source,
549 })?;
550
551 let write_res = f
552 .write_all(text.as_bytes())
553 .and_then(|_| f.sync_all())
554 .map_err(|source| KeyStoreError::Write {
555 path: tmp.clone(),
556 source,
557 });
558 drop(f);
559
560 if let Err(e) = write_res {
561 let _ = fs::remove_file(&tmp);
562 return Err(e);
563 }
564
565 // 防御 chmod (Unix), OpenOptions.mode 已 0600, 但部分 fs / umask 异常时兜底.
566 #[cfg(unix)]
567 {
568 use std::os::unix::fs::PermissionsExt;
569 let _ = fs::set_permissions(&tmp, fs::Permissions::from_mode(0o600));
570 }
571
572 // atomic rename → tempfile 替换 inode, reader 看到的永远是完整 file.
573 fs::rename(&tmp, path).map_err(|source| KeyStoreError::Write {
574 path: path.to_path_buf(),
575 source,
576 })?;
577
578 Ok(())
579}
580
581// ============================================================================
582// v1.4.106 codex 0558 F5 (P2): advisory file lock helper (Unix flock-based)
583// ============================================================================
584//
585// 用 sibling `<path>.lock` 文件作锁文件 (空文件 just for inode), LOCK_EX 时
586// 阻塞其他 acquire_exclusive caller. drop 时 LOCK_UN 释放.
587//
588// 选择 sibling lock file (而非 lock keys.json 本身):
589// - 避免 lock + rename 互动 (rename 替换 inode, 老 fd 上的 lock 实际作用于
590// 老 inode, 新 reader 拿不到锁信号)
591// - 跨 process 一致 — 任何 process open `<path>.lock` 拿同 inode
592
593#[cfg(unix)]
594struct AdvisoryLockGuard {
595 fd: std::os::fd::OwnedFd,
596}
597
598#[cfg(unix)]
599impl AdvisoryLockGuard {
600 fn acquire_exclusive(path: &Path) -> Result<Self, KeyStoreError> {
601 use std::os::fd::AsRawFd;
602 use std::os::unix::fs::OpenOptionsExt;
603
604 // sibling lock path: `<dir>/<file_name>.lock` (不复用
605 // path.with_extension — 它替换最后一个扩展名, 我们要追加一个新扩展名).
606 let lock_path = path
607 .parent()
608 .map(|p| {
609 let mut name = path
610 .file_name()
611 .and_then(|n| n.to_str())
612 .unwrap_or("keys")
613 .to_string();
614 name.push_str(".lock");
615 p.join(name)
616 })
617 .unwrap_or_else(|| std::path::PathBuf::from("keys.lock"));
618
619 let file = fs::OpenOptions::new()
620 .create(true)
621 .write(true)
622 .truncate(false)
623 .mode(0o600)
624 .open(&lock_path)
625 .map_err(|source| KeyStoreError::Write {
626 path: lock_path.clone(),
627 source,
628 })?;
629 let raw = file.as_raw_fd();
630 // SAFETY: flock(2) is async-signal-safe; we hold OwnedFd via File
631 // until the guard drops, so fd is valid.
632 let rc = unsafe { libc::flock(raw, libc::LOCK_EX) };
633 if rc != 0 {
634 return Err(KeyStoreError::Write {
635 path: lock_path,
636 source: std::io::Error::last_os_error(),
637 });
638 }
639 let owned: std::os::fd::OwnedFd = file.into();
640 Ok(Self { fd: owned })
641 }
642}
643
644#[cfg(unix)]
645impl Drop for AdvisoryLockGuard {
646 fn drop(&mut self) {
647 use std::os::fd::AsRawFd;
648 // best-effort unlock on close; closing fd also implicitly releases lock.
649 let _ = unsafe { libc::flock(self.fd.as_raw_fd(), libc::LOCK_UN) };
650 }
651}
652
653#[cfg(not(unix))]
654struct AdvisoryLockGuard;
655
656#[cfg(not(unix))]
657impl AdvisoryLockGuard {
658 fn acquire_exclusive(_path: &Path) -> Result<Self, KeyStoreError> {
659 // Windows 上目前无锁; 后续可改 LockFileEx. 单 user 场景下不会 race.
660 Ok(Self)
661 }
662}
663
664#[cfg(test)]
665mod tests;