-
Notifications
You must be signed in to change notification settings - Fork 10
Shared fetch state 2 #727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Shared fetch state 2 #727
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,7 @@ use super::cli::{self, GitError}; | |
| use super::refs::{branch_name_without_origin, origin_ref_for_branch}; | ||
| use super::status_parse::is_conflicted_status; | ||
| use serde::Serialize; | ||
| use std::collections::HashMap; | ||
| use std::collections::{HashMap, HashSet}; | ||
| use std::path::Path; | ||
| use std::sync::{Mutex, OnceLock}; | ||
| use std::time::{SystemTime, UNIX_EPOCH}; | ||
|
|
@@ -154,6 +154,35 @@ fn fetch_cache() -> &'static Mutex<HashMap<String, FetchCacheEntry>> { | |
| CACHE.get_or_init(|| Mutex::new(HashMap::new())) | ||
| } | ||
|
|
||
| /// Repo-level fetch tracking for local projects. | ||
| /// | ||
| /// A single `git fetch` updates all remote refs for a repo, so when multiple | ||
| /// branches share the same local `.git` directory we only need to hit the | ||
| /// network once per TTL window. This cache is keyed by repo path and tracks | ||
| /// *which* refspecs were included in the last fetch so we can do a cheap | ||
| /// narrow fetch for any new refspecs that appear. | ||
| #[derive(Debug, Clone)] | ||
| struct RepoFetchEntry { | ||
| fetched_at: i64, | ||
| fetched_refspecs: HashSet<String>, | ||
| } | ||
|
|
||
| fn repo_fetch_cache() -> &'static Mutex<HashMap<String, RepoFetchEntry>> { | ||
| static CACHE: OnceLock<Mutex<HashMap<String, RepoFetchEntry>>> = OnceLock::new(); | ||
| CACHE.get_or_init(|| Mutex::new(HashMap::new())) | ||
| } | ||
|
|
||
| /// Extract the repo path portion from a local cache key (`local:{repo_path}:…`). | ||
| fn repo_key_from_local_cache_key(cache_key: &str) -> Option<String> { | ||
| let rest = cache_key.strip_prefix("local:")?; | ||
| // The key format is `local:{repo_path}:{branch}:{base}`. | ||
| // repo_path may contain colons (e.g. Windows paths, though unlikely on | ||
| // macOS/Linux). We split from the right to peel off base and branch. | ||
| let (without_base, _base) = rest.rsplit_once(':')?; | ||
| let (repo_path, _branch) = without_base.rsplit_once(':')?; | ||
| Some(format!("local:{repo_path}")) | ||
| } | ||
|
|
||
| fn now_ms() -> i64 { | ||
| SystemTime::now() | ||
| .duration_since(UNIX_EPOCH) | ||
|
|
@@ -180,6 +209,47 @@ fn refspec_for(remote_branch: &str) -> String { | |
| format!("+refs/heads/{branch}:refs/remotes/origin/{branch}") | ||
| } | ||
|
|
||
| /// Determine which refspecs still need fetching given repo-level cache state. | ||
| /// | ||
| /// Returns `None` if no fetch is needed at all (repo fresh + all refspecs covered). | ||
| /// Returns `Some(refspecs)` with the list of refspecs to fetch — either all of | ||
| /// them (repo stale) or just the missing ones (repo fresh, new refspecs). | ||
| fn refspecs_to_fetch( | ||
| repo_key: Option<&str>, | ||
| needed: &[&str], | ||
| fetch_mode: FetchMode, | ||
| now: i64, | ||
| ) -> Option<Vec<String>> { | ||
| match fetch_mode { | ||
| FetchMode::Never => None, | ||
| FetchMode::Force => Some(needed.iter().map(|s| s.to_string()).collect()), | ||
| FetchMode::Ttl => { | ||
| let repo_entry = repo_key.and_then(|key| { | ||
| repo_fetch_cache() | ||
| .lock() | ||
| .ok() | ||
| .and_then(|cache| cache.get(key).cloned()) | ||
| }); | ||
| match repo_entry { | ||
| Some(entry) if now.saturating_sub(entry.fetched_at) <= FETCH_TTL_MS => { | ||
| // Repo is fresh — only fetch refspecs not already covered. | ||
| let missing: Vec<String> = needed | ||
| .iter() | ||
| .filter(|rs| !entry.fetched_refspecs.contains(**rs)) | ||
| .map(|s| s.to_string()) | ||
| .collect(); | ||
| if missing.is_empty() { | ||
| None | ||
| } else { | ||
| Some(missing) | ||
| } | ||
| } | ||
| _ => Some(needed.iter().map(|s| s.to_string()).collect()), | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| fn refresh_refs_if_needed<F>( | ||
| cache_key: &str, | ||
| run_git: &F, | ||
|
|
@@ -196,15 +266,45 @@ where | |
| .ok() | ||
| .and_then(|cache| cache.get(cache_key).cloned()); | ||
|
|
||
| let should_fetch = match (fetch_mode, &previous) { | ||
| (FetchMode::Never, _) => false, | ||
| (FetchMode::Force, _) => true, | ||
| (FetchMode::Ttl, Some(entry)) => now.saturating_sub(entry.fetched_at) > FETCH_TTL_MS, | ||
| (FetchMode::Ttl, None) => true, | ||
| let base_refspec = refspec_for(base_branch); | ||
| let branch_refspec = refspec_for(branch_name); | ||
|
|
||
| // Build the list of needed refspecs (deduplicated). | ||
| let needed: Vec<&str> = if branch_refspec != base_refspec { | ||
| vec![base_refspec.as_str(), branch_refspec.as_str()] | ||
| } else { | ||
| vec![base_refspec.as_str()] | ||
| }; | ||
|
|
||
| // For local keys, consult the repo-level cache to avoid redundant fetches | ||
| // when multiple branches share the same repo. | ||
| let repo_key = repo_key_from_local_cache_key(cache_key); | ||
| let to_fetch = refspecs_to_fetch(repo_key.as_deref(), &needed, fetch_mode, now); | ||
|
|
||
| // Also check the per-branch cache for the legacy should_fetch decision | ||
| // (used when there's no repo key, i.e. remote branches). | ||
| let should_fetch = if repo_key.is_some() { | ||
| to_fetch.is_some() | ||
| } else { | ||
| match (fetch_mode, &previous) { | ||
| (FetchMode::Never, _) => false, | ||
| (FetchMode::Force, _) => true, | ||
| (FetchMode::Ttl, Some(entry)) => now.saturating_sub(entry.fetched_at) > FETCH_TTL_MS, | ||
| (FetchMode::Ttl, None) => true, | ||
| } | ||
| }; | ||
|
|
||
| if !should_fetch { | ||
| let fetched_at = previous.as_ref().map(|entry| entry.fetched_at); | ||
| let fetched_at = previous.as_ref().map(|entry| entry.fetched_at).or_else(|| { | ||
| // For local keys the repo-level cache may have a timestamp even | ||
| // if the per-branch cache doesn't yet. | ||
| repo_key.as_deref().and_then(|key| { | ||
| repo_fetch_cache() | ||
| .lock() | ||
| .ok() | ||
| .and_then(|cache| cache.get(key).map(|e| e.fetched_at)) | ||
| }) | ||
| }); | ||
| return RefreshOutcome { | ||
| fetch: FetchGitState { | ||
| status: if fetched_at.is_some() { | ||
|
|
@@ -222,25 +322,36 @@ where | |
| }; | ||
| } | ||
|
|
||
| let base_refspec = refspec_for(base_branch); | ||
| let branch_refspec = refspec_for(branch_name); | ||
| // Determine refspecs to actually send over the wire. | ||
| let fetch_refspecs: Vec<String> = to_fetch.unwrap_or_else(|| { | ||
| // Fallback for remote keys (no repo-level cache) — fetch all needed. | ||
| needed.iter().map(|s| s.to_string()).collect() | ||
| }); | ||
|
|
||
| // Is this a narrow supplemental fetch (repo fresh, just filling in gaps)? | ||
| let is_narrow = repo_key.is_some() && fetch_refspecs.len() < needed.len(); | ||
|
|
||
| let mut upstream_known_missing = false; | ||
|
|
||
| // Fetch both refspecs in a single network call when they differ. | ||
| if branch_refspec != base_refspec { | ||
| match run_git(&[ | ||
| "fetch", | ||
| "--prune", | ||
| "origin", | ||
| base_refspec.as_str(), | ||
| branch_refspec.as_str(), | ||
| ]) { | ||
| if fetch_refspecs.len() > 1 { | ||
| let refs: Vec<&str> = fetch_refspecs.iter().map(String::as_str).collect(); | ||
| let mut args = vec!["fetch"]; | ||
| if !is_narrow { | ||
| args.push("--prune"); | ||
| } | ||
| args.push("origin"); | ||
| args.extend(refs.iter()); | ||
| match run_git(&args) { | ||
| Err(error) if is_missing_remote_ref(&error) => { | ||
| // The branch refspec is missing on the remote. Re-fetch with | ||
| // just the base refspec so we still get base branch updates. | ||
| if let Err(base_err) = | ||
| run_git(&["fetch", "--prune", "origin", base_refspec.as_str()]) | ||
| { | ||
| let mut retry_args = vec!["fetch"]; | ||
| if !is_narrow { | ||
| retry_args.push("--prune"); | ||
| } | ||
| retry_args.push("origin"); | ||
| retry_args.push(base_refspec.as_str()); | ||
| if let Err(base_err) = run_git(&retry_args) { | ||
| return RefreshOutcome { | ||
| fetch: FetchGitState { | ||
| status: FetchStatus::Failed, | ||
|
|
@@ -264,17 +375,34 @@ where | |
| } | ||
| Ok(_) => {} | ||
| } | ||
| } else if let Err(error) = run_git(&["fetch", "--prune", "origin", base_refspec.as_str()]) { | ||
| return RefreshOutcome { | ||
| fetch: FetchGitState { | ||
| status: FetchStatus::Failed, | ||
| fetched_at: previous.map(|entry| entry.fetched_at), | ||
| error: Some(error.trim().to_string()), | ||
| }, | ||
| upstream_known_missing: false, | ||
| }; | ||
| } else { | ||
| let refspec = fetch_refspecs | ||
| .first() | ||
| .map(String::as_str) | ||
| .unwrap_or(base_refspec.as_str()); | ||
| let mut args = vec!["fetch"]; | ||
| if !is_narrow { | ||
| args.push("--prune"); | ||
| } | ||
| args.push("origin"); | ||
| args.push(refspec); | ||
| if let Err(error) = run_git(&args) { | ||
| if is_missing_remote_ref(&error) { | ||
| upstream_known_missing = true; | ||
|
Comment on lines
+389
to
+391
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When the repo cache is fresh and the only uncovered refspec is the base branch, Useful? React with 👍 / 👎. |
||
| } else { | ||
| return RefreshOutcome { | ||
| fetch: FetchGitState { | ||
| status: FetchStatus::Failed, | ||
| fetched_at: previous.map(|entry| entry.fetched_at), | ||
| error: Some(error.trim().to_string()), | ||
| }, | ||
| upstream_known_missing: false, | ||
| }; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Update per-branch cache. | ||
| if let Ok(mut cache) = fetch_cache().lock() { | ||
| cache.insert( | ||
| cache_key.to_string(), | ||
|
|
@@ -285,6 +413,24 @@ where | |
| ); | ||
| } | ||
|
|
||
| // Update repo-level cache for local keys. | ||
| if let Some(ref rk) = repo_key { | ||
| if let Ok(mut cache) = repo_fetch_cache().lock() { | ||
| let entry = cache.entry(rk.clone()).or_insert_with(|| RepoFetchEntry { | ||
| fetched_at: now, | ||
| fetched_refspecs: HashSet::new(), | ||
| }); | ||
| entry.fetched_at = now; | ||
| if is_narrow { | ||
| for rs in &fetch_refspecs { | ||
| entry.fetched_refspecs.insert(rs.clone()); | ||
| } | ||
| } else { | ||
| entry.fetched_refspecs = needed.iter().map(|s| s.to_string()).collect(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| RefreshOutcome { | ||
| fetch: FetchGitState { | ||
| status: FetchStatus::Fresh, | ||
|
|
@@ -521,8 +667,34 @@ pub fn compute_local_branch_git_state( | |
|
|
||
| /// Check whether a fetch is needed for the given cache key and mode. | ||
| /// Used by timeline to decide whether to use the two-stream path. | ||
| /// | ||
| /// For local keys this consults the repo-level cache so the decision is | ||
| /// consistent with `refresh_refs_if_needed` — if another branch on the same | ||
| /// repo recently fetched, this returns `false`. | ||
| pub fn needs_fetch(cache_key: &str, fetch_mode: FetchMode) -> bool { | ||
| let now = now_ms(); | ||
|
|
||
| // For local keys, check the repo-level cache first. | ||
| if let Some(repo_key) = repo_key_from_local_cache_key(cache_key) { | ||
| return match fetch_mode { | ||
| FetchMode::Never => false, | ||
| FetchMode::Force => true, | ||
| FetchMode::Ttl => { | ||
| let repo_fresh = repo_fetch_cache() | ||
| .lock() | ||
| .ok() | ||
| .and_then(|cache| cache.get(&repo_key).cloned()) | ||
| .map(|entry| now.saturating_sub(entry.fetched_at) <= FETCH_TTL_MS) | ||
| .unwrap_or(false); | ||
| // Even if the repo is fresh, we might still need a narrow | ||
| // fetch for uncovered refspecs — but that's fast enough that | ||
| // we don't need the two-stream split for it. | ||
| !repo_fresh | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| // Remote / non-local keys: fall back to per-branch cache. | ||
| let previous = fetch_cache() | ||
| .lock() | ||
| .ok() | ||
|
|
@@ -1395,4 +1567,38 @@ mod tests { | |
| assert!(!is_conflicted_status('R', ' ')); | ||
| assert!(!is_conflicted_status('?', '?')); | ||
| } | ||
|
|
||
| #[test] | ||
| fn repo_key_from_local_cache_key_normal_path() { | ||
| assert_eq!( | ||
| repo_key_from_local_cache_key("local:/Users/me/project:feature:main"), | ||
| Some("local:/Users/me/project".to_string()), | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn repo_key_from_local_cache_key_windows_path() { | ||
| assert_eq!( | ||
| repo_key_from_local_cache_key("local:C:\\Users\\me\\project:feature:main"), | ||
| Some("local:C:\\Users\\me\\project".to_string()), | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn repo_key_from_local_cache_key_branch_equals_base() { | ||
| assert_eq!( | ||
| repo_key_from_local_cache_key("local:/repo:main:main"), | ||
| Some("local:/repo".to_string()), | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn repo_key_from_local_cache_key_not_local() { | ||
| assert_eq!(repo_key_from_local_cache_key("remote:foo:bar:baz"), None); | ||
| } | ||
|
|
||
| #[test] | ||
| fn repo_key_from_local_cache_key_too_few_segments() { | ||
| assert_eq!(repo_key_from_local_cache_key("local:only_one"), None); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When local branches are stored as separate git worktrees,
cache_keyis built from the per-branch worktree path (compute_local_branch_git_statepassesrepo.display()for that worktree), so deriving the repo-level key from this string still produces a different key for every branch. In that common local-project flow the new repo cache never gets shared, and each branch can still perform its own network fetch inside the TTL instead of reusing the fetch done for the shared.gitcommon dir. Consider keying this cache fromgit rev-parse --git-common-dir(or the original clone path) rather than the worktree path.Useful? React with 👍 / 👎.