Skip to content

Commit 54e0907

Browse files
AaronMoatcodex
andcommitted
feat: Use untracked cache when dir walking
Co-authored-by: GPT 5.4 <codex@openai.com>
1 parent ed1e75b commit 54e0907

19 files changed

Lines changed: 1117 additions & 11 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ A git directory walk.
843843
* [x] pathspec based filtering
844844
* [ ] multi-threaded initialization of icase hash table is always used to accelerate index lookups, even if ignoreCase = false for performance
845845
* [ ] special handling of submodules (for now, submodules or nested repositories are detected, but they can't be walked into naturally)
846-
* [ ] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)
846+
* [x] accelerated walk with `untracked`-cache (as provided by `UNTR` extension of `gix_index::File`)
847847

848848
### gix-index
849849

gix-dir/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ gix-utils = { version = "^0.3.1", path = "../gix-utils", features = ["bstr"] }
3333
bstr = { version = "1.12.0", default-features = false }
3434
thiserror = "2.0.18"
3535

36+
[target.'cfg(unix)'.dependencies]
37+
libc = { version = "0.2.182" }
38+
rustix = { version = "1.1.2", default-features = false, features = ["std", "system"] }
39+
3640
[dev-dependencies]
3741
gix-testtools = { path = "../tests/tools" }
3842
gix-fs = { path = "../gix-fs" }

gix-dir/src/walk/function.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use bstr::{BStr, BString, ByteSlice};
77

88
use crate::{
99
entry,
10-
walk::{classify, readdir, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome},
10+
walk::{classify, readdir, untracked_cache, Action, Context, Delegate, Error, ForDeletionMode, Options, Outcome},
1111
EntryRef,
1212
};
1313

@@ -106,6 +106,10 @@ pub fn walk(
106106
}
107107

108108
let mut state = readdir::State::new(worktree_root, ctx.current_dir, options.for_deletion.is_some());
109+
let untracked_cache = options
110+
.use_untracked_cache
111+
.then(|| untracked_cache::validate(worktree_root, ctx.index, &ctx, options))
112+
.flatten();
109113
let may_collapse = root != worktree_root && state.may_collapse(&current);
110114
let (action, _) = readdir::recursive(
111115
may_collapse,
@@ -117,6 +121,10 @@ pub fn walk(
117121
delegate,
118122
&mut out,
119123
&mut state,
124+
untracked_cache.as_ref(),
125+
untracked_cache
126+
.as_ref()
127+
.map(|cache: &untracked_cache::Validated<'_>| cache.root_dir()),
120128
)?;
121129
if action.is_continue() {
122130
state.emit_remaining(may_collapse, options, &mut out, delegate);

gix-dir/src/walk/mod.rs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ pub enum ForDeletionMode {
138138
}
139139

140140
/// Options for use in [`walk()`](function::walk()) function.
141-
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
141+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
142142
pub struct Options<'a> {
143143
/// If `true`, the filesystem will store paths as decomposed unicode, i.e. `ä` becomes `"a\u{308}"`, which means that
144144
/// we have to turn these forms back from decomposed to precomposed unicode before storing it in the index or generally
@@ -188,13 +188,36 @@ pub struct Options<'a> {
188188
///
189189
/// In other words, for Git compatibility this flag should be `false`, the default, for `git2` compatibility it should be `true`.
190190
pub symlinks_to_directories_are_ignored_like_directories: bool,
191+
/// If `true`, consult the untracked cache if it is present and otherwise applicable.
192+
pub use_untracked_cache: bool,
191193
/// A set of all git worktree checkouts that are located within the main worktree directory.
192194
///
193195
/// They will automatically be detected as 'tracked', but without providing index information (as there is no actual index entry).
194196
/// Note that the unicode composition must match the `precompose_unicode` field so that paths will match verbatim.
195197
pub worktree_relative_worktree_dirs: Option<&'a BTreeSet<BString>>,
196198
}
197199

200+
impl Default for Options<'_> {
201+
fn default() -> Self {
202+
Self {
203+
precompose_unicode: false,
204+
ignore_case: false,
205+
recurse_repositories: false,
206+
emit_pruned: false,
207+
emit_ignored: None,
208+
for_deletion: None,
209+
classify_untracked_bare_repositories: false,
210+
emit_tracked: false,
211+
emit_untracked: Default::default(),
212+
emit_empty_directories: false,
213+
emit_collapsed: None,
214+
symlinks_to_directories_are_ignored_like_directories: false,
215+
use_untracked_cache: true,
216+
worktree_relative_worktree_dirs: None,
217+
}
218+
}
219+
}
220+
198221
/// All information that is required to perform a dirwalk, and classify paths properly.
199222
pub struct Context<'a> {
200223
/// If not `None`, it will be checked before entering any directory to trigger early interruption.
@@ -306,3 +329,4 @@ pub enum Error {
306329
mod classify;
307330
pub(crate) mod function;
308331
mod readdir;
332+
mod untracked_cache;

gix-dir/src/walk/readdir.rs

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,34 @@ pub(super) fn recursive(
3434
delegate: &mut dyn Delegate,
3535
out: &mut Outcome,
3636
state: &mut State,
37+
untracked_cache: Option<&walk::untracked_cache::Validated<'_>>,
38+
untracked_cache_dir: Option<usize>,
3739
) -> Result<(Action, bool), Error> {
3840
if ctx.should_interrupt.is_some_and(|flag| flag.load(Ordering::Relaxed)) {
3941
return Err(Error::Interrupted);
4042
}
43+
if let Some((action, prevent_collapse)) = untracked_cache
44+
.zip(untracked_cache_dir)
45+
.filter(|(cache, dir)| cache.is_dir_valid(*dir, current))
46+
.map(|(cache, dir)| {
47+
recursive_from_untracked_cache(
48+
dir,
49+
may_collapse,
50+
current,
51+
current_bstr,
52+
current_info,
53+
ctx,
54+
opts,
55+
delegate,
56+
out,
57+
state,
58+
cache,
59+
)
60+
})
61+
.transpose()?
62+
{
63+
return Ok((action, prevent_collapse));
64+
}
4165
out.read_dir_calls += 1;
4266
let entries = gix_fs::read_dir(current, opts.precompose_unicode).map_err(|err| Error::ReadDir {
4367
path: current.to_owned(),
@@ -96,6 +120,15 @@ pub(super) fn recursive(
96120
delegate,
97121
out,
98122
state,
123+
untracked_cache,
124+
untracked_cache_dir.and_then(|dir| {
125+
untracked_cache.and_then(|cache| {
126+
let component = current_bstr
127+
.rfind_byte(b'/')
128+
.map_or(current_bstr.as_bstr(), |pos| current_bstr[pos + 1..].as_bstr());
129+
cache.child_dir(dir, component)
130+
})
131+
}),
99132
)?;
100133
prevent_collapse |= subdir_prevent_collapse;
101134
if action.is_break() {
@@ -141,6 +174,126 @@ pub(super) fn recursive(
141174
Ok((res, prevent_collapse))
142175
}
143176

177+
#[allow(clippy::too_many_arguments)]
178+
fn recursive_from_untracked_cache(
179+
cache_dir: usize,
180+
may_collapse: bool,
181+
current: &mut PathBuf,
182+
current_bstr: &mut BString,
183+
current_info: classify::Outcome,
184+
ctx: &mut Context<'_>,
185+
opts: Options<'_>,
186+
delegate: &mut dyn Delegate,
187+
out: &mut Outcome,
188+
state: &mut State,
189+
untracked_cache: &walk::untracked_cache::Validated<'_>,
190+
) -> Result<(Action, bool), Error> {
191+
let Some(cached) = untracked_cache.directory(cache_dir) else {
192+
return Ok((std::ops::ControlFlow::Continue(()), false));
193+
};
194+
195+
let mut num_entries = 0;
196+
let mark = state.mark(may_collapse);
197+
let mut prevent_collapse = current_info.status == Status::Tracked;
198+
199+
for &subdir_idx in cached.sub_directories() {
200+
let Some(subdir) = untracked_cache.directory(subdir_idx) else {
201+
continue;
202+
};
203+
num_entries += 1;
204+
let prev_len = current_bstr.len();
205+
if prev_len != 0 {
206+
current_bstr.push(b'/');
207+
}
208+
current_bstr.extend_from_slice(subdir.name());
209+
current.push(gix_path::from_bstr(subdir.name()));
210+
211+
let info = classify::path(
212+
current,
213+
current_bstr,
214+
if prev_len == 0 { 0 } else { prev_len + 1 },
215+
Some(entry::Kind::Directory),
216+
|| Some(entry::Kind::Directory),
217+
opts,
218+
ctx,
219+
)?;
220+
if can_recurse(current_bstr.as_bstr(), info, opts.for_deletion, false, delegate) {
221+
let subdir_may_collapse = state.may_collapse(current);
222+
let (action, subdir_prevent_collapse) = recursive(
223+
subdir_may_collapse,
224+
current,
225+
current_bstr,
226+
info,
227+
ctx,
228+
opts,
229+
delegate,
230+
out,
231+
state,
232+
Some(untracked_cache),
233+
Some(subdir_idx),
234+
)?;
235+
prevent_collapse |= subdir_prevent_collapse;
236+
if action.is_break() {
237+
return Ok((action, prevent_collapse));
238+
}
239+
} else if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) {
240+
let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate);
241+
if action.is_break() {
242+
return Ok((action, prevent_collapse));
243+
}
244+
}
245+
current_bstr.truncate(prev_len);
246+
current.pop();
247+
}
248+
249+
for file in cached.untracked_entries() {
250+
num_entries += 1;
251+
let prev_len = current_bstr.len();
252+
if prev_len != 0 {
253+
current_bstr.push(b'/');
254+
}
255+
current_bstr.extend_from_slice(file.as_ref());
256+
current.push(gix_path::from_bstr(file.as_bstr()));
257+
let current_path = current.clone();
258+
259+
let info = classify::path(
260+
current,
261+
current_bstr,
262+
if prev_len == 0 { 0 } else { prev_len + 1 },
263+
None,
264+
|| {
265+
std::fs::symlink_metadata(&current_path)
266+
.ok()
267+
.map(|ft| ft.file_type().into())
268+
},
269+
opts,
270+
ctx,
271+
)?;
272+
if !state.held_for_directory_collapse(current_bstr.as_bstr(), info, &opts) {
273+
let action = emit_entry(Cow::Borrowed(current_bstr.as_bstr()), info, None, opts, out, delegate);
274+
if action.is_break() {
275+
return Ok((action, prevent_collapse));
276+
}
277+
}
278+
current_bstr.truncate(prev_len);
279+
current.pop();
280+
}
281+
282+
let res = mark.reduce_held_entries(
283+
num_entries,
284+
state,
285+
&mut prevent_collapse,
286+
current,
287+
current_bstr.as_bstr(),
288+
current_info,
289+
opts,
290+
out,
291+
ctx,
292+
delegate,
293+
);
294+
Ok((res, prevent_collapse))
295+
}
296+
144297
pub(super) struct State {
145298
/// The entries to hold back until it's clear what to do with them.
146299
pub on_hold: Vec<Entry>,

0 commit comments

Comments
 (0)