storage_benchmarks/
directory_benchmarks.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{Benchmark, CacheClearableFilesystem, Filesystem, OperationDuration, OperationTimer};
6use async_trait::async_trait;
7use std::collections::VecDeque;
8use std::ffi::{CStr, CString};
9use std::mem::MaybeUninit;
10use std::ops::Range;
11use std::os::fd::RawFd;
12use std::os::unix::ffi::OsStringExt;
13use std::path::{Path, PathBuf};
14
15/// Describes the structure of a directory tree to be benchmarked.
16#[derive(Copy, Clone)]
17pub struct DirectoryTreeStructure {
18    /// The number of files that will be created within each directory.
19    pub files_per_directory: u64,
20
21    /// The number of subdirectories that will be created within each directory. No subdirectories
22    /// are created once |max_depth| is reached.
23    pub directories_per_directory: u64,
24
25    /// Specifies how many levels of directories to create.
26    pub max_depth: u32,
27}
28
29impl DirectoryTreeStructure {
30    /// Returns the maximum size of queue required to do a breadth first traversal of a directory
31    /// tree with this structure.
32    fn max_pending(&self) -> u64 {
33        self.directories_per_directory.pow(self.max_depth)
34    }
35
36    /// Creates a directory tree as described by this object starting from |root|.
37    fn create_directory_tree(&self, root: PathBuf) {
38        struct DirInfo {
39            path: PathBuf,
40            depth: u32,
41        }
42        let mut pending = VecDeque::new();
43        pending.push_back(DirInfo { path: root, depth: 0 });
44        while let Some(dir) = pending.pop_front() {
45            for i in 0..self.files_per_directory {
46                let path = dir.path.join(file_name(i));
47                std::fs::File::create(path).unwrap();
48            }
49            if self.max_depth == dir.depth {
50                continue;
51            }
52            for i in 0..self.directories_per_directory {
53                let path = dir.path.join(dir_name(i));
54                std::fs::create_dir(&path).unwrap();
55                pending.push_back(DirInfo { path, depth: dir.depth + 1 });
56            }
57        }
58    }
59
60    /// Generates a list of paths that would be created by `create_directory_tree`.
61    fn enumerate_paths(&self) -> Vec<PathBuf> {
62        let mut paths = Vec::new();
63        self.enumerate_paths_impl(Path::new(""), &mut paths);
64        paths
65    }
66
67    fn enumerate_paths_impl(&self, base: &Path, paths: &mut Vec<PathBuf>) {
68        for i in 0..self.files_per_directory {
69            paths.push(base.join(file_name(i)));
70        }
71        if self.max_depth == 0 {
72            return;
73        }
74        for i in 0..self.directories_per_directory {
75            let path = base.join(dir_name(i));
76            paths.push(path.clone());
77            Self { max_depth: self.max_depth - 1, ..*self }.enumerate_paths_impl(&path, paths);
78        }
79    }
80}
81
82/// A benchmark that measures how long it takes to walk a directory that should not already be
83/// cached in memory.
84#[derive(Clone)]
85pub struct WalkDirectoryTreeCold {
86    dts: DirectoryTreeStructure,
87    iterations: u64,
88}
89
90impl WalkDirectoryTreeCold {
91    pub fn new(dts: DirectoryTreeStructure, iterations: u64) -> Self {
92        Self { dts, iterations }
93    }
94}
95
96#[async_trait]
97impl<T: CacheClearableFilesystem> Benchmark<T> for WalkDirectoryTreeCold {
98    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
99        storage_trace::duration!(
100            c"benchmark",
101            c"WalkDirectoryTreeCold",
102            "files_per_directory" => self.dts.files_per_directory,
103            "directories_per_directory" => self.dts.directories_per_directory,
104            "max_depth" => self.dts.max_depth
105        );
106        let root = fs.benchmark_dir().to_path_buf();
107
108        self.dts.create_directory_tree(root.clone());
109        let max_pending = self.dts.max_pending() as usize;
110        let mut durations = Vec::new();
111        for i in 0..self.iterations {
112            fs.clear_cache().await;
113            storage_trace::duration!(c"benchmark", c"WalkDirectoryTree", "iteration" => i);
114            let timer = OperationTimer::start();
115            walk_directory_tree(root.clone(), max_pending);
116            durations.push(timer.stop());
117        }
118        durations
119    }
120
121    fn name(&self) -> String {
122        format!(
123            "WalkDirectoryTreeCold/Files{}/Dirs{}/Depth{}",
124            self.dts.files_per_directory, self.dts.directories_per_directory, self.dts.max_depth
125        )
126    }
127}
128
129/// A benchmark that measures how long it takes to walk a directory that was recently accessed and
130/// may be cached in memory.
131#[derive(Clone)]
132pub struct WalkDirectoryTreeWarm {
133    dts: DirectoryTreeStructure,
134    iterations: u64,
135}
136
137impl WalkDirectoryTreeWarm {
138    pub fn new(dts: DirectoryTreeStructure, iterations: u64) -> Self {
139        Self { dts, iterations }
140    }
141}
142
143#[async_trait]
144impl<T: Filesystem> Benchmark<T> for WalkDirectoryTreeWarm {
145    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
146        storage_trace::duration!(
147            c"benchmark",
148            c"WalkDirectoryTreeWarm",
149            "files_per_directory" => self.dts.files_per_directory,
150            "directories_per_directory" => self.dts.directories_per_directory,
151            "max_depth" => self.dts.max_depth
152        );
153        let root = fs.benchmark_dir().to_path_buf();
154
155        self.dts.create_directory_tree(root.clone());
156        let max_pending = self.dts.max_pending() as usize;
157        let mut durations = Vec::new();
158        for i in 0..self.iterations {
159            storage_trace::duration!(c"benchmark", c"WalkDirectoryTree", "iteration" => i);
160            let timer = OperationTimer::start();
161            walk_directory_tree(root.clone(), max_pending);
162            durations.push(timer.stop());
163        }
164        durations
165    }
166
167    fn name(&self) -> String {
168        format!(
169            "WalkDirectoryTreeWarm/Files{}/Dirs{}/Depth{}",
170            self.dts.files_per_directory, self.dts.directories_per_directory, self.dts.max_depth
171        )
172    }
173}
174
175/// Performs a breadth first traversal of the directory tree starting at |root|. As an optimization,
176/// |max_pending| can be supplied to pre-allocate the queue needed for the breadth first traversal.
177fn walk_directory_tree(root: PathBuf, max_pending: usize) {
178    let mut pending = VecDeque::new();
179    pending.reserve(max_pending);
180    pending.push_back(root);
181    while let Some(dir) = pending.pop_front() {
182        storage_trace::duration!(c"benchmark", c"read_dir");
183        for entry in std::fs::read_dir(&dir).unwrap() {
184            let entry = entry.unwrap();
185            if entry.file_type().unwrap().is_dir() {
186                pending.push_back(entry.path());
187            }
188        }
189    }
190}
191
192/// A benchmark that measures how long it takes to call stat on a path to a file. A distinct file is
193/// used for each iteration.
194#[derive(Clone)]
195pub struct StatPath {
196    file_count: u64,
197}
198
199impl StatPath {
200    pub fn new() -> Self {
201        Self { file_count: 100 }
202    }
203}
204
205#[async_trait]
206impl<T: Filesystem> Benchmark<T> for StatPath {
207    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
208        storage_trace::duration!(c"benchmark", c"StatPath");
209
210        let root = fs.benchmark_dir().to_path_buf();
211        for i in 0..self.file_count {
212            std::fs::File::create(root.join(file_name(i))).unwrap();
213        }
214
215        let root_fd =
216            open_path(&path_buf_to_c_string(root), libc::O_DIRECTORY | libc::O_RDONLY).unwrap();
217
218        let mut durations = Vec::with_capacity(self.file_count as usize);
219        for i in 0..self.file_count {
220            let path = path_buf_to_c_string(file_name(i));
221            storage_trace::duration!(c"benchmark", c"stat", "file" => i);
222            let timer = OperationTimer::start();
223            stat_path_at(&root_fd, &path).unwrap();
224            durations.push(timer.stop());
225        }
226        durations
227    }
228
229    fn name(&self) -> String {
230        "StatPath".to_string()
231    }
232}
233
234/// A benchmark that measures how long it takes to open a file. A distinct file is used for each
235/// iteration.
236#[derive(Clone)]
237pub struct OpenFile {
238    file_count: u64,
239}
240
241impl OpenFile {
242    pub fn new() -> Self {
243        Self { file_count: 100 }
244    }
245}
246
247#[async_trait]
248impl<T: Filesystem> Benchmark<T> for OpenFile {
249    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
250        storage_trace::duration!(c"benchmark", c"OpenFile");
251
252        let root = fs.benchmark_dir().to_path_buf();
253        for i in 0..self.file_count {
254            std::fs::File::create(root.join(file_name(i))).unwrap();
255        }
256
257        let root_fd =
258            open_path(&path_buf_to_c_string(root), libc::O_DIRECTORY | libc::O_RDONLY).unwrap();
259
260        let mut durations = Vec::with_capacity(self.file_count as usize);
261        for i in 0..self.file_count {
262            let path = path_buf_to_c_string(file_name(i));
263            // Pull the file outside of the trace so it doesn't capture the close call.
264            let _file = {
265                storage_trace::duration!(c"benchmark", c"open", "file" => i);
266                let timer = OperationTimer::start();
267                let file = open_path_at(&root_fd, &path, libc::O_RDWR);
268                durations.push(timer.stop());
269                file
270            };
271        }
272        durations
273    }
274
275    fn name(&self) -> String {
276        "OpenFile".to_string()
277    }
278}
279
280/// A benchmark that measures how long it takes to open a file from a path that contains multiple
281/// directories. A distinct path and file is used for each iteration.
282#[derive(Clone)]
283pub struct OpenDeeplyNestedFile {
284    file_count: u64,
285    depth: u64,
286}
287
288impl OpenDeeplyNestedFile {
289    pub fn new() -> Self {
290        Self { file_count: 100, depth: 5 }
291    }
292
293    fn dir_path(&self, n: u64) -> PathBuf {
294        let mut path = PathBuf::new();
295        for i in 0..self.depth {
296            path = path.join(format!("dir-{:03}-{:03}", n, i));
297        }
298        path
299    }
300}
301
302#[async_trait]
303impl<T: Filesystem> Benchmark<T> for OpenDeeplyNestedFile {
304    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
305        storage_trace::duration!(c"benchmark", c"OpenDeeplyNestedFile");
306
307        let root = fs.benchmark_dir().to_path_buf();
308        for i in 0..self.file_count {
309            let dir_path = root.join(self.dir_path(i));
310            std::fs::create_dir_all(&dir_path).unwrap();
311            std::fs::File::create(dir_path.join(file_name(i))).unwrap();
312        }
313
314        let root_fd =
315            open_path(&path_buf_to_c_string(root), libc::O_DIRECTORY | libc::O_RDONLY).unwrap();
316
317        let mut durations = Vec::with_capacity(self.file_count as usize);
318        for i in 0..self.file_count {
319            let path = path_buf_to_c_string(self.dir_path(i).join(file_name(i)));
320            // Pull the file outside of the trace so it doesn't capture the close call.
321            let _file = {
322                storage_trace::duration!(c"benchmark", c"open", "file" => i);
323                let timer = OperationTimer::start();
324                let file = open_path_at(&root_fd, &path, libc::O_RDWR);
325                durations.push(timer.stop());
326                file
327            };
328        }
329        durations
330    }
331
332    fn name(&self) -> String {
333        "OpenDeeplyNestedFile".to_string()
334    }
335}
336
337/// A benchmark that mimics the filesystem usage pattern of `git status`.
338#[derive(Clone)]
339pub struct GitStatus {
340    dts: DirectoryTreeStructure,
341    iterations: u64,
342
343    /// The number of threads to use when stat'ing all of the files.
344    stat_threads: usize,
345}
346
347impl GitStatus {
348    pub fn new() -> Self {
349        // This will create 254 directories and 1020 files. A depth of 13 would create 16382
350        // directories and 65532 files which is close to the size of fuchsia.git.
351        let dts = DirectoryTreeStructure {
352            files_per_directory: 4,
353            directories_per_directory: 2,
354            max_depth: 7,
355        };
356
357        // Git uses many threads when stat'ing large repositories but using multiple threads in the
358        // benchmarks significantly increases the performance variation between runs.
359        Self { dts, iterations: 5, stat_threads: 1 }
360    }
361
362    /// Stat all of the paths in `paths`. This mimics git checking to see if any of the files in its
363    /// index have been modified.
364    fn stat_paths(&self, root: &OpenFd, paths: &Vec<CString>) {
365        storage_trace::duration!(c"benchmark", c"GitStatus::stat_paths");
366        std::thread::scope(|scope| {
367            for thread in 0..self.stat_threads {
368                scope.spawn(move || {
369                    let paths = &paths;
370                    for path in batch_range(paths.len(), self.stat_threads, thread) {
371                        storage_trace::duration!(c"benchmark", c"stat");
372                        stat_path_at(root, &paths[path]).unwrap();
373                    }
374                });
375            }
376        });
377    }
378
379    /// Performs a recursive depth first traversal of the directory tree.
380    fn walk_repo(&self, dir: &Path) {
381        storage_trace::duration!(c"benchmark", c"GitStatus::walk_repo");
382        for entry in std::fs::read_dir(&dir).unwrap() {
383            let entry = entry.unwrap();
384            if entry.file_type().unwrap().is_dir() {
385                self.walk_repo(&entry.path());
386            }
387        }
388    }
389}
390
391#[async_trait]
392impl<T: Filesystem> Benchmark<T> for GitStatus {
393    async fn run(&self, fs: &mut T) -> Vec<OperationDuration> {
394        let root = &fs.benchmark_dir().to_path_buf();
395
396        self.dts.create_directory_tree(root.clone());
397        let paths = self.dts.enumerate_paths().into_iter().map(path_buf_to_c_string).collect();
398
399        let root_fd =
400            open_path(&path_buf_to_c_string(root.clone()), libc::O_DIRECTORY | libc::O_RDONLY)
401                .unwrap();
402
403        let mut durations = Vec::new();
404        for i in 0..self.iterations {
405            storage_trace::duration!(c"benchmark", c"GitStatus", "iteration" => i);
406            let timer = OperationTimer::start();
407            self.stat_paths(&root_fd, &paths);
408            self.walk_repo(&root);
409            durations.push(timer.stop());
410        }
411        durations
412    }
413
414    fn name(&self) -> String {
415        "GitStatus".to_owned()
416    }
417}
418
419pub fn file_name(n: u64) -> PathBuf {
420    format!("file-{:03}.txt", n).into()
421}
422
423pub fn dir_name(n: u64) -> PathBuf {
424    format!("dir-{:03}", n).into()
425}
426
427pub fn path_buf_to_c_string(path: PathBuf) -> CString {
428    CString::new(path.into_os_string().into_vec()).unwrap()
429}
430
431pub fn stat_path_at(dir: &OpenFd, path: &CStr) -> Result<libc::stat, std::io::Error> {
432    unsafe {
433        let mut stat = MaybeUninit::uninit();
434        // `libc::fstatat` is directly used instead of `std::fs::metadata` because
435        // `std::fs::metadata` uses `statx` on Linux.
436        let result =
437            libc::fstatat(dir.0, path.as_ptr(), stat.as_mut_ptr(), libc::AT_SYMLINK_NOFOLLOW);
438
439        if result == 0 {
440            Ok(stat.assume_init())
441        } else {
442            Err(std::io::Error::last_os_error())
443        }
444    }
445}
446
447/// Splits a collection of items into batches and returns the range of items that `batch_num`
448/// contains.
449fn batch_range(item_count: usize, batch_count: usize, batch_num: usize) -> Range<usize> {
450    let items_per_batch = (item_count + (batch_count - 1)) / batch_count;
451    let start = items_per_batch * batch_num;
452    let end = std::cmp::min(item_count, start + items_per_batch);
453    start..end
454}
455
456pub struct OpenFd(RawFd);
457
458impl Drop for OpenFd {
459    fn drop(&mut self) {
460        unsafe { libc::close(self.0) };
461    }
462}
463
464pub fn open_path(path: &CStr, flags: libc::c_int) -> Result<OpenFd, std::io::Error> {
465    let result = unsafe { libc::open(path.as_ptr(), flags) };
466    if result >= 0 {
467        Ok(OpenFd(result))
468    } else {
469        Err(std::io::Error::last_os_error())
470    }
471}
472
473pub fn open_path_at(
474    dir: &OpenFd,
475    path: &CStr,
476    flags: libc::c_int,
477) -> Result<OpenFd, std::io::Error> {
478    let result = unsafe { libc::openat(dir.0, path.as_ptr(), flags) };
479    if result >= 0 {
480        Ok(OpenFd(result))
481    } else {
482        Err(std::io::Error::last_os_error())
483    }
484}
485
486#[cfg(test)]
487mod tests {
488    use super::*;
489    use crate::testing::TestFilesystem;
490
491    const ITERATION_COUNT: u64 = 3;
492
493    #[derive(PartialEq, Debug)]
494    struct DirectoryTree {
495        files: u64,
496        directories: Vec<DirectoryTree>,
497    }
498
499    fn read_in_directory_tree(root: PathBuf) -> DirectoryTree {
500        let mut tree = DirectoryTree { files: 0, directories: vec![] };
501        for entry in std::fs::read_dir(root).unwrap() {
502            let entry = entry.unwrap();
503            if entry.file_type().unwrap().is_dir() {
504                tree.directories.push(read_in_directory_tree(entry.path()));
505            } else {
506                tree.files += 1;
507            }
508        }
509        tree
510    }
511
512    #[fuchsia::test]
513    async fn create_directory_tree_with_depth_of_zero_test() {
514        let test_fs = Box::new(TestFilesystem::new());
515        let dts = DirectoryTreeStructure {
516            files_per_directory: 3,
517            directories_per_directory: 2,
518            max_depth: 0,
519        };
520        let root = test_fs.benchmark_dir().to_owned();
521        dts.create_directory_tree(root.clone());
522
523        let directory_tree = read_in_directory_tree(root);
524        assert_eq!(directory_tree, DirectoryTree { files: 3, directories: vec![] });
525        test_fs.shutdown().await;
526    }
527
528    #[fuchsia::test]
529    async fn create_directory_tree_with_depth_of_two_test() {
530        let test_fs = Box::new(TestFilesystem::new());
531        let dts = DirectoryTreeStructure {
532            files_per_directory: 4,
533            directories_per_directory: 2,
534            max_depth: 2,
535        };
536        let root = test_fs.benchmark_dir().to_owned();
537        dts.create_directory_tree(root.clone());
538
539        let directory_tree = read_in_directory_tree(root);
540        assert_eq!(
541            directory_tree,
542            DirectoryTree {
543                files: 4,
544                directories: vec![
545                    DirectoryTree {
546                        files: 4,
547                        directories: vec![
548                            DirectoryTree { files: 4, directories: vec![] },
549                            DirectoryTree { files: 4, directories: vec![] }
550                        ]
551                    },
552                    DirectoryTree {
553                        files: 4,
554                        directories: vec![
555                            DirectoryTree { files: 4, directories: vec![] },
556                            DirectoryTree { files: 4, directories: vec![] }
557                        ]
558                    }
559                ]
560            }
561        );
562        test_fs.shutdown().await;
563    }
564
565    #[fuchsia::test]
566    fn enumerate_paths_test() {
567        let dts = DirectoryTreeStructure {
568            files_per_directory: 2,
569            directories_per_directory: 2,
570            max_depth: 0,
571        };
572        let paths = dts.enumerate_paths();
573        assert_eq!(paths, vec![PathBuf::from("file-000.txt"), PathBuf::from("file-001.txt")],);
574
575        let dts = DirectoryTreeStructure {
576            files_per_directory: 2,
577            directories_per_directory: 2,
578            max_depth: 1,
579        };
580        let paths = dts.enumerate_paths();
581        assert_eq!(
582            paths,
583            vec![
584                PathBuf::from("file-000.txt"),
585                PathBuf::from("file-001.txt"),
586                PathBuf::from("dir-000"),
587                PathBuf::from("dir-000/file-000.txt"),
588                PathBuf::from("dir-000/file-001.txt"),
589                PathBuf::from("dir-001"),
590                PathBuf::from("dir-001/file-000.txt"),
591                PathBuf::from("dir-001/file-001.txt"),
592            ],
593        );
594    }
595
596    #[fuchsia::test]
597    fn batch_range_test() {
598        assert_eq!(batch_range(10, 1, 0), 0..10);
599
600        assert_eq!(batch_range(10, 3, 0), 0..4);
601        assert_eq!(batch_range(10, 3, 1), 4..8);
602        assert_eq!(batch_range(10, 3, 2), 8..10);
603
604        assert_eq!(batch_range(10, 4, 3), 9..10);
605        assert_eq!(batch_range(12, 4, 3), 9..12);
606    }
607
608    #[fuchsia::test]
609    async fn walk_directory_tree_cold_test() {
610        let mut test_fs = Box::new(TestFilesystem::new());
611        let dts = DirectoryTreeStructure {
612            files_per_directory: 2,
613            directories_per_directory: 2,
614            max_depth: 2,
615        };
616        let results = WalkDirectoryTreeCold::new(dts, ITERATION_COUNT).run(test_fs.as_mut()).await;
617
618        assert_eq!(results.len(), ITERATION_COUNT as usize);
619        assert_eq!(test_fs.clear_cache_count().await, ITERATION_COUNT);
620        test_fs.shutdown().await;
621    }
622
623    #[fuchsia::test]
624    async fn walk_directory_tree_warm_test() {
625        let mut test_fs = Box::new(TestFilesystem::new());
626        let dts = DirectoryTreeStructure {
627            files_per_directory: 2,
628            directories_per_directory: 2,
629            max_depth: 2,
630        };
631        let results = WalkDirectoryTreeWarm::new(dts, ITERATION_COUNT).run(test_fs.as_mut()).await;
632
633        assert_eq!(results.len(), ITERATION_COUNT as usize);
634        assert_eq!(test_fs.clear_cache_count().await, 0);
635        test_fs.shutdown().await;
636    }
637
638    #[fuchsia::test]
639    async fn stat_path_test() {
640        let mut test_fs = Box::new(TestFilesystem::new());
641        let benchmark = StatPath { file_count: 5 };
642        let results = benchmark.run(test_fs.as_mut()).await;
643        assert_eq!(results.len(), 5);
644        assert_eq!(test_fs.clear_cache_count().await, 0);
645        test_fs.shutdown().await;
646    }
647
648    #[fuchsia::test]
649    async fn open_file_test() {
650        let mut test_fs = Box::new(TestFilesystem::new());
651        let benchmark = OpenFile { file_count: 5 };
652        let results = benchmark.run(test_fs.as_mut()).await;
653        assert_eq!(results.len(), 5);
654        assert_eq!(test_fs.clear_cache_count().await, 0);
655        test_fs.shutdown().await;
656    }
657
658    #[fuchsia::test]
659    async fn open_deeply_nested_file_test() {
660        let mut test_fs = Box::new(TestFilesystem::new());
661        let benchmark = OpenDeeplyNestedFile { file_count: 5, depth: 3 };
662        let results = benchmark.run(test_fs.as_mut()).await;
663        assert_eq!(results.len(), 5);
664        assert_eq!(test_fs.clear_cache_count().await, 0);
665        test_fs.shutdown().await;
666    }
667
668    #[fuchsia::test]
669    async fn git_status_test() {
670        let mut test_fs = Box::new(TestFilesystem::new());
671        let dts = DirectoryTreeStructure {
672            files_per_directory: 2,
673            directories_per_directory: 2,
674            max_depth: 2,
675        };
676        let benchmark = GitStatus { dts, iterations: ITERATION_COUNT, stat_threads: 1 };
677        let results = benchmark.run(test_fs.as_mut()).await;
678
679        assert_eq!(results.len(), ITERATION_COUNT as usize);
680        assert_eq!(test_fs.clear_cache_count().await, 0);
681        test_fs.shutdown().await;
682    }
683}