use crate::{Branch, BranchIter, Commit, HashId}; use atomptr::AtomPtr; use git2::{ObjectType, TreeWalkMode, TreeWalkResult}; use std::collections::{BTreeMap, BTreeSet}; use std::{path::PathBuf, sync::Arc}; /// A tree of files pub struct FileTree { repo: Arc, tree: AtomPtr>>, } impl FileTree { /// Utility function to create a tree, and then parse it too pub(crate) fn new(repo: &Arc, commit: HashId) -> Arc { Arc::new(Self { repo: Arc::clone(repo), tree: AtomPtr::new(BTreeMap::new()), }) .parse(commit) } /// Parse a tree from a specific commit pub(crate) fn parse(self: Arc, commit: HashId) -> Arc { let mut new_tree = BTreeMap::new(); let tree = (&self.repo) .find_commit(commit.to_oid()) .unwrap() .tree() .unwrap(); tree.walk(TreeWalkMode::PreOrder, |p, entry| { let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect(); let path = if path_segs.len() == 0 { None } else { Some(path_segs) }; let te = TreeEntry::generate(path, entry); new_tree.insert(te.path(), Arc::new(te)); TreeWalkResult::Ok }) .unwrap(); // Add a special entry for the root of the repo new_tree.insert( "".into(), Arc::new(TreeEntry::Dir(Directory { id: tree.id().into(), path: "".into(), name: "".into(), })), ); // This is needed to make borrowchk shut up drop(tree); // Atomicly swap new tree into place self.tree.swap(new_tree); self } fn get_entry(&self, path: &str) -> Option> { self.tree.get_ref().get(path).map(|e| Arc::clone(&e)) } /// Load a file entry in this `FileTree` from disk /// /// When calling this function on a directory, nothing will happen /// (returns `None`), because directories can't be loaded. If you /// want to get a list of children for a directory, use /// [`FileTree::enumerate()`]() instead! pub fn load(self: &Arc, path: &str) -> Option { self.get_entry(path).and_then(|e| e.load(self)) } /// Get the history of a path with a branch iterator /// /// This function is very computationally intensive, because it /// will step through the entire iterator to pull commits from, /// and see if they touch the respective path. pub fn history(&self, iter: BranchIter, path: &str) -> Vec { iter.filter_map(|c| { if c.commit() .get_paths() .into_iter() .collect::>() .contains(path) { Some(c.commit().clone()) } else { None } }) .collect() } } /// Data yielded from loading a part of the file tree /// /// This type is returned when fetching a path via `FileTree::load()`, /// and can either be a single file read into memory, or an /// enumeration of direct children of a directory. In this case, the /// information about child status (file or directory) is fetched /// during creation from the underlying repository. /// /// In order to traverse the next level of the subtree, you need to /// append a child name to the existing path, and run /// [`FileTree::load()`](FileTree::load) again. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum Yield { /// Load a single file into a buffer File(Vec), /// Enumerate children in a directory Dir(Vec), } /// Simple type to disambiguate between files and directories #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum YieldEntry { /// A file name File(String), /// A directory name Dir(String), } #[derive(Debug)] enum TreeEntry { /// A single file File(File), /// A sub-tree Dir(Directory), } impl TreeEntry { fn generate(path_segments: Option>, entry: &git2::TreeEntry) -> Self { let path = path_segments.map_or("".into(), |p| path_segs_join(p)); let id = entry.id().into(); let name = entry.name().unwrap().into(); match entry.kind() { Some(ObjectType::Blob) => Self::File(File::new(id, path, name)), Some(ObjectType::Tree) => Self::Dir(Directory::new(id, path, name)), _ => unimplemented!(), } } fn load(&self, ft: &Arc) -> Option { let id = self.id(); let repo = &ft.repo; match self { Self::File(_) => repo .find_blob(id.into()) .ok() .map(|b| Yield::File(b.content().into())), Self::Dir(_) => repo .find_tree(id.into()) .ok() .map(|tree| { let mut children = vec![]; // Iterate the tree, but only as long as there are no // additional path segments tree.walk(TreeWalkMode::PreOrder, |p, entry| { let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect(); if path_segs.len() > 0 { TreeWalkResult::Skip } else { // Take the current tree path and append // the name of the current entry on it let path = PathBuf::new().join(self.path()).join(entry.name().unwrap()); let s: String = path.as_path().to_str().unwrap().into(); // Construct a YieldEntry via a FileTree lookup children.push(if ft.get_entry(s.as_str()).unwrap().is_directory() { YieldEntry::Dir(s) } else { YieldEntry::File(s) }); TreeWalkResult::Ok } }) .unwrap(); children }) .map(|c| Yield::Dir(c)), } } fn is_directory(&self) -> bool { match self { Self::Dir(_) => true, Self::File(_) => false, } } fn id(&self) -> HashId { match self { Self::File(ref f) => f.id.clone(), Self::Dir(ref d) => d.id.clone(), } } /// Get the repo-internal path (including name) /// /// This is used to index files in a file tree, to allow O(1) /// access to deeply nested items. fn path(&self) -> String { match self { Self::File(ref f) => PathBuf::new().join(&f.path).join(&f.name), Self::Dir(ref d) => PathBuf::new().join(&d.path).join(&d.name), } .as_path() .to_str() .unwrap() .into() } } #[derive(Debug)] struct File { id: HashId, path: String, name: String, } impl File { fn new(id: HashId, path: String, name: String) -> Self { Self { id, path, name } } } #[derive(Debug)] struct Directory { id: HashId, path: String, name: String, } impl Directory { fn new(id: HashId, path: String, name: String) -> Self { Self { id, path, name } } #[allow(unused)] fn enumerate(&self, repo: git2::Repository) -> Vec { vec![] } } ////////////////////////////////