diff --git a/apps/servers/octopus/supergit/src/bin/test.rs b/apps/servers/octopus/supergit/src/bin/test.rs index c4fcc2fbb3a..830c8b62f13 100644 --- a/apps/servers/octopus/supergit/src/bin/test.rs +++ b/apps/servers/octopus/supergit/src/bin/test.rs @@ -22,5 +22,6 @@ fn main() { let head = main.get_head(); let tree = head.get_tree(); - + + println!("{:?}", tree.load("")); } diff --git a/apps/servers/octopus/supergit/src/branch.rs b/apps/servers/octopus/supergit/src/branch.rs index dd92aea93cb..43222790024 100644 --- a/apps/servers/octopus/supergit/src/branch.rs +++ b/apps/servers/octopus/supergit/src/branch.rs @@ -4,7 +4,26 @@ use std::{mem, sync::Arc}; /// Abstraction for a branch history slice /// +/// Git implements an acyclical graph, where branches can be split, +/// and re-merge later. Traversal always happens from some point +/// onwards, backwards through the history. Because git repositories +/// can get quite large and this is a recursive process, it's very +/// quickly possible to overflow your program stack. To avoid this, +/// `supergit` uses an iterator design to enumerate commits. /// +/// Use the API on this type to specify your starting point. By +/// default, it will be the head of the branch you are looking at. +/// Note: not all branches have names! +/// +/// After creating a `BranchIter` you can then call `next()` on it, +/// yielding `BranchCommit` objects. These can either be single +/// commits, or various types of merge commits. Each merge commit +/// yields some set of `Branch` handles, that you can either traverse +/// by building another `BranchIter`. +/// +/// A branch iterator is therefore always first-parent, meaning that +/// merged branches can simply be ignored by only ever inspecting the +/// current `Commit` contained by a `BranchCommit`. #[derive(Clone)] pub struct Branch { repo: Arc, @@ -56,6 +75,7 @@ impl Branch { } } + /// Create a branch iterator that stops when reaching a commit pub fn get_to(&self, commit: HashId) -> BranchIter { BranchIter::new( Arc::clone(&self.repo), @@ -64,12 +84,10 @@ impl Branch { ) } - /// Get the primary branch history as far back as it goes - pub fn get_all(&self) -> BranchIter { - BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None) - } - - /// Get a branch segment of a certain length + /// Create a step-limited branch iterator + /// + /// This type of iterator is especially useful when combined with + /// `skip()`, to create a paginated view onto commits. pub fn get(&self, num: usize) -> BranchIter { BranchIter::new( Arc::clone(&self.repo), @@ -78,7 +96,17 @@ impl Branch { ) } - /// Get the commit pointed at by HEAD + /// Create an endless branch iterator + /// + /// While the creation of the iterator is instantanious, actually + /// enumerating all commits in a repository can be quite + /// computationally intensive and is almost never what you + /// actually want. + pub fn get_all(&self) -> BranchIter { + BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None) + } + + /// Get the current HEAD commit pub fn get_head(&self) -> Commit { Commit::new(&self.repo, self.head.clone()).unwrap() } @@ -89,10 +117,12 @@ impl Branch { } } -/// A branch segment iterator +/// A branch slice iterator, created via `Branch` handle /// -/// Each iterator is first-parent, but will notify you about a split -/// parent by setting +/// This iterator yields `BranchCommit` objects, that can either be +/// simple commits, or various types of merge commits with new Branch +/// handles. This means that without explicitly branching, this +/// iterator is first-parent. pub struct BranchIter { repo: Arc, curr: Option, diff --git a/apps/servers/octopus/supergit/src/commit.rs b/apps/servers/octopus/supergit/src/commit.rs index 14f2d9bafdc..bc7383d1ed6 100644 --- a/apps/servers/octopus/supergit/src/commit.rs +++ b/apps/servers/octopus/supergit/src/commit.rs @@ -26,7 +26,7 @@ impl Commit { self.id.to_string() } - /// Get the summary line as a utf-7 string + /// Get the summary line as a utf-8 string pub fn summary(&self) -> String { self.find().summary().unwrap().into() } @@ -55,6 +55,10 @@ impl Commit { .and_then(|c| Self::new(&self.repo, c.id().into())) } + /// Get the set of parents as a vector + /// + /// Use this function if you suspect a commit has more than one + /// parent. pub fn parents(&self) -> Vec { self.find() .parents() diff --git a/apps/servers/octopus/supergit/src/files.rs b/apps/servers/octopus/supergit/src/files.rs index d86a82306ee..fa68fbc2f3d 100644 --- a/apps/servers/octopus/supergit/src/files.rs +++ b/apps/servers/octopus/supergit/src/files.rs @@ -1,13 +1,13 @@ use crate::{Branch, BranchIter, Commit, HashId}; -use git2::{ObjectType, TreeWalkMode, TreeWalkResult}; use atomptr::AtomPtr; +use git2::{ObjectType, TreeWalkMode, TreeWalkResult}; use std::collections::BTreeMap; use std::{path::PathBuf, sync::Arc}; /// A tree of files pub struct FileTree { repo: Arc, - tree: AtomPtr>, + tree: AtomPtr>>, } impl FileTree { @@ -23,39 +23,78 @@ impl FileTree { /// Parse a tree from a specific commit pub(crate) fn parse(self: Arc, commit: HashId) -> Arc { let mut new_tree = BTreeMap::new(); - + let tree = (&self.repo) .find_commit(commit.to_oid()) .unwrap() .tree() .unwrap(); - tree.walk(TreeWalkMode::PreOrder, |what, entry| { - let path_segs: Vec<_> = what.split("/").filter(|s| s != &"").collect(); + tree.walk(TreeWalkMode::PreOrder, |p, entry| { + let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect(); let path = if path_segs.len() == 0 { None } else { Some(path_segs) }; - println!("{:?} {}", path, entry.name().unwrap()); + let te = TreeEntry::generate(path, entry); + new_tree.insert(te.path(), Arc::new(te)); TreeWalkResult::Ok }) .unwrap(); + + // Add a special entry for the root of the repo + new_tree.insert( + "".into(), + Arc::new(TreeEntry::Dir(Directory { + id: tree.id().into(), + path: "".into(), + name: "".into(), + })), + ); + + // This is needed to make borrowchk shut up drop(tree); // Atomicly swap new tree into place self.tree.swap(new_tree); - + self } + + fn get_entry(&self, path: &str) -> Option> { + self.tree.get_ref().get(path).map(|e| Arc::clone(&e)) + } + + /// Load a file entry in this `FileTree` from disk + /// + /// When calling this function on a directory, nothing will happen + /// (returns `None`), because directories can't be loaded. If you + /// want to get a list of children for a directory, use + /// [`FileTree::enumerate()`]() instead! + pub fn load(&self, path: &str) -> Option { + self.get_entry(path).and_then(|e| e.load(&self.repo)) + } } -/// An entry in a file tree +/// Data yielded from loading a part of the file tree +/// +/// This type is returned when fetching a path via `FileTree::load()`, +/// and can either be a single file read into memory, or an +/// enumeration of direct children of a directory. /// -/// It's variants can either be a file (leaf), or a subtree, with it's -/// own path handles, and children. -pub enum TreeEntry { +/// To get all children of a subtree, use `Yield::into_tree()` to +/// create a new, recursive `FileTree` to enumerate. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum Yield { + /// Load a single file into a buffer + File(Vec), + /// Enumerate children in a directory + Dir(Vec), +} + +enum TreeEntry { /// A single file File(File), /// A sub-tree @@ -63,74 +102,114 @@ pub enum TreeEntry { } impl TreeEntry { - /// Create a tree entry from a path and `git2::TreeEntry` - fn generate(root: PathBuf, path_segments: Option>, entry: git2::TreeEntry) -> Self { + fn generate(path_segments: Option>, entry: &git2::TreeEntry) -> Self { let path = path_segments.map_or("".into(), |p| path_segs_join(p)); + let id = entry.id().into(); + let name = entry.name().unwrap().into(); match entry.kind() { - Some(ObjectType::Blob) => Self::File(File::new(root, path)), - Some(ObjectType::Tree) => Self::Dir(Directory::new(root, path)), + Some(ObjectType::Blob) => Self::File(File::new(id, path, name)), + Some(ObjectType::Tree) => Self::Dir(Directory::new(id, path, name)), _ => unimplemented!(), } } - /// Load this tree entry from disk, if it is a file - /// - /// When calling this function on a directory, nothing will - /// happen, because directories can't be loaded. If you want to - /// get a list of children for a directory, use - /// [`FileTree::enumerate()`]() instead! - pub fn load(&self) -> Option> { - if !self.is_file() { - return None; - } + fn load(&self, repo: &Arc) -> Option { + let id = self.id(); - let obj = + match self { + Self::File(ref f) => repo + .find_blob(id.into()) + .ok() + .map(|b| Yield::File(b.content().into())), + Self::Dir(ref d) => repo + .find_tree(id.into()) + .ok() + .map(|tree| { + let mut children = vec![]; + + // Iterate the tree, but only as long as there are no + // additional path segments + tree.walk(TreeWalkMode::PreOrder, |p, entry| { + let path_segs: Vec<_> = p.split("/").filter(|s| s != &"").collect(); + if path_segs.len() > 0 { + TreeWalkResult::Skip + } else { + // Take the current tree path, and append the + // name of whatever we're currently iterating + // over is + let path = PathBuf::new().join(self.path()).join(entry.name().unwrap()); + children.push(path.as_path().to_str().unwrap().into()); + TreeWalkResult::Ok + } + }); + + children + }) + .map(|c| Yield::Dir(c)), + } } - /// Check if this tree entry is a file - pub fn is_file(&self) -> bool { + fn is_file(&self) -> bool { match self { Self::File(_) => true, Self::Dir(_) => false, } } + + fn id(&self) -> HashId { + match self { + Self::File(ref f) => f.id.clone(), + Self::Dir(ref d) => d.id.clone(), + } + } + + /// Get the repo-internal path (including name) + /// + /// This is used to index files in a file tree, to allow O(1) + /// access to deeply nested items. + fn path(&self) -> String { + match self { + Self::File(ref f) => PathBuf::new().join(&f.path).join(&f.name), + Self::Dir(ref d) => PathBuf::new().join(&d.path).join(&d.name), + } + .as_path() + .to_str() + .unwrap() + .into() + } } -/// A file to have ever existed in a git repo -pub struct File { - root: PathBuf, +struct File { + id: HashId, path: String, + name: String, } impl File { - pub(crate) fn new(root: PathBuf, path: String) -> Self { - Self { root, path } - } - - /// Get the history of a file from a branch iterator - pub fn get_history(&self, branch: BranchIter) -> Vec { - todo!() + fn new(id: HashId, path: String, name: String) -> Self { + Self { id, path, name } } } -/// A subdirectory in a file tree -/// -/// A directory has a set of children, which can either be Files, or -/// other directories. Many of the functions to retrieve metadata -/// (such as the last commit, count, etc) will be deferred to the -/// children of this directory. -pub struct Directory { - root: PathBuf, +struct Directory { + id: HashId, path: String, + name: String, } impl Directory { - pub(crate) fn new(root: PathBuf, path: String) -> Self { - Self { root, path } + fn new(id: HashId, path: String, name: String) -> Self { + Self { id, path, name } + } + + fn enumerate(&self, repo: git2::Repository) -> Vec { + vec![] } } +//////////////////////////////// + /// Take a vector of path segments, and turn it into a valid offset path /// /// There are tests to make sure this function works properly. @@ -139,7 +218,7 @@ impl Directory { /// * vec![] -> "" /// * vec!["foo"] -> "foo" /// * vec!["foo", "bar", "baz"] -> "foo/bar/baz" -fn path_segs_join(segments: Vec) -> String { +fn path_segs_join(segments: Vec<&str>) -> String { segments .into_iter() .fold(PathBuf::new(), |buf, seg| buf.join(seg)) diff --git a/apps/servers/octopus/supergit/src/lib.rs b/apps/servers/octopus/supergit/src/lib.rs index df30d996ebe..17c9094c32d 100644 --- a/apps/servers/octopus/supergit/src/lib.rs +++ b/apps/servers/octopus/supergit/src/lib.rs @@ -5,8 +5,10 @@ //! repository, consider using that library instead. //! //! supergit aims to make queries into a git repo as typed and easy as -//! possible. Start by creating a [`Repository`](), and enumerating -//! or fetching [`Branch`]()es that you are interested in. +//! possible. Start by creating a +//! [`Repository`](struct.Repository.html), and enumerating or +//! fetching [`Branch`](struct.Branch.html)es that you are interested +//! in. //! //! Unlike `libgit2`, this library can resolve reverse dependencies //! between files, and their commit history. Some of these functions @@ -27,7 +29,7 @@ pub(crate) use repo::HashId; pub use repo::Repository; mod files; -pub use files::{File, FileTree}; +pub use files::{Yield, FileTree}; use async_std::sync::{Arc, RwLock}; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/apps/servers/octopus/supergit/src/repo.rs b/apps/servers/octopus/supergit/src/repo.rs index 37991c3a560..3d802a929e7 100644 --- a/apps/servers/octopus/supergit/src/repo.rs +++ b/apps/servers/octopus/supergit/src/repo.rs @@ -2,7 +2,7 @@ use crate::{Branch, BranchCommit}; use git2::{self, Oid}; -use std::sync::Arc; +use std::{fmt, sync::Arc}; pub type GitResult = Result; @@ -10,6 +10,12 @@ pub type GitResult = Result; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct HashId(String); +impl fmt::Display for HashId { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + impl HashId { pub fn to_oid(&self) -> Oid { self.clone().into() @@ -63,6 +69,7 @@ pub struct Repository { } impl Repository { + /// Open a repository read-only at a specific path pub fn open(path: &str) -> GitResult { Ok(Self { inner: Arc::new(git2::Repository::open(path)?), @@ -71,9 +78,12 @@ impl Repository { /// Parse branch data from repository /// + /// If you only care about a single branch, you can also use the + /// convenience function `get_branch()`. + /// /// ## Panics /// - /// If there is an error around getting the name, or head commit. + /// This function can panic when branch metadata is missing. pub fn branches(&self) -> GitResult> { Ok(self .inner @@ -88,11 +98,17 @@ impl Repository { .collect()) } - /// Get the files touched by a commit - pub fn get_files_for(&self, id: HashId) -> GitResult> { - let c = self.inner.find_commit(id.into())?; - let tree = c.tree()?; - - todo!() + /// Get a single branch by name + /// + /// This function will enumerate all branches, and then select the + /// desired one. If you want to make repeated queries onto the + /// branch set, it's recommended you call `branches()`, and cache + /// the data yourself. + pub fn get_branch(&self, name: String) -> Option { + self.branches().ok().and_then(|ok| { + ok.into_iter() + .filter(|b| b.name().is_some()) + .find(|b| &b.name().unwrap() == &name) + }) } }