My personal project and infrastructure archive
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
nomicon/apps/servers/octopus/supergit/src/branch.rs

461 lines
15 KiB

//! Type system for working with git branches
use crate::{Commit, HashId};
use atomptr::AtomPtr;
use git2::Repository;
use std::{
mem,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
};
/// Abstraction for a branch history slice
///
/// Git implements an acyclical graph, where branches can be split,
/// and re-merge later. Traversal always happens from some point
/// onwards, backwards through the history. Because git repositories
/// can get quite large and this is a recursive process, it's very
/// quickly possible to overflow your program stack. To avoid this,
/// `supergit` uses an iterator design to enumerate commits.
///
/// Use the API on this type to specify your starting point. By
/// default, it will be the head of the branch you are looking at.
/// Note: not all branches have names!
///
/// After creating a `BranchIter` you can then call `next()` on it,
/// yielding `BranchCommit` objects. These can either be single
/// commits, or various types of merge commits. Each merge commit
/// yields some set of `Branch` handles, that you can either traverse
/// by building another `BranchIter`.
///
/// A branch iterator is therefore always first-parent, meaning that
/// merged branches can simply be ignored by only ever inspecting the
/// current `Commit` contained by a `BranchCommit`.
#[derive(Clone)]
pub struct Branch {
repo: Arc<Repository>,
name: Option<String>,
head: HashId,
}
impl Branch {
/// Create a new branch handle
pub(crate) fn new(repo: &Arc<Repository>, name: String, head: HashId) -> Self {
Self {
repo: Arc::clone(repo),
name: Some(name),
head,
}
}
pub(crate) fn without_name(repo: &Arc<Repository>, head: HashId) -> Self {
Self {
repo: Arc::clone(repo),
name: None,
head,
}
}
/// Get a branch handle starting at a certain commit
// TODO: do we want to check if this is actually a child?
pub fn skip_to(&self, from: HashId) -> Self {
match self.name {
Some(ref name) => Self::new(&self.repo, name.clone(), from),
None => Self::without_name(&self.repo, from),
}
}
/// Create a branch handle that skips a certain number of commits
///
/// This walker always picks the first parent.
pub fn skip(&self, num: usize) -> Self {
let mut head = self.repo.find_commit(self.head.clone().into()).unwrap();
for _ in 0..num {
if let Ok(p) = head.parent(0) {
head = p;
}
}
match self.name {
Some(ref name) => Self::new(&self.repo, name.clone(), head.id().into()),
None => Self::without_name(&self.repo, head.id().into()),
}
}
/// Create a branch iterator that stops when reaching a commit
pub fn get_to(&self, commit: HashId) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
self.head.clone(),
SegLimit::Commit(false, commit),
)
}
/// Create a step-limited branch iterator
///
/// This type of iterator is especially useful when combined with
/// `skip()`, to create a paginated view onto commits.
pub fn get(&self, num: usize) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
self.head.clone(),
SegLimit::Length(0, num),
)
}
/// Create an endless branch iterator
///
/// While the creation of the iterator is instantanious, actually
/// enumerating all commits in a repository can be quite
/// computationally intensive and is almost never what you
/// actually want.
pub fn get_all(&self) -> BranchIter {
BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None)
}
/// Get the current HEAD commit
pub fn head(&self) -> Commit {
Commit::new(&self.repo, self.head.clone()).unwrap()
}
/// Get the branch name, if it exists
pub fn name(&self) -> Option<String> {
self.name.clone()
}
}
/// A branch slice iterator, created via `Branch` handle
///
/// This iterator yields `BranchCommit` objects, that can either be
/// simple commits, or various types of merge commits with new Branch
/// handles. This means that without explicitly branching, this
/// iterator is first-parent.
pub struct BranchIter {
mode: AtomPtr<IterMode>,
splits: IterData,
repo: Arc<Repository>,
curr: Option<HashId>,
limit: SegLimit,
}
impl BranchIter {
/// Create a new branch segment iterator
fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
Self {
mode: AtomPtr::new(IterMode::FirstParent),
splits: IterData::new(),
repo,
curr: Some(last),
limit,
}
}
pub fn current(&self) -> Commit {
Commit::new(&self.repo, self.curr.as_ref().unwrap().clone()).unwrap()
}
/// Get a commit object, if it exists
fn find_commit(&self, id: &HashId) -> Option<Commit> {
Commit::new(&self.repo, id.clone())
}
/// For a current commit, get it's parents if they exists
fn parents(&self, curr: &Commit) -> (Option<Commit>, Option<Commit>) {
(curr.first_parent(), curr.parent(1))
}
/// Take an optional commit and turn it into a branch commit
fn make_branch_commit(&self, curr: Commit) -> BranchCommit {
match curr.parent_count() {
0 | 1 => BranchCommit::Commit(curr),
2 => {
let p2 = self.parents(&curr).1.unwrap();
BranchCommit::Merge(curr, Branch::without_name(&self.repo, p2.id))
}
_ => BranchCommit::Octopus(
curr.clone(),
curr.parents()
.into_iter()
.map(|c| Branch::without_name(&self.repo, c.id))
.collect(),
),
}
}
/// Determine which commit should be looked at next
///
/// FirstParent and DepthFirst iterators will take the next
/// first_parent if available. DepthFirst iterators will fall
/// back to the last split point, if no `first_parent()` exists.
/// BreathFirst iterators will always prefer a new branch over
/// first_parent, and jump back to the last split if there are no
/// parents.
fn determine_next(&mut self, current: Commit) -> Commit {
let mode = &**self.mode.get_ref();
self.curr = match mode {
IterMode::FirstParent => match current.first_parent() {
Some(p1) => Some(p1.id),
None => None,
},
// DepthFirst iterates normally until we hit the end of
// the branch, then we "jump back" to an earlier commit.
IterMode::DepthFirst => match current.first_parent() {
Some(p1) => Some(p1.id),
None => {
// Get the last split point. If there are no
// more, terminate the iterator. If there are,
// increment brnum and keep going. If brnum is
// higher than the parent count, call this
// function again to get the next split point (and
// reset brnum)
self.splits.next().map(|id| {
let brnum = self.splits.incr_brnum();
// ALSO: when brnum is LOWER than parent point,
// re-insert split, to allow the commit to be
// jumped to again!
if brnum < current.parent_count() {
self.splits.re_insert(id.clone());
}
let com = self.find_commit(&id).unwrap();
if brnum > current.parent_count() {
self.splits.reset_brnum();
self.determine_next(com).id
} else {
id
}
})
}
},
// // When there is only one parent, chose that parent. When
// // there is none, jump back to the last split point,
// // according to brnum. If brnum is then greater than the
// // number of branches, reset brnum and re-call this
// // function.
// IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
// Some(p1) => Some(p1.id),
// None => self.splits.next().map(|id| {
// let brnum = self.splits.incr_brnum();
// }),
// },
// // When there are is more than 1 parent, set this commit
// // as a split point, and chose the last parent commit as
// // the next commit to walk. This shifts the active branch
// // over. Important: we set brnum to the _highest_ branch,
// // and iterate inwards.
// IterMode::BreadthFirst => {
// },
_ => todo!(),
};
self.curr = match current.first_parent() {
Some(p1) => Some(p1.id),
None => None,
};
current
}
}
impl Iterator for BranchIter {
type Item = BranchCommit;
fn next(&mut self) -> Option<Self::Item> {
let mode = &**self.mode.get_ref();
let id = match mode {
// When iterating first-parent OR when going depth first,
// while the current branch still has children, take the
// next child of the current branch.
IterMode::FirstParent => mem::replace(&mut self.curr, None),
IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
mem::replace(&mut self.curr, None)
}
// When going both BreadthFirst or DepthFirst, reaching
// the end of the current branch means getting the last
// split point. The difference between these two
// strategies is in how the split points and "current
// point" are stored.
_ if self.curr.is_none() => self.splits.next(),
_ => unreachable!(), // can't be reached
};
// The chain of events
id.and_then(|id| self.find_commit(&id))
.map(|c| self.determine_next(c))
.and_then(|c| match self.limit {
SegLimit::None => Some(c),
SegLimit::Commit(ended, _) if ended => None,
SegLimit::Commit(ref mut b, ref target) => {
if &c.id == target {
*b = true;
}
Some(c)
}
SegLimit::Length(ref mut curr, ref max) if *curr < *max => {
*curr += 1;
Some(c)
}
SegLimit::Length(ref curr, ref mut max) if curr >= max => None,
SegLimit::Length(_, _) => unreachable!(), // oh rustc :)
})
.map(|c| self.make_branch_commit(c))
}
}
/// Specify the mode of a branch iterator
///
/// The default value is `FirstParent`, meaning that merged branches
/// will create a new `Branch` handle that the user must iterate
/// manually.
///
/// This is a reasonable default, but means that history searches for
/// files become more tedious. To improve this use-case, iterators
/// can internally be set to change their iteration behaviour, meaning
/// that returned commits are always `BranchCommit::Commit`, and can
pub enum IterMode {
/// Default value, iterating only first-parent commits
FirstParent,
/// Iterate a branch to completion, before picking the next
DepthFirst,
/// Iterate branches as they come up
BreadthFirst,
}
/// Limiter applied to a branch segment
pub enum SegLimit {
/// No limit, enumerating all children
None,
/// Run until a certain commit is found
Commit(bool, HashId),
/// Run to collect a certain number of commits
Length(usize, usize),
}
/// A commit represented as a relationship to a branch
///
/// Most commits will be simple, meaning they are in sequence on the
/// branch. Two types of merge commits exist: normal, and octopus.
/// All branches leading into this branch are a reverse tree
pub enum BranchCommit {
/// A single commit
Commit(Commit),
/// A merge commit from one other branch
Merge(Commit, Branch),
/// An octopus merge with multiple branches
Octopus(Commit, Vec<Branch>),
}
impl BranchCommit {
pub fn id(&self) -> HashId {
use BranchCommit::*;
match self {
Commit(ref c) => &c.id,
Merge(_, ref b) => &b.head,
Octopus(ref c, _) => &c.id,
}
.clone()
}
/// Get the underlying commit, regardless of type
pub fn commit(&self) -> &Commit {
use BranchCommit::*;
match self {
Commit(ref c) => c,
Merge(ref c, _) => c,
Octopus(ref c, _) => c,
}
}
}
/// Additional iterator data
///
/// This structure tracks split points on the iterator. When
/// traversing a branch, the iterator can reach branch points. While
/// in `IterMode::FirstParent`, these are irrelevant. However: when
/// iterating breadth or depth first these need to be tracked.
///
/// ## Depth first
///
/// - When a branch is encountered, append it to this data set.
/// - When the end of the current branch is reached, get the next
/// split point to resume from
///
/// ## Breadth first
///
/// - When a branch is encountered, add the previous commit to this set
/// - When reaching the end of a branch, get the next split point to
/// resume from
///
/// ---
///
/// In essense, the usage of this structure for BreadthFirst and
/// DepthFirst are inverted!
struct IterData {
/// Split points on the iterator
splits: AtomPtr<Vec<HashId>>,
/// Additional data for octopus merges
brnum: AtomicUsize,
}
impl IterData {
fn new() -> Self {
Self {
splits: AtomPtr::new(vec![]),
brnum: AtomicUsize::new(0),
}
}
/// Check if the split set is empty
fn empty(&self) -> bool {
self.splits.get_ref().len() == 0
}
fn set_brnum(&self, num: usize) {
self.brnum.swap(num, Ordering::Relaxed);
}
fn incr_brnum(&self) -> usize {
self.brnum.fetch_add(1, Ordering::Relaxed) + 1
}
fn decr_brnum(&self) -> usize {
self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
}
fn reset_brnum(&self) {
self.set_brnum(0);
}
fn append(&self, id: HashId) {
let mut vec = (**self.splits.get_ref()).clone();
let mut new = vec![id];
new.append(&mut vec);
self.splits.swap(new);
}
/// Insert a hashID to the front of the splits list
fn re_insert(&self, id: HashId) {
let mut vec = (**self.splits.get_ref()).clone();
vec.insert(0, id);
self.splits.swap(vec);
}
fn next(&self) -> Option<HashId> {
let mut vec = (**self.splits.get_ref()).clone();
let next = if vec.len() < 0 {
Some(vec.remove(0))
} else {
None
};
self.splits.swap(vec);
next
}
}