nomicon/apps/servers/octopus/supergit/src/branch.rs

//! Type system for working with git branches

use crate::{Commit, HashId};
use atomptr::AtomPtr;
use git2::Repository;
use std::{
    mem,
    sync::{
        atomic::{AtomicUsize, Ordering},
        Arc,
    },
};

/// Abstraction for a branch history slice
///
/// Git implements an acyclical graph, where branches can be split,
/// and re-merge later.  Traversal always happens from some point
/// onwards, backwards through the history.  Because git repositories
/// can get quite large and this is a recursive process, it's very
/// quickly possible to overflow your program stack.  To avoid this,
/// `supergit` uses an iterator design to enumerate commits.
///
/// Use the API on this type to specify your starting point.  By
/// default, it will be the head of the branch you are looking at.
/// Note: not all branches have names!
///
/// After creating a `BranchIter` you can then call `next()` on it,
/// yielding `BranchCommit` objects.  These can either be single
/// commits, or various types of merge commits.  Each merge commit
/// yields some set of `Branch` handles, that you can either traverse
/// by building another `BranchIter`.
///
/// A branch iterator is therefore always first-parent, meaning that
/// merged branches can simply be ignored by only ever inspecting the
/// current `Commit` contained by a `BranchCommit`.
#[derive(Clone)]
pub struct Branch {
    repo: Arc<Repository>,
    name: Option<String>,
    head: HashId,
}

impl Branch {
    /// Create a new branch handle
    pub(crate) fn new(repo: &Arc<Repository>, name: String, head: HashId) -> Self {
        Self {
            repo: Arc::clone(repo),
            name: Some(name),
            head,
        }
    }

    pub(crate) fn without_name(repo: &Arc<Repository>, head: HashId) -> Self {
        Self {
            repo: Arc::clone(repo),
            name: None,
            head,
        }
    }

    /// Get a branch handle starting at a certain commit
    // TODO: do we want to check if this is actually a child?
    pub fn skip_to(&self, from: HashId) -> Self {
        match self.name {
            Some(ref name) => Self::new(&self.repo, name.clone(), from),
            None => Self::without_name(&self.repo, from),
        }
    }

    /// Create a branch handle that skips a certain number of commits
    ///
    /// This walker always picks the first parent.
    pub fn skip(&self, num: usize) -> Self {
        let mut head = self.repo.find_commit(self.head.clone().into()).unwrap();
        for _ in 0..num {
            if let Ok(p) = head.parent(0) {
                head = p;
            }
        }

        match self.name {
            Some(ref name) => Self::new(&self.repo, name.clone(), head.id().into()),
            None => Self::without_name(&self.repo, head.id().into()),
        }
    }

    /// Create a branch iterator that stops when reaching a commit
    pub fn get_to(&self, commit: HashId) -> BranchIter {
        BranchIter::new(
            Arc::clone(&self.repo),
            self.head.clone(),
            SegLimit::Commit(false, commit),
        )
    }

    /// Create a step-limited branch iterator
    ///
    /// This type of iterator is especially useful when combined with
    /// `skip()`, to create a paginated view onto commits.
    pub fn get(&self, num: usize) -> BranchIter {
        BranchIter::new(
            Arc::clone(&self.repo),
            self.head.clone(),
            SegLimit::Length(0, num),
        )
    }

    /// Create an endless branch iterator
    ///
    /// While the creation of the iterator is instantanious, actually
    /// enumerating all commits in a repository can be quite
    /// computationally intensive and is almost never what you
    /// actually want.
    pub fn get_all(&self) -> BranchIter {
        BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None)
    }

    /// Get the current HEAD commit
    pub fn head(&self) -> Commit {
        Commit::new(&self.repo, self.head.clone()).unwrap()
    }

    /// Get the branch name, if it exists
    pub fn name(&self) -> Option<String> {
        self.name.clone()
    }
}

/// A branch slice iterator, created via `Branch` handle
///
/// This iterator yields `BranchCommit` objects, that can either be
/// simple commits, or various types of merge commits with new Branch
/// handles.  This means that without explicitly branching, this
/// iterator is first-parent.
pub struct BranchIter {
    mode: AtomPtr<IterMode>,
    splits: IterData,
    repo: Arc<Repository>,
    curr: Option<HashId>,
    limit: SegLimit,
}

impl BranchIter {
    /// Create a new branch segment iterator
    fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
        Self {
            mode: AtomPtr::new(IterMode::FirstParent),
            splits: IterData::new(),
            repo,
            curr: Some(last),
            limit,
        }
    }

    pub fn current(&self) -> Commit {
        Commit::new(&self.repo, self.curr.as_ref().unwrap().clone()).unwrap()
    }

    /// Get a commit object, if it exists
    fn find_commit(&self, id: &HashId) -> Option<Commit> {
        Commit::new(&self.repo, id.clone())
    }

    /// For a current commit, get it's parents if they exists
    fn parents(&self, curr: &Commit) -> (Option<Commit>, Option<Commit>) {
        (curr.first_parent(), curr.parent(1))
    }

    /// Take an optional commit and turn it into a branch commit
    fn make_branch_commit(&self, curr: Commit) -> BranchCommit {
        match curr.parent_count() {
            0 | 1 => BranchCommit::Commit(curr),
            2 => {
                let p2 = self.parents(&curr).1.unwrap();
                BranchCommit::Merge(curr, Branch::without_name(&self.repo, p2.id))
            }
            _ => BranchCommit::Octopus(
                curr.clone(),
                curr.parents()
                    .into_iter()
                    .map(|c| Branch::without_name(&self.repo, c.id))
                    .collect(),
            ),
        }
    }

    /// Determine which commit should be looked at next
    ///
    /// FirstParent and DepthFirst iterators will take the next
    /// first_parent if available.  DepthFirst iterators will fall
    /// back to the last split point, if no `first_parent()` exists.
    /// BreathFirst iterators will always prefer a new branch over
    /// first_parent, and jump back to the last split if there are no
    /// parents.
    fn determine_next(&mut self, current: Commit) -> Commit {
        let mode = &**self.mode.get_ref();
        self.curr = match mode {
            IterMode::FirstParent => match current.first_parent() {
                Some(p1) => Some(p1.id),
                None => None,
            },
            // DepthFirst iterates normally until we hit the end of
            // the branch, then we "jump back" to an earlier commit.
            IterMode::DepthFirst => match current.first_parent() {
                Some(p1) => Some(p1.id),
                None => {
                    // Get the last split point.  If there are no
                    // more, terminate the iterator.  If there are,
                    // increment brnum and keep going.  If brnum is
                    // higher than the parent count, call this
                    // function again to get the next split point (and
                    // reset brnum)
                    self.splits.next().map(|id| {
                        let brnum = self.splits.incr_brnum();

                        // ALSO: when brnum is LOWER than parent point,
                        // re-insert split, to allow the commit to be
                        // jumped to again!
                        if brnum < current.parent_count() {
                            self.splits.re_insert(id.clone());
                        }

                        let com = self.find_commit(&id).unwrap();
                        if brnum > current.parent_count() {
                            self.splits.reset_brnum();
                            self.determine_next(com).id
                        } else {
                            id
                        }
                    })
                }
            },
            // // When there is only one parent, chose that parent.  When
            // // there is none, jump back to the last split point,
            // // according to brnum.  If brnum is then greater than the
            // // number of branches, reset brnum and re-call this
            // // function.
            // IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
            //     Some(p1) => Some(p1.id),
            //     None => self.splits.next().map(|id| {
            //         let brnum = self.splits.incr_brnum();
            //     }),
            // },

            // // When there are is more than 1 parent, set this commit
            // // as a split point, and chose the last parent commit as
            // // the next commit to walk.  This shifts the active branch
            // // over.  Important: we set brnum to the _highest_ branch,
            // // and iterate inwards.
            // IterMode::BreadthFirst => {

            // },
            _ => todo!(),
        };

        self.curr = match current.first_parent() {
            Some(p1) => Some(p1.id),
            None => None,
        };

        current
    }
}

impl Iterator for BranchIter {
    type Item = BranchCommit;

    fn next(&mut self) -> Option<Self::Item> {
        let mode = &**self.mode.get_ref();

        let id = match mode {
            // When iterating first-parent OR when going depth first,
            // while the current branch still has children, take the
            // next child of the current branch.
            IterMode::FirstParent => mem::replace(&mut self.curr, None),
            IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
                mem::replace(&mut self.curr, None)
            }
            // When going both BreadthFirst or DepthFirst, reaching
            // the end of the current branch means getting the last
            // split point.  The difference between these two
            // strategies is in how the split points and "current
            // point" are stored.
            _ if self.curr.is_none() => self.splits.next(),
            _ => unreachable!(), // can't be reached
        };

        // The chain of events
        id.and_then(|id| self.find_commit(&id))
            .map(|c| self.determine_next(c))
            .and_then(|c| match self.limit {
                SegLimit::None => Some(c),
                SegLimit::Commit(ended, _) if ended => None,
                SegLimit::Commit(ref mut b, ref target) => {
                    if &c.id == target {
                        *b = true;
                    }

                    Some(c)
                }
                SegLimit::Length(ref mut curr, ref max) if *curr < *max => {
                    *curr += 1;
                    Some(c)
                }
                SegLimit::Length(ref curr, ref mut max) if curr >= max => None,
                SegLimit::Length(_, _) => unreachable!(), // oh rustc :)
            })
            .map(|c| self.make_branch_commit(c))
    }
}

/// Specify the mode of a branch iterator
///
/// The default value is `FirstParent`, meaning that merged branches
/// will create a new `Branch` handle that the user must iterate
/// manually.
///
/// This is a reasonable default, but means that history searches for
/// files become more tedious.  To improve this use-case, iterators
/// can internally be set to change their iteration behaviour, meaning
/// that returned commits are always `BranchCommit::Commit`, and can
pub enum IterMode {
    /// Default value, iterating only first-parent commits
    FirstParent,
    /// Iterate a branch to completion, before picking the next
    DepthFirst,
    /// Iterate branches as they come up
    BreadthFirst,
}

/// Limiter applied to a branch segment
pub enum SegLimit {
    /// No limit, enumerating all children
    None,
    /// Run until a certain commit is found
    Commit(bool, HashId),
    /// Run to collect a certain number of commits
    Length(usize, usize),
}

/// A commit represented as a relationship to a branch
///
/// Most commits will be simple, meaning they are in sequence on the
/// branch.  Two types of merge commits exist: normal, and octopus.
/// All branches leading into this branch are a reverse tree
pub enum BranchCommit {
    /// A single commit
    Commit(Commit),
    /// A merge commit from one other branch
    Merge(Commit, Branch),
    /// An octopus merge with multiple branches
    Octopus(Commit, Vec<Branch>),
}

impl BranchCommit {
    pub fn id(&self) -> HashId {
        use BranchCommit::*;
        match self {
            Commit(ref c) => &c.id,
            Merge(_, ref b) => &b.head,
            Octopus(ref c, _) => &c.id,
        }
        .clone()
    }

    /// Get the underlying commit, regardless of type
    pub fn commit(&self) -> &Commit {
        use BranchCommit::*;
        match self {
            Commit(ref c) => c,
            Merge(ref c, _) => c,
            Octopus(ref c, _) => c,
        }
    }
}

/// Additional iterator data
///
/// This structure tracks split points on the iterator.  When
/// traversing a branch, the iterator can reach branch points.  While
/// in `IterMode::FirstParent`, these are irrelevant.  However: when
/// iterating breadth or depth first these need to be tracked.
///
/// ## Depth first
///
/// - When a branch is encountered, append it to this data set.
/// - When the end of the current branch is reached, get the next
///   split point to resume from
///
/// ## Breadth first
///
/// - When a branch is encountered, add the previous commit to this set
/// - When reaching the end of a branch, get the next split point to
///   resume from
///
/// ---
///
/// In essense, the usage of this structure for BreadthFirst and
/// DepthFirst are inverted!
struct IterData {
    /// Split points on the iterator
    splits: AtomPtr<Vec<HashId>>,
    /// Additional data for octopus merges
    brnum: AtomicUsize,
}

impl IterData {
    fn new() -> Self {
        Self {
            splits: AtomPtr::new(vec![]),
            brnum: AtomicUsize::new(0),
        }
    }

    /// Check if the split set is empty
    fn empty(&self) -> bool {
        self.splits.get_ref().len() == 0
    }

    fn set_brnum(&self, num: usize) {
        self.brnum.swap(num, Ordering::Relaxed);
    }

    fn incr_brnum(&self) -> usize {
        self.brnum.fetch_add(1, Ordering::Relaxed) + 1
    }

    fn decr_brnum(&self) -> usize {
        self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
    }

    fn reset_brnum(&self) {
        self.set_brnum(0);
    }

    fn append(&self, id: HashId) {
        let mut vec = (**self.splits.get_ref()).clone();
        let mut new = vec![id];
        new.append(&mut vec);
        self.splits.swap(new);
    }

    /// Insert a hashID to the front of the splits list
    fn re_insert(&self, id: HashId) {
        let mut vec = (**self.splits.get_ref()).clone();
        vec.insert(0, id);
        self.splits.swap(vec);
    }

    fn next(&self) -> Option<HashId> {
        let mut vec = (**self.splits.get_ref()).clone();
        let next = if vec.len() < 0 {
            Some(vec.remove(0))
        } else {
            None
        };

        self.splits.swap(vec);
        next
    }
}