supergit: WIP, trying to add branching iterators

This commit is here for the history of my madness. Rather than trying to teach the iterator API to be branching, I should add a management wrapper around it, which can decide for each commit it sees whether it would like to follow a branch or not. This way it's also much easier to terminate a branch when we realise that we've seen it before (or disable that functionality), without having to cram more features into the same abstraction.
4 years ago · a35411dc74
parent 1e9c862f4c
commit a35411dc74
2 changed files with 162 additions and 20 deletions
--- a/apps/servers/octopus/supergit/src/bin/test.rs
+++ b/apps/servers/octopus/supergit/src/bin/test.rs
@ -23,11 +23,11 @@ fn main() {
    let head = main.get_head();
    let tree = head.get_tree();
-    // println!(
+    println!(
-    //     "{:?}",
+        "{:#?}",
-    //     tree.history(main.get_all(), "infra/libkookie/nixpkgs/nixos/modules/module-list.nix")
+        tree.history(main.get_all(), "Cargo.toml")
-    //         .into_iter()
+            .into_iter()
-    //         .map(|c| c.summary())
+            .map(|c| c.summary())
-    //         .collect::<Vec<_>>()
+            .collect::<Vec<_>>()
-    // );
+    );
 }
--- a/apps/servers/octopus/supergit/src/branch.rs
+++ b/apps/servers/octopus/supergit/src/branch.rs
@ -1,7 +1,13 @@
 use crate::{Commit, HashId};
 use atomptr::AtomPtr;
 use git2::Repository;
-use std::{mem, sync::Arc};
+use std::{
    mem,
    sync::{
        atomic::{AtomicUsize, Ordering},
        Arc,
    },
 };
 /// Abstraction for a branch history slice
 ///
@ -125,8 +131,8 @@ impl Branch {
 /// handles.  This means that without explicitly branching, this
 /// iterator is first-parent.
 pub struct BranchIter {
-    rec: AtomPtr<IterMode>,
+    mode: AtomPtr<IterMode>,
-    splits: AtomPtr<Vec<HashId>>,
+    splits: IterData,
    repo: Arc<Repository>,
    curr: Option<HashId>,
    limit: SegLimit,
@ -136,8 +142,8 @@ impl BranchIter {
    /// Create a new branch segment iterator
    fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
        Self {
-            rec: AtomPtr::new(IterMode::FirstParent),
+            mode: AtomPtr::new(IterMode::FirstParent),
-            splits: AtomPtr::new(vec![]),
+            splits: IterData::new(),
            repo,
            curr: Some(last),
            limit,
@ -176,12 +182,75 @@ impl BranchIter {
        }
    }
-    /// Get the current commit
+    /// Determine which commit should be looked at next
    ///
-    /// This function looks either at the "curr" field, or takes the
+    /// FirstParent and DepthFirst iterators will take the next
-    /// ID from `cmd`, if it is set to `IterCmd::Jump(...)`, which
+    /// first_parent if available.  DepthFirst iterators will fall
-    /// indicates that the previous commit was a merge, and we need to escape
+    /// back to the last split point, if no `first_parent()` exists.
-    fn set_next(&mut self, current: Commit) -> Commit {
+    /// BreathFirst iterators will always prefer a new branch over
    /// first_parent, and jump back to the last split if there are no
    /// parents.
    fn determine_next(&mut self, current: Commit) -> Commit {
        let mode = &**self.mode.get_ref();
        self.curr = match mode {
            IterMode::FirstParent => match current.first_parent() {
                Some(p1) => Some(p1.id),
                None => None,
            },
            // DepthFirst iterates normally until we hit the end of
            // the branch, then we "jump back" to an earlier commit.
            IterMode::DepthFirst => match current.first_parent() {
                Some(p1) => Some(p1.id),
                None => {
                    // Get the last split point.  If there are no
                    // more, terminate the iterator.  If there are,
                    // increment brnum and keep going.  If brnum is
                    // higher than the parent count, call this
                    // function again to get the next split point (and
                    // reset brnum)
                    self.splits.next().map(|id| {
                        let brnum = self.splits.incr_brnum();
                        // ALSO: when brnum is LOWER than parent point,
                        // re-insert split, to allow the commit to be
                        // jumped to again!
                        if brnum < current.parent_count() {
                            self.splits.re_insert(id.clone());
                        }
                        let com = self.find_commit(&id).unwrap();
                        if brnum > current.parent_count() {
                            self.splits.reset_brnum();
                            self.determine_next(com).id
                        } else {
                            id
                        }
                    })
                }
            },
            // // When there is only one parent, chose that parent.  When
            // // there is none, jump back to the last split point,
            // // according to brnum.  If brnum is then greater than the
            // // number of branches, reset brnum and re-call this
            // // function.
            // IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
            //     Some(p1) => Some(p1.id),
            //     None => self.splits.next().map(|id| {
            //         let brnum = self.splits.incr_brnum();
            //     }),
            // },
            // // When there are is more than 1 parent, set this commit
            // // as a split point, and chose the last parent commit as
            // // the next commit to walk.  This shifts the active branch
            // // over.  Important: we set brnum to the _highest_ branch,
            // // and iterate inwards.
            // IterMode::BreadthFirst => {
            // },
            _ => todo!(),
        };
        self.curr = match current.first_parent() {
            Some(p1) => Some(p1.id),
            None => None,
@ -195,9 +264,28 @@ impl Iterator for BranchIter {
    type Item = BranchCommit;
    fn next(&mut self) -> Option<Self::Item> {
-        mem::replace(&mut self.curr, None)
+        let mode = &**self.mode.get_ref();
-            .and_then(|id| self.find_commit(&id))
+
-            .map(|c| self.set_next(c))
+        let id = match mode {
            // When iterating first-parent OR when going depth first,
            // while the current branch still has children, take the
            // next child of the current branch.
            IterMode::FirstParent => mem::replace(&mut self.curr, None),
            IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
                mem::replace(&mut self.curr, None)
            }
            // When going both BreadthFirst or DepthFirst, reaching
            // the end of the current branch means getting the last
            // split point.  The difference between these two
            // strategies is in how the split points and "current
            // point" are stored.
            _ if self.curr.is_none() => self.splits.next(),
            _ => unreachable!(), // can't be reached
        };
        // The chain of events
        id.and_then(|id| self.find_commit(&id))
            .map(|c| self.determine_next(c))
            .and_then(|c| match self.limit {
                SegLimit::None => Some(c),
                SegLimit::Commit(ended, _) if ended => None,
@ -285,17 +373,64 @@ impl BranchCommit {
 }
 /// Additional iterator data
 ///
 /// This structure tracks split points on the iterator.  When
 /// traversing a branch, the iterator can reach branch points.  While
 /// in `IterMode::FirstParent`, these are irrelevant.  However: when
 /// iterating breadth or depth first these need to be tracked.
 ///
 /// ## Depth first
 ///
 /// - When a branch is encountered, append it to this data set.
 /// - When the end of the current branch is reached, get the next
 ///   split point to resume from
 ///
 /// ## Breadth first
 ///
 /// - When a branch is encountered, add the previous commit to this set
 /// - When reaching the end of a branch, get the next split point to
 ///   resume from
 ///
 /// ---
 ///
 /// In essense, the usage of this structure for BreadthFirst and
 /// DepthFirst are inverted!
 struct IterData {
    /// Split points on the iterator
    splits: AtomPtr<Vec<HashId>>,
    /// Additional data for octopus merges
    brnum: AtomicUsize,
 }
 impl IterData {
    fn new() -> Self {
        Self {
            splits: AtomPtr::new(vec![]),
            brnum: AtomicUsize::new(0),
        }
    }
    /// Check if the split set is empty
    fn empty(&self) -> bool {
        self.splits.get_ref().len() == 0
    }
    fn set_brnum(&self, num: usize) {
        self.brnum.swap(num, Ordering::Relaxed);
    }
    fn incr_brnum(&self) -> usize {
        self.brnum.fetch_add(1, Ordering::Relaxed) + 1
    }
    fn decr_brnum(&self) -> usize {
        self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
    }
    fn reset_brnum(&self) {
        self.set_brnum(0);
    }
    fn append(&self, id: HashId) {
        let mut vec = (**self.splits.get_ref()).clone();
        let mut new = vec![id];
@ -303,6 +438,13 @@ impl IterData {
        self.splits.swap(new);
    }
    /// Insert a hashID to the front of the splits list
    fn re_insert(&self, id: HashId) {
        let mut vec = (**self.splits.get_ref()).clone();
        vec.insert(0, id);
        self.splits.swap(vec);
    }
    fn next(&self) -> Option<HashId> {
        let mut vec = (**self.splits.get_ref()).clone();
        let next = if vec.len() < 0 {