supergit: WIP, trying to add branching iterators

This commit is here for the history of my madness. Rather than trying to teach the iterator API to be branching, I should add a management wrapper around it, which can decide for each commit it sees whether it would like to follow a branch or not. This way it's also much easier to terminate a branch when we realise that we've seen it before (or disable that functionality), without having to cram more features into the same abstraction.
4 years ago · a35411dc74
parent 1e9c862f4c
commit a35411dc74
2 changed files with 162 additions and 20 deletions
--- a/apps/servers/octopus/supergit/src/bin/test.rs
+++ b/apps/servers/octopus/supergit/src/bin/test.rs
@ -23,11 +23,11 @@ fn main() {
    let head = main.get_head();
    let tree = head.get_tree();
    
-    // println!(
-    //     "{:?}",
-    //     tree.history(main.get_all(), "infra/libkookie/nixpkgs/nixos/modules/module-list.nix")
-    //         .into_iter()
-    //         .map(|c| c.summary())
-    //         .collect::<Vec<_>>()
-    // );
+    println!(
+        "{:#?}",
+        tree.history(main.get_all(), "Cargo.toml")
+            .into_iter()
+            .map(|c| c.summary())
+            .collect::<Vec<_>>()
+    );
 }
--- a/apps/servers/octopus/supergit/src/branch.rs
+++ b/apps/servers/octopus/supergit/src/branch.rs
@ -1,7 +1,13 @@
 use crate::{Commit, HashId};
 use atomptr::AtomPtr;
 use git2::Repository;
-use std::{mem, sync::Arc};
+use std::{
+    mem,
+    sync::{
+        atomic::{AtomicUsize, Ordering},
+        Arc,
+    },
+};

 /// Abstraction for a branch history slice
 ///
@ -125,8 +131,8 @@ impl Branch {
 /// handles.  This means that without explicitly branching, this
 /// iterator is first-parent.
 pub struct BranchIter {
-    rec: AtomPtr<IterMode>,
-    splits: AtomPtr<Vec<HashId>>,
+    mode: AtomPtr<IterMode>,
+    splits: IterData,
    repo: Arc<Repository>,
    curr: Option<HashId>,
    limit: SegLimit,
@ -136,8 +142,8 @@ impl BranchIter {
    /// Create a new branch segment iterator
    fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
        Self {
-            rec: AtomPtr::new(IterMode::FirstParent),
-            splits: AtomPtr::new(vec![]),
+            mode: AtomPtr::new(IterMode::FirstParent),
+            splits: IterData::new(),
            repo,
            curr: Some(last),
            limit,
@ -176,12 +182,75 @@ impl BranchIter {
        }
    }

-    /// Get the current commit
+    /// Determine which commit should be looked at next
    ///
-    /// This function looks either at the "curr" field, or takes the
-    /// ID from `cmd`, if it is set to `IterCmd::Jump(...)`, which
-    /// indicates that the previous commit was a merge, and we need to escape
-    fn set_next(&mut self, current: Commit) -> Commit {
+    /// FirstParent and DepthFirst iterators will take the next
+    /// first_parent if available.  DepthFirst iterators will fall
+    /// back to the last split point, if no `first_parent()` exists.
+    /// BreathFirst iterators will always prefer a new branch over
+    /// first_parent, and jump back to the last split if there are no
+    /// parents.
+    fn determine_next(&mut self, current: Commit) -> Commit {
+        let mode = &**self.mode.get_ref();
+        self.curr = match mode {
+            IterMode::FirstParent => match current.first_parent() {
+                Some(p1) => Some(p1.id),
+                None => None,
+            },
+            // DepthFirst iterates normally until we hit the end of
+            // the branch, then we "jump back" to an earlier commit.
+            IterMode::DepthFirst => match current.first_parent() {
+                Some(p1) => Some(p1.id),
+                None => {
+                    // Get the last split point.  If there are no
+                    // more, terminate the iterator.  If there are,
+                    // increment brnum and keep going.  If brnum is
+                    // higher than the parent count, call this
+                    // function again to get the next split point (and
+                    // reset brnum)
+                    self.splits.next().map(|id| {
+                        let brnum = self.splits.incr_brnum();
+
+                        // ALSO: when brnum is LOWER than parent point,
+                        // re-insert split, to allow the commit to be
+                        // jumped to again!
+                        if brnum < current.parent_count() {
+                            self.splits.re_insert(id.clone());
+                        }
+
+                        let com = self.find_commit(&id).unwrap();
+                        if brnum > current.parent_count() {
+                            self.splits.reset_brnum();
+                            self.determine_next(com).id
+                        } else {
+                            id
+                        }
+                    })
+                }
+            },
+            // // When there is only one parent, chose that parent.  When
+            // // there is none, jump back to the last split point,
+            // // according to brnum.  If brnum is then greater than the
+            // // number of branches, reset brnum and re-call this
+            // // function.
+            // IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
+            //     Some(p1) => Some(p1.id),
+            //     None => self.splits.next().map(|id| {
+            //         let brnum = self.splits.incr_brnum();
+            //     }),
+            // },
+
+            // // When there are is more than 1 parent, set this commit
+            // // as a split point, and chose the last parent commit as
+            // // the next commit to walk.  This shifts the active branch
+            // // over.  Important: we set brnum to the _highest_ branch,
+            // // and iterate inwards.
+            // IterMode::BreadthFirst => {
+
+            // },
+            _ => todo!(),
+        };
+
        self.curr = match current.first_parent() {
            Some(p1) => Some(p1.id),
            None => None,
@ -195,9 +264,28 @@ impl Iterator for BranchIter {
    type Item = BranchCommit;

    fn next(&mut self) -> Option<Self::Item> {
-        mem::replace(&mut self.curr, None)
-            .and_then(|id| self.find_commit(&id))
-            .map(|c| self.set_next(c))
+        let mode = &**self.mode.get_ref();
+
+        let id = match mode {
+            // When iterating first-parent OR when going depth first,
+            // while the current branch still has children, take the
+            // next child of the current branch.
+            IterMode::FirstParent => mem::replace(&mut self.curr, None),
+            IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
+                mem::replace(&mut self.curr, None)
+            }
+            // When going both BreadthFirst or DepthFirst, reaching
+            // the end of the current branch means getting the last
+            // split point.  The difference between these two
+            // strategies is in how the split points and "current
+            // point" are stored.
+            _ if self.curr.is_none() => self.splits.next(),
+            _ => unreachable!(), // can't be reached
+        };
+
+        // The chain of events
+        id.and_then(|id| self.find_commit(&id))
+            .map(|c| self.determine_next(c))
            .and_then(|c| match self.limit {
                SegLimit::None => Some(c),
                SegLimit::Commit(ended, _) if ended => None,
@ -285,17 +373,64 @@ impl BranchCommit {
 }

 /// Additional iterator data
+///
+/// This structure tracks split points on the iterator.  When
+/// traversing a branch, the iterator can reach branch points.  While
+/// in `IterMode::FirstParent`, these are irrelevant.  However: when
+/// iterating breadth or depth first these need to be tracked.
+///
+/// ## Depth first
+///
+/// - When a branch is encountered, append it to this data set.
+/// - When the end of the current branch is reached, get the next
+///   split point to resume from
+///
+/// ## Breadth first
+///
+/// - When a branch is encountered, add the previous commit to this set
+/// - When reaching the end of a branch, get the next split point to
+///   resume from
+///
+/// ---
+///
+/// In essense, the usage of this structure for BreadthFirst and
+/// DepthFirst are inverted!
 struct IterData {
+    /// Split points on the iterator
    splits: AtomPtr<Vec<HashId>>,
+    /// Additional data for octopus merges
+    brnum: AtomicUsize,
 }

 impl IterData {
    fn new() -> Self {
        Self {
            splits: AtomPtr::new(vec![]),
+            brnum: AtomicUsize::new(0),
        }
    }

+    /// Check if the split set is empty
+    fn empty(&self) -> bool {
+        self.splits.get_ref().len() == 0
+    }
+
+    fn set_brnum(&self, num: usize) {
+        self.brnum.swap(num, Ordering::Relaxed);
+    }
+
+    fn incr_brnum(&self) -> usize {
+        self.brnum.fetch_add(1, Ordering::Relaxed) + 1
+    }
+
+    fn decr_brnum(&self) -> usize {
+        self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
+    }
+
+    fn reset_brnum(&self) {
+        self.set_brnum(0);
+    }
+
    fn append(&self, id: HashId) {
        let mut vec = (**self.splits.get_ref()).clone();
        let mut new = vec![id];
@ -303,6 +438,13 @@ impl IterData {
        self.splits.swap(new);
    }

+    /// Insert a hashID to the front of the splits list
+    fn re_insert(&self, id: HashId) {
+        let mut vec = (**self.splits.get_ref()).clone();
+        vec.insert(0, id);
+        self.splits.swap(vec);
+    }
+
    fn next(&self) -> Option<HashId> {
        let mut vec = (**self.splits.get_ref()).clone();
        let next = if vec.len() < 0 {