supergit: WIP, trying to add branching iterators

This commit is here for the history of my madness.  Rather than trying
to teach the iterator API to be branching, I should add a management
wrapper around it, which can decide for each commit it sees whether it
would like to follow a branch or not.  This way it's also much easier
to terminate a branch when we realise that we've seen it before (or
disable that functionality), without having to cram more features into
the same abstraction.
wip/yesman
Katharina Fey 4 years ago
parent 1e9c862f4c
commit a35411dc74
  1. 14
      apps/servers/octopus/supergit/src/bin/test.rs
  2. 168
      apps/servers/octopus/supergit/src/branch.rs

@ -23,11 +23,11 @@ fn main() {
let head = main.get_head(); let head = main.get_head();
let tree = head.get_tree(); let tree = head.get_tree();
// println!( println!(
// "{:?}", "{:#?}",
// tree.history(main.get_all(), "infra/libkookie/nixpkgs/nixos/modules/module-list.nix") tree.history(main.get_all(), "Cargo.toml")
// .into_iter() .into_iter()
// .map(|c| c.summary()) .map(|c| c.summary())
// .collect::<Vec<_>>() .collect::<Vec<_>>()
// ); );
} }

@ -1,7 +1,13 @@
use crate::{Commit, HashId}; use crate::{Commit, HashId};
use atomptr::AtomPtr; use atomptr::AtomPtr;
use git2::Repository; use git2::Repository;
use std::{mem, sync::Arc}; use std::{
mem,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
};
/// Abstraction for a branch history slice /// Abstraction for a branch history slice
/// ///
@ -125,8 +131,8 @@ impl Branch {
/// handles. This means that without explicitly branching, this /// handles. This means that without explicitly branching, this
/// iterator is first-parent. /// iterator is first-parent.
pub struct BranchIter { pub struct BranchIter {
rec: AtomPtr<IterMode>, mode: AtomPtr<IterMode>,
splits: AtomPtr<Vec<HashId>>, splits: IterData,
repo: Arc<Repository>, repo: Arc<Repository>,
curr: Option<HashId>, curr: Option<HashId>,
limit: SegLimit, limit: SegLimit,
@ -136,8 +142,8 @@ impl BranchIter {
/// Create a new branch segment iterator /// Create a new branch segment iterator
fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self { fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
Self { Self {
rec: AtomPtr::new(IterMode::FirstParent), mode: AtomPtr::new(IterMode::FirstParent),
splits: AtomPtr::new(vec![]), splits: IterData::new(),
repo, repo,
curr: Some(last), curr: Some(last),
limit, limit,
@ -176,12 +182,75 @@ impl BranchIter {
} }
} }
/// Get the current commit /// Determine which commit should be looked at next
/// ///
/// This function looks either at the "curr" field, or takes the /// FirstParent and DepthFirst iterators will take the next
/// ID from `cmd`, if it is set to `IterCmd::Jump(...)`, which /// first_parent if available. DepthFirst iterators will fall
/// indicates that the previous commit was a merge, and we need to escape /// back to the last split point, if no `first_parent()` exists.
fn set_next(&mut self, current: Commit) -> Commit { /// BreathFirst iterators will always prefer a new branch over
/// first_parent, and jump back to the last split if there are no
/// parents.
fn determine_next(&mut self, current: Commit) -> Commit {
let mode = &**self.mode.get_ref();
self.curr = match mode {
IterMode::FirstParent => match current.first_parent() {
Some(p1) => Some(p1.id),
None => None,
},
// DepthFirst iterates normally until we hit the end of
// the branch, then we "jump back" to an earlier commit.
IterMode::DepthFirst => match current.first_parent() {
Some(p1) => Some(p1.id),
None => {
// Get the last split point. If there are no
// more, terminate the iterator. If there are,
// increment brnum and keep going. If brnum is
// higher than the parent count, call this
// function again to get the next split point (and
// reset brnum)
self.splits.next().map(|id| {
let brnum = self.splits.incr_brnum();
// ALSO: when brnum is LOWER than parent point,
// re-insert split, to allow the commit to be
// jumped to again!
if brnum < current.parent_count() {
self.splits.re_insert(id.clone());
}
let com = self.find_commit(&id).unwrap();
if brnum > current.parent_count() {
self.splits.reset_brnum();
self.determine_next(com).id
} else {
id
}
})
}
},
// // When there is only one parent, chose that parent. When
// // there is none, jump back to the last split point,
// // according to brnum. If brnum is then greater than the
// // number of branches, reset brnum and re-call this
// // function.
// IterMode::BreadthFirst if current.parent_count() <= 1 => match current.first_parent() {
// Some(p1) => Some(p1.id),
// None => self.splits.next().map(|id| {
// let brnum = self.splits.incr_brnum();
// }),
// },
// // When there are is more than 1 parent, set this commit
// // as a split point, and chose the last parent commit as
// // the next commit to walk. This shifts the active branch
// // over. Important: we set brnum to the _highest_ branch,
// // and iterate inwards.
// IterMode::BreadthFirst => {
// },
_ => todo!(),
};
self.curr = match current.first_parent() { self.curr = match current.first_parent() {
Some(p1) => Some(p1.id), Some(p1) => Some(p1.id),
None => None, None => None,
@ -195,9 +264,28 @@ impl Iterator for BranchIter {
type Item = BranchCommit; type Item = BranchCommit;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
mem::replace(&mut self.curr, None) let mode = &**self.mode.get_ref();
.and_then(|id| self.find_commit(&id))
.map(|c| self.set_next(c)) let id = match mode {
// When iterating first-parent OR when going depth first,
// while the current branch still has children, take the
// next child of the current branch.
IterMode::FirstParent => mem::replace(&mut self.curr, None),
IterMode::DepthFirst | IterMode::BreadthFirst if self.curr.is_some() => {
mem::replace(&mut self.curr, None)
}
// When going both BreadthFirst or DepthFirst, reaching
// the end of the current branch means getting the last
// split point. The difference between these two
// strategies is in how the split points and "current
// point" are stored.
_ if self.curr.is_none() => self.splits.next(),
_ => unreachable!(), // can't be reached
};
// The chain of events
id.and_then(|id| self.find_commit(&id))
.map(|c| self.determine_next(c))
.and_then(|c| match self.limit { .and_then(|c| match self.limit {
SegLimit::None => Some(c), SegLimit::None => Some(c),
SegLimit::Commit(ended, _) if ended => None, SegLimit::Commit(ended, _) if ended => None,
@ -285,17 +373,64 @@ impl BranchCommit {
} }
/// Additional iterator data /// Additional iterator data
///
/// This structure tracks split points on the iterator. When
/// traversing a branch, the iterator can reach branch points. While
/// in `IterMode::FirstParent`, these are irrelevant. However: when
/// iterating breadth or depth first these need to be tracked.
///
/// ## Depth first
///
/// - When a branch is encountered, append it to this data set.
/// - When the end of the current branch is reached, get the next
/// split point to resume from
///
/// ## Breadth first
///
/// - When a branch is encountered, add the previous commit to this set
/// - When reaching the end of a branch, get the next split point to
/// resume from
///
/// ---
///
/// In essense, the usage of this structure for BreadthFirst and
/// DepthFirst are inverted!
struct IterData { struct IterData {
/// Split points on the iterator
splits: AtomPtr<Vec<HashId>>, splits: AtomPtr<Vec<HashId>>,
/// Additional data for octopus merges
brnum: AtomicUsize,
} }
impl IterData { impl IterData {
fn new() -> Self { fn new() -> Self {
Self { Self {
splits: AtomPtr::new(vec![]), splits: AtomPtr::new(vec![]),
brnum: AtomicUsize::new(0),
} }
} }
/// Check if the split set is empty
fn empty(&self) -> bool {
self.splits.get_ref().len() == 0
}
fn set_brnum(&self, num: usize) {
self.brnum.swap(num, Ordering::Relaxed);
}
fn incr_brnum(&self) -> usize {
self.brnum.fetch_add(1, Ordering::Relaxed) + 1
}
fn decr_brnum(&self) -> usize {
self.brnum.fetch_sub(1, Ordering::Relaxed) - 1
}
fn reset_brnum(&self) {
self.set_brnum(0);
}
fn append(&self, id: HashId) { fn append(&self, id: HashId) {
let mut vec = (**self.splits.get_ref()).clone(); let mut vec = (**self.splits.get_ref()).clone();
let mut new = vec![id]; let mut new = vec![id];
@ -303,6 +438,13 @@ impl IterData {
self.splits.swap(new); self.splits.swap(new);
} }
/// Insert a hashID to the front of the splits list
fn re_insert(&self, id: HashId) {
let mut vec = (**self.splits.get_ref()).clone();
vec.insert(0, id);
self.splits.swap(vec);
}
fn next(&self) -> Option<HashId> { fn next(&self) -> Option<HashId> {
let mut vec = (**self.splits.get_ref()).clone(); let mut vec = (**self.splits.get_ref()).clone();
let next = if vec.len() < 0 { let next = if vec.len() < 0 {

Loading…
Cancel
Save