octopus: refactoring & typed branch parsing

This code is work-in-progress, and doesn't work on a repo that has a
branched history.  The issue here is that after handling a merge
commit, keeping track of which commit to look at next is non-trivial.
This solution tries to isuse a "skip" command on the walker, but this
can accidentally skip commits, when two merges have happened in
succession (maybe a bug with the impl, not the concept).

But also, the actual merge commit seems to already be part of the
norma history? So maybe we can ommit the merge commit explicitly, and
simply return a new branch handle instead.
wip/yesman
Katharina Fey 4 years ago
parent 8be6dc679e
commit 70fe187f1e
  1. 38
      apps/servers/octopus/supergit/src/bin/test.rs
  2. 223
      apps/servers/octopus/supergit/src/branch.rs
  3. 58
      apps/servers/octopus/supergit/src/commit.rs
  4. 42
      apps/servers/octopus/supergit/src/lib.rs
  5. 1
      apps/servers/octopus/supergit/src/raw/#tree_walk.rs#
  6. 1
      apps/servers/octopus/supergit/src/raw/.#tree_walk.rs
  7. 51
      apps/servers/octopus/supergit/src/raw/branch.rs
  8. 19
      apps/servers/octopus/supergit/src/raw/branch_walk.rs
  9. 168
      apps/servers/octopus/supergit/src/raw/tree_walk.rs
  10. 48
      apps/servers/octopus/supergit/src/repo.rs

@ -1,6 +1,7 @@
//! A test binary to use during development
use supergit::raw::RawRepository;
use std::sync::mpsc::channel;
use supergit::{BranchCommit, Repository};
fn main() {
let path = match std::env::args().nth(1) {
@ -11,13 +12,36 @@ fn main() {
}
};
let rr = RawRepository::open(path.as_str()).unwrap();
let branches = rr.parse_branches().unwrap();
let repo = Repository::open(path.as_str()).unwrap();
for branch in branches {
if branch.name.as_str() != "main" && continue {}
println!("Branch: {}", branch.name);
let (tx, rx) = channel();
let branches = repo.branches().unwrap();
branch.enumerate(" ".into(), &rr.inner);
branches
.into_iter()
.filter(|b| b.name == Some("main".to_string()))
.for_each(|b| tx.send(b.get_all()).unwrap());
// Iterate over all branch iterators we get
while let Some(biter) = rx.recv().ok() {
use BranchCommit::*;
biter.for_each(|bc| match bc {
Commit(c) => println!("{}: {}", c.id_str(), c.summary()),
Merge(c, b) => {
// println!("[MERGE] {}: {}", c.id_str(), c.summary());
// tx.send(b.get_all()).unwrap();
}
_ => todo!(),
});
}
// let rr = RawRepository::open(path.as_str()).unwrap();
// let branches = rr.parse_branches().unwrap();
// for branch in branches {
// if branch.name.as_str() != "main" && continue {}
// println!("Branch: {}", branch.name);
// branch.enumerate(" ".into(), &rr.inner);
// }
}

@ -1,10 +1,209 @@
use crate::{Commit, CommitId};
use crate::{Commit, HashId};
use git2::Repository;
use std::sync::Arc;
/// Abstraction for a branch history slice
///
///
#[derive(Clone)]
pub struct Branch {
name: String,
head: CommitId,
history: Vec<BranchCommit>,
repo: Arc<Repository>,
pub name: Option<String>,
pub head: HashId,
}
impl Branch {
/// Create a new branch handle
pub(crate) fn new(repo: &Arc<Repository>, name: String, head: HashId) -> Self {
Self {
repo: Arc::clone(repo),
name: Some(name),
head,
}
}
pub(crate) fn without_name(repo: &Arc<Repository>, head: HashId) -> Self {
Self {
repo: Arc::clone(repo),
name: None,
head,
}
}
/// Get a branch handle starting at a certain commit
pub fn skip_to(&self, from: HashId) -> Self {
match self.name {
Some(ref name) => Self::new(&self.repo, name.clone(), from),
None => Self::without_name(&self.repo, from),
}
}
/// Create a branch handle that skips a certain number of commits
///
/// This walker always picks the first parent.
pub fn skip(&self, num: usize) -> Self {
let mut head = self.repo.find_commit(self.head.clone().into()).unwrap();
for _ in 0..num {
if let Ok(p) = head.parent(0) {
head = p;
}
}
match self.name {
Some(ref name) => Self::new(&self.repo, name.clone(), head.id().into()),
None => Self::without_name(&self.repo, head.id().into()),
}
}
pub fn get_to(&self, commit: HashId) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
self.head.clone(),
SegLimit::Commit(false, commit),
)
}
/// Get the primary branch history as far back as it goes
pub fn get_all(&self) -> BranchIter {
BranchIter::new(Arc::clone(&self.repo), self.head.clone(), SegLimit::None)
}
/// Get a branch segment of a certain length
pub fn get(&self, num: usize) -> BranchIter {
BranchIter::new(
Arc::clone(&self.repo),
self.head.clone(),
SegLimit::Length(0, num),
)
}
}
/// A branch segment iterator
///
/// Each iterator is first-parent, but will notify you about a split
/// parent by setting
pub struct BranchIter {
repo: Arc<Repository>,
last: HashId,
cmd: IterCmd,
limit: SegLimit,
}
impl BranchIter {
/// Create a new branch segment iterator
fn new(repo: Arc<Repository>, last: HashId, limit: SegLimit) -> Self {
Self {
repo,
last,
cmd: IterCmd::Step,
limit,
}
}
/// Get a commit object, if it exists
fn find_commit(&self, id: &HashId) -> Option<Commit> {
Commit::new(&self.repo, id.clone())
}
/// Utility functiot to set last commit
fn set_last(&mut self, (bc, cmd): (BranchCommit, IterCmd)) -> BranchCommit {
self.last = bc.id();
self.cmd = cmd;
bc
}
/// Get the parent, set the last, and return BranchCommit (maybe)
fn get_parent(&self, last: Option<Commit>) -> Option<(BranchCommit, IterCmd)> {
last.and_then(|c| match c.parent_count() {
// No parent means we've reached the end of the branch
0 => None,
// One parent is a normal commit
1 => Some((
BranchCommit::Commit(c.first_parent().unwrap()),
IterCmd::Step,
)),
// Two parents is a normal merge commit
2 => Some((
BranchCommit::Merge(
c.clone(),
Branch::without_name(&self.repo, c.parent(1).unwrap().id),
),
IterCmd::Skip(c.parent(0).unwrap().id),
)),
// More or negative parents means the universe is ending
_ => panic!("Octopus merges are not implemented yet!"),
})
}
}
impl Iterator for BranchIter {
type Item = BranchCommit;
fn next(&mut self) -> Option<Self::Item> {
let cid = std::mem::replace(&mut self.cmd, IterCmd::Step)
.take()
.unwrap_or_else(|| self.last.clone());
let last = self.find_commit(&cid);
match self.limit {
// Get commits forever
SegLimit::None => self.get_parent(last).map(|bc| self.set_last(bc)),
// Get commits until hitting a certain ID
SegLimit::Commit(ended, _) if ended => None,
SegLimit::Commit(_, ref c) => {
let c = c.clone();
self.get_parent(last)
.map(|(bc, cmd)| {
// Set iterator to "done" if we have reached the commit
if bc.id() == c {
self.limit = SegLimit::Commit(true, c.clone());
(bc, cmd)
} else {
(bc, cmd)
}
})
// Set last in case there's more to iterate
.map(|bc| self.set_last(bc))
}
// Get a certain number of commits
SegLimit::Length(ref mut curr, ref mut max) => {
if curr >= max {
return None;
}
*curr += 1;
self.get_parent(last).map(|bc| self.set_last(bc))
}
}
}
}
/// Specify how to trace actions on the iterator
enum IterCmd {
/// Set the last commit to an ID
Step,
/// Specify a parent to step to next
Skip(HashId),
}
impl IterCmd {
fn take(self) -> Option<HashId> {
match self {
Self::Skip(id) => Some(id),
Self::Step => None,
}
}
}
/// the limit applied to a branch segment
pub enum SegLimit {
/// No limit, enumerating all children
None,
/// Run until a certain commit is found
Commit(bool, HashId),
/// Run to collect a certain number of commits
Length(usize, usize),
}
/// A commit represented as a relationship to a branch
@ -16,7 +215,19 @@ pub enum BranchCommit {
/// A single commit
Commit(Commit),
/// A merge commit from one other branch
Merge(Branch),
Merge(Commit, Branch),
/// An octopus merge with multiple branches
Octopus(Vec<Branch>),
Octopus(Commit, Vec<Branch>),
}
impl BranchCommit {
pub fn id(&self) -> HashId {
use BranchCommit::*;
match self {
Commit(ref c) => &c.id,
Merge(ref c, _) => &c.id,
Octopus(ref c, _) => &c.id,
}
.clone()
}
}

@ -1,11 +1,57 @@
pub type CommitId = usize;
use crate::HashId;
use git2::Repository;
use std::sync::Arc;
/// Represent a commit on a repository
///
/// This abstraction only contains metadata required to fetch the full
/// commit from disk, if it is queried. Any operation on this type
/// will block to first load
/// When creating a commit object, it is guaranteed that it exists in
/// the repository.
#[derive(Clone)]
pub struct Commit {
pub id: CommitId,
hash: String,
pub id: HashId,
repo: Arc<Repository>,
}
impl Commit {
/// Create a commit object and check if it exists in the repo
pub fn new(r: &Arc<Repository>, id: HashId) -> Option<Self> {
r.find_commit(id.to_oid()).ok().map(|_| Self {
id,
repo: Arc::clone(r),
})
}
pub fn id_str(&self) -> String {
self.id.to_string()
}
pub fn summary(&self) -> String {
self.find().summary().unwrap().into()
}
pub fn parent_count(&self) -> usize {
self.repo
.find_commit(self.id.to_oid())
.unwrap()
.parent_count()
}
/// Return the first parent, if it exists
pub fn first_parent(&self) -> Option<Self> {
self.find()
.parent(0)
.ok()
.and_then(|c| Self::new(&self.repo, c.id().into()))
}
pub fn parent(&self, num: usize) -> Option<Self> {
self.find()
.parent(num)
.ok()
.and_then(|c| Self::new(&self.repo, c.id().into()))
}
fn find(&self) -> git2::Commit {
self.repo.find_commit(self.id.to_oid()).unwrap()
}
}

@ -6,50 +6,20 @@
//! update, call `sync()` again. If you want the sync operation to be
//! blocking, call `sync_blocking()` instead.
//!
//!
//!
mod branch;
pub use branch::{Branch, BranchCommit};
mod commit;
pub use commit::{CommitId, Commit};
pub use commit::Commit;
mod diff;
pub use diff::Diff;
pub mod raw;
mod repo;
pub use repo::Repository;
pub(crate) use repo::HashId;
use std::sync::atomic::{AtomicUsize, Ordering};
use async_std::sync::{Arc, RwLock};
use raw::RawRepository;
/// Represents a git repository with lazy data loading
pub struct Repository {
raw: RawRepository,
}
impl Repository {
pub fn open(path: &std::path::Path) -> Arc<Self> {
todo!()
}
/// Sync the repository with the backing git files
///
/// This function can be invoked manually, but should be invoked
/// basically every time your program expects changes to have
/// happened. Polling this function is not recommended.
pub fn sync(&self) {
todo!()
}
}
/////////// IDs are created from the same pool to save on code size ////////////
const ID_CTR: AtomicUsize = AtomicUsize::new(0);
/// Get monotonically increasing IDs for objects
pub(crate) fn id() -> usize {
ID_CTR.fetch_add(1, Ordering::Relaxed)
}
use std::sync::atomic::{AtomicUsize, Ordering};

@ -1 +0,0 @@
//! Walk the file tree for a particular commit

@ -1,51 +0,0 @@
use super::{HashId, RawRepository};
use crate::Branch;
use git2::{Commit, Repository};
/// Represent some raw branch metadata
pub struct RawBranch {
pub name: String,
pub head: HashId,
}
fn print_commit(i: &String, c: &Commit) {
println!(
"{}{}: {}",
i,
c.id().to_string(),
c.message().unwrap().trim().split("\n").nth(0).unwrap()
);
}
fn print_parent_tree(c: &Commit, indent: String) {
c.parents().for_each(|c| {
println!(
"{}{}: {}",
indent,
c.id().to_string(),
c.message().unwrap().trim().split("\n").nth(0).unwrap()
);
print_parent_tree(&c, indent.clone());
});
}
impl RawBranch {
/// Consume branch reference and enumerate real branch history
pub fn into_branch(self, repo: &mut RawRepository) -> Branch {
todo!()
}
/// **REMOVE ME** A test function to do some test things
pub fn enumerate(&self, indent: String, repo: &Repository) {
let c = repo.find_commit((&self.head).into()).unwrap();
println!(
"{}{}: {}",
indent,
c.id().to_string(),
c.message().unwrap().trim().split("\n").nth(0).unwrap()
);
print_parent_tree(&c, indent);
}
}

@ -1,19 +0,0 @@
//! Walk along a branch parsing commit metadata
use std::collections::{BTreeMap, BTreeSet};
pub struct CommitHistory {
/// The correct order of commit IDs
order: Vec<String>,
/// Map of commit IDs to commit metadata
meta: BTreeMap<String, CommitNode>,
}
pub struct CommitNode {
id: String,
author: String,
commiter: String,
message: String,
touches: BTreeSet<String>,
time: u64,
}

@ -1,168 +0,0 @@
//! Walk the file tree for a particular commit
use git2::{self, ObjectType, TreeWalkMode};
use std::collections::BTreeMap;
/// A cache of a repository tree
#[derive(Default, Debug, Clone)]
pub(crate) struct Tree {
inner: BTreeMap<String, TreeNode>,
}
impl Tree {
/// Insert a node into a subtree with it's full path
fn insert_to_subtree(&mut self, mut path: Vec<String>, name: String, node: TreeNode) {
// If we are given a path, resolve it first
let curr = if path.len() > 0 {
let rest = path.split_off(1);
let mut curr = self.inner.get_mut(&path[0]).unwrap();
for dir in rest {
match curr {
TreeNode::Dir(ref mut d) => {
curr = d.children.inner.get_mut(&dir).unwrap();
}
_ => panic!("Not a tree!"),
}
}
match curr {
TreeNode::Dir(ref mut d) => &mut d.children,
TreeNode::File(_) => panic!("Not a tree!"),
}
} else {
// If no path was given, we assume the root is meant
self
};
curr.inner.insert(name, node);
}
/// Walk through the tree and only return filenode objects
pub(crate) fn flatten(&self) -> Vec<FileNode> {
self.inner.values().fold(vec![], |mut vec, node| {
match node {
TreeNode::File(f) => vec.push(f.clone()),
TreeNode::Dir(d) => vec.append(&mut d.children.flatten()),
}
vec
})
}
/// Get all the commits that touch a file
pub(crate) fn grab_path_history(&self, path: String) -> String {
let mut path: Vec<String> = path
.split("/")
.filter_map(|seg| match seg {
"" => None,
val => Some(val.into()),
})
.collect();
let leaf = if path.len() > 0 {
let rest = path.split_off(1);
let mut curr = self.inner.get(&path[0]).unwrap();
for dir in rest {
match curr {
TreeNode::Dir(d) => curr = d.children.inner.get(&dir).unwrap(),
TreeNode::File(_) => break, // we reached the leaf
}
}
curr
} else {
panic!("No valid path!");
};
match leaf {
TreeNode::File(f) => f.id.clone(),
_ => panic!("Not a leaf!"),
}
}
}
#[derive(Clone, Debug)]
pub(crate) enum TreeNode {
File(FileNode),
Dir(DirNode),
}
impl TreeNode {
fn name(&self) -> String {
match self {
Self::File(f) => f.name.clone(),
Self::Dir(d) => d.name.clone(),
}
}
}
#[derive(Clone, Debug)]
pub(crate) struct FileNode {
pub id: String,
pub path: Vec<String>,
pub name: String,
}
#[derive(Clone, Debug)]
pub(crate) struct DirNode {
pub path: Vec<String>,
pub name: String,
pub children: Tree,
}
impl DirNode {
fn append(&mut self, node: TreeNode) {
self.children.inner.insert(node.name(), node);
}
}
/// Take a series of path-segments and render a tree at that location
pub(crate) fn parse_tree(tree: git2::Tree) -> Tree {
let mut root = Tree::default();
tree.walk(TreeWalkMode::PreOrder, |path, entry| {
let path: Vec<String> = path
.split("/")
.filter_map(|seg| match seg {
"" => None,
val => Some(val.into()),
})
.collect();
let name = entry.name().unwrap().to_string();
match entry.kind() {
// For every tree in the tree we create a new TreeNode with the path we know about
Some(ObjectType::Tree) => {
root.insert_to_subtree(
path.clone(),
name.clone(),
TreeNode::Dir(DirNode {
path,
name,
children: Tree::default(),
}),
);
}
// If we encounter a blob, this is a file that we can simply insert into the tree
Some(ObjectType::Blob) => {
root.insert_to_subtree(
path.clone(),
name.clone(),
TreeNode::File(FileNode {
id: format!("{}", entry.id()),
path,
name,
}),
);
}
_ => {}
}
0
})
.unwrap();
root
}

@ -1,20 +1,25 @@
//! Raw representation wrappers for libgit2
mod branch;
pub use branch::RawBranch;
mod branch_walk;
mod tree_walk;
use crate::{Branch, BranchCommit};
use git2::{self, Oid, Repository};
use git2::{self, Oid};
use std::sync::Arc;
pub type RawResult<T> = Result<T, RawError>;
pub type GitResult<T> = Result<T, GitError>;
/// The hex ID of a commit
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct HashId(String);
impl HashId {
pub fn to_oid(&self) -> Oid {
self.clone().into()
}
pub fn to_string(&self) -> String {
self.0.clone()
}
}
impl From<Oid> for HashId {
fn from(o: Oid) -> Self {
Self(o.to_string())
@ -39,28 +44,28 @@ impl<'any> From<&'any HashId> for Oid {
}
}
/// An error abstraction for raw git operations
#[derive(Debug)]
pub enum RawError {
pub enum GitError {
AllBad,
}
impl From<git2::Error> for RawError {
impl From<git2::Error> for GitError {
fn from(_: git2::Error) -> Self {
Self::AllBad
}
}
/// Wrap a libgit2 repository to provide an API fascade
pub struct RawRepository {
pub inner: Repository,
/// Represents a git repository with lazy data loading
#[derive(Clone)]
pub struct Repository {
inner: Arc<git2::Repository>,
}
impl RawRepository {
pub fn open(path: &str) -> RawResult<Self> {
impl Repository {
pub fn open(path: &str) -> GitResult<Self> {
Ok(Self {
inner: Repository::open(path)?,
inner: Arc::new(git2::Repository::open(path)?),
})
}
@ -69,7 +74,7 @@ impl RawRepository {
/// ## Panics
///
/// If there is an error around getting the name, or head commit.
pub fn parse_branches(&self) -> RawResult<Vec<RawBranch>> {
pub fn branches(&self) -> GitResult<Vec<Branch>> {
Ok(self
.inner
.branches(None)?
@ -78,14 +83,13 @@ impl RawRepository {
.map(|(branch, _)| {
let name = branch.name().unwrap().unwrap().into();
let head = branch.get().peel_to_commit().unwrap().id().into();
RawBranch { name, head }
Branch::new(&self.inner, name, head)
})
.collect())
}
/// Get the files touched by a commit
pub fn get_files_for(&self, id: HashId) -> RawResult<Vec<()>> {
pub fn get_files_for(&self, id: HashId) -> GitResult<Vec<()>> {
let c = self.inner.find_commit(id.into())?;
let tree = c.tree()?;
Loading…
Cancel
Save