supergit: implementing simple file-history search

This is done by stepping through a branch iterator to diff commits to
see where they changed.  This is somewhat computationally intensive,
and also doesn't work well yet because iterator stepping is done
internally and there is no API to extend iterators before they are
run.  That means that histories can only be read from first-parent
iterators.

Ideally we would do two things here:

1. build an API to let iterators extend themselves, either breadth
   first, or depth first.  But maybe that would be too much
   complexity for the iterator module?

2. figure out a better way to get the history of a file.  At the
   moment we are stepping through commits and diffing them with
   parents to find the set of changed paths.  I don't _think_ there is
   a way to simply compare refs, but maybe there is.
wip/yesman
Katharina Fey 4 years ago committed by Mx Kookie
parent 9cc32b516d
commit a2513a72e6
  1. 8
      apps/servers/octopus/supergit/src/bin/test.rs
  2. 10
      apps/servers/octopus/supergit/src/branch.rs
  3. 40
      apps/servers/octopus/supergit/src/commit.rs
  4. 29
      apps/servers/octopus/supergit/src/diff.rs
  5. 23
      apps/servers/octopus/supergit/src/files.rs

@ -23,5 +23,11 @@ fn main() {
let head = main.get_head();
let tree = head.get_tree();
println!("{:?}", tree.load(""));
println!(
"{:?}",
tree.history(main.get_all(), "Cargo.toml")
.into_iter()
.map(|c| c.summary())
.collect::<Vec<_>>()
);
}

@ -248,4 +248,14 @@ impl BranchCommit {
}
.clone()
}
/// Get the underlying commit, regardless of type
pub fn commit(&self) -> &Commit {
use BranchCommit::*;
match self {
Commit(ref c) => c,
Merge(ref c, _) => c,
Octopus(ref c, _) => c,
}
}
}

@ -1,4 +1,4 @@
use crate::{FileTree, HashId};
use crate::{Diff, FileTree, HashId};
use git2::Repository;
use std::sync::Arc;
@ -71,6 +71,44 @@ impl Commit {
FileTree::new(&self.repo, self.id.clone())
}
/// Get the list of paths in the repository touched by this commit
///
/// Using this function directly is often not what you want.
/// Instead, use the `get_history(...)` function on `FileTree`,
/// which uses this function.
pub fn get_paths(&self) -> Vec<String> {
self.get_diff()
.map(|d| Diff::from(d))
.map_or(vec![], |d| d.get_paths())
}
/// Utility function to get a merged diff from all parents
fn get_diff(&self) -> Option<git2::Diff> {
// Get all diffs with parents
let stree = self.find().tree().unwrap();
let mut vec = self
.parents()
.into_iter()
.filter_map(|p| {
self.repo
.diff_tree_to_tree(Some(&stree), Some(p.find().tree().as_ref().unwrap()), None)
.ok()
})
.collect::<Vec<_>>();
// If there are no parents
if vec.len() == 0 {
vec = vec![self.repo.diff_tree_to_tree(Some(&stree), None, None).ok()?];
}
// Take the first and merge onto
let first = vec.remove(0);
Some(vec.iter().fold(first, |mut acc, diff| {
acc.merge(diff).unwrap();
acc
}))
}
fn find(&self) -> git2::Commit {
self.repo.find_commit(self.id.to_oid()).unwrap()
}

@ -1,5 +1,32 @@
/// A diff between two commits
///
/// At the moment this type doesn't properly express a Diff, and is
/// only used to compute the change set between commits to generate a
/// file history.
pub struct Diff {
paths: Vec<String>,
}
impl Diff {
/// Generate a new Diff from a git2::Diff
pub(crate) fn from(d: git2::Diff) -> Self {
Self {
paths: d.deltas().fold(vec![], |mut vec, delta| {
append(&mut vec, delta.old_file());
append(&mut vec, delta.new_file());
vec
}),
}
}
/// Get all paths touched by a diff
pub fn get_paths(&self) -> Vec<String> {
self.paths.clone()
}
}
fn append(vec: &mut Vec<String>, f: git2::DiffFile) {
if let Some(path) = f.path().map(|p| p.to_str().unwrap().into()) {
vec.push(path);
}
}

@ -1,7 +1,7 @@
use crate::{Branch, BranchIter, Commit, HashId};
use atomptr::AtomPtr;
use git2::{ObjectType, TreeWalkMode, TreeWalkResult};
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::{path::PathBuf, sync::Arc};
/// A tree of files
@ -76,6 +76,27 @@ impl FileTree {
pub fn load(&self, path: &str) -> Option<Yield> {
self.get_entry(path).and_then(|e| e.load(&self.repo))
}
/// Get the history of a path with a branch iterator
///
/// This function is very computationally intensive, because it
/// will step through the entire iterator to pull commits from,
/// and see if they touch the respective path.
pub fn history(&self, iter: BranchIter, path: &str) -> Vec<Commit> {
iter.filter_map(|c| {
if c.commit()
.get_paths()
.into_iter()
.collect::<BTreeSet<_>>()
.contains(path)
{
Some(c.commit().clone())
} else {
None
}
})
.collect()
}
}
/// Data yielded from loading a part of the file tree

Loading…
Cancel
Save