From a2513a72e699f0db1c345e952b336c6cbc912a3e Mon Sep 17 00:00:00 2001 From: Kaiden Fey Date: Tue, 10 Nov 2020 11:12:29 +0100 Subject: [PATCH] supergit: implementing simple file-history search This is done by stepping through a branch iterator to diff commits to see where they changed. This is somewhat computationally intensive, and also doesn't work well yet because iterator stepping is done internally and there is no API to extend iterators before they are run. That means that histories can only be read from first-parent iterators. Ideally we would do two things here: 1. build an API to let iterators extend themselves, either breadth first, or depth first. But maybe that would be too much complexity for the iterator module? 2. figure out a better way to get the history of a file. At the moment we are stepping through commits and diffing them with parents to find the set of changed paths. I don't _think_ there is a way to simply compare refs, but maybe there is. --- apps/servers/octopus/supergit/src/bin/test.rs | 8 +++- apps/servers/octopus/supergit/src/branch.rs | 10 +++++ apps/servers/octopus/supergit/src/commit.rs | 40 ++++++++++++++++++- apps/servers/octopus/supergit/src/diff.rs | 29 +++++++++++++- apps/servers/octopus/supergit/src/files.rs | 23 ++++++++++- 5 files changed, 106 insertions(+), 4 deletions(-) diff --git a/apps/servers/octopus/supergit/src/bin/test.rs b/apps/servers/octopus/supergit/src/bin/test.rs index 830c8b62f13..b7dba0379df 100644 --- a/apps/servers/octopus/supergit/src/bin/test.rs +++ b/apps/servers/octopus/supergit/src/bin/test.rs @@ -23,5 +23,11 @@ fn main() { let head = main.get_head(); let tree = head.get_tree(); - println!("{:?}", tree.load("")); + println!( + "{:?}", + tree.history(main.get_all(), "Cargo.toml") + .into_iter() + .map(|c| c.summary()) + .collect::>() + ); } diff --git a/apps/servers/octopus/supergit/src/branch.rs b/apps/servers/octopus/supergit/src/branch.rs index 43222790024..3261d23b917 100644 --- a/apps/servers/octopus/supergit/src/branch.rs +++ b/apps/servers/octopus/supergit/src/branch.rs @@ -248,4 +248,14 @@ impl BranchCommit { } .clone() } + + /// Get the underlying commit, regardless of type + pub fn commit(&self) -> &Commit { + use BranchCommit::*; + match self { + Commit(ref c) => c, + Merge(ref c, _) => c, + Octopus(ref c, _) => c, + } + } } diff --git a/apps/servers/octopus/supergit/src/commit.rs b/apps/servers/octopus/supergit/src/commit.rs index bc7383d1ed6..99c4dbba6fc 100644 --- a/apps/servers/octopus/supergit/src/commit.rs +++ b/apps/servers/octopus/supergit/src/commit.rs @@ -1,4 +1,4 @@ -use crate::{FileTree, HashId}; +use crate::{Diff, FileTree, HashId}; use git2::Repository; use std::sync::Arc; @@ -71,6 +71,44 @@ impl Commit { FileTree::new(&self.repo, self.id.clone()) } + /// Get the list of paths in the repository touched by this commit + /// + /// Using this function directly is often not what you want. + /// Instead, use the `get_history(...)` function on `FileTree`, + /// which uses this function. + pub fn get_paths(&self) -> Vec { + self.get_diff() + .map(|d| Diff::from(d)) + .map_or(vec![], |d| d.get_paths()) + } + + /// Utility function to get a merged diff from all parents + fn get_diff(&self) -> Option { + // Get all diffs with parents + let stree = self.find().tree().unwrap(); + let mut vec = self + .parents() + .into_iter() + .filter_map(|p| { + self.repo + .diff_tree_to_tree(Some(&stree), Some(p.find().tree().as_ref().unwrap()), None) + .ok() + }) + .collect::>(); + + // If there are no parents + if vec.len() == 0 { + vec = vec![self.repo.diff_tree_to_tree(Some(&stree), None, None).ok()?]; + } + + // Take the first and merge onto + let first = vec.remove(0); + Some(vec.iter().fold(first, |mut acc, diff| { + acc.merge(diff).unwrap(); + acc + })) + } + fn find(&self) -> git2::Commit { self.repo.find_commit(self.id.to_oid()).unwrap() } diff --git a/apps/servers/octopus/supergit/src/diff.rs b/apps/servers/octopus/supergit/src/diff.rs index e92a4cd1804..e83903cf6eb 100644 --- a/apps/servers/octopus/supergit/src/diff.rs +++ b/apps/servers/octopus/supergit/src/diff.rs @@ -1,5 +1,32 @@ - /// A diff between two commits +/// +/// At the moment this type doesn't properly express a Diff, and is +/// only used to compute the change set between commits to generate a +/// file history. pub struct Diff { + paths: Vec, +} + +impl Diff { + /// Generate a new Diff from a git2::Diff + pub(crate) fn from(d: git2::Diff) -> Self { + Self { + paths: d.deltas().fold(vec![], |mut vec, delta| { + append(&mut vec, delta.old_file()); + append(&mut vec, delta.new_file()); + vec + }), + } + } + + /// Get all paths touched by a diff + pub fn get_paths(&self) -> Vec { + self.paths.clone() + } +} +fn append(vec: &mut Vec, f: git2::DiffFile) { + if let Some(path) = f.path().map(|p| p.to_str().unwrap().into()) { + vec.push(path); + } } diff --git a/apps/servers/octopus/supergit/src/files.rs b/apps/servers/octopus/supergit/src/files.rs index fa68fbc2f3d..c593a492392 100644 --- a/apps/servers/octopus/supergit/src/files.rs +++ b/apps/servers/octopus/supergit/src/files.rs @@ -1,7 +1,7 @@ use crate::{Branch, BranchIter, Commit, HashId}; use atomptr::AtomPtr; use git2::{ObjectType, TreeWalkMode, TreeWalkResult}; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::{path::PathBuf, sync::Arc}; /// A tree of files @@ -76,6 +76,27 @@ impl FileTree { pub fn load(&self, path: &str) -> Option { self.get_entry(path).and_then(|e| e.load(&self.repo)) } + + /// Get the history of a path with a branch iterator + /// + /// This function is very computationally intensive, because it + /// will step through the entire iterator to pull commits from, + /// and see if they touch the respective path. + pub fn history(&self, iter: BranchIter, path: &str) -> Vec { + iter.filter_map(|c| { + if c.commit() + .get_paths() + .into_iter() + .collect::>() + .contains(path) + { + Some(c.commit().clone()) + } else { + None + } + }) + .collect() + } } /// Data yielded from loading a part of the file tree