cassiopeia: implementing basic file parser

wip/yesman
Katharina Fey 3 years ago
parent afd8a74e43
commit 5502c6d320
  1. 29
      apps/cassiopeia/src/data.rs
  2. 145
      apps/cassiopeia/src/file.rs
  3. 91
      apps/cassiopeia/src/format/lexer.rs
  4. 26
      apps/cassiopeia/src/format/mod.rs
  5. 78
      apps/cassiopeia/src/format/parser.rs
  6. 5
      apps/cassiopeia/src/main.rs

@ -0,0 +1,29 @@
//! Typed time file for cassiopeia
//!
//! This data gets generated by the `format` module, and can later be
//! used to generate new files, and perform various lookups and
//! analysis tasks.
use crate::format::LineCfg;
use chrono::{Date, DateTime, FixedOffset as Offset};
use std::collections::BTreeMap;
#[derive(Default)]
pub struct TimeFile {
header: BTreeMap<String, String>,
sessions: Vec<Session>,
invoices: Vec<Date<Offset>>,
}
impl TimeFile {
pub(crate) fn append(self, line: LineCfg) -> Self {
println!("{:?}", line);
self
}
}
pub struct Session {
start: DateTime<Offset>,
stop: DateTime<Offset>,
}

@ -1,145 +0,0 @@
//! Parse the cassiopeia file format
//!
//! Each file is associated with a single project. This way there is
//! no need to associate session enries with multiple customers and
//! projcets. Currently there's also no way to cross-relate sessions
//! between projects or clients, although the metadata in the header
//! is available to do so in the future
//!
//! ## Structure
//!
//! `cassiopeia` files should use the `.cass` extension, although this
//! implementation is not opinionated on that.
//!
//! A line starting with `;` is a comment and can be ignored. A line
//! can have a comment anywhere, which means that everything after it
//! gets ignored. There are no block comments.
//!
//! A regular statements has two parts: a key, and a value. Available
//! keys are:
//!
//! - HEADER
//! - START
//! - STOP
//! - FINISH
//!
//! A file has to have at least one `HEADER` key, containing a certain
//! number of fields to be considered valid. The required number of
//! fields may vary between versions.
//!
//! ### HEADER
//!
//! `cassiopeia` in princpile only needs a single value to parse a
//! file, which is `version`. It is however recommended to add
//! additional metadata to allow future processing into clients and
//! cross-referencing projects. Importantly: header keys that are not
//! expected will be ignored.
//!
//! The general header format is a comma-separated list with a key
//! value pair, separated by an equals sign. You can use spaces in
//! both keys and values without having to escape them or use special
//! quotes. Leading and trailing spaces will be removed.
//!
//! ```
//! HEADER version=0.0.0,location=Berlin
//! HEADER work schedule=mon tue wed
//! ```
//!
//! When re-writing the file format, known/ accepted keys should go
//! first. All other unknown keys will be printed alphabetically at
//! the end. This way it's possible for an outdated implementation to
//! pass through unknown keys, or users to add their own keys.
use chrono::{DateTime, Utc};
use std::{fs::File, io::Read, path::Path};
/// A cassiopeia file that has been successfully parsed
pub struct TimeFile {
path: PathBuf,
content: Vec<Statement>,
}
impl TimeFile {
/// Open an existing `.cass` file on disk. Panics!
pub fn open(p: impl Into<Path>) -> Self {
let mut f = File::open(p).unwrap();
let mut cont = String::new();
f.read_to_string(&mut cont).unwrap();
}
}
/// A statement in a `.cass` line
///
/// While the whole file get's re-written on every run to update
/// version numbers and header values, the structure of the file is
/// preserved.
pub enum Statement {
/// A blank line
Blank,
/// A comment line that is echo-ed back out
Comment(String),
/// Header value
Header(Vec<HeaderVal>),
/// A session start value
Start(DateTime<Utc>),
/// A session stop value
Stop(DateTime<Utc>),
/// A project finish value
Finish(DateTime<Utc>),
}
/// A set of header value
pub struct HeaderVal {
/// Header key
key: String,
/// Header value
val: String,
}
impl HeaderVal {
fn new<S: Into<String>>(key: S, val: S) -> Self {
Self {
key: key.into(),
val: val.into(),
}
}
/// Test if a header value is known to this implementation
fn known(&self) -> bool {
match self.key {
"version" => true,
_ => false,
}
}
}
/// A builder for cass files
#[cfg(tests)]
struct FileBuilder {
acc: Vec<Statement>,
}
impl FileBuilder {
fn new() -> Self {
Self { acc: vec![] }
}
fn header(mut self, data: Vec<(&str, &str)>) -> Self {
self.acc.push(Statement::Header(
data.into_iter()
.map(|(key, val)| HeaderVal::new(key, val))
.collect(),
));
self
}
fn build(self) -> String {
format!(";; This file was generated by cassiopeia (reference)\n{}", self.acc.into_iter().map(|s| s.render()).collect::<Vec<_>().join("\n"))
}
}
#[test]
fn empty_file() {
let fb = FileBuilder::new().header(vec![("version", "0.3.0"), ("project", "testing")]);
}

@ -1,6 +1,7 @@
//! Cassiopeia file lexer
use logos::Logos;
use logos::{Lexer, Logos};
use std::iter::Iterator;
/// A basic line lexer type
///
@ -8,8 +9,7 @@ use logos::Logos;
/// does not attempt to parse the line specifics. This is what the
/// content lexer is for.
#[derive(Logos, Debug, PartialEq)]
enum Line {
pub(crate) enum Token {
#[token("HEADER")]
Header,
@ -21,7 +21,7 @@ enum Line {
#[token("INVOICE")]
Invoice,
#[regex(r"\w+=[^,$]+[,$]")]
HeaderData,
@ -32,85 +32,120 @@ enum Line {
#[token(" ", logos::skip)]
Space,
#[regex(";;.*")]
Comment,
#[error]
Error,
}
/// A single token type on a line
#[derive(Debug)]
pub(crate) struct LineToken<'l> {
pub(crate) tt: Token,
pub(crate) slice: &'l str,
}
// pub fn test_this() {
// // let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin,");
// let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00");
/// A lexer wrapped for a single line
pub(crate) struct LineLexer<'l> {
lexer: Lexer<'l, Token>,
}
impl<'l> LineLexer<'l> {
pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
let mut acc = vec![];
for l in self {
acc.push(l);
}
acc
}
}
// while let Some(t) = lex.next() {
// println!("{:?}: {}", t, lex.slice());
// }
// }
impl<'l> Iterator for LineLexer<'l> {
type Item = LineToken<'l>;
fn next(&mut self) -> Option<Self::Item> {
self.lexer.next().map(|tt| Self::Item {
tt,
slice: self.lexer.slice(),
})
}
}
/// Take a line of input and lex it into a stream of tokens
pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
LineLexer {
lexer: Token::lexer(line),
}
}
#[test]
fn basic_header() {
let mut lex = Line::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
assert_eq!(lex.next(), Some(Line::Header));
assert_eq!(lex.next(), Some(Token::Header));
assert_eq!(lex.span(), 0..6);
assert_eq!(lex.slice(), "HEADER");
assert_eq!(lex.next(), Some(Line::HeaderData));
assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 7..21);
assert_eq!(lex.slice(), "version=0.0.0,");
assert_eq!(lex.next(), Some(Line::HeaderData));
assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 21..49);
assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_start() {
let mut lex = Line::lexer("START 2020-11-11 13:00:00+01:00");
let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Line::Start));
assert_eq!(lex.next(), Some(Token::Start));
assert_eq!(lex.span(), 0..5);
assert_eq!(lex.slice(), "START");
assert_eq!(lex.next(), Some(Line::Date));
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 5..31);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_stop() {
let mut lex = Line::lexer("STOP 2020-11-11 13:00:00+01:00");
let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Line::Stop));
assert_eq!(lex.next(), Some(Token::Stop));
assert_eq!(lex.span(), 0..4);
assert_eq!(lex.slice(), "STOP");
assert_eq!(lex.next(), Some(Line::Date));
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 4..30);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_invoice() {
let mut lex = Line::lexer("INVOICE 2020-11-11 13:00:00+01:00");
let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Line::Invoice));
assert_eq!(lex.next(), Some(Token::Invoice));
assert_eq!(lex.span(), 0..7);
assert_eq!(lex.slice(), "INVOICE");
assert_eq!(lex.next(), Some(Line::Date));
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 7..33);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_comment() {
let mut lex = Token::lexer(";; This file is auto generated!");
assert_eq!(lex.next(), Some(Token::Comment));
}

@ -1,3 +1,25 @@
//! cassiopeia file format handling
//! cassiopeia file format
pub(crate) mod lexer;
mod lexer;
mod parser;
pub(crate) use lexer::{LineLexer, LineToken, Token};
pub(crate) use parser::LineCfg;
use crate::TimeFile;
use std::{fs::File, io::Read};
pub(crate) fn load_file(path: &str) {
let mut f = File::open(path).unwrap();
let mut content = String::new();
f.read_to_string(&mut content).unwrap();
let mut lines: Vec<String> = content.split("\n").map(|l| l.to_owned()).collect();
lines
.iter_mut()
.map(|line| lexer::lex(line))
.map(|lex| parser::parse(lex))
.filter(|line| line.valid())
.fold(TimeFile::default(), |file, line| file.append(line));
}

@ -0,0 +1,78 @@
//! cassiopeia parser
//!
//! Takes a lexer's token stream as an input, and outputs a fully
//! parsed time file.
use crate::format::{LineLexer, LineToken, Token};
use chrono::{DateTime, FixedOffset as Offset};
use logos::Lexer;
use std::collections::BTreeMap;
use std::iter::Iterator;
/// A type-parsed line in a time file
#[derive(Debug)]
pub enum LineCfg {
/// A header line with a set of keys and values
Header(BTreeMap<String, String>),
/// A session start line with a date and time
Start(Option<DateTime<Offset>>),
/// A session stop line with a date and time
Stop(Option<DateTime<Offset>>),
/// An invoice line with a date
Invoice(Option<DateTime<Offset>>),
/// An empty line
Empty,
/// A temporary value that is invalid
#[doc(hidden)]
Ignore,
}
impl LineCfg {
pub(crate) fn valid(&self) -> bool {
match self {
LineCfg::Ignore => false,
_ => true,
}
}
}
pub(crate) fn parse<'l>(lex: LineLexer<'l>) -> LineCfg {
use LineCfg::*;
use Token as T;
#[cfg_attr(rustfmt, rustfmt_skip)]
lex.get_all().into_iter().fold(Ignore, |cfg, tok| match (cfg, tok) {
// If the first token is a comment, we ignore it
(Ignore, LineToken { tt: T::Comment, .. }, ) => Ignore,
// If the first token is a keyword, we wait for more data
(Ignore, LineToken { tt: T::Header, .. }) => Header(Default::default()),
(Ignore, LineToken { tt: T::Start, .. }) => Start(None),
(Ignore, LineToken { tt: T::Stop, .. }) => Stop(None),
(Ignore, LineToken { tt: T::Invoice, .. }) => Invoice(None),
// If the first token _was_ a keyword, fill in the data
(Header(map), LineToken { tt: T::HeaderData, slice }) => Header(append_data(map, slice)),
(Start(_), LineToken { tt: T::Date, slice }) => Start(parse_date(slice)),
(Stop(_), LineToken { tt: T::Date, slice }) => Stop(parse_date(slice)),
(Invoice(_), LineToken { tt: T::Date, slice }) => Invoice(parse_date(slice)),
// Pass empty lines through,
(Empty, _) => Empty,
// Ignore everything else (which will be filtered)
_ => Ignore,
})
}
fn append_data(mut map: BTreeMap<String, String>, slice: &str) -> BTreeMap<String, String> {
let split = slice.split("=").collect::<Vec<_>>();
map.insert(split[0].into(), split[1].into());
map
}
fn parse_date(slice: &str) -> Option<DateTime<Offset>> {
Some(
DateTime::parse_from_str(slice, "%Y-%m-%d %H:%M:%S%:z")
.expect("Failed to parse date; invalid format!"),
)
}

@ -1,5 +1,8 @@
mod format;
mod data;
fn main() {
pub use data::{TimeFile, Session};
fn main() {
format::load_file("/home/projects/clients/nyantec-nix-workshops/time.cass")
}

Loading…
Cancel
Save