You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
151 lines
3.5 KiB
151 lines
3.5 KiB
//! Cassiopeia file lexer
|
|
|
|
use logos::{Lexer, Logos};
|
|
use std::iter::Iterator;
|
|
|
|
/// A basic line lexer type
|
|
///
|
|
/// This lexer distinguishes between comments, and keyword lines. It
|
|
/// does not attempt to parse the line specifics. This is what the
|
|
/// content lexer is for.
|
|
#[derive(Logos, Debug, PartialEq)]
|
|
pub(crate) enum Token {
|
|
#[token("HEADER")]
|
|
Header,
|
|
|
|
#[token("START")]
|
|
Start,
|
|
|
|
#[token("STOP")]
|
|
Stop,
|
|
|
|
#[token("INVOICE")]
|
|
Invoice,
|
|
|
|
#[regex(r"\w+=[^,$]+[,$]")]
|
|
HeaderData,
|
|
|
|
// FIXME: this will have a leading whitespace that we could remove
|
|
// with ^\w, but logos does not support this at the moment
|
|
#[regex(r"[0-9-:+ ]+")]
|
|
Date,
|
|
|
|
#[token(" ", logos::skip)]
|
|
Space,
|
|
|
|
#[regex(";;.*")]
|
|
Comment,
|
|
|
|
#[error]
|
|
Error,
|
|
}
|
|
|
|
/// A single token type on a line
|
|
#[derive(Debug)]
|
|
pub(crate) struct LineToken<'l> {
|
|
pub(crate) tt: Token,
|
|
pub(crate) slice: &'l str,
|
|
}
|
|
|
|
/// A lexer wrapped for a single line
|
|
pub(crate) struct LineLexer<'l> {
|
|
lexer: Lexer<'l, Token>,
|
|
}
|
|
|
|
impl<'l> LineLexer<'l> {
|
|
pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
|
|
let mut acc = vec![];
|
|
for l in self {
|
|
acc.push(l);
|
|
}
|
|
acc
|
|
}
|
|
}
|
|
|
|
impl<'l> Iterator for LineLexer<'l> {
|
|
type Item = LineToken<'l>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.lexer.next().map(|tt| Self::Item {
|
|
tt,
|
|
slice: self.lexer.slice(),
|
|
})
|
|
}
|
|
}
|
|
|
|
/// Take a line of input and lex it into a stream of tokens
|
|
pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
|
|
LineLexer {
|
|
lexer: Token::lexer(line),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn basic_header() {
|
|
let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Header));
|
|
assert_eq!(lex.span(), 0..6);
|
|
assert_eq!(lex.slice(), "HEADER");
|
|
|
|
assert_eq!(lex.next(), Some(Token::HeaderData));
|
|
assert_eq!(lex.span(), 7..21);
|
|
assert_eq!(lex.slice(), "version=0.0.0,");
|
|
|
|
assert_eq!(lex.next(), Some(Token::HeaderData));
|
|
assert_eq!(lex.span(), 21..49);
|
|
assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
|
|
|
|
assert_eq!(lex.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn basic_start() {
|
|
let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Start));
|
|
assert_eq!(lex.span(), 0..5);
|
|
assert_eq!(lex.slice(), "START");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Date));
|
|
assert_eq!(lex.span(), 5..31);
|
|
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn basic_stop() {
|
|
let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Stop));
|
|
assert_eq!(lex.span(), 0..4);
|
|
assert_eq!(lex.slice(), "STOP");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Date));
|
|
assert_eq!(lex.span(), 4..30);
|
|
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn basic_invoice() {
|
|
let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Invoice));
|
|
assert_eq!(lex.span(), 0..7);
|
|
assert_eq!(lex.slice(), "INVOICE");
|
|
|
|
assert_eq!(lex.next(), Some(Token::Date));
|
|
assert_eq!(lex.span(), 7..33);
|
|
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
|
|
|
|
assert_eq!(lex.next(), None);
|
|
}
|
|
|
|
#[test]
|
|
fn basic_comment() {
|
|
let mut lex = Token::lexer(";; This file is auto generated!");
|
|
assert_eq!(lex.next(), Some(Token::Comment));
|
|
}
|
|
|