nomicon/apps/cassiopeia/src/format/lexer.rs

//! Cassiopeia file lexer

use logos::{Lexer, Logos};
use std::iter::Iterator;

/// A basic line lexer type
///
/// This lexer distinguishes between comments, and keyword lines.  It
/// does not attempt to parse the line specifics.  This is what the
/// content lexer is for.
#[derive(Logos, Debug, PartialEq)]
pub(crate) enum Token {
    #[token("HEADER")]
    Header,

    #[token("START")]
    Start,

    #[token("STOP")]
    Stop,

    #[token("INVOICE")]
    Invoice,

    #[regex(r"\w+=[^,$]+[,$]")]
    HeaderData,

    // FIXME: this will have a leading whitespace that we could remove
    // with ^\w, but logos does not support this at the moment
    #[regex(r"[0-9-:+ ]+")]
    Date,

    #[token(" ", logos::skip)]
    Space,

    #[regex(";;.*")]
    Comment,

    #[error]
    Error,
}

/// A single token type on a line
#[derive(Debug)]
pub(crate) struct LineToken<'l> {
    pub(crate) tt: Token,
    pub(crate) slice: &'l str,
}

/// A lexer wrapped for a single line
pub(crate) struct LineLexer<'l> {
    lexer: Lexer<'l, Token>,
}

impl<'l> LineLexer<'l> {
    pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
        let mut acc = vec![];
        for l in self {
            acc.push(l);
        }
        acc
    }
}

impl<'l> Iterator for LineLexer<'l> {
    type Item = LineToken<'l>;

    fn next(&mut self) -> Option<Self::Item> {
        self.lexer.next().map(|tt| Self::Item {
            tt,
            slice: self.lexer.slice(),
        })
    }
}

/// Take a line of input and lex it into a stream of tokens
pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
    LineLexer {
        lexer: Token::lexer(line),
    }
}

#[test]
fn basic_header() {
    let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");

    assert_eq!(lex.next(), Some(Token::Header));
    assert_eq!(lex.span(), 0..6);
    assert_eq!(lex.slice(), "HEADER");

    assert_eq!(lex.next(), Some(Token::HeaderData));
    assert_eq!(lex.span(), 7..21);
    assert_eq!(lex.slice(), "version=0.0.0,");

    assert_eq!(lex.next(), Some(Token::HeaderData));
    assert_eq!(lex.span(), 21..49);
    assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");

    assert_eq!(lex.next(), None);
}

#[test]
fn basic_start() {
    let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), Some(Token::Start));
    assert_eq!(lex.span(), 0..5);
    assert_eq!(lex.slice(), "START");

    assert_eq!(lex.next(), Some(Token::Date));
    assert_eq!(lex.span(), 5..31);
    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), None);
}

#[test]
fn basic_stop() {
    let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), Some(Token::Stop));
    assert_eq!(lex.span(), 0..4);
    assert_eq!(lex.slice(), "STOP");

    assert_eq!(lex.next(), Some(Token::Date));
    assert_eq!(lex.span(), 4..30);
    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), None);
}

#[test]
fn basic_invoice() {
    let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), Some(Token::Invoice));
    assert_eq!(lex.span(), 0..7);
    assert_eq!(lex.slice(), "INVOICE");

    assert_eq!(lex.next(), Some(Token::Date));
    assert_eq!(lex.span(), 7..33);
    assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");

    assert_eq!(lex.next(), None);
}

#[test]
fn basic_comment() {
    let mut lex = Token::lexer(";; This file is auto generated!");
    assert_eq!(lex.next(), Some(Token::Comment));
}