My personal project and infrastructure archive
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
nomicon/apps/koffice/libko/src/cass/format/lexer.rs

151 lines
3.5 KiB

//! Cassiopeia file lexer
use logos::{Lexer, Logos};
use std::iter::Iterator;
/// A basic line lexer type
///
/// This lexer distinguishes between comments, and keyword lines. It
/// does not attempt to parse the line specifics. This is what the
/// content lexer is for.
#[derive(Logos, Debug, PartialEq)]
pub(crate) enum Token {
#[token("HEADER")]
Header,
#[token("START")]
Start,
#[token("STOP")]
Stop,
#[token("INVOICE")]
Invoice,
#[regex(r"\w+=[^,$]+[,$]")]
HeaderData,
// FIXME: this will have a leading whitespace that we could remove
// with ^\w, but logos does not support this at the moment
#[regex(r"[0-9-:+ ]+")]
Date,
#[token(" ", logos::skip)]
Space,
#[regex(";;.*")]
Comment,
#[error]
Error,
}
/// A single token type on a line
#[derive(Debug)]
pub(crate) struct LineToken<'l> {
pub(crate) tt: Token,
pub(crate) slice: &'l str,
}
/// A lexer wrapped for a single line
pub(crate) struct LineLexer<'l> {
lexer: Lexer<'l, Token>,
}
impl<'l> LineLexer<'l> {
pub(crate) fn get_all(self) -> Vec<LineToken<'l>> {
let mut acc = vec![];
for l in self {
acc.push(l);
}
acc
}
}
impl<'l> Iterator for LineLexer<'l> {
type Item = LineToken<'l>;
fn next(&mut self) -> Option<Self::Item> {
self.lexer.next().map(|tt| Self::Item {
tt,
slice: self.lexer.slice(),
})
}
}
/// Take a line of input and lex it into a stream of tokens
pub(crate) fn lex<'l>(line: &'l mut String) -> LineLexer<'l> {
LineLexer {
lexer: Token::lexer(line),
}
}
#[test]
fn basic_header() {
let mut lex = Token::lexer("HEADER version=0.0.0,location=Berlin Lichtenberg,");
assert_eq!(lex.next(), Some(Token::Header));
assert_eq!(lex.span(), 0..6);
assert_eq!(lex.slice(), "HEADER");
assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 7..21);
assert_eq!(lex.slice(), "version=0.0.0,");
assert_eq!(lex.next(), Some(Token::HeaderData));
assert_eq!(lex.span(), 21..49);
assert_eq!(lex.slice(), "location=Berlin Lichtenberg,");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_start() {
let mut lex = Token::lexer("START 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Token::Start));
assert_eq!(lex.span(), 0..5);
assert_eq!(lex.slice(), "START");
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 5..31);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_stop() {
let mut lex = Token::lexer("STOP 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Token::Stop));
assert_eq!(lex.span(), 0..4);
assert_eq!(lex.slice(), "STOP");
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 4..30);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_invoice() {
let mut lex = Token::lexer("INVOICE 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), Some(Token::Invoice));
assert_eq!(lex.span(), 0..7);
assert_eq!(lex.slice(), "INVOICE");
assert_eq!(lex.next(), Some(Token::Date));
assert_eq!(lex.span(), 7..33);
assert_eq!(lex.slice(), " 2020-11-11 13:00:00+01:00");
assert_eq!(lex.next(), None);
}
#[test]
fn basic_comment() {
let mut lex = Token::lexer(";; This file is auto generated!");
assert_eq!(lex.next(), Some(Token::Comment));
}