lwcb/blang/src/parser.rs (533 lines of code) (raw):

use crate::call::Call; use crate::constant::Constant; use crate::lexer::Token; use crate::lexer::Tokens; use anyhow::bail; use anyhow::Result; use bpfir::types::BinaryOp; use bpfir::types::UnaryOp; use bpfir::Type; use bpfir::TypeKind; use logos::Span; #[derive(Clone, Debug, PartialEq)] pub struct MemberAttr { pub offset: u32, // in bytes pub bitfield_offset: u16, // in bits pub bitfield_size: u16, // in bits } #[derive(Clone, Debug, PartialEq)] pub enum ExprKind { Compound(Vec<Expr>), // compound statement ExprStmt(Box<Expr>), If(Box<Expr>, Box<Expr>, Box<Option<Expr>>), // condition, then, else Return, Type(Type), Ident(String), // identifier LitStr(String), // string literal Constant(i64), Unary(UnaryOp, Box<Expr>), // unary expression Binary(BinaryOp, Box<Expr>, Box<Expr>), // binary expression Cast(Box<Expr>, Box<Expr>), Call(Call, Vec<Expr>), Member(Box<Expr>, Box<Expr>, Option<MemberAttr>), //member access Trace(Box<Expr>, Box<Expr>), // bpf program, tracing point definition and program body } #[derive(Clone, Debug, PartialEq)] pub struct Expr { pub kind: ExprKind, pub span: Span, pub ty: Type, } impl Expr { pub fn new(kind: ExprKind, span: Span) -> Self { Self { kind, span, ty: Type::new(TypeKind::Undef), } } pub fn from_compound(compound: Vec<Expr>, span: Span) -> Self { Self::new(ExprKind::Compound(compound), span) } pub fn from_exprstmt(stmt: Expr, span: Span) -> Self { Self::new(ExprKind::ExprStmt(Box::new(stmt)), span) } pub fn from_if(c: Expr, t: Expr, e: Option<Expr>, span: Span) -> Self { Self::new(ExprKind::If(Box::new(c), Box::new(t), Box::new(e)), span) } pub fn from_return(span: Span) -> Self { Self::new(ExprKind::Return, span) } pub fn new_type(ty: Type, span: Span) -> Self { Self::new(ExprKind::Type(ty), span) } pub fn from_trace(types: Vec<Expr>, program: Expr, span: Span) -> Self { Self::new( ExprKind::Trace(Box::new(types[0].clone()), Box::new(program)), span, ) } pub fn new_cast(cast: Expr, ty: Expr, span: Span) -> Self { Self::new(ExprKind::Cast(Box::new(cast), Box::new(ty)), span) } pub fn new_constant(c: Constant, span: Span) -> Self { let mut expr = Self::new(ExprKind::Constant(c.value()), span); expr.ty = c.ty().clone(); expr } pub fn new_binary(op: BinaryOp, l: Expr, r: Expr, span: Span) -> Self { Self::new(ExprKind::Binary(op, Box::new(l), Box::new(r)), span) } pub fn new_unary(op: UnaryOp, e: Expr, span: Span) -> Self { Self::new(ExprKind::Unary(op, Box::new(e)), span) } pub fn new_ident(ident: String, span: Span) -> Self { Self::new(ExprKind::Ident(ident), span) } pub fn new_litstr(str: String, span: Span) -> Self { Self::new(ExprKind::LitStr(str), span) } pub fn new_call(call: Call, args: Vec<Expr>, span: Span) -> Self { Self::new(ExprKind::Call(call, args), span) } pub fn new_member(expr1: Expr, expr2: Expr, span: Span) -> Self { Self::new( ExprKind::Member(Box::new(expr1), Box::new(expr2), None), span, ) } pub fn ty(&self) -> &Type { &self.ty } } macro_rules! parsed_debug { ($tokens: ident, $left: expr, $right: expr) => { let span = merge_span($left, $right); log::debug!("Parsed expression: {}", $tokens.span_string(span)); }; } fn merge_span(left: &Span, right: &Span) -> Span { let mut span = Span::default(); span.start = left.start; span.end = right.end; span } fn type_expression(tokens: &mut Tokens) -> Result<Expr> { let mut ty = match tokens.read() { Token::Bool => Type::bool(), Token::Char => Type::char(), Token::I8 => Type::i8(), Token::U8 => Type::u8(), Token::I16 => Type::i16(), Token::U16 => Type::u16(), Token::I32 => Type::i32(), Token::U32 => Type::u32(), Token::I64 => Type::i64(), Token::U64 => Type::u64(), Token::Struct => Type::struct_(tokens.eat_identifier()?), Token::Union => Type::union(tokens.eat_identifier()?), _ => todo!(), }; while tokens.try_eat(Token::Star) { ty = Type::ptr(ty); } tokens.eat(Token::RightParen)?; return Ok(Expr::new_type(ty, tokens.span())); } /// /// ```grammar /// postfix_expression /// : primary_expression /// | postfix_expression '[' expression ']' /// | postfix_expression '(' ')' /// | postfix_expression '(' argument_expression_list ')' /// | postfix_expression '.' IDENTIFIER /// | postfix_expression PTR_OP IDENTIFIER /// | postfix_expression INC_OP /// | postfix_expression DEC_OP /// ; /// ``` pub fn postfix_expression(tokens: &mut Tokens) -> Result<Expr> { let mut rename_expression = primary_expression(tokens)?; // if tokens.try_eat(Token::LeftBracket) { // [] operator // } loop { let base = tokens.span(); if tokens.try_eat(Token::LeftBracket) { let expression = expression(tokens)?; tokens.eat(Token::RightBracket)?; rename_expression = Expr::new_binary( BinaryOp::Index, rename_expression, expression, merge_span(&base, &tokens.span()), ); continue; } // member expression if tokens.try_eat(Token::Dot) { rename_expression = Expr::new_member( rename_expression, Expr::new_ident(tokens.eat_identifier()?, tokens.span()), merge_span(&base, &tokens.span()), ); continue; } if tokens.try_eat(Token::Deref) { // replace "->" with an unary expression rename_expression = Expr::new_unary( UnaryOp::Deref, rename_expression, merge_span(&base, &tokens.span()), ); rename_expression = Expr::new_member( rename_expression, Expr::new_ident(tokens.eat_identifier()?, tokens.span()), merge_span(&base, &tokens.span()), ); continue; } return Ok(rename_expression); } } /// /// /// /// CFG grammar: /// /// ``` /// primary_expression /// : IDENTIFIER /// | CONSTANT /// | STRING_LITERAL /// | '(' expression ')' /// ; /// ``` pub fn primary_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); if tokens.try_eat(Token::LeftParen) { let expression = expression(tokens); tokens.eat(Token::RightParen)?; return expression; } match tokens.read() { Token::Identifier(i) => { if let Ok(func) = Call::try_from(i.as_str()) { if tokens.try_eat(Token::LeftParen) { return Ok(Expr::new_call( func, argument_expression_list(tokens)?, merge_span(&base, &tokens.span()), )); } } return Ok(Expr::new_ident( i.clone(), merge_span(&base, &tokens.span()), )); } Token::Constant(c) => Ok(Expr::new_constant(c, merge_span(&base, &tokens.span()))), // Token:: Token::StringLiteral(s) => Ok(Expr::new_litstr(s, merge_span(&base, &tokens.span()))), _ => bail!("wrong params {:?}", tokens), } } /// ```grammar /// argument_expression_list /// : assignment_expression /// | argument_expression_list ',' assignment_expression /// ; /// ``` pub fn argument_expression_list(tokens: &mut Tokens) -> Result<Vec<Expr>> { let _ = tokens.span(); if tokens.try_eat(Token::RightParen) { return Ok(vec![]); } let mut args = Vec::new(); args.push(assignment_expression(tokens)?); while tokens.try_eat(Token::Comma) { args.push(assignment_expression(tokens)?); } tokens.eat(Token::RightParen)?; return Ok(args); } ///```grammar /// multiplicative_expression /// : cast_expression /// | multiplicative_expression '*' cast_expression /// | multiplicative_expression '/' cast_expression /// | multiplicative_expression '%' cast_expression /// ; /// ``` pub fn multiplicative_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let mut expression = cast_expression(tokens)?; loop { if tokens.try_eat(Token::Star) { expression = Expr::new_binary( BinaryOp::Mult, expression, cast_expression(tokens)?, merge_span(&base, &tokens.span()), ); } else if tokens.try_eat(Token::Slash) { expression = Expr::new_binary( BinaryOp::Div, expression, cast_expression(tokens)?, merge_span(&base, &tokens.span()), ); } else { return Ok(expression); } } } /// /// ```grammar /// additive_expression /// : multiplicative_expression /// | additive_expression '+' multiplicative_expression /// | additive_expression '-' multiplicative_expression /// ; /// ``` /// pub fn additive_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let mut expression = multiplicative_expression(tokens)?; loop { if tokens.try_eat(Token::Plus) { expression = Expr::new_binary( BinaryOp::Add, expression, multiplicative_expression(tokens)?, merge_span(&base, &tokens.span()), ); } else if tokens.try_eat(Token::Minus) { expression = Expr::new_binary( BinaryOp::Sub, expression, multiplicative_expression(tokens)?, merge_span(&base, &tokens.span()), ); } else { return Ok(expression); } } } /// /// ```grammar /// unary_expression /// : postfix_expression /// | INC_OP unary_expression /// | DEC_OP unary_expression /// | unary_operator cast_expression /// | SIZEOF unary_expression /// | SIZEOF '(' type_name ')' /// ; /// ``` /// /// pub fn unary_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); if tokens.try_eat(Token::Plus) { return cast_expression(tokens); } if tokens.try_eat(Token::Minus) { return Ok(Expr::new_unary( UnaryOp::Neg, cast_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::Star) { return Ok(Expr::new_unary( UnaryOp::Deref, cast_expression(tokens)?, merge_span(&base, &tokens.span()), )); } return postfix_expression(tokens); } ///```grammar /// cast_expression /// : unary_expression /// | '(' type_name ')' cast_expression /// ; /// ``` pub fn cast_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); if tokens.peek() == Token::LeftParen && tokens.peek_offset(1).is_type_name() { tokens.eat(Token::LeftParen)?; let ty = type_expression(tokens)?; return Ok(Expr::new_cast( cast_expression(tokens)?, ty, merge_span(&base, &tokens.span()), )); } return unary_expression(tokens); } /// /// ```grammar /// equality_expression /// : relational_expression /// | equality_expression EQ_OP relational_expression /// | equality_expression NE_OP relational_expression /// ; /// ``` pub fn equality_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let expression = relational_expression(tokens)?; if tokens.try_eat(Token::TwoEqual) { return Ok(Expr::new_binary( BinaryOp::Equal, expression, relational_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::NotEqual) { return Ok(Expr::new_binary( BinaryOp::NonEqual, expression, relational_expression(tokens)?, merge_span(&base, &tokens.span()), )); } return Ok(expression); } /// /// ```grammar /// relational_expression /// : shift_expression /// | relational_expression '<' shift_expression /// | relational_expression '>' shift_expression /// | relational_expression LE_OP shift_expression /// | relational_expression GE_OP shift_expression /// ; /// ``` pub fn relational_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let expression = shift_expression(tokens)?; if tokens.try_eat(Token::LessThan) { return Ok(Expr::new_binary( BinaryOp::LT, expression, shift_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::GreaterThan) { return Ok(Expr::new_binary( BinaryOp::GT, expression, shift_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::LessThanEqual) { return Ok(Expr::new_binary( BinaryOp::LTE, expression, shift_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::GreaterThanEqual) { return Ok(Expr::new_binary( BinaryOp::GTE, expression, shift_expression(tokens)?, merge_span(&base, &tokens.span()), )); } return Ok(expression); } /// /// ```grammar /// shift_expression /// : additive_expression /// | shift_expression LEFT_OP additive_expression /// | shift_expression RIGHT_OP additive_expression /// ; /// ``` pub fn shift_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let expression = additive_expression(tokens)?; if tokens.try_eat(Token::LShift) { return Ok(Expr::new_binary( BinaryOp::LShift, expression, additive_expression(tokens)?, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::RShift) { return Ok(Expr::new_binary( BinaryOp::RShift, expression, additive_expression(tokens)?, merge_span(&base, &tokens.span()), )); } return Ok(expression); } ///```grammar /// assignment_expression /// : conditional_expression /// | unary_expression assignment_operator assignment_expression /// ; /// ``` pub fn assignment_expression(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let expression = equality_expression(tokens)?; if tokens.try_eat(Token::Equal) { return Ok(Expr::new_binary( BinaryOp::Assign, expression, assignment_expression(tokens)?, merge_span(&base, &tokens.span()), )); } return Ok(expression); } /// /// ```grammar /// expression /// : assignment_expression /// | expression ',' assignment_expression /// ; /// ``` pub fn expression(tokens: &mut Tokens) -> Result<Expr> { assignment_expression(tokens) } ///```grammar /// expression_statement /// : ';' /// | expression ';' /// ; /// ``` pub fn expression_statement(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); let expression = expression(tokens)?; tokens.eat(Token::Semicolon)?; parsed_debug!(tokens, &base, &tokens.span()); return Ok(Expr::from_exprstmt( expression, merge_span(&base, &tokens.span()), )); } /// /// ```grammar /// statement /// : labeled_statement /// | compound_statement /// | expression_statement /// | selection_statement /// | iteration_statement /// | jump_statement /// ; /// ``` /// pub fn statement(tokens: &mut Tokens) -> Result<Expr> { let base = tokens.span(); if tokens.try_eat(Token::Return) { tokens.eat(Token::Semicolon)?; return Ok(Expr::from_return(merge_span(&base, &tokens.span()))); } if tokens.try_eat(Token::LeftBrace) { let mut stmts = vec![]; while !tokens.try_eat(Token::RightBrace) { stmts.push(statement(tokens)?); } return Ok(Expr::from_compound( stmts, merge_span(&base, &tokens.span()), )); } if tokens.try_eat(Token::If) { tokens.eat(Token::LeftParen)?; let condition = expression(tokens)?; tokens.eat(Token::RightParen)?; let then_statement = statement(tokens)?; let else_statement = if tokens.try_eat(Token::Else) { Some(statement(tokens)?) } else { None }; return Ok(Expr::from_if( condition, then_statement, else_statement, merge_span(&base, &tokens.span()), )); } return expression_statement(tokens); } fn program_expression(tokens: &mut Tokens) -> Result<Expr> { let mut program_types = vec![]; let base = tokens.span(); match tokens.read() { Token::Kprobe => { tokens.eat(Token::Colon).unwrap(); let ident = tokens.eat_identifier().unwrap(); program_types.push(Expr::new_type( Type::kprobe(ident), merge_span(&base, &tokens.span()), )); } Token::Kretprobe => { tokens.eat(Token::Colon).unwrap(); let ident = tokens.eat_identifier().unwrap(); program_types.push(Expr::new_type( Type::kretprobe(ident), merge_span(&base, &tokens.span()), )); } _ => { bail!("Please specify bpf program type, such as begin, end, k(ret)probe or tracepoint") } } loop { if tokens.peek() == Token::LeftBrace { return Ok(Expr::from_trace( program_types, statement(tokens)?, merge_span(&base, &tokens.span()), )); } match tokens.read() { _ => { bail!("begin or end bpf program type is single") } } } } fn generate_ast(tokens: &mut Tokens) -> Result<Ast> { let mut expressions = vec![]; loop { if tokens.is_eof() { return Ok(Ast { exprs: expressions }); } expressions.push(program_expression(tokens)?); } } #[derive(Clone, Debug, PartialEq)] pub struct Ast { pub exprs: Vec<Expr>, } impl<'text> From<&mut Tokens<'text>> for Ast { fn from(tokens: &mut Tokens) -> Self { match generate_ast(tokens) { Ok(ast) => { return ast; } Err(e) => { log::error!( "Failed to parse, left string: {}, error: {}", tokens.left_str(), e ); panic!(); } } } } impl From<&str> for Ast { fn from(source: &str) -> Self { let mut tokens = Tokens::from(source); Ast::from(&mut tokens) } } #[cfg(test)] mod tests { use super::*; #[test] fn basic_statement() { let _ = Ast::from("kprobe:tcp_sendmsg { }"); let _ = Ast::from("kprobe:tcp_sendmsg { a = 0; }"); let _ = Ast::from("kprobe:tcp_sendmsg { a = 0; if (a == 0) { a = 2; } }"); let _ = Ast::from("kprobe:tcp_sendmsg { a = 0; if (a == 0) { a = 2; } else { a = 3; } }"); } #[test] fn map_operation() { let _ = Ast::from("kprobe:tcp_sendmsg { a[2] = 0; }"); let _ = Ast::from("kprobe:tcp_sendmsg { print(a[2]); }"); let _ = Ast::from("kprobe:tcp_sendmsg { print(a[2 + 3]); }"); } #[test] fn member_access() { let _ = Ast::from("kprobe:tcp_sendmsg { a = skb.head; }"); let _ = Ast::from("kprobe:tcp_sendmsg { a = skb->head; }"); } }