cli/src/generate/prepare_grammar/process_inlines.rs (443 lines of code) (raw):

use crate::generate::grammars::{InlinedProductionMap, Production, ProductionStep, SyntaxGrammar}; use hashbrown::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] struct ProductionStepId { // A `None` value here means that the production itself was produced via inlining, // and is stored in the the builder's `productions` vector, as opposed to being // stored in one of the grammar's variables. variable_index: Option<usize>, production_index: usize, step_index: usize, } struct InlinedProductionMapBuilder { production_indices_by_step_id: HashMap<ProductionStepId, Vec<usize>>, productions: Vec<Production>, } impl InlinedProductionMapBuilder { fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap { let mut step_ids_to_process = Vec::new(); for (variable_index, variable) in grammar.variables.iter().enumerate() { for production_index in 0..variable.productions.len() { step_ids_to_process.push(ProductionStepId { variable_index: Some(variable_index), production_index, step_index: 0, }); while !step_ids_to_process.is_empty() { let mut i = 0; while i < step_ids_to_process.len() { let step_id = step_ids_to_process[i]; if let Some(step) = self.production_step_for_id(step_id, grammar) { if grammar.variables_to_inline.contains(&step.symbol) { let inlined_step_ids = self .inline_production_at_step(step_id, grammar) .into_iter() .cloned() .map(|production_index| ProductionStepId { variable_index: None, production_index, step_index: step_id.step_index, }); step_ids_to_process.splice(i..i + 1, inlined_step_ids); } else { step_ids_to_process[i] = ProductionStepId { variable_index: step_id.variable_index, production_index: step_id.production_index, step_index: step_id.step_index + 1, }; i += 1; } } else { step_ids_to_process.remove(i); } } } } } let productions = self.productions; let production_indices_by_step_id = self.production_indices_by_step_id; let production_map = production_indices_by_step_id .into_iter() .map(|(step_id, production_indices)| { let production = if let Some(variable_index) = step_id.variable_index { &grammar.variables[variable_index].productions[step_id.production_index] } else { &productions[step_id.production_index] } as *const Production; ((production, step_id.step_index as u32), production_indices) }) .collect(); InlinedProductionMap { productions, production_map, } } fn inline_production_at_step<'a>( &'a mut self, step_id: ProductionStepId, grammar: &'a SyntaxGrammar, ) -> &'a Vec<usize> { // Build a list of productions produced by inlining rules. let mut i = 0; let step_index = step_id.step_index; let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()]; while i < productions_to_add.len() { if let Some(step) = productions_to_add[i].steps.get(step_index) { let symbol = step.symbol.clone(); if grammar.variables_to_inline.contains(&symbol) { // Remove the production from the vector, replacing it with a placeholder. let production = productions_to_add .splice(i..i + 1, [Production::default()].iter().cloned()) .next() .unwrap(); // Replace the placeholder with the inlined productions. productions_to_add.splice( i..i + 1, grammar.variables[symbol.index].productions.iter().map(|p| { let mut production = production.clone(); let removed_step = production .steps .splice(step_index..(step_index + 1), p.steps.iter().cloned()) .next() .unwrap(); let inserted_steps = &mut production.steps[step_index..(step_index + p.steps.len())]; if let Some(alias) = removed_step.alias { for inserted_step in inserted_steps.iter_mut() { inserted_step.alias = Some(alias.clone()); } } if let Some(field_name) = removed_step.field_name { for inserted_step in inserted_steps.iter_mut() { inserted_step.field_name = Some(field_name.clone()); } } if let Some(last_inserted_step) = inserted_steps.last_mut() { if last_inserted_step.precedence == 0 { last_inserted_step.precedence = removed_step.precedence; } if last_inserted_step.associativity == None { last_inserted_step.associativity = removed_step.associativity; } } production }), ); continue; } } i += 1; } // Store all the computed productions. let result = productions_to_add .into_iter() .map(|production| { self.productions .iter() .position(|p| *p == production) .unwrap_or({ self.productions.push(production); self.productions.len() - 1 }) }) .collect(); // Cache these productions based on the original production step. self.production_indices_by_step_id .entry(step_id) .or_insert(result) } fn production_for_id<'a>( &'a self, id: ProductionStepId, grammar: &'a SyntaxGrammar, ) -> &'a Production { if let Some(variable_index) = id.variable_index { &grammar.variables[variable_index].productions[id.production_index] } else { &self.productions[id.production_index] } } fn production_step_for_id<'a>( &'a self, id: ProductionStepId, grammar: &'a SyntaxGrammar, ) -> Option<&'a ProductionStep> { self.production_for_id(id, grammar).steps.get(id.step_index) } } pub(super) fn process_inlines(grammar: &SyntaxGrammar) -> InlinedProductionMap { InlinedProductionMapBuilder { productions: Vec::new(), production_indices_by_step_id: HashMap::new(), } .build(grammar) } #[cfg(test)] mod tests { use super::*; use crate::generate::grammars::{ProductionStep, SyntaxVariable, VariableType}; use crate::generate::rules::{Associativity, Symbol}; #[test] fn test_basic_inlining() { let grammar = SyntaxGrammar { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, variables_to_inline: vec![Symbol::non_terminal(1)], variables: vec![ SyntaxVariable { name: "non-terminal-0".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::non_terminal(1)), // inlined ProductionStep::new(Symbol::terminal(11)), ], }], }, SyntaxVariable { name: "non-terminal-1".to_string(), kind: VariableType::Named, productions: vec![ Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(12)), ProductionStep::new(Symbol::terminal(13)), ], }, Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::terminal(14))], }, ], }, ], }; let inline_map = process_inlines(&grammar); // Nothing to inline at step 0. assert!(inline_map .inlined_productions(&grammar.variables[0].productions[0], 0) .is_none()); // Inlining variable 1 yields two productions. assert_eq!( inline_map .inlined_productions(&grammar.variables[0].productions[0], 1) .unwrap() .cloned() .collect::<Vec<_>>(), vec![ Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(12)), ProductionStep::new(Symbol::terminal(13)), ProductionStep::new(Symbol::terminal(11)), ], }, Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(14)), ProductionStep::new(Symbol::terminal(11)), ], }, ] ); } #[test] fn test_nested_inlining() { let grammar = SyntaxGrammar { variables: vec![ SyntaxVariable { name: "non-terminal-0".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::non_terminal(1)), // inlined ProductionStep::new(Symbol::terminal(11)), ProductionStep::new(Symbol::non_terminal(2)), // inlined ProductionStep::new(Symbol::terminal(12)), ], }], }, SyntaxVariable { name: "non-terminal-1".to_string(), kind: VariableType::Named, productions: vec![ Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::terminal(13))], }, Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::non_terminal(3)), // inlined ProductionStep::new(Symbol::terminal(14)), ], }, ], }, SyntaxVariable { name: "non-terminal-2".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::terminal(15))], }], }, SyntaxVariable { name: "non-terminal-3".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::terminal(16))], }], }, ], variables_to_inline: vec![ Symbol::non_terminal(1), Symbol::non_terminal(2), Symbol::non_terminal(3), ], expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, }; let inline_map = process_inlines(&grammar); let productions: Vec<&Production> = inline_map .inlined_productions(&grammar.variables[0].productions[0], 1) .unwrap() .collect(); assert_eq!( productions.iter().cloned().cloned().collect::<Vec<_>>(), vec![ Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(13)), ProductionStep::new(Symbol::terminal(11)), ProductionStep::new(Symbol::non_terminal(2)), ProductionStep::new(Symbol::terminal(12)), ], }, Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(16)), ProductionStep::new(Symbol::terminal(14)), ProductionStep::new(Symbol::terminal(11)), ProductionStep::new(Symbol::non_terminal(2)), ProductionStep::new(Symbol::terminal(12)), ], }, ] ); assert_eq!( inline_map .inlined_productions(productions[0], 3) .unwrap() .cloned() .collect::<Vec<_>>(), vec![Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::terminal(13)), ProductionStep::new(Symbol::terminal(11)), ProductionStep::new(Symbol::terminal(15)), ProductionStep::new(Symbol::terminal(12)), ], },] ); } #[test] fn test_inlining_with_precedence_and_alias() { let grammar = SyntaxGrammar { variables_to_inline: vec![Symbol::non_terminal(1), Symbol::non_terminal(2)], variables: vec![ SyntaxVariable { name: "non-terminal-0".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ // inlined ProductionStep::new(Symbol::non_terminal(1)) .with_prec(1, Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // inlined ProductionStep::new(Symbol::non_terminal(2)) .with_alias("outer_alias", true), ], }], }, SyntaxVariable { name: "non-terminal-1".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) .with_prec(2, None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)), ], }], }, SyntaxVariable { name: "non-terminal-2".to_string(), kind: VariableType::Named, productions: vec![Production { dynamic_precedence: 0, steps: vec![ProductionStep::new(Symbol::terminal(13))], }], }, ], expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, }; let inline_map = process_inlines(&grammar); let productions: Vec<_> = inline_map .inlined_productions(&grammar.variables[0].productions[0], 0) .unwrap() .collect(); assert_eq!( productions.iter().cloned().cloned().collect::<Vec<_>>(), vec![Production { dynamic_precedence: 0, steps: vec![ // The first step in the inlined production retains its precedence // and alias. ProductionStep::new(Symbol::terminal(11)) .with_prec(2, None) .with_alias("inner_alias", true), // The final step of the inlined production inherits the precedence of // the inlined step. ProductionStep::new(Symbol::terminal(12)) .with_prec(1, Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), ProductionStep::new(Symbol::non_terminal(2)).with_alias("outer_alias", true), ] }], ); assert_eq!( inline_map .inlined_productions(productions[0], 3) .unwrap() .cloned() .collect::<Vec<_>>(), vec![Production { dynamic_precedence: 0, steps: vec![ ProductionStep::new(Symbol::terminal(11)) .with_prec(2, None) .with_alias("inner_alias", true), ProductionStep::new(Symbol::terminal(12)) .with_prec(1, Some(Associativity::Left)), ProductionStep::new(Symbol::terminal(10)), // All steps of the inlined production inherit their alias from the // inlined step. ProductionStep::new(Symbol::terminal(13)).with_alias("outer_alias", true), ] }], ); } }