diff options
author | Jesse Morgan <jesse@jesterpm.net> | 2023-12-31 09:50:08 -0800 |
---|---|---|
committer | Jesse Morgan <jesse@jesterpm.net> | 2023-12-31 09:50:08 -0800 |
commit | d8d335d0cfd0a420b25ecc6c97bf5c0020e7ac16 (patch) | |
tree | 6c57dc9d840d03a6333fb4d63e378c23f5d48fd7 /src/js.rs | |
parent | 9ea74605c9537321a35f96bf41e667c3bf02cd7a (diff) |
Move Compiler into new module
Diffstat (limited to 'src/js.rs')
-rw-r--r-- | src/js.rs | 666 |
1 files changed, 666 insertions, 0 deletions
diff --git a/src/js.rs b/src/js.rs new file mode 100644 index 0000000..8de188b --- /dev/null +++ b/src/js.rs @@ -0,0 +1,666 @@ +use std::collections::BTreeMap; + +use boa_ast::Declaration; +use boa_ast::Expression; +use boa_ast::Script; +use boa_ast::Statement; +use boa_ast::StatementList; +use boa_ast::StatementListItem; +use boa_ast::declaration::Binding; +use boa_ast::expression::access::PropertyAccess; +use boa_ast::expression::access::PropertyAccessField; +use boa_ast::expression::literal::Literal; +use boa_ast::expression::operator::Assign; +use boa_ast::expression::operator::Binary; +use boa_ast::expression::operator::assign::AssignTarget; +use boa_ast::expression::operator::binary::BinaryOp; +use boa_ast::expression::operator::binary::LogicalOp; +use boa_ast::expression::operator::binary::RelationalOp; +use boa_ast::property::PropertyDefinition; +use boa_ast::property::PropertyName; +use boa_ast::statement::iteration::IterableLoopInitializer; +use boa_interner::Interner; +use boa_interner::Sym; + +use ion_rs::SymbolTable; + +use crate::ion::IonValue; +use crate::runtime::Function; +use crate::runtime::OpCode; +use crate::runtime::Runtime; + + +pub struct JsCompiler { + interner: Interner, + symbols: SymbolTable, + vars: Vec<BTreeMap<String, usize>>, + max_var: Vec<usize>, + bytecode: Vec<u8>, + functions: Vec<Function>, +} + +impl JsCompiler { + + pub fn new(interner: Interner) -> JsCompiler { + let mut compiler = JsCompiler { + interner, + symbols: SymbolTable::new(), + vars: Vec::new(), + max_var: vec![0], + bytecode: Vec::new(), + functions: Vec::new(), + }; + compiler.enter_block(); + compiler + } + + fn enter_block(&mut self) { + let mut vars = if let Some(vars) = self.vars.last() { + vars.clone() + } else { + BTreeMap::new() + }; + vars.insert("this".to_string(), 0); + self.vars.push(vars); + } + + fn exit_block(&mut self) { + self.vars.pop(); + } + + fn get_or_create_variable(&mut self, sym: Sym) -> usize { + let name = self.interner.resolve_expect(sym).to_string(); + let idmaybe = self.vars.last().unwrap().len(); + let id = *self.vars.last_mut().unwrap().entry(name).or_insert(idmaybe); + let max_var = *self.max_var.last().unwrap().max(&id); + self.max_var.pop(); + self.max_var.push(max_var); + id + } + + fn set_jump_target(&mut self, jump_pc: usize, target_pc: usize) { + let target_32: u32 = target_pc.try_into().unwrap(); + for (i, b) in target_32.to_be_bytes().iter().enumerate() { + self.bytecode[jump_pc + 1 + i] = *b; + } + } + + /// Attach a previous jump instruction to the next instruction. + fn set_jump(&mut self, jump_pc: usize) { + self.set_jump_target(jump_pc, self.bytecode.len()) + } + + fn push_jump(&mut self) -> usize { + self.bytecode.push(OpCode::Jump.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + fn push_dup(&mut self) { + self.bytecode.push(OpCode::Dup.into()); + } + + fn push_dup2(&mut self) { + self.bytecode.push(OpCode::Dup2.into()); + } + + fn push_swappop(&mut self) { + self.bytecode.push(OpCode::SwapPop.into()); + } + + fn push_push(&mut self, value: &IonValue) { + self.bytecode.push(OpCode::TypePush.into()); + self.bytecode.extend(value.bytes()); + } + + fn push_pop(&mut self) { + self.bytecode.push(OpCode::TypePop.into()); + } + + fn push_load(&mut self, varid: usize) { + self.bytecode.push(OpCode::TypeLoad.into()); + self.push_varuint(varid); + } + + fn push_store(&mut self, varid: usize) { + self.bytecode.push(OpCode::TypeStore.into()); + self.push_varuint(varid); + } + + fn push_iterate(&mut self) { + self.bytecode.push(OpCode::Iterator.into()); + } + + fn push_next(&mut self) { + self.bytecode.push(OpCode::Next.into()); + } + + fn push_step_in(&mut self, varid: usize) { + self.bytecode.push(OpCode::StepIn.into()); + self.push_varuint(varid); + } + + fn push_step_out(&mut self) { + self.bytecode.push(OpCode::StepOut.into()); + } + + fn push_load_value(&mut self) { + self.bytecode.push(OpCode::LoadValue.into()); + } + + fn push_load_field(&mut self) { + self.bytecode.push(OpCode::StructField.into()); + } + + fn push_ifeq2(&mut self) -> usize { + self.bytecode.push(OpCode::IfEq2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifne2(&mut self) -> usize { + self.bytecode.push(OpCode::IfNe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifgt2(&mut self) -> usize { + self.bytecode.push(OpCode::IfGt2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifge2(&mut self) -> usize { + self.bytecode.push(OpCode::IfGe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_iflt2(&mut self) -> usize { + self.bytecode.push(OpCode::IfLt2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + fn push_ifle2(&mut self) -> usize { + self.bytecode.push(OpCode::IfLe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + + fn push_newlist(&mut self) { + self.bytecode.push(OpCode::NewList.into()); + } + + fn push_newstruct(&mut self) { + self.bytecode.push(OpCode::NewStruct.into()); + } + + fn push_varuint(&mut self, mut value: usize) { + let mut buf = [0; (usize::BITS / 7 + 1) as usize]; + let mut pos = 0; + while value != 0 { + buf[pos] = (value & 0x7F) as u8; + value >>= 7; + pos += 1; + } + buf[0] |= 0x80; + pos = pos.max(1); + + for i in (0..pos).rev() { + self.bytecode.push(buf[i]); + } + } + + fn push_u32(&mut self, value: u32) { + self.bytecode.extend(value.to_be_bytes()) + } + + + pub fn compile_declaration(&mut self, decl: &Declaration) { + match decl { + Declaration::Function(func) => { + self.max_var.push(*self.max_var.last().unwrap()); + let pc = self.bytecode.len(); + self.enter_block(); + self.compile_statement_list(func.body().statements()); + self.exit_block(); + + let name_symbol = if let Some(name) = func.name() { + self.to_symbol_id(name.sym()) + } else { + self.new_symbol_id(format!("anonFunction{}", self.functions.len())) + }; + + self.functions.push(Function { + name_symbol, + arguments: func.parameters().length() as u16, + variables: (self.max_var.pop().unwrap() + 1) as u16, + pc, + expected_stack_depth: 32, // TODO FIXME + }); + }, + Declaration::Generator(_) => todo!(), + Declaration::AsyncFunction(_) => todo!(), + Declaration::AsyncGenerator(_) => todo!(), + Declaration::Class(_) => todo!(), + Declaration::Lexical(decl) => { + for v in decl.variable_list().as_ref() { + match v.binding() { + Binding::Pattern(_) => todo!(), + Binding::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + if let Some(init) = v.init() { + self.compile_expression(init); + } else { + self.push_push(&IonValue::new_null()); + } + self.push_store(varid); + } + } + } + }, + } + } + + fn new_symbol_id(&mut self, s: String) -> usize { + self.symbols.intern(s) + } + + fn to_symbol_id(&mut self, sym: Sym) -> usize { + let s = self.interner.resolve_expect(sym).to_string(); + self.symbols.intern(s) + } + + fn to_ion_symbol(&mut self, sym: Sym) -> IonValue { + IonValue::new_symbol(self.to_symbol_id(sym)) + } + + fn literal_to_ion(&mut self, literal: &Literal) -> IonValue { + match literal { + Literal::String(sym) => self.to_ion_symbol(*sym), + Literal::Num(v) => IonValue::new_f64(*v), + Literal::Int(v) => IonValue::new_i32(*v), + Literal::BigInt(v) => todo!(), + Literal::Bool(v) => IonValue::new_bool(*v), + Literal::Null => IonValue::new_null(), + Literal::Undefined => IonValue::new_null(), // TODO: undefined? + } + } + + fn compile_assignment(&mut self, assignment: &Assign) { + self.compile_expression(assignment.rhs()); + match assignment.op() { + boa_ast::expression::operator::assign::AssignOp::Assign => {}, + boa_ast::expression::operator::assign::AssignOp::Add => todo!(), + boa_ast::expression::operator::assign::AssignOp::Sub => todo!(), + boa_ast::expression::operator::assign::AssignOp::Mul => todo!(), + boa_ast::expression::operator::assign::AssignOp::Div => todo!(), + boa_ast::expression::operator::assign::AssignOp::Mod => todo!(), + boa_ast::expression::operator::assign::AssignOp::Exp => todo!(), + boa_ast::expression::operator::assign::AssignOp::And => todo!(), + boa_ast::expression::operator::assign::AssignOp::Or => todo!(), + boa_ast::expression::operator::assign::AssignOp::Xor => todo!(), + boa_ast::expression::operator::assign::AssignOp::Shl => todo!(), + boa_ast::expression::operator::assign::AssignOp::Shr => todo!(), + boa_ast::expression::operator::assign::AssignOp::Ushr => todo!(), + boa_ast::expression::operator::assign::AssignOp::BoolAnd => todo!(), + boa_ast::expression::operator::assign::AssignOp::BoolOr => todo!(), + boa_ast::expression::operator::assign::AssignOp::Coalesce => todo!(), + } + self.push_dup(); + match assignment.lhs() { + AssignTarget::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + AssignTarget::Access(_) => todo!(), + AssignTarget::Pattern(_) => todo!(), + } + } + + fn compile_binary(&mut self, binary: &Binary) { + match binary.op() { + BinaryOp::Arithmetic(_) => todo!(), + BinaryOp::Bitwise(_) => todo!(), + BinaryOp::Relational(op) => { + self.compile_expression(binary.lhs()); + self.compile_expression(binary.rhs()); + let jump_true = match op { + RelationalOp::Equal => self.push_ifeq2(), + RelationalOp::NotEqual => self.push_ifne2(), + RelationalOp::StrictEqual => self.push_ifeq2(), + RelationalOp::StrictNotEqual => self.push_ifne2(), + RelationalOp::GreaterThan => self.push_ifle2(), + RelationalOp::GreaterThanOrEqual => self.push_iflt2(), + RelationalOp::LessThan => self.push_ifge2(), + RelationalOp::LessThanOrEqual => self.push_ifgt2(), + RelationalOp::In => todo!(), + RelationalOp::InstanceOf => todo!(), + }; + self.push_push(&IonValue::new_bool(false)); + let jump_false = self.push_jump(); + self.set_jump(jump_true); + self.push_push(&IonValue::new_bool(true)); + self.set_jump(jump_false); + }, + BinaryOp::Logical(op) => { + match op { + LogicalOp::And => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_bool(false)); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + LogicalOp::Or => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_bool(true)); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + LogicalOp::Coalesce => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_null()); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + } + }, + BinaryOp::Comma => todo!(), + } + } + + fn compile_expression(&mut self, expression: &Expression) { + match expression { + Expression::This => self.push_load(0), + Expression::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_load(varid); + }, + Expression::Literal(lit) => { + let value = self.literal_to_ion(lit); + self.push_push(&value); + }, + Expression::ArrayLiteral(alit) => { + // TODO: there's an opportunity here inject static arrays into the bytecode. + for exprmaybe in alit.as_ref() { + if let Some(expr) = exprmaybe { + self.compile_expression(expr); + } else { + self.push_push(&IonValue::new_null()); + } + } + self.push_push(&alit.as_ref().len().into()); + self.push_newlist(); + }, + Expression::ObjectLiteral(olit) => { + // TODO: there's an opportunity here inject static objects into the bytecode. + for propdef in olit.properties() { + match propdef { + PropertyDefinition::IdentifierReference(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_load(varid); + let symb = self.to_ion_symbol(ident.sym()); + self.push_push(&symb); + }, + PropertyDefinition::Property(name, expr) => { + self.compile_expression(expr); + match name { + PropertyName::Literal(sym) => { + let symb = self.to_ion_symbol(*sym); + self.push_push(&symb); + }, + PropertyName::Computed(_) => todo!(), + } + }, + PropertyDefinition::MethodDefinition(_, _) => todo!(), + PropertyDefinition::SpreadObject(_) => todo!(), + PropertyDefinition::CoverInitializedName(_, _) => todo!(), + } + } + self.push_push(&olit.properties().len().into()); + self.push_newstruct(); + }, + Expression::Spread(_) => todo!(), + Expression::Function(_) => todo!(), + Expression::ArrowFunction(_) => todo!(), + Expression::AsyncArrowFunction(_) => todo!(), + Expression::Generator(_) => todo!(), + Expression::AsyncFunction(_) => todo!(), + Expression::AsyncGenerator(_) => todo!(), + Expression::Class(_) => todo!(), + Expression::TemplateLiteral(_) => todo!(), + Expression::PropertyAccess(atype) => { + match atype { + PropertyAccess::Simple(access) => { + self.compile_expression(access.target()); + self.push_iterate(); + let label_loop = self.bytecode.len(); + self.push_dup2(); + let jump_undef = self.push_ifge2(); + self.push_load_field(); + match access.field() { + PropertyAccessField::Const(sym) => { + let symbol = self.to_ion_symbol(*sym); + self.push_push(&symbol); + }, + PropertyAccessField::Expr(expr) => { + self.compile_expression(expr); + todo!("cast to symbol") + }, + } + let jump_found = self.push_ifeq2(); + self.push_next(); + let jump_loop = self.push_jump(); + self.set_jump_target(jump_loop, label_loop); + + // Found the field + self.set_jump(jump_found); + self.push_load_value(); + let jump_cleanup = self.push_jump(); + + // Undefined field + self.set_jump(jump_undef); + self.push_push(&IonValue::new_null()); + + // Cleanup + // We need to pop four values under the top value: + // three for the iterator, one for the object. + self.set_jump(jump_cleanup); + for _ in 0..4 { + self.push_swappop(); + } + }, + PropertyAccess::Private(_) => todo!(), + PropertyAccess::Super(_) => todo!(), + } + }, + Expression::New(_) => todo!(), + Expression::Call(_) => todo!(), + Expression::SuperCall(_) => todo!(), + Expression::ImportCall(_) => todo!(), + Expression::Optional(_) => todo!(), + Expression::TaggedTemplate(_) => todo!(), + Expression::NewTarget => todo!(), + Expression::ImportMeta => todo!(), + Expression::Assign(assignment) => self.compile_assignment(assignment), + Expression::Unary(_) => todo!(), + Expression::Update(_) => todo!(), + Expression::Binary(binary) => self.compile_binary(binary), + Expression::BinaryInPrivate(_) => todo!(), + Expression::Conditional(_) => todo!(), + Expression::Await(_) => todo!(), + Expression::Yield(_) => todo!(), + Expression::Parenthesized(_) => todo!(), + _ => todo!(), + } + } + + fn compile_statement_list(&mut self, statements: &StatementList) { + for item in statements.as_ref() { + match item { + StatementListItem::Statement(statement) => self.compile_statement(statement), + StatementListItem::Declaration(decl) => self.compile_declaration(decl), + } + } + } + + fn compile_statement(&mut self, statement: &Statement) { + match statement { + boa_ast::Statement::Block(block) => { + self.enter_block(); + self.compile_statement_list(block.statement_list()); + self.exit_block(); + }, + boa_ast::Statement::Var(_) => todo!(), + boa_ast::Statement::Empty => {}, + boa_ast::Statement::Expression(expression) => { + self.compile_expression(expression); + self.push_pop(); + }, + boa_ast::Statement::If(flow) => { + self.compile_expression(flow.cond()); + self.push_push(&IonValue::new_bool(false)); + let jump_else = self.push_ifeq2(); + self.compile_statement(flow.body()); + if let Some(statement) = flow.else_node() { + let jump_done = self.push_jump(); + self.set_jump(jump_else); + self.compile_statement(statement); + self.set_jump(jump_done) + } else { + self.set_jump(jump_else); + } + }, + boa_ast::Statement::DoWhileLoop(_) => todo!(), + boa_ast::Statement::WhileLoop(_) => todo!(), + boa_ast::Statement::ForLoop(_) => todo!(), + boa_ast::Statement::ForInLoop(_) => todo!(), + boa_ast::Statement::ForOfLoop(flow) => { + self.compile_expression(flow.iterable()); + self.push_iterate(); + + // Top of the loop + let label_loop = self.bytecode.len(); + self.push_dup2(); + let jump_done = self.push_ifge2(); + + self.push_load_value(); + + // Save the request data in the binding + match flow.initializer() { + IterableLoopInitializer::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + IterableLoopInitializer::Access(_) => todo!(), + IterableLoopInitializer::Var(_) => todo!(), + IterableLoopInitializer::Let(binding) | IterableLoopInitializer::Const(binding) => { + self.enter_block(); + match binding { + Binding::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + Binding::Pattern(_) => todo!(), + } + self.exit_block(); + }, + IterableLoopInitializer::Pattern(_) => todo!(), + } + + // Run the loop body + self.compile_statement(flow.body()); + + // Back to the top + self.push_next(); + let jump_loop = self.push_jump(); + self.set_jump_target(jump_loop, label_loop); + + // Cleanup + // We need to pop four values under the top value: + // three for the iterator, one for the object. + self.set_jump(jump_done); + for _ in 0..4 { + self.push_pop(); + } + + }, + boa_ast::Statement::Switch(_) => todo!(), + boa_ast::Statement::Continue(_) => todo!(), + boa_ast::Statement::Break(_) => todo!(), + boa_ast::Statement::Return(_) => todo!(), + boa_ast::Statement::Labelled(_) => todo!(), + boa_ast::Statement::Throw(_) => todo!(), + boa_ast::Statement::Try(_) => todo!(), + boa_ast::Statement::With(_) => todo!(), + } + } + + pub fn dump(&self) { + println!("Symbols"); + println!("-------"); + for (id, text) in self.symbols.symbols().iter().enumerate() { + println!("{id:04X} {text}"); + } + println!(); + + println!("Functions"); + println!("---------"); + for func in &self.functions { + println!("{:04X} pc: {:04X}", func.name_symbol, func.pc); + } + println!(); + + println!("Bytecode"); + println!("--------"); + println!("ADDRESS 0 1 2 3 4 5 6 7 8 9 A B C D E F TEXT"); + for (i, bytes) in self.bytecode.chunks(16).enumerate() { + print!("{:08X} ", i * 16); + for b in bytes { + print!("{b:02X} "); + } + for _ in bytes.len()..16 { + print!(" "); + } + print!(" "); + for b in bytes { + if (0x40..0x7F).contains(b) { + unsafe { + print!("{}", char::from_u32_unchecked(*b as u32)); + } + } else { + print!("."); + } + } + println!(); + } + println!(); + } + + pub fn compile_script(&mut self, script: Script) { + for item in script.statements().statements() { + if let StatementListItem::Declaration(decl) = item { + if let Declaration::Function(_) = decl { + self.compile_declaration(decl); + } else { + todo!("Not supported in the global scope."); + } + } else { + todo!("Not supported in the global scope."); + } + } + } +} + +impl From<JsCompiler> for Runtime { + fn from(compiler: JsCompiler) -> Self { + Runtime::new(compiler.symbols, compiler.bytecode, compiler.functions) + } +} |