From 9ea74605c9537321a35f96bf41e667c3bf02cd7a Mon Sep 17 00:00:00 2001 From: Jesse Morgan Date: Fri, 29 Dec 2023 23:34:38 -0800 Subject: Checkpoint: naive JS compilation --- .gitignore | 1 + demo/script.js | 15 ++ src/ion.rs | 201 ++++++++++++++-- src/main.rs | 706 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/runtime.rs | 327 ++++++++++++++++---------- 5 files changed, 1092 insertions(+), 158 deletions(-) create mode 100644 .gitignore create mode 100644 demo/script.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/demo/script.js b/demo/script.js new file mode 100644 index 0000000..c829cc0 --- /dev/null +++ b/demo/script.js @@ -0,0 +1,15 @@ + +function lowest_price() { + const offers = [{ id: 1, price: 2.01}, { id: 2, price: 2.99 }]; + + let min_price = null; + let best_offer = null; + + for (const offer of offers) { + if (min_price === null || offer.price < min_price) { + min_price = offer.price; + best_offer = offer; + } + } +} + diff --git a/src/ion.rs b/src/ion.rs index 6773539..cbfae24 100644 --- a/src/ion.rs +++ b/src/ion.rs @@ -1,4 +1,4 @@ -use std::iter::Copied; +use std::{iter::Copied, cmp::Ordering}; pub enum IonType { Null = 0x00, @@ -26,6 +26,56 @@ impl IonValue { IonValue(vec![0x0F]) } + pub fn new_symbol(value: usize) -> IonValue { + let mut buf = Vec::new(); + if value == 0 { + buf.push(0x70); + } else { + let octets = ((usize::BITS - value.leading_zeros() + 7) >> 3) as usize; + let tl = 0x70 | octets as u8; + buf.push(tl); + let be = value.to_be_bytes(); + let start = be.len() - octets; + for b in &be[start..] { + buf.push(*b); + } + } + IonValue(buf) + } + + pub fn new_f64(value: f64) -> IonValue { + let b = value.to_be_bytes(); + IonValue(vec![0x48, b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]) + } + + pub fn new_i32(value: i32) -> IonValue { + IonValue::from(value as i64) + } + + pub fn new_bool(value: bool) -> IonValue { + if value { + IonValue(vec![0x11]) + } else { + IonValue(vec![0x10]) + } + } + + pub fn new_string(value: S) -> IonValue + where S: AsRef + { + let bytes = value.as_ref().as_bytes(); + let mut v = Vec::with_capacity(2 + bytes.len()); + if bytes.len() < 14 { + v.push(0x80 | (bytes.len() as u8)); + } else { + v.push(0x8E); + push_varuint(&mut v, bytes.len()); + } + v.extend(bytes); + IonValue(v) + } + + pub fn ion_type(&self) -> u8 { self.0[0] >> 4 } @@ -62,6 +112,10 @@ impl IonValue { self.reader().next_usize() } + pub fn to_symbol_id(&self) -> usize { + self.reader().next_symbol_id() + } + } impl From> for IonValue { @@ -74,6 +128,47 @@ impl From> for IonValue { } } +impl From for IonValue { + fn from(value: usize) -> Self { + let mut buf = Vec::new(); + if value == 0 { + buf.push(0x20); + } else { + let v = value; + let octets = ((usize::BITS - v.leading_zeros() + 7) >> 3) as usize; + let tl = 0x20 | octets as u8; + buf.push(tl); + let be = v.to_be_bytes(); + let start = be.len() - octets; + for b in &be[start..] { + buf.push(*b); + } + } + IonValue(buf) + } +} + +impl From for IonValue { + fn from(value: i64) -> Self { + let mut buf = Vec::new(); + let (mut tl, v) = match value.cmp(&0) { + Ordering::Equal => return IonValue(vec![0x20]), + Ordering::Greater => (0x20, value), + Ordering::Less => (0x30, -value), + }; + + let octets = ((usize::BITS - v.leading_zeros() + 7) >> 3) as usize; + tl |= octets as u8; + buf.push(tl); + let be = v.to_be_bytes(); + let start = be.len() - octets; + for b in &be[start..] { + buf.push(*b); + } + IonValue(buf) + } +} + pub struct IonReader { iter: T, @@ -141,21 +236,49 @@ impl IonReader pub fn next_value(&mut self) -> IonValue { self.prepare_next(); let tl = self.next_byte(); - let (mut buf, len) = match tl & 0x0F { - 0 | 15 => (vec![tl], 0), - 14 => { - let l = self.next_varuint(); - let mut v = Vec::with_capacity(5 + l); - v.push(tl); - push_varuint(&mut v, l); - (v, l) + let (mut buf, len) = match tl & 0xF0 { + 0x10 => (vec![tl], 0), // bool + 0xD0 => { // struct + match tl & 0x0F { + 0 | 15 => (vec![tl], 0), + 1 | 14 => { + let l = self.next_varuint(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + push_varuint(&mut v, l); + (v, l) + }, + len => { + let l = len.into(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + (v, l) + } + } }, - len => { - let l = len.into(); - let mut v = Vec::with_capacity(5 + l); - v.push(tl); - (v, l) - } + 0x00 | 0x20 | 0x30 | 0x40 | + 0x50 | 0x60 | 0x70 | 0x80 | + 0x90 | 0xA0 | 0xB0 | 0xC0 | + 0xE0 => + { + match tl & 0x0F { + 0 | 15 => (vec![tl], 0), + 14 => { + let l = self.next_varuint(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + push_varuint(&mut v, l); + (v, l) + }, + len => { + let l = len.into(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + (v, l) + } + } + }, + _ => panic!("unsupported ion type"), }; for _ in 0..len { let b = self.next_byte(); @@ -185,16 +308,55 @@ impl IonReader value } + pub fn next_symbol_id(&mut self) -> usize { + self.prepare_next(); + let tl = self.next_byte(); + if tl & 0xF0 != 0x70 { + panic!("Not a symbol"); + } + + let len = self.extract_length(tl); + if len * 8 > usize::BITS as usize { + panic!("Symbol id too large for usize"); + } + + let mut value = 0; + for _ in 0..len { + let b = self.next_byte(); + value <<= 8; + value |= b as usize; + } + value + } + + fn next_byte(&mut self) -> u8 { self.offset += 1; self.iter.next().expect("Missing data") } fn extract_length(&mut self, tl: u8) -> usize { - match tl & 0x0F { - 0 | 15 => 0, - 14 => self.next_varuint(), - len => len.into(), + match tl & 0xF0 { + 0x10 => 0, // bool + 0xD0 => { // struct + match tl & 0x0F { + 0 | 15 => 0, + 1 | 14 => self.next_varuint(), + len => len.into(), + } + }, + 0x00 | 0x20 | 0x30 | 0x40 | + 0x50 | 0x60 | 0x70 | 0x80 | + 0x90 | 0xA0 | 0xB0 | 0xC0 | + 0xE0 => + { + match tl & 0x0F { + 0 | 15 => 0, + 14 => self.next_varuint(), + len => len.into(), + } + } + _ => panic!("Unsupported Ion type"), } } @@ -239,3 +401,4 @@ fn parse_varuint(buf: &[u8]) -> usize { } value } + diff --git a/src/main.rs b/src/main.rs index 78d1822..fa4225d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,38 @@ +use std::collections::BTreeMap; use std::fs::File; use std::io; use std::io::prelude::*; use std::path::Path; +use boa_ast::Declaration; +use boa_ast::Expression; +use boa_ast::Statement; +use boa_ast::StatementList; +use boa_ast::StatementListItem; +use boa_ast::declaration::Binding; +use boa_ast::expression::access::PropertyAccess; +use boa_ast::expression::access::PropertyAccessField; +use boa_ast::expression::literal::Literal; +use boa_ast::expression::operator::Assign; +use boa_ast::expression::operator::Binary; +use boa_ast::expression::operator::assign::AssignTarget; +use boa_ast::expression::operator::binary::BinaryOp; +use boa_ast::expression::operator::binary::LogicalOp; +use boa_ast::expression::operator::binary::RelationalOp; +use boa_ast::property::PropertyDefinition; +use boa_ast::property::PropertyName; +use boa_ast::statement::iteration::IterableLoopInitializer; +use boa_ast::visitor::VisitWith; +use boa_ast::visitor::Visitor; use boa_interner::Interner; +use boa_interner::Sym; use boa_parser::Parser; use boa_parser::Source; use ion::IonValue; +use ion_rs::SymbolTable; +use runtime::ByteCode; +use runtime::Function; +use runtime::OpCode; use runtime::Runtime; mod ion; @@ -31,35 +57,675 @@ impl From for RelJsError { } } +pub struct Compiler { + interner: Interner, + symbols: SymbolTable, + vars: Vec>, + max_var: Vec, + bytecode: Vec, + jumps: BTreeMap, + functions: Vec, +} +impl Compiler { -fn main() -> Result<(), RelJsError> { - //let src = Source::from_filepath(Path::new("script.js"))?; - //let mut parser = Parser::new(src); - //let mut interner = Interner::new(); - //let script = parser.parse_script(&mut interner)?; + pub fn new(interner: Interner) -> Compiler { + let mut compiler = Compiler { + interner, + symbols: SymbolTable::new(), + vars: Vec::new(), + max_var: vec![0], + bytecode: Vec::new(), + jumps: BTreeMap::new(), + functions: Vec::new(), + }; + compiler.enter_block(); + compiler + } + + fn enter_block(&mut self) { + let mut vars = if let Some(vars) = self.vars.last() { + vars.clone() + } else { + BTreeMap::new() + }; + vars.insert("this".to_string(), 0); + self.vars.push(vars); + } + + fn exit_block(&mut self) { + self.vars.pop(); + } + + fn get_or_create_variable(&mut self, sym: Sym) -> usize { + let name = self.interner.resolve_expect(sym).to_string(); + let idmaybe = self.vars.last().unwrap().len(); + let id = *self.vars.last_mut().unwrap().entry(name).or_insert(idmaybe); + let max_var = *self.max_var.last().unwrap().max(&id); + self.max_var.pop(); + self.max_var.push(max_var); + id + } + + fn set_jump_target(&mut self, jump_pc: usize, target_pc: usize) { + let target_32: u32 = target_pc.try_into().unwrap(); + for (i, b) in target_32.to_be_bytes().iter().enumerate() { + self.bytecode[jump_pc + 1 + i] = *b; + } + } + + /// Attach a previous jump instruction to the next instruction. + fn set_jump(&mut self, jump_pc: usize) { + self.set_jump_target(jump_pc, self.bytecode.len()) + } + + fn push_jump(&mut self) -> usize { + self.bytecode.push(OpCode::Jump.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + fn push_dup(&mut self) { + self.bytecode.push(OpCode::Dup.into()); + } + + fn push_dup2(&mut self) { + self.bytecode.push(OpCode::Dup2.into()); + } + + fn push_swappop(&mut self) { + self.bytecode.push(OpCode::SwapPop.into()); + } + + fn push_push(&mut self, value: &IonValue) { + self.bytecode.push(OpCode::TypePush.into()); + self.bytecode.extend(value.bytes()); + } + + fn push_pop(&mut self) { + self.bytecode.push(OpCode::TypePop.into()); + } + + fn push_load(&mut self, varid: usize) { + self.bytecode.push(OpCode::TypeLoad.into()); + self.push_varuint(varid); + } + + fn push_store(&mut self, varid: usize) { + self.bytecode.push(OpCode::TypeStore.into()); + self.push_varuint(varid); + } + + fn push_iterate(&mut self) { + self.bytecode.push(OpCode::Iterator.into()); + } + + fn push_next(&mut self) { + self.bytecode.push(OpCode::Next.into()); + } + + fn push_step_in(&mut self, varid: usize) { + self.bytecode.push(OpCode::StepIn.into()); + self.push_varuint(varid); + } + + fn push_step_out(&mut self) { + self.bytecode.push(OpCode::StepOut.into()); + } + + fn push_load_value(&mut self) { + self.bytecode.push(OpCode::LoadValue.into()); + } + + fn push_load_field(&mut self) { + self.bytecode.push(OpCode::StructField.into()); + } + + fn push_ifeq2(&mut self) -> usize { + self.bytecode.push(OpCode::IfEq2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifne2(&mut self) -> usize { + self.bytecode.push(OpCode::IfNe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifgt2(&mut self) -> usize { + self.bytecode.push(OpCode::IfGt2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_ifge2(&mut self) -> usize { + self.bytecode.push(OpCode::IfGe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + fn push_iflt2(&mut self) -> usize { + self.bytecode.push(OpCode::IfLt2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + fn push_ifle2(&mut self) -> usize { + self.bytecode.push(OpCode::IfLe2.into()); + self.push_u32(0xFFFFFFFF); + self.bytecode.len() - 5 + } + + + fn push_newlist(&mut self) { + self.bytecode.push(OpCode::NewList.into()); + } + + fn push_newstruct(&mut self) { + self.bytecode.push(OpCode::NewStruct.into()); + } + + fn push_varuint(&mut self, mut value: usize) { + let mut buf = [0; (usize::BITS / 7 + 1) as usize]; + let mut pos = 0; + while value != 0 { + buf[pos] = (value & 0x7F) as u8; + value >>= 7; + pos += 1; + } + buf[0] |= 0x80; + pos = pos.max(1); + + for i in (0..pos).rev() { + self.bytecode.push(buf[i]); + } + } + + fn push_u32(&mut self, value: u32) { + self.bytecode.extend(value.to_be_bytes()) + } + + + fn compile_declaration(&mut self, decl: &Declaration) { + match decl { + Declaration::Function(func) => { + self.max_var.push(*self.max_var.last().unwrap()); + let pc = self.bytecode.len(); + self.enter_block(); + self.compile_statement_list(func.body().statements()); + self.exit_block(); + + let name_symbol = if let Some(name) = func.name() { + self.to_symbol_id(name.sym()) + } else { + self.new_symbol_id(format!("anonFunction{}", self.functions.len())) + }; + + self.functions.push(Function { + name_symbol, + arguments: func.parameters().length() as u16, + variables: (self.max_var.pop().unwrap() + 1) as u16, + pc, + expected_stack_depth: 32, // TODO FIXME + }); + }, + Declaration::Generator(_) => todo!(), + Declaration::AsyncFunction(_) => todo!(), + Declaration::AsyncGenerator(_) => todo!(), + Declaration::Class(_) => todo!(), + Declaration::Lexical(decl) => { + for v in decl.variable_list().as_ref() { + match v.binding() { + Binding::Pattern(_) => todo!(), + Binding::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + if let Some(init) = v.init() { + self.compile_expression(init); + } else { + self.push_push(&IonValue::new_null()); + } + self.push_store(varid); + } + } + } + }, + } + } + + fn new_symbol_id(&mut self, s: String) -> usize { + self.symbols.intern(s) + } + + fn to_symbol_id(&mut self, sym: Sym) -> usize { + let s = self.interner.resolve_expect(sym).to_string(); + self.symbols.intern(s) + } + + fn to_ion_symbol(&mut self, sym: Sym) -> IonValue { + IonValue::new_symbol(self.to_symbol_id(sym)) + } - let mut bin = File::open("demo/test-1.bin")?; - let mut data = Vec::new(); - bin.read_to_end(&mut data)?; + fn literal_to_ion(&mut self, literal: &Literal) -> IonValue { + match literal { + Literal::String(sym) => self.to_ion_symbol(*sym), + Literal::Num(v) => IonValue::new_f64(*v), + Literal::Int(v) => IonValue::new_i32(*v), + Literal::BigInt(v) => todo!(), + Literal::Bool(v) => IonValue::new_bool(*v), + Literal::Null => IonValue::new_null(), + Literal::Undefined => IonValue::new_null(), // TODO: undefined? + } + } + + fn compile_assignment(&mut self, assignment: &Assign) { + self.compile_expression(assignment.rhs()); + match assignment.op() { + boa_ast::expression::operator::assign::AssignOp::Assign => {}, + boa_ast::expression::operator::assign::AssignOp::Add => todo!(), + boa_ast::expression::operator::assign::AssignOp::Sub => todo!(), + boa_ast::expression::operator::assign::AssignOp::Mul => todo!(), + boa_ast::expression::operator::assign::AssignOp::Div => todo!(), + boa_ast::expression::operator::assign::AssignOp::Mod => todo!(), + boa_ast::expression::operator::assign::AssignOp::Exp => todo!(), + boa_ast::expression::operator::assign::AssignOp::And => todo!(), + boa_ast::expression::operator::assign::AssignOp::Or => todo!(), + boa_ast::expression::operator::assign::AssignOp::Xor => todo!(), + boa_ast::expression::operator::assign::AssignOp::Shl => todo!(), + boa_ast::expression::operator::assign::AssignOp::Shr => todo!(), + boa_ast::expression::operator::assign::AssignOp::Ushr => todo!(), + boa_ast::expression::operator::assign::AssignOp::BoolAnd => todo!(), + boa_ast::expression::operator::assign::AssignOp::BoolOr => todo!(), + boa_ast::expression::operator::assign::AssignOp::Coalesce => todo!(), + } + self.push_dup(); + match assignment.lhs() { + AssignTarget::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + AssignTarget::Access(_) => todo!(), + AssignTarget::Pattern(_) => todo!(), + } + } - let mut runtime = Runtime::new(); - let fnref = runtime.load_bytecode(data); + fn compile_binary(&mut self, binary: &Binary) { + match binary.op() { + BinaryOp::Arithmetic(_) => todo!(), + BinaryOp::Bitwise(_) => todo!(), + BinaryOp::Relational(op) => { + self.compile_expression(binary.lhs()); + self.compile_expression(binary.rhs()); + let jump_true = match op { + RelationalOp::Equal => self.push_ifeq2(), + RelationalOp::NotEqual => self.push_ifne2(), + RelationalOp::StrictEqual => self.push_ifeq2(), + RelationalOp::StrictNotEqual => self.push_ifne2(), + RelationalOp::GreaterThan => self.push_ifle2(), + RelationalOp::GreaterThanOrEqual => self.push_iflt2(), + RelationalOp::LessThan => self.push_ifge2(), + RelationalOp::LessThanOrEqual => self.push_ifgt2(), + RelationalOp::In => todo!(), + RelationalOp::InstanceOf => todo!(), + }; + self.push_push(&IonValue::new_bool(false)); + let jump_false = self.push_jump(); + self.set_jump(jump_true); + self.push_push(&IonValue::new_bool(true)); + self.set_jump(jump_false); + }, + BinaryOp::Logical(op) => { + match op { + LogicalOp::And => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_bool(false)); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + LogicalOp::Or => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_bool(true)); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + LogicalOp::Coalesce => { + self.compile_expression(binary.lhs()); + self.push_dup(); + self.push_push(&IonValue::new_null()); + let jump_done = self.push_ifeq2(); + self.push_pop(); + self.compile_expression(binary.rhs()); + self.set_jump(jump_done) + }, + } + }, + BinaryOp::Comma => todo!(), + } + } - let mut args = Vec::with_capacity(3); - args.push(IonValue::new_null()); - args.push(IonValue::new_null()); - args.push(IonValue::new_null()); + fn compile_expression(&mut self, expression: &Expression) { + match expression { + Expression::This => self.push_load(0), + Expression::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_load(varid); + }, + Expression::Literal(lit) => { + let value = self.literal_to_ion(lit); + self.push_push(&value); + }, + Expression::ArrayLiteral(alit) => { + // TODO: there's an opportunity here inject static arrays into the bytecode. + for exprmaybe in alit.as_ref() { + if let Some(expr) = exprmaybe { + self.compile_expression(expr); + } else { + self.push_push(&IonValue::new_null()); + } + } + self.push_push(&alit.as_ref().len().into()); + self.push_newlist(); + }, + Expression::ObjectLiteral(olit) => { + // TODO: there's an opportunity here inject static objects into the bytecode. + for propdef in olit.properties() { + match propdef { + PropertyDefinition::IdentifierReference(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_load(varid); + let symb = self.to_ion_symbol(ident.sym()); + self.push_push(&symb); + }, + PropertyDefinition::Property(name, expr) => { + self.compile_expression(expr); + match name { + PropertyName::Literal(sym) => { + let symb = self.to_ion_symbol(*sym); + self.push_push(&symb); + }, + PropertyName::Computed(_) => todo!(), + } + }, + PropertyDefinition::MethodDefinition(_, _) => todo!(), + PropertyDefinition::SpreadObject(_) => todo!(), + PropertyDefinition::CoverInitializedName(_, _) => todo!(), + } + } + self.push_push(&olit.properties().len().into()); + self.push_newstruct(); + }, + Expression::Spread(_) => todo!(), + Expression::Function(_) => todo!(), + Expression::ArrowFunction(_) => todo!(), + Expression::AsyncArrowFunction(_) => todo!(), + Expression::Generator(_) => todo!(), + Expression::AsyncFunction(_) => todo!(), + Expression::AsyncGenerator(_) => todo!(), + Expression::Class(_) => todo!(), + Expression::TemplateLiteral(_) => todo!(), + Expression::PropertyAccess(atype) => { + match atype { + PropertyAccess::Simple(access) => { + self.compile_expression(access.target()); + self.push_iterate(); + let label_loop = self.bytecode.len(); + self.push_dup2(); + let jump_undef = self.push_ifge2(); + self.push_load_field(); + match access.field() { + PropertyAccessField::Const(sym) => { + let symbol = self.to_ion_symbol(*sym); + self.push_push(&symbol); + }, + PropertyAccessField::Expr(expr) => { + self.compile_expression(expr); + todo!("cast to symbol") + }, + } + let jump_found = self.push_ifeq2(); + self.push_next(); + let jump_loop = self.push_jump(); + self.set_jump_target(jump_loop, label_loop); - let result = runtime.invoke(fnref, args); + // Found the field + self.set_jump(jump_found); + self.push_load_value(); + let jump_cleanup = self.push_jump(); - for (i, v) in result.iter().enumerate() { - println!("Result {i}:"); - for b in v.bytes() { - print!("{b:02x} "); + // Undefined field + self.set_jump(jump_undef); + self.push_push(&IonValue::new_null()); + + // Cleanup + // We need to pop four values under the top value: + // three for the iterator, one for the object. + self.set_jump(jump_cleanup); + for _ in 0..4 { + self.push_swappop(); + } + }, + PropertyAccess::Private(_) => todo!(), + PropertyAccess::Super(_) => todo!(), + } + }, + Expression::New(_) => todo!(), + Expression::Call(_) => todo!(), + Expression::SuperCall(_) => todo!(), + Expression::ImportCall(_) => todo!(), + Expression::Optional(_) => todo!(), + Expression::TaggedTemplate(_) => todo!(), + Expression::NewTarget => todo!(), + Expression::ImportMeta => todo!(), + Expression::Assign(assignment) => self.compile_assignment(assignment), + Expression::Unary(_) => todo!(), + Expression::Update(_) => todo!(), + Expression::Binary(binary) => self.compile_binary(binary), + Expression::BinaryInPrivate(_) => todo!(), + Expression::Conditional(_) => todo!(), + Expression::Await(_) => todo!(), + Expression::Yield(_) => todo!(), + Expression::Parenthesized(_) => todo!(), + _ => todo!(), + } + } + + fn compile_statement_list(&mut self, statements: &StatementList) { + for item in statements.as_ref() { + match item { + StatementListItem::Statement(statement) => self.compile_statement(statement), + StatementListItem::Declaration(decl) => self.compile_declaration(decl), + } } - println!("\n"); } + + fn compile_statement(&mut self, statement: &Statement) { + match statement { + boa_ast::Statement::Block(block) => { + self.enter_block(); + self.compile_statement_list(block.statement_list()); + self.exit_block(); + }, + boa_ast::Statement::Var(_) => todo!(), + boa_ast::Statement::Empty => {}, + boa_ast::Statement::Expression(expression) => { + self.compile_expression(expression); + self.push_pop(); + }, + boa_ast::Statement::If(flow) => { + self.compile_expression(flow.cond()); + self.push_push(&IonValue::new_bool(false)); + let jump_else = self.push_ifeq2(); + self.compile_statement(flow.body()); + if let Some(statement) = flow.else_node() { + let jump_done = self.push_jump(); + self.set_jump(jump_else); + self.compile_statement(statement); + self.set_jump(jump_done) + } else { + self.set_jump(jump_else); + } + }, + boa_ast::Statement::DoWhileLoop(_) => todo!(), + boa_ast::Statement::WhileLoop(_) => todo!(), + boa_ast::Statement::ForLoop(_) => todo!(), + boa_ast::Statement::ForInLoop(_) => todo!(), + boa_ast::Statement::ForOfLoop(flow) => { + self.compile_expression(flow.iterable()); + self.push_iterate(); + + // Top of the loop + let label_loop = self.bytecode.len(); + self.push_dup2(); + let jump_done = self.push_ifge2(); + + self.push_load_value(); + + // Save the request data in the binding + match flow.initializer() { + IterableLoopInitializer::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + IterableLoopInitializer::Access(_) => todo!(), + IterableLoopInitializer::Var(_) => todo!(), + IterableLoopInitializer::Let(binding) | IterableLoopInitializer::Const(binding) => { + self.enter_block(); + match binding { + Binding::Identifier(ident) => { + let varid = self.get_or_create_variable(ident.sym()); + self.push_store(varid); + }, + Binding::Pattern(_) => todo!(), + } + self.exit_block(); + }, + IterableLoopInitializer::Pattern(_) => todo!(), + } + + // Run the loop body + self.compile_statement(flow.body()); + + // Back to the top + self.push_next(); + let jump_loop = self.push_jump(); + self.set_jump_target(jump_loop, label_loop); + + // Cleanup + // We need to pop four values under the top value: + // three for the iterator, one for the object. + self.set_jump(jump_done); + for _ in 0..4 { + self.push_pop(); + } + + }, + boa_ast::Statement::Switch(_) => todo!(), + boa_ast::Statement::Continue(_) => todo!(), + boa_ast::Statement::Break(_) => todo!(), + boa_ast::Statement::Return(_) => todo!(), + boa_ast::Statement::Labelled(_) => todo!(), + boa_ast::Statement::Throw(_) => todo!(), + boa_ast::Statement::Try(_) => todo!(), + boa_ast::Statement::With(_) => todo!(), + } + } + + pub fn dump(&self) { + println!("Symbols"); + println!("-------"); + for (id, text) in self.symbols.symbols().iter().enumerate() { + println!("{id:04X} {text}"); + } + println!(); + + println!("Functions"); + println!("---------"); + for func in &self.functions { + println!("{:04X} pc: {:04X}", func.name_symbol, func.pc); + } + println!(); + + println!("Bytecode"); + println!("--------"); + println!("ADDRESS 0 1 2 3 4 5 6 7 8 9 A B C D E F TEXT"); + for (i, bytes) in self.bytecode.chunks(16).enumerate() { + print!("{:08X} ", i * 16); + for b in bytes { + print!("{b:02X} "); + } + for _ in bytes.len()..16 { + print!(" "); + } + print!(" "); + for b in bytes { + if (0x40..0x7F).contains(b) { + unsafe { + print!("{}", char::from_u32_unchecked(*b as u32)); + } + } else { + print!("."); + } + } + println!(); + } + println!(); + } +} + + +fn main() -> Result<(), RelJsError> { + let src = Source::from_filepath(Path::new("demo/script.js"))?; + let mut parser = Parser::new(src); + let mut interner = Interner::new(); + let script = parser.parse_script(&mut interner)?; + + let mut compiler = Compiler::new(interner); + + for item in script.statements().statements() { + if let StatementListItem::Declaration(decl) = item { + if let Declaration::Function(_) = decl { + compiler.compile_declaration(decl); + } else { + todo!("Not supported in the global scope."); + } + } else { + todo!("Not supported in the global scope."); + } + } + + compiler.dump(); + + let func = compiler.functions[0].name_symbol; + + let mut runtime = Runtime::new(compiler.symbols, compiler.bytecode, compiler.functions); + + let result = runtime.invoke(func, vec![]); + + + /* + let mut bin = File::open("demo/test-1.bin")?; + let mut data = Vec::new(); + bin.read_to_end(&mut data)?; + + let mut runtime = Runtime::new(); + let fnref = runtime.load_bytecode(data); + + let mut args = Vec::with_capacity(3); + args.push(IonValue::new_null()); + args.push(IonValue::new_null()); + args.push(IonValue::new_null()); + + let result = runtime.invoke(fnref, args); + */ + for (i, v) in result.iter().enumerate() { + println!("Result {i}:"); + for b in v.bytes() { + print!("{b:02x} "); + } + println!("\n"); + } Ok(()) } diff --git a/src/runtime.rs b/src/runtime.rs index 3954a80..b041561 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -1,5 +1,6 @@ -use std::{collections::LinkedList, cmp::Ordering, sync::Arc}; +use std::{collections::{LinkedList, BTreeMap}, cmp::Ordering, sync::Arc, iter::{Copied, Rev}}; +use boa_interner::Sym; use ion_rs::SymbolTable; use crate::ion::{IonReader, IonValue}; @@ -12,7 +13,14 @@ pub enum OpCode { Jump = 0x01, + Dup = 0x02, Dup2 = 0x03, + SwapPop = 0x04, + + Invoke = 0x05, + Yield = 0x06, + Return = 0x07, + /// Push a typed Ion value onto the stack. /// operands: T and L @@ -221,6 +229,10 @@ pub enum OpCode { /// stack: [{value1}, {value2}] -> [] IfLt2 = 0x3b, + + NewList = 0xB1, + NewStruct = 0xD1, + } impl OpCode { @@ -229,10 +241,17 @@ impl OpCode { } } +impl From for u8 { + fn from(value: OpCode) -> Self { + value.as_u8() + } +} + pub struct Runtime { symbols: SymbolTable, + bytecode: Arc, + functions: BTreeMap, stacks: LinkedList, - functions: Vec>, } pub enum ExecutionState { @@ -240,11 +259,13 @@ pub enum ExecutionState { Halt, } +#[derive(Copy, Clone)] pub struct Function { - bytecode: ByteCode, - arguments: u16, - variables: u16, - expected_stack_depth: usize, + pub(crate) name_symbol: usize, + pub(crate) arguments: u16, + pub(crate) variables: u16, + pub(crate) pc: usize, + pub(crate) expected_stack_depth: usize, } pub type FnRef = usize; @@ -253,42 +274,30 @@ pub type ByteCode = Vec; struct Stack(Vec); pub struct StackFrame { - func: Arc, + bytecode: Arc, pc: usize, variables: Vec, stack: Stack, } -impl Runtime { - pub fn new() -> Runtime { +impl Runtime { + pub fn new(symbols: SymbolTable, bytecode: ByteCode, functions: Vec) -> Runtime { Runtime { - symbols: SymbolTable::new(), + symbols, + functions: functions.into_iter().map(|f| (f.name_symbol, f)).collect(), + bytecode: bytecode.into(), stacks: LinkedList::new(), - functions: Vec::new(), } } - pub fn load_bytecode(&mut self, data: Vec) -> FnRef { - let arguments = (data[0] as u16) << 8 | (data[1] as u16); - let variables = (data[2] as u16) << 8 | (data[3] as u16); - let func = Function { - bytecode: data.into_iter().skip(4).collect(), - arguments, - variables, - expected_stack_depth: 32, - }; - self.functions.push(func.into()); - self.functions.len() - 1 - } - pub fn invoke(&mut self, fnref: FnRef, arguments: Vec) -> Vec { - let func = self.functions.get(fnref).expect("Undefined function").clone(); - let mut frame = StackFrame::new(func.clone(), arguments); + let func = *self.functions.get(&fnref).expect("Undefined function"); + let mut frame = StackFrame::new(self.bytecode.clone(), &func, arguments); loop { match self.execute(&mut frame) { ExecutionState::Continue => (), ExecutionState::Halt => { - frame.variables.truncate(func.arguments.into()); + //frame.variables.truncate(func.arguments.into()); return frame.variables; }, } @@ -301,17 +310,22 @@ impl Runtime { } let op = frame.next_byte(); - println!("PC: {:02X}, OP: {:02X}, Stack: {:02X}", frame.pc, op, frame.stack.0.len()); for x in &frame.stack.0 { print!(" {x:02X}"); } println!(); + println!("PC: {:02X}, OP: {:02X}", frame.pc - 1, op); match op { 0x01 => { // OpType::Jump - let pc = frame.next_varuint(); + let pc = frame.next_u32().try_into().unwrap(); frame.jump(pc); }, + 0x02 => { // OpType::Dup + let value1 = frame.stack.pop_value(); + frame.stack.push_value(&value1); + frame.stack.push_value(&value1); + }, 0x03 => { // OpType::Dup2 let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); @@ -320,8 +334,13 @@ impl Runtime { frame.stack.push_value(&value2); frame.stack.push_value(&value1); }, + 0x04 => { // OpType::SwapPop + let value1 = frame.stack.pop_value(); + frame.stack.drop_value(); + frame.stack.push_value(&value1); + }, 0x10 => { // OpType::Push - let buf = &frame.func.bytecode[frame.pc..]; + let buf = &self.bytecode[frame.pc..]; let mut reader = IonReader::new(buf.iter().copied(), 0); let value = reader.next_value(); frame.pc += reader.offset(); @@ -349,14 +368,12 @@ impl Runtime { 0x17 => todo!(), // OpCode::ListLoad 0x18 => todo!(), // OpCode::FieldAppend 0x19 => { // OpCode::Iterate - let varid = frame.next_varuint(); - let value = frame.variables - .get(varid) - .expect("Undefined variable"); - - let mut reader = value.reader(); - let (ion_type, end) = reader.step_in(); - let pos = reader.offset(); + let (ion_type, end, pos) = { + let mut reader = frame.stack.peek_reader(); + let (ion_type, end) = reader.step_in(); + let pos = reader.offset(); + (ion_type, end, pos) + }; frame.stack.push_usize(ion_type as usize); frame.stack.push_usize(end); @@ -368,20 +385,16 @@ impl Runtime { let len = frame.stack.pop_value().to_usize(); let ion_type = frame.stack.pop_value().to_usize(); - let varid = frame.next_varuint(); - let mut value = if ion_type == 0x0D { - frame.variables - .get(varid) - .expect("Undefined variable") - .struct_reader_at(pos) - } else { - frame.variables - .get(varid) - .expect("Undefined variable") - .reader_at(pos) + let next = { + let mut reader = if ion_type == 0x0D { + frame.stack.peek_struct_reader_at(pos) + } else { + frame.stack.peek_reader_at(pos) + }; + reader.skip_value(); + reader.offset() }; - value.skip_value(); - let next = value.offset(); + frame.stack.push_usize(ion_type); // TODO peek frame.stack.push_usize(len); // TODO peek frame.stack.push_usize(next); @@ -391,26 +404,21 @@ impl Runtime { let len = frame.stack.pop_value().to_usize(); let ion_type = frame.stack.pop_value().to_usize(); + let (next_type, next_end, next_pos) = { + let mut reader = if ion_type == 0x0D { + frame.stack.peek_struct_reader_at(pos) + } else { + frame.stack.peek_reader_at(pos) + }; + let (next_type, next_end) = reader.step_in(); + let next_pos = reader.offset(); + (next_type, next_end, next_pos) + }; + frame.stack.push_usize(ion_type); // TODO peek frame.stack.push_usize(len); // TODO peek frame.stack.push_usize(pos); // TODO peek - let varid = frame.next_varuint(); - let mut value = if ion_type == 0x0D { - frame.variables - .get(varid) - .expect("Undefined variable") - .struct_reader_at(pos) - } else { - frame.variables - .get(varid) - .expect("Undefined variable") - .reader_at(pos) - }; - - let (next_type, next_end) = value.step_in(); - let next_pos = value.offset(); - frame.stack.push_usize(next_type as usize); frame.stack.push_usize(next_end); frame.stack.push_usize(next_pos); @@ -425,24 +433,19 @@ impl Runtime { let len = frame.stack.pop_value().to_usize(); let ion_type = frame.stack.pop_value().to_usize(); + let next_value = { + let mut reader = if ion_type == 0x0D { + frame.stack.peek_struct_reader_at(pos) + } else { + frame.stack.peek_reader_at(pos) + }; + reader.next_value() + }; + frame.stack.push_usize(ion_type); // TODO peek frame.stack.push_usize(len); // TODO peek frame.stack.push_usize(pos); // TODO peek - let varid = frame.next_varuint(); - let mut value = if ion_type == 0x0D { - frame.variables - .get(varid) - .expect("Undefined variable") - .struct_reader_at(pos) - } else { - frame.variables - .get(varid) - .expect("Undefined variable") - .reader_at(pos) - }; - - let next_value = value.next_value(); frame.stack.push_value(&next_value); }, 0x1E => { // OpCode::StructField @@ -450,25 +453,21 @@ impl Runtime { let len = frame.stack.pop_value().to_usize(); let ion_type = frame.stack.pop_value().to_usize(); + let field_id = { + let mut reader = if ion_type == 0x0D { + frame.stack.peek_struct_reader_at(pos) + } else { + frame.stack.peek_reader_at(pos) + }; + reader.skip_value(); + reader.field_id() + }; + frame.stack.push_usize(ion_type); // TODO peek frame.stack.push_usize(len); // TODO peek frame.stack.push_usize(pos); // TODO peek - let varid = frame.next_varuint(); - let mut value = if ion_type == 0x0D { - frame.variables - .get(varid) - .expect("Undefined variable") - .struct_reader_at(pos) - } else { - frame.variables - .get(varid) - .expect("Undefined variable") - .reader_at(pos) - }; - - value.skip_value(); - if let Some(field_id) = value.field_id() { + if let Some(field_id) = field_id { frame.stack.push_symbol(field_id); } else { panic!("Not a struct"); @@ -493,7 +492,7 @@ impl Runtime { 0x34 => todo!(), // OpCode::IfLe 0x35 => todo!(), // OpCode::IfLt 0x36 => { // OpCode::IfEq2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 == value2 { @@ -501,7 +500,7 @@ impl Runtime { } }, 0x37 => { // OpCode::IfNe2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 != value2 { @@ -509,7 +508,7 @@ impl Runtime { } }, 0x38 => { // OpCode::IfGe2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 >= value2 { @@ -517,7 +516,7 @@ impl Runtime { } }, 0x39 => { // OpCode::IfGt2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 > value2 { @@ -525,7 +524,7 @@ impl Runtime { } }, 0x3A => { // OpCode::IfLe2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 <= value2 { @@ -533,14 +532,59 @@ impl Runtime { } }, 0x3B => { // OpCode::IfLt2 - let next_pc = frame.next_varuint(); + let next_pc = frame.next_u32().try_into().unwrap(); let value1 = frame.stack.pop_value(); let value2 = frame.stack.pop_value(); if value1 < value2 { frame.pc = next_pc; } }, - _ => panic!("Invalid opcode {:02x} at {}", op, frame.pc), + + 0xB1 => { // OpCode::NewList + // TODO: Yuck + let len = frame.stack.pop_usize(); + let mut list = Vec::new(); + let mut sz = 0; + for _ in 0..len { + let value = frame.stack.pop_value(); + sz += value.len(); + list.push(value); + } + for v in list { + frame.stack.push_value(&v); + } + if sz >= 14 { + frame.stack.push_varuint(sz); + frame.stack.push_byte(0xBE); + } else { + frame.stack.push_byte(0xB0 | (sz as u8)); + } + }, + + 0xD1 => { // OpCode::NewStruct + // TODO: Yuck + let len = frame.stack.pop_usize(); + let mut pairs = Vec::new(); + for _ in 0..len { + let sym = frame.stack.pop_value(); + let value = frame.stack.pop_value(); + pairs.push((sym, value)); + } + let mark = frame.stack.len(); + for (sym, v) in pairs { + frame.stack.push_value(&v); + frame.stack.push_varuint(sym.reader().next_symbol_id()); + } + let sz = frame.stack.len() - mark; + if sz >= 14 { + frame.stack.push_varuint(sz); + frame.stack.push_byte(0xDE); + } else { + frame.stack.push_byte(0xD0 | (sz as u8)); + } + }, + + _ => panic!("Invalid opcode {:02x} at {:02x}", op, frame.pc - 1), } ExecutionState::Continue @@ -551,17 +595,17 @@ impl Runtime { } } -impl StackFrame { - pub fn new(func: Arc, arguments: Vec) -> StackFrame { +impl StackFrame { + pub fn new(bytecode: Arc, func: &Function, arguments: Vec) -> StackFrame { let stack = Stack::new(func.expected_stack_depth); let mut frame = StackFrame { - func, - pc: 0, + bytecode, + pc: func.pc, variables: arguments, stack, }; - for _ in frame.func.arguments..frame.func.variables { + for _ in func.arguments..func.variables { frame.variables.push(IonValue::new_null()); } @@ -573,22 +617,31 @@ impl StackFrame { } fn eof(&self) -> bool { - self.pc >= self.func.bytecode.len() + self.pc >= self.bytecode.len() } fn next_byte(&mut self) -> u8 { - let b = self.func.bytecode[self.pc]; + let b = self.bytecode[self.pc]; self.pc += 1; b } fn next_bytes(&mut self, len: usize) -> &[u8] { let end = self.pc + len; - let bytes = &self.func.bytecode[self.pc..end]; + let bytes = &self.bytecode[self.pc..end]; self.pc = end; bytes } + fn next_u32(&mut self) -> u32 { + let mut v: u32 = 0; + for _ in 0..4 { + v <<= 8; + v |= self.next_byte() as u32; + } + v + } + fn next_varuint(&mut self) -> usize { let mut b: usize = self.next_byte().into(); let mut v = b & 0x7f; @@ -606,10 +659,24 @@ impl Stack { Stack(Vec::with_capacity(expected_depth)) } - pub fn peak_reader(&self) -> IonReader + '_> { + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn peek_reader(&self) -> IonReader + '_> { IonReader::new(self.0.iter().copied().rev(), 0) } + pub fn peek_reader_at(&self, offset: usize) -> IonReader>>> { + let end = self.0.len() - offset; + IonReader::new(self.0[..end].iter().copied().rev(), offset) + } + + pub fn peek_struct_reader_at(&self, offset: usize) -> IonReader>>> { + let end = self.0.len() - offset; + IonReader::new_struct(self.0[..end].iter().copied().rev(), offset) + } + pub fn drop_value(&mut self) { let mut it = IonReader::new(self.0.iter().copied().rev(), 0); it.skip_value(); @@ -625,25 +692,47 @@ impl Stack { value } + pub fn pop_usize(&mut self) -> usize { + let mut it = IonReader::new(self.0.iter().copied().rev(), 0); + let value = it.next_usize(); + let offset = it.offset(); + self.0.truncate(self.0.len() - offset); + value + } + + #[inline] + pub fn push_byte(&mut self, value: u8) { + self.0.push(value); + } + + pub fn push_varuint(&mut self, mut value: usize) { + self.push_byte((value & 0x7F | 0x80) as u8); + value >>= 7; + while value != 0 { + self.push_byte((value & 0x7F) as u8); + value >>= 7; + } + } + pub fn push_value(&mut self, value: &IonValue) { for byte in value.bytes().rev() { - self.0.push(byte); + self.push_byte(byte); } } pub fn push_symbol(&mut self, value: usize) { match value.cmp(&0) { - Ordering::Equal => self.0.push(0x70), + Ordering::Equal => self.push_byte(0x70), Ordering::Greater => { let mut v = value; let mut octets = 0; while v != 0 { - self.0.push((v & 0xFF) as u8); + self.push_byte((v & 0xFF) as u8); octets += 1; v >>= 8; } let tl = 0x70 | octets; - self.0.push(tl); + self.push_byte(tl); }, Ordering::Less => (), } @@ -651,17 +740,17 @@ impl Stack { pub fn push_usize(&mut self, value: usize) { match value.cmp(&0) { - Ordering::Equal => self.0.push(0x20), + Ordering::Equal => self.push_byte(0x20), Ordering::Greater => { let mut v = value; let mut octets = 0; while v != 0 { - self.0.push((v & 0xFF) as u8); + self.push_byte((v & 0xFF) as u8); octets += 1; v >>= 8; } let tl = 0x20 | octets; - self.0.push(tl); + self.push_byte(tl); }, Ordering::Less => (), } -- cgit v1.2.3