From c639bf850ed97fe0149a33b3a0190c6269f85e26 Mon Sep 17 00:00:00 2001 From: Steve Akinyemi Date: Sat, 24 Nov 2018 15:55:21 +0100 Subject: [PATCH] Add some syscalls --- examples/sys.wat | 47 ++++++ src/apis/emscripten/README.md | 2 + src/apis/emscripten/errno.rs | 266 ++++++++++++++++++++++++++++++++ src/apis/emscripten/lock.rs | 8 + src/apis/emscripten/memory.rs | 8 +- src/apis/emscripten/mod.rs | 130 +++++++++++++++- src/apis/emscripten/nullfunc.rs | 39 +++++ src/apis/emscripten/process.rs | 21 +-- src/apis/emscripten/storage.rs | 28 ++++ src/apis/emscripten/syscalls.rs | 196 +++++++++++++++++++++-- src/apis/mod.rs | 2 +- src/webassembly/instance.rs | 62 +++++++- src/webassembly/memory.rs | 13 +- 13 files changed, 770 insertions(+), 52 deletions(-) create mode 100644 examples/sys.wat create mode 100644 src/apis/emscripten/errno.rs create mode 100644 src/apis/emscripten/lock.rs create mode 100644 src/apis/emscripten/nullfunc.rs create mode 100644 src/apis/emscripten/storage.rs diff --git a/examples/sys.wat b/examples/sys.wat new file mode 100644 index 000000000..916352a48 --- /dev/null +++ b/examples/sys.wat @@ -0,0 +1,47 @@ +(module + (type $t1 (func (param i32))) + (type $t2 (func (param i32 i32 i32) (result i32))) + (type $t3 (func (param i32) (result i32))) + (type $t4 (func (param i32 i32) (result i32))) + (func $putchar (import "env" "putchar") (type $t1)) + (func $printf (import "env" "printf") (type $t4)) + (func $sys_open (import "env" "sys_open") (type $t2)) + (func $sys_read (import "env" "sys_read") (type $t2)) + (func $sys_close (import "env" "sys_close") (type $t3)) + (func $sys_exit (import "env" "sys_exit") (type $t1)) + (memory 1) + (data $filename (i32.const 0) "/Users/xxxx/Desktop/hello.txt\00") + (func $main (export "_main") + ;; declare variables + (local $string_buf_addr i32) + (local $string_buf_len i32) + (local $file_access_flag i32) + (local $file_permission_flag i32) + (local $file_descriptor i32) + + ;; set variables + (set_local $string_buf_addr (i32.const 72)) ;; string_buf_addr at offset 72 + (set_local $string_buf_len (i32.const 10)) ;; string_buf_len is 5 + (set_local $file_access_flag (i32.const 02)) ;; file_access_flag has O_RDWR permission + (set_local $file_permission_flag (i32.const 700)) ;; file_permission_flag has S_IRWXU permission + + ;; open file + (call $sys_open (i32.const 0) (get_local $file_access_flag) (get_local $file_permission_flag)) ;; (path: u32, flags: c_int, mode: c_int) -> c_int + (set_local $file_descriptor) ;; set file_descriptor to the value returned by sys_open + + ;; read file content + (call $sys_read (get_local $file_descriptor) (get_local $string_buf_addr) (get_local $string_buf_len)) ;; (fd: c_int, buf: u32, count: size_t) -> ssize_t + (drop) ;; ignoring errors + + ;; close file + (call $sys_close (get_local $file_descriptor)) ;; (fd: c_int) -> c_int + (drop) ;; ignoring errors + + ;; print file content + (call $printf (get_local $string_buf_addr) (i32.const 0)) + (drop) ;; ignoring errors + + ;; exit + (call $exit (i32.const 0)) + ) +) diff --git a/src/apis/emscripten/README.md b/src/apis/emscripten/README.md index b0b6d32d3..dce02bc24 100644 --- a/src/apis/emscripten/README.md +++ b/src/apis/emscripten/README.md @@ -520,3 +520,5 @@ 🔥 - Possible memory access violation 📥 - Access to external memory + +📝 - External write to internal memory diff --git a/src/apis/emscripten/errno.rs b/src/apis/emscripten/errno.rs new file mode 100644 index 000000000..9a671af24 --- /dev/null +++ b/src/apis/emscripten/errno.rs @@ -0,0 +1,266 @@ +// use std::collections::HashMap; + +pub extern "C" fn ___seterrno(value: i32) -> i32 { + debug!("emscripten::___seterrno"); + // TODO: Incomplete impl + eprintln!("failed to set errno!"); + value +} + +// pub enum ErrnoCodes { +// EPERM = 1, +// ENOENT = 2, +// ESRCH = 3, +// EINTR = 4, +// EIO = 5, +// ENXIO = 6, +// E2BIG = 7, +// ENOEXEC = 8, +// EBADF = 9, +// ECHILD = 10, +// EAGAIN = 11, +// EWOULDBLOCK = 11, +// ENOMEM = 12, +// EACCES = 13, +// EFAULT = 14, +// ENOTBLK = 15, +// EBUSY = 16, +// EEXIST = 17, +// EXDEV = 18, +// ENODEV = 19, +// ENOTDIR = 20, +// EISDIR = 21, +// EINVAL = 22, +// ENFILE = 23, +// EMFILE = 24, +// ENOTTY = 25, +// ETXTBSY = 26, +// EFBIG = 27, +// ENOSPC = 28, +// ESPIPE = 29, +// EROFS = 30, +// EMLINK = 31, +// EPIPE = 32, +// EDOM = 33, +// ERANGE = 34, +// ENOMSG = 42, +// EIDRM = 43, +// ECHRNG = 44, +// EL2NSYNC = 45, +// EL3HLT = 46, +// EL3RST = 47, +// ELNRNG = 48, +// EUNATCH = 49, +// ENOCSI = 50, +// EL2HLT = 51, +// EDEADLK = 35, +// ENOLCK = 37, +// EBADE = 52, +// EBADR = 53, +// EXFULL = 54, +// ENOANO = 55, +// EBADRQC = 56, +// EBADSLT = 57, +// EDEADLOCK = 35, +// EBFONT = 59, +// ENOSTR = 60, +// ENODATA = 61, +// ETIME = 62, +// ENOSR = 63, +// ENONET = 64, +// ENOPKG = 65, +// EREMOTE = 66, +// ENOLINK = 67, +// EADV = 68, +// ESRMNT = 69, +// ECOMM = 70, +// EPROTO = 71, +// EMULTIHOP = 72, +// EDOTDOT = 73, +// EBADMSG = 74, +// ENOTUNIQ = 76, +// EBADFD = 77, +// EREMCHG = 78, +// ELIBACC = 79, +// ELIBBAD = 80, +// ELIBSCN = 81, +// ELIBMAX = 82, +// ELIBEXEC = 83, +// ENOSYS = 38, +// ENOTEMPTY = 39, +// ENAMETOOLONG = 36, +// ELOOP = 40, +// EOPNOTSUPP = 95, +// EPFNOSUPPORT = 96, +// ECONNRESET = 104, +// ENOBUFS = 105, +// EAFNOSUPPORT = 97, +// EPROTOTYPE = 91, +// ENOTSOCK = 88, +// ENOPROTOOPT = 92, +// ESHUTDOWN = 108, +// ECONNREFUSED = 111, +// EADDRINUSE = 98, +// ECONNABORTED = 103, +// ENETUNREACH = 101, +// ENETDOWN = 100, +// ETIMEDOUT = 110, +// EHOSTDOWN = 112, +// EHOSTUNREACH = 113, +// EINPROGRESS = 115, +// EALREADY = 114, +// EDESTADDRREQ = 89, +// EMSGSIZE = 90, +// EPROTONOSUPPORT = 93, +// ESOCKTNOSUPPORT = 94, +// EADDRNOTAVAIL = 99, +// ENETRESET = 102, +// EISCONN = 106, +// ENOTCONN = 107, +// ETOOMANYREFS = 109, +// EUSERS = 87, +// EDQUOT = 122, +// ESTALE = 116, +// ENOTSUP = 95, +// ENOMEDIUM = 123, +// EILSEQ = 84, +// EOVERFLOW = 75, +// ECANCELED = 125, +// ENOTRECOVERABLE = 131, +// EOWNERDEAD = 130, +// ESTRPIPE = 86, +// } + +// pub struct ErrnoMessages<'a> { +// message_map: HashMap +// } + +// impl<'a> ErrnoMessages<'a> { +// fn new() -> Self { +// let mut message_map = HashMap::new(); +// message_map.insert(0, "Success"); +// message_map.insert(1, "Not super-user"); +// message_map.insert(2, "No such file or directory"); +// message_map.insert(3, "No such process"); +// message_map.insert(4, "Interrupted system call"); +// message_map.insert(5, "I/O error"); +// message_map.insert(6, "No such device or address"); +// message_map.insert(7, "Arg list too long"); +// message_map.insert(8, "Exec format error"); +// message_map.insert(9, "Bad file number"); +// message_map.insert(10, "No children"); +// message_map.insert(11, "No more processes"); +// message_map.insert(12, "Not enough core"); +// message_map.insert(13, "Permission denied"); +// message_map.insert(14, "Bad address"); +// message_map.insert(15, "Block device required"); +// message_map.insert(16, "Mount device busy"); +// message_map.insert(17, "File exists"); +// message_map.insert(18, "Cross-device link"); +// message_map.insert(19, "No such device"); +// message_map.insert(20, "Not a directory"); +// message_map.insert(21, "Is a directory"); +// message_map.insert(22, "Invalid argument"); +// message_map.insert(23, "Too many open files in system"); +// message_map.insert(24, "Too many open files"); +// message_map.insert(25, "Not a typewriter"); +// message_map.insert(26, "Text file busy"); +// message_map.insert(27, "File too large"); +// message_map.insert(28, "No space left on device"); +// message_map.insert(29, "Illegal seek"); +// message_map.insert(30, "Read only file system"); +// message_map.insert(31, "Too many links"); +// message_map.insert(32, "Broken pipe"); +// message_map.insert(33, "Math arg out of domain of func"); +// message_map.insert(34, "Math result not representable"); +// message_map.insert(35, "File locking deadlock error"); +// message_map.insert(36, "File or path name too long"); +// message_map.insert(37, "No record locks available"); +// message_map.insert(38, "Function not implemented"); +// message_map.insert(39, "Directory not empty"); +// message_map.insert(40, "Too many symbolic links"); +// message_map.insert(42, "No message of desired type"); +// message_map.insert(43, "Identifier removed"); +// message_map.insert(44, "Channel number out of range"); +// message_map.insert(45, "Level 2 not synchronized"); +// message_map.insert(46, "Level 3 halted"); +// message_map.insert(47, "Level 3 reset"); +// message_map.insert(48, "Link number out of range"); +// message_map.insert(49, "Protocol driver not attached"); +// message_map.insert(50, "No CSI structure available"); +// message_map.insert(51, "Level 2 halted"); +// message_map.insert(52, "Invalid exchange"); +// message_map.insert(53, "Invalid request descriptor"); +// message_map.insert(54, "Exchange full"); +// message_map.insert(55, "No anode"); +// message_map.insert(56, "Invalid request code"); +// message_map.insert(57, "Invalid slot"); +// message_map.insert(59, "Bad font file fmt"); +// message_map.insert(60, "Device not a stream"); +// message_map.insert(61, "No data (for no delay io)"); +// message_map.insert(62, "Timer expired"); +// message_map.insert(63, "Out of streams resources"); +// message_map.insert(64, "Machine is not on the network"); +// message_map.insert(65, "Package not installed"); +// message_map.insert(66, "The object is remote"); +// message_map.insert(67, "The link has been severed"); +// message_map.insert(68, "Advertise error"); +// message_map.insert(69, "Srmount error"); +// message_map.insert(70, "Communication error on send"); +// message_map.insert(71, "Protocol error"); +// message_map.insert(72, "Multihop attempted"); +// message_map.insert(73, "Cross mount point (not really error)"); +// message_map.insert(74, "Trying to read unreadable message"); +// message_map.insert(75, "Value too large for defined data type"); +// message_map.insert(76, "Given log. name not unique"); +// message_map.insert(77, "f.d. invalid for this operation"); +// message_map.insert(78, "Remote address changed"); +// message_map.insert(79, "Can access a needed shared lib"); +// message_map.insert(80, "Accessing a corrupted shared lib"); +// message_map.insert(81, ".lib section in a.out corrupted"); +// message_map.insert(82, "Attempting to link in too many libs"); +// message_map.insert(83, "Attempting to exec a shared library"); +// message_map.insert(84, "Illegal byte sequence"); +// message_map.insert(86, "Streams pipe error"); +// message_map.insert(87, "Too many users"); +// message_map.insert(88, "Socket operation on non-socket"); +// message_map.insert(89, "Destination address required"); +// message_map.insert(90, "Message too long"); +// message_map.insert(91, "Protocol wrong type for socket"); +// message_map.insert(92, "Protocol not available"); +// message_map.insert(93, "Unknown protocol"); +// message_map.insert(94, "Socket type not supported"); +// message_map.insert(95, "Not supported"); +// message_map.insert(96, "Protocol family not supported"); +// message_map.insert(97, "Address family not supported by protocol family"); +// message_map.insert(98, "Address already in use"); +// message_map.insert(99, "Address not available"); +// message_map.insert(100, "Network interface is not configured"); +// message_map.insert(101, "Network is unreachable"); +// message_map.insert(102, "Connection reset by network"); +// message_map.insert(103, "Connection aborted"); +// message_map.insert(104, "Connection reset by peer"); +// message_map.insert(105, "No buffer space available"); +// message_map.insert(106, "Socket is already connected"); +// message_map.insert(107, "Socket is not connected"); +// message_map.insert(108, "Can't send after socket shutdown"); +// message_map.insert(109, "Too many references"); +// message_map.insert(110, "Connection timed out"); +// message_map.insert(111, "Connection refused"); +// message_map.insert(112, "Host is down"); +// message_map.insert(113, "Host is unreachable"); +// message_map.insert(114, "Socket already connected"); +// message_map.insert(115, "Connection already in progress"); +// message_map.insert(116, "Stale file handle"); +// message_map.insert(122, "Quota exceeded"); +// message_map.insert(123, "No medium (in tape drive)"); +// message_map.insert(125, "Operation canceled"); +// message_map.insert(130, "Previous owner died"); +// message_map.insert(131, "State not recoverable"); + +// ErrnoMessages { +// message_map, +// } +// } +// } + diff --git a/src/apis/emscripten/lock.rs b/src/apis/emscripten/lock.rs new file mode 100644 index 000000000..cf15a7e02 --- /dev/null +++ b/src/apis/emscripten/lock.rs @@ -0,0 +1,8 @@ +use libc::c_int; +use crate::webassembly::Instance; + +// NOTE: Not implemented by Emscripten +pub extern "C" fn ___lock(_which: c_int, _varargs: c_int, _instance: &mut Instance) {} + +// NOTE: Not implemented by Emscripten +pub extern "C" fn ___unlock(_which: c_int, _varargs: c_int, _instance: &mut Instance) {} diff --git a/src/apis/emscripten/memory.rs b/src/apis/emscripten/memory.rs index e568108b3..77eb3f4a5 100644 --- a/src/apis/emscripten/memory.rs +++ b/src/apis/emscripten/memory.rs @@ -1,6 +1,6 @@ use libc::{c_void, memcpy, size_t}; - use crate::webassembly::Instance; +use super::process::abort_with_message; /// emscripten: _emscripten_memcpy_big pub extern "C" fn _emscripten_memcpy_big( @@ -29,3 +29,9 @@ pub extern "C" fn enlarge_memory(_instance: &mut Instance) { debug!("emscripten::enlarge_memory"); // instance.memories[0].grow(100); } + +/// emscripten: abortOnCannotGrowMemory +pub extern "C" fn abort_on_cannot_grow_memory() { + debug!("emscripten::abort_on_cannot_grow_memory"); + abort_with_message("Cannot enlarge memory arrays!"); +} diff --git a/src/apis/emscripten/mod.rs b/src/apis/emscripten/mod.rs index 14c0b67b1..bdb5b4c51 100644 --- a/src/apis/emscripten/mod.rs +++ b/src/apis/emscripten/mod.rs @@ -1,3 +1,4 @@ +/// NOTE: TODO: These emscripten api implementation only support wasm32 for now because they assume offsets are u32 use crate::webassembly::{ImportObject, ImportValue}; // EMSCRIPTEN APIS @@ -6,27 +7,70 @@ mod io; mod memory; mod process; mod syscalls; +mod lock; mod utils; mod varargs; +mod errno; +mod storage; +mod nullfunc; -// SYSCALLS pub use self::utils::is_emscripten_module; +pub use self::storage::{align_memory, static_alloc}; pub fn generate_emscripten_env<'a, 'b>() -> ImportObject<&'a str, &'b str> { let mut import_object = ImportObject::new(); + // Global + import_object.set( + "env", + "global1", + ImportValue::Global(24), // TODO + ); + import_object.set( + "env", + "global2", + ImportValue::Global(50), // TODO + ); + import_object.set( + "env", + "global3", + ImportValue::Global(67), // TODO + ); + // Print functions import_object.set("env", "printf", ImportValue::Func(io::printf as *const u8)); import_object.set( "env", "putchar", ImportValue::Func(io::putchar as *const u8), ); - // Emscripten Env + // Lock + import_object.set( + "env", + "___lock", + ImportValue::Func(lock::___lock as *const u8), + ); + import_object.set( + "env", + "___unlock", + ImportValue::Func(lock::___unlock as *const u8), + ); + // Env import_object.set( "env", "_getenv", ImportValue::Func(env::_getenv as *const u8), ); - // Emscripten syscalls + // Errno + import_object.set( + "env", + "___setErrNo", + ImportValue::Func(errno::___seterrno as *const u8), + ); + // Syscalls + import_object.set( + "env", + "___syscall1", + ImportValue::Func(syscalls::___syscall1 as *const u8), + ); import_object.set( "env", "___syscall3", @@ -42,6 +86,11 @@ pub fn generate_emscripten_env<'a, 'b>() -> ImportObject<&'a str, &'b str> { "___syscall5", ImportValue::Func(syscalls::___syscall5 as *const u8), ); + import_object.set( + "env", + "___syscall6", + ImportValue::Func(syscalls::___syscall6 as *const u8), + ); import_object.set( "env", "___syscall54", @@ -49,10 +98,25 @@ pub fn generate_emscripten_env<'a, 'b>() -> ImportObject<&'a str, &'b str> { ); import_object.set( "env", - "___syscall122", - ImportValue::Func(syscalls::___syscall122 as *const u8), + "___syscall140", + ImportValue::Func(syscalls::___syscall140 as *const u8), ); - // Emscripten other APIs + import_object.set( + "env", + "___syscall145", + ImportValue::Func(syscalls::___syscall145 as *const u8), + ); + import_object.set( + "env", + "___syscall146", + ImportValue::Func(syscalls::___syscall146 as *const u8), + ); + import_object.set( + "env", + "___syscall221", + ImportValue::Func(syscalls::___syscall221 as *const u8), + ); + // Process import_object.set( "env", "abort", @@ -63,10 +127,16 @@ pub fn generate_emscripten_env<'a, 'b>() -> ImportObject<&'a str, &'b str> { "_abort", ImportValue::Func(process::_abort as *const u8), ); + import_object.set( + "env", + "abortStackOverflow", + ImportValue::Func(process::abort_stack_overflow as *const u8), + ); + // Memory import_object.set( "env", "abortOnCannotGrowMemory", - ImportValue::Func(process::abort_on_cannot_grow_memory as *const u8), + ImportValue::Func(memory::abort_on_cannot_grow_memory as *const u8), ); import_object.set( "env", @@ -83,6 +153,52 @@ pub fn generate_emscripten_env<'a, 'b>() -> ImportObject<&'a str, &'b str> { "getTotalMemory", ImportValue::Func(memory::get_total_memory as *const u8), ); + // NullFuncs + import_object.set( + "env", + "nullFunc_ii", + ImportValue::Func(nullfunc::nullfunc_ii as *const u8), + ); + import_object.set( + "env", + "nullFunc_iiii", + ImportValue::Func(nullfunc::nullfunc_iiii as *const u8), + ); + import_object.set( + "env", + "nullFunc_iiii", + ImportValue::Func(nullfunc::nullfunc_iiii as *const u8), + ); + import_object.set( + "env", + "nullFunc_iiiii", + ImportValue::Func(nullfunc::nullfunc_iiiii as *const u8), + ); + import_object.set( + "env", + "nullFunc_iiiiii", + ImportValue::Func(nullfunc::nullfunc_iiiiii as *const u8), + ); + import_object.set( + "env", + "nullFunc_vi", + ImportValue::Func(nullfunc::nullfunc_vi as *const u8), + ); + import_object.set( + "env", + "nullFunc_vii", + ImportValue::Func(nullfunc::nullfunc_vii as *const u8), + ); + import_object.set( + "env", + "nullFunc_viii", + ImportValue::Func(nullfunc::nullfunc_viii as *const u8), + ); + import_object.set( + "env", + "nullFunc_viiii", + ImportValue::Func(nullfunc::nullfunc_viiii as *const u8), + ); import_object } diff --git a/src/apis/emscripten/nullfunc.rs b/src/apis/emscripten/nullfunc.rs new file mode 100644 index 000000000..43d3ebd05 --- /dev/null +++ b/src/apis/emscripten/nullfunc.rs @@ -0,0 +1,39 @@ +use super::process::abort_with_message; +use crate::webassembly::Instance; + +pub extern "C" fn nullfunc_ii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'ii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_iii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'iii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_iiii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'iiii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_iiiii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'iiiii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_iiiiii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'iiiiii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_vi(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'vi'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_vii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'vii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_viii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'viii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + +pub extern "C" fn nullfunc_viiii(x: u32, instance: &Instance) { + abort_with_message("Invalid function pointer called with signature 'viiii'. Perhaps this is an invalid value (e.g. caused by calling a virtual method on a NULL pointer)? Or calling a function with an incorrect type, which will fail? (it is worth building your source files with -Werror (warnings are errors), as warnings can indicate undefined behavior which can cause this)"); +} + diff --git a/src/apis/emscripten/process.rs b/src/apis/emscripten/process.rs index 89ebffade..95a389ce8 100644 --- a/src/apis/emscripten/process.rs +++ b/src/apis/emscripten/process.rs @@ -1,22 +1,14 @@ -use libc::{ - // size_t, - // ssize_t, - abort, - // c_int, - // c_void, - c_char, -}; +use libc::{abort, c_char}; use crate::webassembly::Instance; use std::ffi::CStr; -extern "C" fn abort_with_message(message: &str) { +pub extern "C" fn abort_with_message(message: &str) { debug!("emscripten::abort_with_message"); println!("{}", message); _abort(); } -/// emscripten: _abort pub extern "C" fn _abort() { debug!("emscripten::_abort"); unsafe { @@ -24,7 +16,6 @@ pub extern "C" fn _abort() { } } -/// emscripten: abort pub extern "C" fn em_abort(message: u32, instance: &mut Instance) { debug!("emscripten::em_abort"); let message_addr = instance.memory_offset_addr(0, message as usize) as *mut c_char; @@ -37,8 +28,8 @@ pub extern "C" fn em_abort(message: u32, instance: &mut Instance) { } } -/// emscripten: abortOnCannotGrowMemory -pub extern "C" fn abort_on_cannot_grow_memory() { - debug!("emscripten::abort_on_cannot_grow_memory"); - abort_with_message("Cannot enlarge memory arrays!"); +pub extern "C" fn abort_stack_overflow() { + debug!("emscripten::abort_stack_overflow"); + // TODO: Message incomplete. Need to finish em runtime data first + abort_with_message("Stack overflow! Attempted to allocate some bytes on the stack"); } diff --git a/src/apis/emscripten/storage.rs b/src/apis/emscripten/storage.rs new file mode 100644 index 000000000..380fb3556 --- /dev/null +++ b/src/apis/emscripten/storage.rs @@ -0,0 +1,28 @@ + +use crate::webassembly::{LinearMemory, Instance}; + +pub fn align_memory(size: u32, factor: u32) -> u32 { + assert!(factor != 0, "memory cannot be aligned by 0 offset!"); + if size % factor == 1 { + (size) - (size % factor) + (factor) + } else { + size + } +} + +// pub fn static_alloc(size: u32, instance: &mut Instance) -> u32 { +// let static_top = instance.emscripten_data.static_top; +// let total_memory = instance.memories[0].maximum.unwrap_or(LinearMemory::DEFAULT_HEAP_SIZE as u32); +// instance.emscripten_data.static_top = (static_top + size + 15) & 4294967280; +// assert!(static_top < total_memory, "not enough memory for static allocation - increase total_memory!"); +// static_top +// } + +pub fn static_alloc(size: u32, static_top: &mut u32, memory: &LinearMemory) -> u32 { + let old_static_top = *static_top; + let total_memory = memory.maximum.unwrap_or(LinearMemory::MAX_PAGES as u32) * LinearMemory::PAGE_SIZE; + // NOTE: The `4294967280` is a u32 conversion of -16 as gotten from emscripten. + *static_top = (*static_top + size + 15) & 4294967280; + assert!(*static_top < total_memory, "not enough memory for static allocation - increase total_memory!"); + old_static_top +} diff --git a/src/apis/emscripten/syscalls.rs b/src/apis/emscripten/syscalls.rs index d9d5c395f..9d7b73088 100644 --- a/src/apis/emscripten/syscalls.rs +++ b/src/apis/emscripten/syscalls.rs @@ -1,24 +1,31 @@ -/// NOTE: TODO: These syscalls only support wasm_32 for now because they take u32 offset +/// NOTE: TODO: These syscalls only support wasm_32 for now because they assume offsets are u32 /// Syscall list: https://www.cs.utexas.edu/~bismith/test/syscalls/syscalls32.html use libc::{ - c_int, c_void, utsname, + c_int, c_void, utsname, off_t, ssize_t, write, exit, read, open, close, ioctl, - uname, + uname, fcntl, lseek, readv, + iovec, writev, socklen_t, + sockaddr, socket, bind, + connect, listen, accept, + getsockname, getpeername, + sendto, recvfrom, setsockopt, + getsockopt, sendmsg, recvmsg, + msghdr, }; -#[macro_use] + use macros; use crate::webassembly::Instance; /// sys_exit -pub extern "C" fn ___syscall1(_which: c_int, varargs: c_int, instance: &mut Instance) { +pub extern "C" fn ___syscall1(_which: c_int, mut varargs: c_int, instance: &mut Instance) { debug!("emscripten::___syscall1"); let status = vararg!(i32, instance, varargs); unsafe { exit(status); } } /// sys_read -pub extern "C" fn ___syscall3(_which: c_int, varargs: c_int, instance: &mut Instance) -> ssize_t { +pub extern "C" fn ___syscall3(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> ssize_t { debug!("emscripten::___syscall3"); let fd = vararg!(i32, instance, varargs); let buf = vararg!(u32, instance, varargs); @@ -29,7 +36,7 @@ pub extern "C" fn ___syscall3(_which: c_int, varargs: c_int, instance: &mut Inst } /// sys_write -pub extern "C" fn ___syscall4(_which: c_int, varargs: c_int, instance: &mut Instance) -> c_int { +pub extern "C" fn ___syscall4(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { debug!("emscripten::___syscall4"); let fd = vararg!(i32, instance, varargs); let buf = vararg!(u32, instance, varargs); @@ -40,7 +47,7 @@ pub extern "C" fn ___syscall4(_which: c_int, varargs: c_int, instance: &mut Inst } /// sys_open -pub extern "C" fn ___syscall5(_which: c_int, varargs: c_int, instance: &mut Instance) -> c_int { +pub extern "C" fn ___syscall5(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { debug!("emscripten::___syscall5"); let pathname = vararg!(u32, instance, varargs); let flags = vararg!(i32, instance, varargs); @@ -51,15 +58,15 @@ pub extern "C" fn ___syscall5(_which: c_int, varargs: c_int, instance: &mut Inst } /// sys_close -pub extern "C" fn ___syscall6(_which: c_int, varargs: c_int, instance: &mut Instance) -> c_int { +pub extern "C" fn ___syscall6(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { debug!("emscripten::___syscall1"); let fd = vararg!(i32, instance, varargs); debug!("fd: {}", fd); unsafe { close(fd) } } -// sys_ioctl -pub extern "C" fn ___syscall54(_which: c_int, varargs: c_int, instance: &mut Instance) -> c_int { +/// sys_ioctl +pub extern "C" fn ___syscall54(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { debug!("emscripten::___syscall54"); let fd = vararg!(i32, instance, varargs); let request = vararg!(u64, instance, varargs); @@ -67,11 +74,172 @@ pub extern "C" fn ___syscall54(_which: c_int, varargs: c_int, instance: &mut Ins unsafe { ioctl(fd, request) } } -// sys_newuname -pub extern "C" fn ___syscall122(_which: c_int, varargs: c_int, instance: &mut Instance) -> c_int { +/// sys_uname +// NOTE: Wondering if we should return custom utsname, like Emscripten. +pub extern "C" fn ___syscall122(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { debug!("emscripten::___syscall122"); let buf = vararg!(u32, instance, varargs); debug!("buf: {}", buf); let buf_addr = instance.memory_offset_addr(0, buf as usize) as *mut utsname; - unsafe { uname(buf_addr) } // TODO: Fix implementation + unsafe { uname(buf_addr) } +} + +/// sys_lseek +pub extern "C" fn ___syscall140(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> off_t { + debug!("emscripten::___syscall145"); + let fd = vararg!(i32, instance, varargs); + let offset = vararg!(i64, instance, varargs); + let whence = vararg!(i32, instance, varargs); + debug!("fd: {}, offset: {}, whence = {}", fd, offset, whence); + unsafe { lseek(fd, offset, whence) } +} + +/// sys_readv +pub extern "C" fn ___syscall145(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> ssize_t { + debug!("emscripten::___syscall145"); + let fd = vararg!(i32, instance, varargs); + let iov = vararg!(u32, instance, varargs); // TODO: struct iovec { iov_base: *mut c_void, iov_len: size_t } + let iovcnt = vararg!(i32, instance, varargs); + debug!("fd: {}, iov: {}, iovcnt = {}", fd, iov, iovcnt); + let iov_addr = instance.memory_offset_addr(0, iov as usize) as *mut iovec; + unsafe { readv(fd, iov_addr, iovcnt) } +} + +// sys_writev +pub extern "C" fn ___syscall146(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> ssize_t { + debug!("emscripten::___syscall145"); + let fd = vararg!(i32, instance, varargs); + let iov = vararg!(u32, instance, varargs); // TODO: struct iovec { iov_base: *mut c_void, iov_len: size_t } + let iovcnt = vararg!(i32, instance, varargs); + debug!("fd: {}, iov: {}, iovcnt = {}", fd, iov, iovcnt); + let iov_addr = instance.memory_offset_addr(0, iov as usize) as *mut iovec; + unsafe { writev(fd, iov_addr, iovcnt) } +} + +/// sys_fcntl64 +pub extern "C" fn ___syscall221(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { + debug!("emscripten::___syscall221"); + let fd = vararg!(i32, instance, varargs); + let cmd = vararg!(i32, instance, varargs); + debug!("fd: {}, cmd: {}", fd, cmd); + unsafe { fcntl(fd, cmd) } +} + +// sys_socketcall +pub extern "C" fn ___syscall102(_which: c_int, mut varargs: c_int, instance: &mut Instance) -> c_int { + debug!("emscripten::___syscall102"); + let call = vararg!(u32, instance, varargs); + match call { + 1 => { // socket (domain: c_int, ty: c_int, protocol: c_int) -> c_int + let domain = vararg!(i32, instance, varargs); + let ty = vararg!(i32, instance, varargs); + let protocol = vararg!(i32, instance, varargs); // NOTE: Emscripten asserts protocol to be TCP (i.e 0x6) + unsafe { socket(domain, ty, protocol) } + }, + 2 => { // bind (socket: c_int, address: *const sockaddr, address_len: socklen_t) -> c_int + // TODO: Emscripten has a different signature. + let socket = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); + let address_len = vararg!(u32, instance, varargs); + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + unsafe { bind(socket, address, address_len) } + }, + 3 => { // connect (socket: c_int, address: *const sockaddr, len: socklen_t) -> c_int + // TODO: Emscripten has a different signature. + let socket = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); + let address_len = vararg!(u32, instance, varargs); + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + unsafe { connect(socket, address, address_len) } + }, + 4 => { // listen (socket: c_int, backlog: c_int) -> c_int + let socket = vararg!(i32, instance, varargs); + let backlog = vararg!(i32, instance, varargs); + unsafe { listen(socket, backlog) } + }, + 5 => { // accept (socket: c_int, address: *mut sockaddr, address_len: *mut socklen_t) -> c_int + let socket = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); // TODO: sockaddr has ptr + let address_len = vararg!(u32, instance, varargs); + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + let address_len_addr = instance.memory_offset_addr(0, address_len as usize) as *mut socklen_t; + unsafe { accept(socket, address, address_len_addr) } + }, + 6 => { // getsockname (socket: c_int, address: *mut sockaddr, address_len: *mut socklen_t) -> c_int + let socket = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); // TODO: sockaddr has ptr + let address_len = vararg!(u32, instance, varargs); + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + let address_len_addr = instance.memory_offset_addr(0, address_len as usize) as *mut socklen_t; + unsafe { getsockname(socket, address, address_len_addr) } + }, + 7 => { // getpeername (socket: c_int, address: *mut sockaddr, address_len: *mut socklen_t) -> c_int + let socket = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); // TODO: sockaddr has ptr + let address_len = vararg!(u32, instance, varargs); + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + let address_len_addr = instance.memory_offset_addr(0, address_len as usize) as *mut socklen_t; + unsafe { getpeername(socket, address, address_len_addr) } + }, + 11 => { // sendto (socket: c_int, buf: *const c_void, len: size_t, flags: c_int, addr: *const sockaddr, addrlen: socklen_t) -> ssize_t + let socket = vararg!(i32, instance, varargs); + let buf = vararg!(u32, instance, varargs); + let flags = vararg!(usize, instance, varargs); + let len = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); // TODO: sockaddr has ptr + let address_len = vararg!(u32, instance, varargs); + let buf_addr = instance.memory_offset_addr(0, buf as usize) as *mut c_void; + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + unsafe { sendto(socket, buf_addr, flags, len, address, address_len) as i32 } + }, + 12 => { // recvfrom (socket: c_int, buf: *const c_void, len: size_t, flags: c_int, addr: *const sockaddr, addrlen: socklen_t) -> ssize_t + let socket = vararg!(i32, instance, varargs); + let buf = vararg!(u32, instance, varargs); + let flags = vararg!(usize, instance, varargs); + let len = vararg!(i32, instance, varargs); + let address = vararg!(u32, instance, varargs); // TODO: sockaddr has ptr + let address_len = vararg!(u32, instance, varargs); + let buf_addr = instance.memory_offset_addr(0, buf as usize) as *mut c_void; + let address = instance.memory_offset_addr(0, address as usize) as *mut sockaddr; + let address_len_addr = instance.memory_offset_addr(0, address_len as usize) as *mut socklen_t; + unsafe { recvfrom(socket, buf_addr, flags, len, address, address_len_addr) as i32 } + }, + 14 => { // setsockopt (socket: c_int, level: c_int, name: c_int, value: *const c_void, option_len: socklen_t) -> c_int + let socket = vararg!(i32, instance, varargs); + let level = vararg!(i32, instance, varargs); + let name = vararg!(i32, instance, varargs); + let value = vararg!(u32, instance, varargs); + let option_len = vararg!(u32, instance, varargs); + let value_addr = instance.memory_offset_addr(0, value as usize) as *const c_void; + unsafe { setsockopt(socket, level, name, value_addr, option_len) } + + }, + 15 => { // getsockopt (sockfd: c_int, level: c_int, optname: c_int, optval: *mut c_void, optlen: *mut socklen_t) -> c_int + let socket = vararg!(i32, instance, varargs); + let level = vararg!(i32, instance, varargs); + let name = vararg!(i32, instance, varargs); + let value = vararg!(u32, instance, varargs); + let option_len = vararg!(u32, instance, varargs); + let value_addr = instance.memory_offset_addr(0, value as usize) as *mut c_void; + let option_len_addr = instance.memory_offset_addr(0, option_len as usize) as *mut socklen_t; + unsafe { getsockopt(socket, level, name, value_addr, option_len_addr) } + }, + 16 => { // sendmsg (fd: c_int, msg: *const msghdr, flags: c_int) -> ssize_t + let socket = vararg!(i32, instance, varargs); + let msg = vararg!(u32, instance, varargs); // TODO: msghdr has ptr + let flags = vararg!(i32, instance, varargs); + let msg_addr = instance.memory_offset_addr(0, msg as usize) as *const msghdr; + unsafe { sendmsg(socket, msg_addr, flags) as i32 } + }, + 17 => { // recvmsg (fd: c_int, msg: *mut msghdr, flags: c_int) -> ssize_t + let socket = vararg!(i32, instance, varargs); + let msg = vararg!(u32, instance, varargs); // TODO: msghdr has ptr + let flags = vararg!(i32, instance, varargs); + let msg_addr = instance.memory_offset_addr(0, msg as usize) as *mut msghdr; + unsafe { recvmsg(socket, msg_addr, flags) as i32 } + }, + _ => { // others + -1 + }, + } } diff --git a/src/apis/mod.rs b/src/apis/mod.rs index 964fa86d6..ca6aa9838 100644 --- a/src/apis/mod.rs +++ b/src/apis/mod.rs @@ -1,4 +1,4 @@ pub mod emscripten; pub mod host; -pub use self::emscripten::{generate_emscripten_env, is_emscripten_module}; +pub use self::emscripten::{generate_emscripten_env, is_emscripten_module, align_memory}; diff --git a/src/webassembly/instance.rs b/src/webassembly/instance.rs index 62a458c0c..83dd617d0 100644 --- a/src/webassembly/instance.rs +++ b/src/webassembly/instance.rs @@ -29,6 +29,8 @@ use super::memory::LinearMemory; use super::module::{Export, ImportableExportable, Module}; use super::relocation::{Reloc, RelocSink, RelocationType}; +use crate::apis::emscripten::{align_memory, static_alloc}; + type TablesSlice = UncheckedSlice>; type MemoriesSlice = UncheckedSlice>; type GlobalsSlice = UncheckedSlice; @@ -69,7 +71,6 @@ fn get_function_addr( /// An Instance of a WebAssembly module /// NOTE: There is an assumption that data_pointers is always the /// first field -#[repr(C)] #[derive(Debug)] #[repr(C)] pub struct Instance { @@ -97,12 +98,14 @@ pub struct Instance { pub start_func: Option, // Region start memory location // code_base: *const (), + + /// TODO: This should probably be passed as globals to the module. + pub emscripten_data: EmscriptenData, } /// Contains pointers to data (heaps, globals, tables) needed /// by Cranelift. /// NOTE: Rearranging the fields will break the memory arrangement model -#[repr(C)] #[derive(Debug)] #[repr(C)] pub struct DataPointers { @@ -116,6 +119,27 @@ pub struct DataPointers { pub globals: GlobalsSlice, } +#[derive(Debug)] +#[repr(C)] +pub struct EmscriptenData { + pub static_sealed: bool, + + // global section + pub global_base: u32, + pub static_base: u32, + pub static_top: u32, + + // stack + pub total_stack: u32, + pub stack_base: u32, + pub stack_max: u32, + pub stack_top: u32, + + // heap + pub dynamic_base: u32, + pub dynamictop_ptr: u32, +} + pub struct InstanceOptions { // Shall we mock automatically the imported functions if they don't exist? pub mock_missing_imports: bool, @@ -467,7 +491,6 @@ impl Instance { _ => None, }); - // TODO: Refactor repetitive code let tables_pointer: Vec> = tables.iter().map(|table| table[..].into()).collect(); let memories_pointer: Vec> = memories @@ -486,7 +509,32 @@ impl Instance { tables: tables_pointer[..].into(), }; - // let mem = data_pointers.memories; + // Emscripten runtime data + // TODO: Find a better implementation. Use global values. + let static_sealed = false; + let global_base = 1024; + let static_base = global_base; + let mut static_top = static_base + 5536; + let total_stack = 5242880; + let stack_base = align_memory(static_top, 16); + let stack_top = stack_base; + let stack_max = stack_base + total_stack; + let dynamic_base = align_memory(stack_max, 16); + let dynamictop_ptr = static_alloc(4, &mut static_top, &memories[0]); + + + let emscripten_data = EmscriptenData { + static_sealed, + global_base, + static_base, + static_top, + total_stack, + stack_base, + stack_top, + stack_max, + dynamic_base, + dynamictop_ptr, + }; Ok(Instance { data_pointers, @@ -496,6 +544,7 @@ impl Instance { functions, import_functions, start_func, + emscripten_data, }) } @@ -532,9 +581,8 @@ impl Instance { .as_ref()[address..address + len] } - pub fn memory_offset_addr(&self, index: usize, offset: usize) -> *const usize { - let mem = &self.memories[index]; - unsafe { mem.mmap.as_ptr().offset(offset as isize) as *const usize } + pub fn memory_offset_addr(&self, index: usize, offset: usize) -> *const u8 { + &self.data_pointers.memories.get_unchecked(index)[offset] as *const u8 } // Shows the value of a global variable. diff --git a/src/webassembly/memory.rs b/src/webassembly/memory.rs index dc50eba91..f559102c5 100644 --- a/src/webassembly/memory.rs +++ b/src/webassembly/memory.rs @@ -7,9 +7,6 @@ use memmap::MmapMut; use std::fmt; use std::ops::{Deref, DerefMut}; -const PAGE_SIZE: u32 = 65536; -const MAX_PAGES: u32 = 65536; - /// A linear memory instance. /// pub struct LinearMemory { @@ -28,6 +25,8 @@ pub struct LinearMemory { /// It holds the raw bytes of memory accessed by a WebAssembly Instance impl LinearMemory { + pub const PAGE_SIZE: u32 = 65536; + pub const MAX_PAGES: u32 = 65536; pub const WASM_PAGE_SIZE: usize = 1 << 16; // 64 KiB pub const DEFAULT_HEAP_SIZE: usize = 1 << 32; // 4 GiB pub const DEFAULT_GUARD_SIZE: usize = 1 << 31; // 2 GiB @@ -37,8 +36,8 @@ impl LinearMemory { /// /// `maximum` cannot be set to more than `65536` pages. pub fn new(initial: u32, maximum: Option) -> Self { - assert!(initial <= MAX_PAGES); - assert!(maximum.is_none() || maximum.unwrap() <= MAX_PAGES); + assert!(initial <= Self::MAX_PAGES); + assert!(maximum.is_none() || maximum.unwrap() <= Self::MAX_PAGES); debug!( "Instantiate LinearMemory(initial={:?}, maximum={:?})", initial, maximum @@ -95,8 +94,8 @@ impl LinearMemory { return None; } - let prev_bytes = (prev_pages * PAGE_SIZE) as usize; - let new_bytes = (new_pages * PAGE_SIZE) as usize; + let prev_bytes = (prev_pages * Self::PAGE_SIZE) as usize; + let new_bytes = (new_pages * Self::PAGE_SIZE) as usize; // Updating self.current if new_bytes > prev_bytes if new_bytes > prev_bytes {