Skip to content

Commit

Permalink
Move bytecode db serialization to runtime lib
Browse files Browse the repository at this point in the history
  • Loading branch information
acweathersby committed Sep 26, 2024
1 parent 58b73fd commit c2733a5
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 170 deletions.
2 changes: 1 addition & 1 deletion crates/radlr-bytecode/build_bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ fn build_match<'db>(
// What remains are hash collisions. We use simple linear
// probing to find the next available slot, and
// attach it to the probing chain using a signed
// delta index.
// delta offset.
for (val, offset) in leftover_pairs {
let mut pointer;
let mut prev_node = (val & mod_mask) as usize;
Expand Down
203 changes: 203 additions & 0 deletions crates/radlr-rust-runtime/types/bytecode_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use std::{
rc::Rc,
};

/// Stores information, such as bytecode instructions and symbols to
/// non-terminal id maps, used by parsers and other "runtime tools".
#[derive(Clone, Default, Debug)]
pub struct BytecodeParserDB {
pub bytecode: Vec<u8>,
Expand Down Expand Up @@ -79,3 +81,204 @@ impl<T: ParserInput> ParserProducer<T> for BytecodeParserDB {
Ok(Box::new(ByteCodeParserNew::new(Rc::new(self.bytecode.clone()), self.nonterm_id_to_address.clone())))
}
}

// Export and import functions
pub mod serialize {
use crate::types::{BytecodeParserDB, ParserError};
use std::{collections::HashMap, hash::Hash};

use super::{EntryPoint, Token};

impl BytecodeParserDB {
/// Import a database from its portable binary format
pub fn import_bytecode_db(buffer: &[u8]) -> Result<BytecodeParserDB, ParserError> {
let mut db = BytecodeParserDB::default();
let mut offset = 0;

let bc_len = read_primitive_at_offset::<u32>(&buffer, &mut offset) as usize;
db.bytecode = buffer[offset..offset + bc_len].to_vec();
offset += bc_len;

db.address_to_state_name = read_hash_id_str(&buffer, &mut offset);
db.token_id_to_str = read_hash_id_str(&buffer, &mut offset);
db.state_name_to_address = read_hash_of_str_id(&buffer, &mut offset);
db.nonterm_name_to_id = read_hash_of_str_id(&buffer, &mut offset);
db.state_to_token_ids_map = read_hash_of_id_vecu32(&buffer, &mut offset);
db.nonterm_id_to_address = read_primitive_hash(&buffer, &mut offset);
db.default_entry = read_primitive_at_offset(&buffer, &mut offset);

Ok(db)
}

/// Export the database into a portable binary format
pub fn export_bytecode_db(&self) -> Vec<u8> {
let mut size = self.bytecode.len() + 4;

// address_to_state_name: HashMap<u32, String>
size += 4 + self.address_to_state_name.iter().fold(0, |size, d| size + 8 + d.1.as_bytes().len());

// token_id_to_str: HashMap<u32, String>
size += 4 + self.token_id_to_str.iter().fold(0, |size, d| size + 8 + d.1.as_bytes().len());

// nonterm_id_to_address: HashMap<u32, u32>
size += 4 + self.nonterm_id_to_address.len() * 8;

// nonterm_name_to_id: HashMap<String, u32>
size += 4 + self.nonterm_name_to_id.iter().fold(0, |size, d| size + 8 + d.0.as_bytes().len());

// state_name_to_address: HashMap<String, u32>
size += 4 + self.state_name_to_address.iter().fold(0, |size, d| size + 8 + d.0.as_bytes().len());

// state_to_token_ids_map: HashMap<u32, Vec<u32>>
size += 4 + self.state_to_token_ids_map.iter().fold(0, |size, d| size + 4 + d.1.len() * 4);

// ir_token_lookup: BTreeMap<u32, Token>
size += 4 + self.ir_token_lookup.iter().fold(0, |size, _| size + 4 + size_of::<Token>());

// default_entry
size += size_of::<EntryPoint>();

let mut buffer = Vec::<u8>::with_capacity(size);

write_primitive_to_bytes(&mut buffer, self.bytecode.len() as u32);
write_bytes(&mut buffer, &self.bytecode);
write_hash_of_id_str(&mut buffer, &self.address_to_state_name);
write_hash_of_id_str(&mut buffer, &self.token_id_to_str);
write_hash_of_str_id(&mut buffer, &self.state_name_to_address);
write_hash_of_str_id(&mut buffer, &self.nonterm_name_to_id);
write_hash_of_id_vecu32(&mut buffer, &self.state_to_token_ids_map);
write_primitive_hash(&mut buffer, &self.nonterm_id_to_address);
write_primitive_to_bytes(&mut buffer, self.default_entry);

buffer
}
}

fn write_hash_of_id_str<T: Clone + Copy>(buffer: &mut Vec<u8>, data: &HashMap<T, String>) {
write_primitive_to_bytes(buffer, data.len() as u32);
for (id, str) in data {
write_primitive_to_bytes(buffer, *id);
write_primitive_to_bytes(buffer, str.len() as u32);
write_bytes(buffer, str.as_bytes());
}
}

fn read_hash_id_str<T: Copy + Clone + Default + Eq + Hash>(buffer: &[u8], offset: &mut usize) -> HashMap<T, String> {
let entry_count = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let mut hash = HashMap::with_capacity(entry_count);
for _ in 0..entry_count {
let k = read_primitive_at_offset::<T>(buffer, offset);
let str_len = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let v = unsafe { String::from_utf8_unchecked(buffer[*offset..*offset + str_len].to_vec()) };
*offset += str_len;
hash.insert(k, v);
}
hash
}

fn write_hash_of_str_id<T: Clone + Copy>(buffer: &mut Vec<u8>, data: &HashMap<String, T>) {
write_primitive_to_bytes(buffer, data.len() as u32);
for (str, id) in data {
write_primitive_to_bytes(buffer, *id);
write_primitive_to_bytes(buffer, str.len() as u32);
write_bytes(buffer, str.as_bytes());
}
}

fn read_hash_of_str_id<T: Copy + Clone + Default + Eq + Hash>(buffer: &[u8], offset: &mut usize) -> HashMap<String, T> {
let entry_count = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let mut hash = HashMap::with_capacity(entry_count);
for _ in 0..entry_count {
let v = read_primitive_at_offset::<T>(buffer, offset);
let str_len = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let k = unsafe { String::from_utf8_unchecked(buffer[*offset..*offset + str_len].to_vec()) };
*offset += str_len;
hash.insert(k, v);
}
hash
}

fn read_primitive_at_offset<T: Copy + Default>(buffer: &[u8], offset: &mut usize) -> T {
unsafe {
let size: usize = size_of::<T>();
let data: T = Default::default();
let bytes: *mut u8 = std::mem::transmute(&data);
buffer.as_ptr().offset(*offset as isize).copy_to(bytes, size);
*offset += size;
data
}
}

fn write_primitive_hash<K: Clone + Copy, V: Clone + Copy>(buffer: &mut Vec<u8>, data: &HashMap<K, V>) {
write_primitive_to_bytes(buffer, data.len() as u32);
for (k, v) in data {
write_primitive_to_bytes(buffer, *k);
write_primitive_to_bytes(buffer, *v);
}
}

fn read_primitive_hash<K: Clone + Copy + Eq + Hash + Default, V: Clone + Copy + Default>(
buffer: &[u8],
offset: &mut usize,
) -> HashMap<K, V> {
let entry_count = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let mut hash = HashMap::with_capacity(entry_count);
for _ in 0..entry_count {
let k = read_primitive_at_offset::<K>(buffer, offset);
let v = read_primitive_at_offset::<V>(buffer, offset);
hash.insert(k, v);
}
hash
}

fn write_hash_of_id_vecu32<T: Clone + Copy>(buffer: &mut Vec<u8>, data: &HashMap<T, Vec<u32>>) {
write_primitive_to_bytes(buffer, data.len() as u32);
for (k, v) in data {
write_primitive_to_bytes(buffer, *k);
write_primitive_to_bytes(buffer, v.len() as u32);
write_bytes(buffer, v.as_slice());
}
}

fn read_hash_of_id_vecu32<T: Copy + Clone + Default + Eq + Hash>(buffer: &[u8], offset: &mut usize) -> HashMap<T, Vec<u32>> {
let entry_count = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let mut hash = HashMap::with_capacity(entry_count);
for _ in 0..entry_count {
let k = read_primitive_at_offset::<T>(buffer, offset);
let vec_size = read_primitive_at_offset::<u32>(buffer, offset) as usize;
let byte_len = vec_size * size_of::<u32>();

let mut v = Vec::<u32>::with_capacity(vec_size);
unsafe {
v.set_len(vec_size);
buffer.as_ptr().offset(*offset as isize).copy_to(std::mem::transmute(v.as_mut_ptr()), byte_len);
}

*offset += byte_len;

hash.insert(k, v);
}
hash
}

fn write_bytes<T: Copy + Clone>(buffer: &mut Vec<u8>, data: &[T]) {
unsafe {
let size: usize = size_of::<T>();
let off: usize = buffer.len();
let byte_size = data.len() * size;
buffer.set_len(off + byte_size);
let ptr: *const u8 = std::mem::transmute(data.as_ptr());
ptr.copy_to(buffer.as_mut_ptr().offset(off as isize), byte_size);
}
}

fn write_primitive_to_bytes<T: Copy>(buffer: &mut Vec<u8>, data: T) {
unsafe {
let size: usize = size_of::<T>();
let off: usize = buffer.len();
let bytes: *const u8 = std::mem::transmute(&data);
buffer.set_len(off + size);
bytes.copy_to(buffer.as_mut_ptr().offset(off as isize), size);
}
}
}
14 changes: 14 additions & 0 deletions crates/radlr-wasm/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,20 @@ impl From<&Vec<RadlrError>> for PositionedErrors {
}
}

impl From<Vec<RadlrError>> for PositionedErrors {
fn from(mut errors: Vec<RadlrError>) -> Self {
let mut out = PositionedErrors { vec: vec![] };
out.extend(&mut errors);
out
}
}

impl From<RadlrError> for PositionedErrors {
fn from(mut err: RadlrError) -> Self {
PositionedErrors { vec: convert_error(&err) }
}
}

impl From<&Vec<&RadlrError>> for PositionedErrors {
fn from(errors: &Vec<&RadlrError>) -> Self {
let mut out = PositionedErrors { vec: vec![] };
Expand Down
Loading

0 comments on commit c2733a5

Please sign in to comment.