diff options
Diffstat (limited to 'src/ion.rs')
-rw-r--r-- | src/ion.rs | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/src/ion.rs b/src/ion.rs new file mode 100644 index 0000000..6773539 --- /dev/null +++ b/src/ion.rs @@ -0,0 +1,241 @@ +use std::iter::Copied; + +pub enum IonType { + Null = 0x00, + Bool = 0x01, + IntPos = 0x02, + IntNeg= 0x03, + Float = 0x04, + Decimal = 0x05, + Timestamp = 0x06, + Symbol = 0x07, + String = 0x08, + Clob = 0x09, + Blob = 0x0a, + List = 0x0b, + Sexp = 0x0c, + Struct = 0x0d, + Annotations = 0x0e, +} + +#[derive(PartialOrd, PartialEq)] +pub struct IonValue(Vec<u8>); + +impl IonValue { + pub fn new_null() -> IonValue { + IonValue(vec![0x0F]) + } + + pub fn ion_type(&self) -> u8 { + self.0[0] >> 4 + } + + pub fn len(&self) -> usize { + // TODO: Should this read the type length? + self.0.len() + } + + pub fn repr_offset(&self) -> usize { + let mut reader = self.reader(); + let tl = reader.next_byte(); + let _len = reader.extract_length(tl); + reader.offset() + } + + pub fn bytes(&self) -> impl DoubleEndedIterator<Item = u8> + '_ { + self.0.iter().copied() + } + + pub fn reader(&self) -> IonReader<impl DoubleEndedIterator<Item = u8> + '_> { + IonReader::new(self.0.iter().copied(), 0) + } + + pub fn reader_at(&self, offset: usize) -> IonReader<Copied<std::slice::Iter<'_, u8>>> { + IonReader::new(self.0[offset..].iter().copied(), offset) + } + + pub fn struct_reader_at(&self, offset: usize) -> IonReader<Copied<std::slice::Iter<'_, u8>>> { + IonReader::new_struct(self.0[offset..].iter().copied(), offset) + } + + pub fn to_usize(&self) -> usize { + self.reader().next_usize() + } + +} + +impl From<Vec<u8>> for IonValue { + fn from(value: Vec<u8>) -> Self { + if value.len() > 0 { + IonValue(value) + } else { + IonValue(vec![0x0F]) + } + } +} + + +pub struct IonReader<T> { + iter: T, + offset: usize, + is_struct: bool, + field_name: Option<usize>, +} + +impl <T> IonReader<T> + where T: Iterator<Item = u8> +{ + pub fn new(iter: T, offset: usize) -> IonReader<T> { + IonReader { + iter, + offset, + is_struct: false, + field_name: None, + } + } + + pub fn new_struct(iter: T, offset: usize) -> IonReader<T> { + IonReader { + iter, + offset, + is_struct: true, + field_name: None, + } + } + + pub fn offset(&self) -> usize { + self.offset + } + + pub fn field_id(&self) -> Option<usize> { + self.field_name + } + + /// Move the iterator to the first nested value. + pub fn step_in(&mut self) -> (u8, usize) { + self.prepare_next(); + let tl = self.next_byte(); + let len = self.extract_length(tl); + if (tl & 0xF0) == 0xD0 { + self.is_struct = true; + } + (tl >> 4, self.offset + len) + } + + fn prepare_next(&mut self) { + if self.is_struct { + // TODO: symbol length can be greater than max usize. + self.field_name = Some(self.next_varuint()); + } + } + + pub fn skip_value(&mut self) { + self.prepare_next(); + let tl = self.next_byte(); + let len = self.extract_length(tl); + for _ in 0..len { + self.next_byte(); + } + } + + pub fn next_value(&mut self) -> IonValue { + self.prepare_next(); + let tl = self.next_byte(); + let (mut buf, len) = match tl & 0x0F { + 0 | 15 => (vec![tl], 0), + 14 => { + let l = self.next_varuint(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + push_varuint(&mut v, l); + (v, l) + }, + len => { + let l = len.into(); + let mut v = Vec::with_capacity(5 + l); + v.push(tl); + (v, l) + } + }; + for _ in 0..len { + let b = self.next_byte(); + buf.push(b); + } + buf.into() + } + + pub fn next_usize(&mut self) -> usize { + self.prepare_next(); + let tl = self.next_byte(); + if tl & 0xF0 != 0x20 { + panic!("Not a positive integer"); + } + + let len = self.extract_length(tl); + if len * 8 > usize::BITS as usize { + panic!("Integer too large for usize"); + } + + let mut value = 0; + for _ in 0..len { + let b = self.next_byte(); + value <<= 8; + value |= b as usize; + } + value + } + + fn next_byte(&mut self) -> u8 { + self.offset += 1; + self.iter.next().expect("Missing data") + } + + fn extract_length(&mut self, tl: u8) -> usize { + match tl & 0x0F { + 0 | 15 => 0, + 14 => self.next_varuint(), + len => len.into(), + } + } + + fn next_varuint(&mut self) -> usize { + let mut v: usize = 0; + while let Some(b) = self.iter.next() { + self.offset += 1; + v <<= 7; + v |= (b & 0x7f) as usize; + if b & 0x80 != 0 { + return v; + } + } + panic!("Truncated varuint"); + } +} + +fn push_varuint(v: &mut Vec<u8>, mut value: usize) { + let mut buf = [0; (usize::BITS / 7 + 1) as usize]; + let mut pos = 0; + while value != 0 { + buf[pos] = (value & 0x7F) as u8; + value >>= 7; + pos += 1; + } + buf[0] |= 0x80; + pos = pos.max(1); + + for i in (0..pos).rev() { + v.push(buf[i]); + } +} + +fn parse_varuint(buf: &[u8]) -> usize { + let mut value: usize = 0; + for b in buf { + value <<= 7; + value |= (b & 0x7F) as usize; + if b & 0x80 != 0 { + break; + } + } + value +} |