summaryrefslogtreecommitdiff
path: root/src/ion.rs
diff options
context:
space:
mode:
authorJesse Morgan <jesse@jesterpm.net>2023-12-28 17:12:54 -0800
committerJesse Morgan <jesse@jesterpm.net>2023-12-28 17:12:54 -0800
commitd04b4dfa993681060d363b5ef994c1a635f3c1f9 (patch)
tree6d7ae65ed7c14e32df50858c0bc28bf4996cd730 /src/ion.rs
Checkpoint
Diffstat (limited to 'src/ion.rs')
-rw-r--r--src/ion.rs241
1 files changed, 241 insertions, 0 deletions
diff --git a/src/ion.rs b/src/ion.rs
new file mode 100644
index 0000000..6773539
--- /dev/null
+++ b/src/ion.rs
@@ -0,0 +1,241 @@
+use std::iter::Copied;
+
+pub enum IonType {
+ Null = 0x00,
+ Bool = 0x01,
+ IntPos = 0x02,
+ IntNeg= 0x03,
+ Float = 0x04,
+ Decimal = 0x05,
+ Timestamp = 0x06,
+ Symbol = 0x07,
+ String = 0x08,
+ Clob = 0x09,
+ Blob = 0x0a,
+ List = 0x0b,
+ Sexp = 0x0c,
+ Struct = 0x0d,
+ Annotations = 0x0e,
+}
+
+#[derive(PartialOrd, PartialEq)]
+pub struct IonValue(Vec<u8>);
+
+impl IonValue {
+ pub fn new_null() -> IonValue {
+ IonValue(vec![0x0F])
+ }
+
+ pub fn ion_type(&self) -> u8 {
+ self.0[0] >> 4
+ }
+
+ pub fn len(&self) -> usize {
+ // TODO: Should this read the type length?
+ self.0.len()
+ }
+
+ pub fn repr_offset(&self) -> usize {
+ let mut reader = self.reader();
+ let tl = reader.next_byte();
+ let _len = reader.extract_length(tl);
+ reader.offset()
+ }
+
+ pub fn bytes(&self) -> impl DoubleEndedIterator<Item = u8> + '_ {
+ self.0.iter().copied()
+ }
+
+ pub fn reader(&self) -> IonReader<impl DoubleEndedIterator<Item = u8> + '_> {
+ IonReader::new(self.0.iter().copied(), 0)
+ }
+
+ pub fn reader_at(&self, offset: usize) -> IonReader<Copied<std::slice::Iter<'_, u8>>> {
+ IonReader::new(self.0[offset..].iter().copied(), offset)
+ }
+
+ pub fn struct_reader_at(&self, offset: usize) -> IonReader<Copied<std::slice::Iter<'_, u8>>> {
+ IonReader::new_struct(self.0[offset..].iter().copied(), offset)
+ }
+
+ pub fn to_usize(&self) -> usize {
+ self.reader().next_usize()
+ }
+
+}
+
+impl From<Vec<u8>> for IonValue {
+ fn from(value: Vec<u8>) -> Self {
+ if value.len() > 0 {
+ IonValue(value)
+ } else {
+ IonValue(vec![0x0F])
+ }
+ }
+}
+
+
+pub struct IonReader<T> {
+ iter: T,
+ offset: usize,
+ is_struct: bool,
+ field_name: Option<usize>,
+}
+
+impl <T> IonReader<T>
+ where T: Iterator<Item = u8>
+{
+ pub fn new(iter: T, offset: usize) -> IonReader<T> {
+ IonReader {
+ iter,
+ offset,
+ is_struct: false,
+ field_name: None,
+ }
+ }
+
+ pub fn new_struct(iter: T, offset: usize) -> IonReader<T> {
+ IonReader {
+ iter,
+ offset,
+ is_struct: true,
+ field_name: None,
+ }
+ }
+
+ pub fn offset(&self) -> usize {
+ self.offset
+ }
+
+ pub fn field_id(&self) -> Option<usize> {
+ self.field_name
+ }
+
+ /// Move the iterator to the first nested value.
+ pub fn step_in(&mut self) -> (u8, usize) {
+ self.prepare_next();
+ let tl = self.next_byte();
+ let len = self.extract_length(tl);
+ if (tl & 0xF0) == 0xD0 {
+ self.is_struct = true;
+ }
+ (tl >> 4, self.offset + len)
+ }
+
+ fn prepare_next(&mut self) {
+ if self.is_struct {
+ // TODO: symbol length can be greater than max usize.
+ self.field_name = Some(self.next_varuint());
+ }
+ }
+
+ pub fn skip_value(&mut self) {
+ self.prepare_next();
+ let tl = self.next_byte();
+ let len = self.extract_length(tl);
+ for _ in 0..len {
+ self.next_byte();
+ }
+ }
+
+ pub fn next_value(&mut self) -> IonValue {
+ self.prepare_next();
+ let tl = self.next_byte();
+ let (mut buf, len) = match tl & 0x0F {
+ 0 | 15 => (vec![tl], 0),
+ 14 => {
+ let l = self.next_varuint();
+ let mut v = Vec::with_capacity(5 + l);
+ v.push(tl);
+ push_varuint(&mut v, l);
+ (v, l)
+ },
+ len => {
+ let l = len.into();
+ let mut v = Vec::with_capacity(5 + l);
+ v.push(tl);
+ (v, l)
+ }
+ };
+ for _ in 0..len {
+ let b = self.next_byte();
+ buf.push(b);
+ }
+ buf.into()
+ }
+
+ pub fn next_usize(&mut self) -> usize {
+ self.prepare_next();
+ let tl = self.next_byte();
+ if tl & 0xF0 != 0x20 {
+ panic!("Not a positive integer");
+ }
+
+ let len = self.extract_length(tl);
+ if len * 8 > usize::BITS as usize {
+ panic!("Integer too large for usize");
+ }
+
+ let mut value = 0;
+ for _ in 0..len {
+ let b = self.next_byte();
+ value <<= 8;
+ value |= b as usize;
+ }
+ value
+ }
+
+ fn next_byte(&mut self) -> u8 {
+ self.offset += 1;
+ self.iter.next().expect("Missing data")
+ }
+
+ fn extract_length(&mut self, tl: u8) -> usize {
+ match tl & 0x0F {
+ 0 | 15 => 0,
+ 14 => self.next_varuint(),
+ len => len.into(),
+ }
+ }
+
+ fn next_varuint(&mut self) -> usize {
+ let mut v: usize = 0;
+ while let Some(b) = self.iter.next() {
+ self.offset += 1;
+ v <<= 7;
+ v |= (b & 0x7f) as usize;
+ if b & 0x80 != 0 {
+ return v;
+ }
+ }
+ panic!("Truncated varuint");
+ }
+}
+
+fn push_varuint(v: &mut Vec<u8>, mut value: usize) {
+ let mut buf = [0; (usize::BITS / 7 + 1) as usize];
+ let mut pos = 0;
+ while value != 0 {
+ buf[pos] = (value & 0x7F) as u8;
+ value >>= 7;
+ pos += 1;
+ }
+ buf[0] |= 0x80;
+ pos = pos.max(1);
+
+ for i in (0..pos).rev() {
+ v.push(buf[i]);
+ }
+}
+
+fn parse_varuint(buf: &[u8]) -> usize {
+ let mut value: usize = 0;
+ for b in buf {
+ value <<= 7;
+ value |= (b & 0x7F) as usize;
+ if b & 0x80 != 0 {
+ break;
+ }
+ }
+ value
+}