diff options
Diffstat (limited to 'src/de/parse.rs')
-rw-r--r-- | src/de/parse.rs | 367 |
1 files changed, 207 insertions, 160 deletions
diff --git a/src/de/parse.rs b/src/de/parse.rs index a4d33b9..b296087 100644 --- a/src/de/parse.rs +++ b/src/de/parse.rs @@ -1,5 +1,9 @@ +use percent_encoding; use serde::de; +use std::borrow::Cow; +use std::slice::Iter; + use super::*; /// To override the default serialization parameters, first construct a new @@ -56,107 +60,165 @@ impl Config { impl Config { /// Deserializes a querystring from a `&[u8]` using this `Config`. pub fn deserialize_bytes<'de, T: de::Deserialize<'de>>(&self, - input: &[u8]) + input: &'de [u8]) -> Result<T> { T::deserialize(QsDeserializer::with_config(self, input)) } /// Deserializes a querystring from a `&str` using this `Config`. pub fn deserialize_str<'de, T: de::Deserialize<'de>>(&self, - input: &str) + input: &'de str) -> Result<T> { self.deserialize_bytes(input.as_bytes()) } - - /// Deserializes a querystring from a reader using this `Config`. - pub fn deserialize_reader<'de, T, R>(&self, mut reader: R) -> Result<T> - where T: de::Deserialize<'de>, - R: Read, - { - let mut buf = vec![]; - let _ = reader.read_to_end(&mut buf).map_err(Error::from)?; - self.deserialize_bytes(&buf) - } } macro_rules! tu { ($x:expr) => ( match $x { - Some(x) => x, + Some(x) => *x, None => return Err( de::Error::custom("query string ended before expected")) } ) } +impl<'a> Level<'a> { + /// If this `Level` value is indeed a map, then attempt to insert + /// `value` for key `key`. + /// Returns error if `self` is not a map, or already has an entry for that + /// key. + fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) { + if let Level::Nested(ref mut map) = *self { + match map.entry(key) { + Entry::Occupied(mut o) => { + // Throw away old result; map is now invalid anyway. + let _ = o.insert(Level::Invalid("Multiple values for one key")); + }, + Entry::Vacant(vm) => { + // Map is empty, result is None + let _ = vm.insert(Level::Flat(value)); + }, + } + } else if let Level::Uninitialised = *self { + let mut map = BTreeMap::default(); + let _ = map.insert(key, Level::Flat(value)); + *self = Level::Nested(map); + } else { + *self = Level::Invalid("Attempted to insert map value into non-map structure"); + } + } + + /// If this `Level` value is indeed a seq, then attempt to insert + /// `value` for key `key`. + /// Returns error if `self` is not a seq, or already has an entry for that + /// key. + fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) { + if let Level::OrderedSeq(ref mut map) = *self { + match map.entry(key) { + Entry::Occupied(mut o) => { + // Throw away old result; map is now invalid anyway. + let _ = o.insert(Level::Invalid("Multiple values for one key")); + }, + Entry::Vacant(vm) => { + // Map is empty, result is None + let _ = vm.insert(Level::Flat(value)); + }, + } + } else if let Level::Uninitialised = *self { + // To reach here, self is either an OrderedSeq or nothing. + let mut map = BTreeMap::default(); + let _ = map.insert(key, Level::Flat(value)); + *self = Level::OrderedSeq(map); + } else { + *self = Level::Invalid("Attempted to insert seq value into non-seq structure"); + } + } +} + use std::iter::Iterator; use std::str; -pub struct Parser<I: Iterator<Item = u8>> { - inner: I, - acc: Vec<u8>, - peeked: Option<u8>, +pub struct Parser<'a> { + inner: &'a [u8], + iter: Iter<'a, u8>, + // `acc` stores an index range for the current value + acc: (usize, usize), + peeked: Option<&'a u8>, depth: usize, } -impl<I: Iterator<Item = u8>> Iterator for Parser<I> { - type Item = u8; - #[inline] - fn next(&mut self) -> Option<Self::Item> { - self.inner.next() +use std::fmt; +impl<'a> fmt::Debug for Parser<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Parser\n\tinner: {}\n\tcurrent: {:?}\n\tpeeked: {:?}", + String::from_utf8_lossy(self.inner), + self.acc, + // String::from_utf8_lossy(&self.inner[self.acc.0..self.acc.1 - 1]), + self.peeked + ) } } - -fn insert_into_map(node: &mut Level, key: String, value: String) { - if let Level::Nested(ref mut map) = *node { - match map.entry(key) { - Entry::Occupied(mut o) => { - // Throw away old result; map is now invalid anyway. - let _ = o.insert(Level::Invalid("Multiple values for one key")); - }, - Entry::Vacant(vm) => { - // Map is empty, result is None - let _ = vm.insert(Level::Flat(value)); - }, +impl<'a> Iterator for Parser<'a> { + type Item = &'a u8; + #[inline] + fn next(&mut self) -> Option<Self::Item> { + match self.peeked.take() { + Some(v) => Some(v), + None => { + self.acc.1 += 1; + self.iter.next() + } } - } else { - // To reach here, node is either an Nested or nothing. - let mut map = BTreeMap::default(); - let _ = map.insert(key, Level::Flat(value)); - *node = Level::Nested(map); } } -fn insert_into_ord_seq(node: &mut Level, key: usize, value: String) { - if let Level::OrderedSeq(ref mut map) = *node { - match map.entry(key) { - Entry::Occupied(mut o) => { - // Throw away old result; map is now invalid anyway. - let _ = o.insert(Level::Invalid("Multiple values for one key")); - }, - Entry::Vacant(vm) => { - // Map is empty, result is None - let _ = vm.insert(Level::Flat(value)); - }, + +/// Replace b'+' with b' ' +/// Copied from `form_urlencoded` +fn replace_plus<'a>(input: Cow<'a, str>) -> Cow<'a, str> { + match input.as_bytes().iter().position(|&b| b == b'+') { + None => input, + Some(first_position) => { + let mut replaced = input.as_bytes().to_owned(); + replaced[first_position] = b' '; + for byte in &mut replaced[first_position + 1..] { + if *byte == b'+' { + *byte = b' '; + } + } + Cow::Owned(String::from_utf8(replaced).expect("replacing '+' with ' ' cannot panic")) } - } else { - // To reach here, node is either an OrderedSeq or nothing. - let mut map = BTreeMap::default(); - let _ = map.insert(key, Level::Flat(value)); - *node = Level::OrderedSeq(map); } } -impl<I: Iterator<Item = u8>> Parser<I> { - pub fn new(inner: I, acc: Vec<u8>, peeked: Option<u8>, depth: usize) -> Self { +impl<'a> Parser<'a> { + pub fn new(encoded: &'a [u8], depth: usize) -> Self { Parser { - inner, acc, peeked, depth + inner: encoded, + iter: encoded.iter(), + acc: (0, 0), + peeked: None, + depth: depth, } } - pub fn as_deserializer(&mut self) -> QsDeserializer { + fn clear_acc(&mut self) { + self.acc.0 = self.acc.1; + } + + fn decode_acc(&mut self) -> Result<Cow<'a, str>> { + let res: Cow<'a, str> = percent_encoding::percent_decode(&self.inner[self.acc.0..self.acc.1 - 1]).decode_utf8()?; + let res: Result<Cow<'a, str>> = Ok(replace_plus(res)); + println!("({}, {})", self.acc.0, self.acc.1); + self.clear_acc(); + println!("Decoded: {:?}", res); + res.map_err(Error::from) + } + + pub fn as_deserializer(&mut self) -> QsDeserializer<'a> { let map = BTreeMap::default(); let mut root = Level::Nested(map); while let Ok(x) = self.parse(&mut root) { @@ -176,10 +238,9 @@ impl<I: Iterator<Item = u8>> Parser<I> { #[inline] fn peek(&mut self) -> Option<<Self as Iterator>::Item> { - if !self.acc.is_empty() { + if self.peeked.is_some() { self.peeked - } else if let Some(x) = self.inner.next() { - self.acc.push(x); + } else if let Some(x) = self.next() { self.peeked = Some(x); Some(x) } else { @@ -190,95 +251,81 @@ impl<I: Iterator<Item = u8>> Parser<I> { fn parse_key(&mut self, end_on: u8, consume: bool) - -> Result<String> { + -> Result<Cow<'a, str>> { loop { if let Some(x) = self.next() { - match x { - x if x == end_on => { - let res = String::from_utf8(self.acc.split_off(0)); - self.acc.clear(); - + match *x { + c if c == end_on => { // Add this character back to the buffer for peek. if !consume { - self.acc.push(x); self.peeked = Some(x); } - return res.map_err(Error::from); + + return self.decode_acc().map_err(Error::from); }, b'=' => { - // Allow the '=' byte when parsing keys within [] - if end_on == b']' { - self.acc.push(b'='); - } else { - let res = String::from_utf8(self.acc.split_off(0)); - self.acc.clear(); - + // Allow the '=' byte only when parsing keys within [] + if end_on != b']' { + let res = self.decode_acc(); // Add this character back to the buffer for peek. - self.acc.push(b'='); - self.peeked = Some(b'='); + self.peeked = Some(x); return res.map_err(Error::from); } }, - b' ' => { - self.acc.push(b' '); - }, + b'&' => { - let res = String::from_utf8(self.acc.split_off(0)); - self.acc.clear(); - self.acc.push(b'&'); - self.peeked = Some(b'&'); + let res = self.decode_acc(); + // let res = String::from_utf8(self.acc.split_off(0)); + // self.acc.clear(); + // self.acc.push(b'&'); + // self.acc. + self.peeked = Some(&b'&'); return res.map_err(Error::from); }, - x @ 0x20...0x7e => { - self.acc.push(x); - }, + // x @ 0x20...0x7e | x @ ' ' => { + // self.acc.push(x); + // }, _ => { - return Err(de::Error::custom("unexpected character \ - in query string.")); + // do nothing, keep adding to key + // return Err(de::Error::custom("unexpected character \ + // in query string.")); }, } } else { - let res = String::from_utf8(self.acc.split_off(0)); - self.acc.clear(); + let res = self.decode_acc(); + // self.acc.clear(); return res.map_err(Error::from); } } } fn parse_map_value(&mut self, - key: String, - node: &mut Level) + key: Cow<'a, str>, + node: &mut Level<'a>) -> Result<()> { - if let Some(x) = self.peek() { - match x { + let res = if let Some(x) = self.peek() { + match *x { b'=' => { - self.acc.clear(); - for b in self.inner.by_ref().take_while(|b| b != &b'&') { - if b == b'+' { - self.acc.push(b' '); - } else { - self.acc.push(b); - } - } - let value = String::from_utf8(self.acc.split_off(0)); - let value = value.map_err(Error::from)?; + self.clear_acc(); + for _ in self.take_while(|b| *b != &b'&') {} + let value: Cow<'a, str> = self.decode_acc()?; // Reached the end of the key string - insert_into_map(node, key, value); + node.insert_map_value(key, value); Ok(()) }, b'&' => { - insert_into_map(node, key, "".to_string()); + node.insert_map_value(key, Cow::Borrowed("")); Ok(()) }, b'[' => { - if let Level::Invalid(_) = *node { + if let Level::Uninitialised = *node { *node = Level::Nested(BTreeMap::default()); } if let Level::Nested(ref mut map) = *node { self.depth -= 1; let _ = self.parse(map.entry(key) - .or_insert(Level::Invalid("uninitialised")))?; + .or_insert(Level::Uninitialised))?; Ok(()) } else { Err(de::Error::custom(format!("tried to insert a \ @@ -291,24 +338,20 @@ impl<I: Iterator<Item = u8>> Parser<I> { }, } } else { - insert_into_map(node, key, "".to_string()); + node.insert_map_value(key, Cow::Borrowed("")); Ok(()) - } + }; + self.depth +=1; + res } - fn parse_seq_value(&mut self, node: &mut Level) -> Result<()> { - match tu!(self.peek()) { + fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> { + let res = match tu!(self.peek()) { b'=' => { - self.acc.clear(); - for b in self.inner.by_ref().take_while(|b| b != &b'&') { - if b == b'+' { - self.acc.push(b' '); - } else { - self.acc.push(b); - } - } - let value = String::from_utf8(self.acc.split_off(0)); - let value = value.map_err(Error::from)?; + self.clear_acc(); + // Iterate through until finding '&' character. + for _ in self.take_while(|b| *b != &b'&') {} + let value = self.decode_acc()?; // Reached the end of the key string if let Level::Sequence(ref mut seq) = *node { seq.push(Level::Flat(value)); @@ -323,39 +366,35 @@ impl<I: Iterator<Item = u8>> Parser<I> { Err(de::Error::custom("non-indexed sequence of structs not \ supported")) }, - } + }; + self.depth += 1; + res } - fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level) -> Result<()> { - if let Some(x) = self.peek() { - match x { + fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> { + let res = if let Some(x) = self.peek() { + match *x { b'=' => { - self.acc.clear(); - for b in self.inner.by_ref().take_while(|b| b != &b'&') { - if b == b'+' { - self.acc.push(b' '); - } else { - self.acc.push(b); - } - } - let value = String::from_utf8(self.acc.split_off(0)); - let value = value.map_err(Error::from)?; + self.clear_acc(); + // Iterate through until finding '&' character. + for _ in self.take_while(|b| *b != &b'&') {} + let value = self.decode_acc()?; // Reached the end of the key string - insert_into_ord_seq(node, key, value); + node.insert_ord_seq_value(key, value); Ok(()) }, b'&' => { - insert_into_ord_seq(node, key, "".to_string()); + node.insert_ord_seq_value(key, Cow::Borrowed("")); Ok(()) }, b'[' => { - if let Level::Invalid(_) = *node { + if let Level::Uninitialised = *node { *node = Level::OrderedSeq(BTreeMap::default()); } if let Level::OrderedSeq(ref mut map) = *node { self.depth -= 1; let _ = self.parse(map.entry(key) - .or_insert(Level::Invalid("uninitialised")))?; + .or_insert(Level::Uninitialised))?; Ok(()) } else { Err(de::Error::custom(format!("tried to insert a \ @@ -368,25 +407,30 @@ impl<I: Iterator<Item = u8>> Parser<I> { }, } } else { - insert_into_ord_seq(node, key, "".to_string()); + node.insert_ord_seq_value(key, Cow::Borrowed("")); Ok(()) - } + }; + self.depth += 1; + res } - fn parse(&mut self, node: &mut Level) -> Result<bool> { + fn parse(&mut self, node: &mut Level<'a>) -> Result<bool> { // First character determines parsing type if self.depth == 0 { + // Hit the maximum depth level, so parse everything as a key let key = self.parse_key(b'\x00', true)?; self.parse_map_value(key, node)?; - self.depth += 1; + // self.depth += 1; return Ok(true); } - match self.peek() { + // println!("Beginning new parse\n{:?}", self); + match self.next() { Some(x) => { - match x { + match *x { b'[' => { - self.acc.clear(); + self.clear_acc(); + println!("Parsing nested key: \n{:?}", self); match tu!(self.peek()) { // key is of the form "[...", not really allowed. b'[' => { @@ -395,9 +439,11 @@ impl<I: Iterator<Item = u8>> Parser<I> { }, // key is simply "[]", so treat as a seq. b']' => { - self.acc.clear(); + // throw away the bracket + let _ = self.next(); + self.clear_acc(); self.parse_seq_value(node)?; - self.depth += 1; + // self.depth += 1; Ok(true) }, @@ -406,14 +452,14 @@ impl<I: Iterator<Item = u8>> Parser<I> { let key = self.parse_key(b']', true)?; let key = usize::from_str_radix(&key, 10).map_err(Error::from)?; self.parse_ord_seq_value(key, node)?; - self.depth += 1; + // self.depth += 1; Ok(true) } // Key is "[a..." so parse up to the closing "]" 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => { let key = self.parse_key(b']', true)?; self.parse_map_value(key, node)?; - self.depth += 1; + // self.depth += 1; Ok(true) }, c => { @@ -425,15 +471,16 @@ impl<I: Iterator<Item = u8>> Parser<I> { // of the form "abc" or "abc[...]" // We do actually allow integer keys here since they cannot // be confused with sequences - 0x20...0x5a | 0x5c...0x7e => { - let key = self.parse_key(b'[', false)?; + _ => { + println!("Parsing root key: \n{:?}", self); + let key = { + self.parse_key(b'[', false)? + }; + println!("Parsing map value: \n{:?}", self); self.parse_map_value(key, node)?; - self.depth += 1; + // self.depth += 1; Ok(true) }, - c => { - Err(de::Error::custom(format!("unexpected character: {}", c))) - } } }, // Ran out of characters to parse |