diff options
author | Sam Scott <sam.scott89@gmail.com> | 2017-11-09 16:15:24 +0000 |
---|---|---|
committer | Sam Scott <sam.scott89@gmail.com> | 2017-11-09 16:37:53 +0000 |
commit | 32c685bf7472ebbc8f10b9d9e57f29f2d7501566 (patch) | |
tree | 00e7e887c50e34ed9476313fa979a9614b4767c7 /src/de | |
parent | 1ee313aa2998feed7d1721e0df8db0e7004fb35d (diff) |
Implement `strict` option feature.
Permits encoded brackets, and will generally tolerate parsing
errors where possible.
Permitting, for example, `a%5B[0%5D=1` to decode as `a: vec![1]`.
Default behaviour is strict.
Diffstat (limited to 'src/de')
-rw-r--r-- | src/de/mod.rs | 48 | ||||
-rw-r--r-- | src/de/parse.rs | 336 |
2 files changed, 234 insertions, 150 deletions
diff --git a/src/de/mod.rs b/src/de/mod.rs index 066270a..c28c459 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -52,23 +52,32 @@ use std::collections::btree_map::{BTreeMap, Entry, IntoIter}; /// To override the default serialization parameters, first construct a new /// Config. /// +/// The `strict` parameter controls whether the deserializer will tolerate +/// encoded brackets as part of the key. For example, serializing the field +/// `a = vec![12]` might give `a[0]=12`. In strict mode, the only string accepted +/// will be this string, whereas in non-strict mode, this can also be deserialized +/// from `a%5B0%5D=12`. Strict mode is more accurate for cases where it a field +/// may contain square brackets. +/// In non-strict mode, the deserializer will generally tolerate unexpected +/// characters. +/// /// A `max_depth` of 0 implies no nesting: the result will be a flat map. /// This is mostly useful when the maximum nested depth is known beforehand, /// to prevent denial of service attacks by providing incredibly deeply nested /// inputs. /// -/// The default value for `max_depth` is 5. +/// The default value for `max_depth` is 5, and the default mode is `strict=true`. /// /// ``` /// use serde_qs::Config; /// use std::collections::HashMap; /// -/// let config = Config::with_max_depth(0); +/// let config = Config::new(0, true); /// let map: HashMap<String, String> = config.deserialize_str("a[b][c]=1") /// .unwrap(); /// assert_eq!(map.get("a[b][c]").unwrap(), "1"); /// -/// let config = Config::with_max_depth(10); +/// let config = Config::new(10, true); /// let map: HashMap<String, HashMap<String, HashMap<String, String>>> = /// config.deserialize_str("a[b][c]=1").unwrap(); /// assert_eq!(map.get("a").unwrap().get("b").unwrap().get("c").unwrap(), "1"); @@ -78,24 +87,27 @@ pub struct Config { /// Specifies the maximum depth key that `serde_qs` will attempt to /// deserialize. Default is 5. max_depth: usize, + /// Strict deserializing mode will not tolerate encoded brackets. + strict: bool, } impl Default for Config { fn default() -> Self { - Config { max_depth: 5 } + Self::new(5, true) } } impl Config { - /// Construct a new `Config` with the specified maximum depth of nesting. - pub fn with_max_depth(depth: usize) -> Config { - Config { - max_depth: depth + /// Create a new `Config` with the specified `max_depth` and `strict` mode. + pub fn new(max_depth: usize, strict: bool) -> Self { + Self { + max_depth, + strict } } /// Get maximum depth parameter. - pub fn max_depth(&self) -> usize { + fn max_depth(&self) -> usize { self.max_depth } } @@ -104,10 +116,20 @@ impl Config { /// Deserializes a querystring from a `&[u8]` using this `Config`. pub fn deserialize_bytes<'de, T: de::Deserialize<'de>>(&self, input: &'de [u8]) - -> Result<T> { + -> Result<T> + { T::deserialize(QsDeserializer::with_config(self, input)?) } + // pub fn deserialize_bytes_sloppy<T: de::DeserializeOwned>(&self, input: &[u8]) + // -> Result<T> + // { + // let buf = String::from_utf8(input.to_vec())?; + // let buf = buf.replace("%5B", "[").replace("%5D", "]").into_bytes(); + // let deser = QsDeserializer::with_config(self, &buf)?; + // T::deserialize(deser) + // } + /// Deserializes a querystring from a `&str` using this `Config`. pub fn deserialize_str<'de, T: de::Deserialize<'de>>(&self, input: &'de str) @@ -178,7 +200,7 @@ pub fn from_str<'de, T: de::Deserialize<'de>>(input: &'de str) -> Result<T> { /// A deserializer for the querystring format. /// /// Supported top-level outputs are structs and maps. -pub struct QsDeserializer<'a> { +pub(crate) struct QsDeserializer<'a> { iter: IntoIter<Cow<'a, str>, Level<'a>>, value: Option<Level<'a>>, } @@ -202,8 +224,8 @@ impl<'a> QsDeserializer<'a> { } /// Returns a new `QsDeserializer<'a>`. - pub fn with_config(config: &Config, input: &'a [u8]) -> Result<Self> { - parse::Parser::new(input, config.max_depth()).as_deserializer() + fn with_config(config: &Config, input: &'a [u8]) -> Result<Self> { + parse::Parser::new(input, config.max_depth(), config.strict).as_deserializer() } } diff --git a/src/de/parse.rs b/src/de/parse.rs index 225d030..3b39c46 100644 --- a/src/de/parse.rs +++ b/src/de/parse.rs @@ -9,7 +9,6 @@ use std::str; use super::*; - macro_rules! tu { ($x:expr) => ( match $x { @@ -96,20 +95,59 @@ impl<'a> Level<'a> { pub struct Parser<'a> { inner: &'a [u8], iter: Iter<'a, u8>, + index: usize, acc: (usize, usize), peeked: Option<&'a u8>, depth: usize, // stores the current depth, for use in bounded-depth parsing + strict: bool, } impl<'a> Iterator for Parser<'a> { type Item = &'a u8; #[inline] fn next(&mut self) -> Option<Self::Item> { - match self.peeked.take() { - Some(v) => Some(v), - None => { - self.acc.1 += 1; - self.iter.next() + if self.strict { + match self.peeked.take() { + Some(v) => Some(v), + None => { + self.index += 1; + self.acc.1 += 1; + self.iter.next() + } + } + } else { + // in non-strict mode, we will happily decode any bracket + match self.peeked.take() { + Some(v) => Some(v), + None => { + self.index += 1; + self.acc.1 += 1; + match self.iter.next() { + Some(v) if v == &b'%' => { + match &self.iter.as_slice()[..2] { + b"5B" => { + // skip the next two characters + let _ = self.iter.next(); + let _ = self.iter.next(); + self.index += 2; + Some(&b'[') + }, + b"5D" => { + // skip the next two characters + let _ = self.iter.next(); + let _ = self.iter.next(); + self.index += 2; + Some(&b']') + }, + _ => { + Some(v) + } + } + } + Some(v) => Some(v), + None => None, + } + } } } } @@ -148,19 +186,21 @@ fn replace_plus(input: Cow<str>) -> Cow<str> { } impl<'a> Parser<'a> { - pub fn new(encoded: &'a [u8], depth: usize) -> Self { + pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self { Parser { - inner: encoded, + inner: encoded, iter: encoded.iter(), acc: (0, 0), + index: 0, peeked: None, - depth: depth, + depth, + strict, } } /// Resets the accumulator range by setting `(start, end)` to `(end, end)`. fn clear_acc(&mut self) { - self.acc.0 = self.acc.1; + self.acc = (self.index, self.index); } /// Extracts a string from the internal byte slice from the range tracked by @@ -176,7 +216,7 @@ impl<'a> Parser<'a> { /// In some ways the main way to use a `Parser`, this runs the parsing step /// and outputs a simple `Deserializer` over the parsed map. - pub fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> { + pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> { let map = BTreeMap::default(); let mut root = Level::Nested(map); @@ -210,38 +250,48 @@ impl<'a> Parser<'a> { Some(x) => { match *x { b'[' => { - self.clear_acc(); - // Only peek at the next value to determine the key type. - match tu!(self.peek()) { - // key is of the form "[...", not really allowed. - b'[' => { - Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.acc)) - }, - // key is simply "[]", so treat as a seq. - b']' => { - // throw away the bracket - let _ = self.next(); - self.clear_acc(); - self.parse_seq_value(node)?; - Ok(true) - }, - // First character is an integer, attempt to parse it as an integer key - b'0'...b'9' => { - let key = self.parse_key(b']', true)?; - let key = usize::from_str_radix(&key, 10).map_err(Error::from)?; - self.parse_ord_seq_value(key, node)?; - Ok(true) + loop { + self.clear_acc(); + // Only peek at the next value to determine the key type. + match tu!(self.peek()) { + // key is of the form "[...", not really allowed. + b'[' => { + // If we're in strict mode, error, otherwise just ignore it. + if self.strict { + return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index)) + } else { + let _ = self.next(); + } + }, + // key is simply "[]", so treat as a seq. + b']' => { + // throw away the bracket + let _ = self.next(); + self.clear_acc(); + self.parse_seq_value(node)?; + return Ok(true); + }, + // First character is an integer, attempt to parse it as an integer key + b'0'...b'9' => { + let key = self.parse_key(b']', true)?; + let key = usize::from_str_radix(&key, 10).map_err(Error::from)?; + self.parse_ord_seq_value(key, node)?; + return Ok(true); + } + // Key is "[a..." so parse up to the closing "]" + 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => { + let key = self.parse_key(b']', true)?; + self.parse_map_value(key, node)?; + return Ok(true); + }, + c => { + if self.strict { + return Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.index)) + } else { + let _ = self.next(); + } + }, } - // Key is "[a..." so parse up to the closing "]" - 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => { - let key = self.parse_key(b']', true)?; - self.parse_map_value(key, node)?; - Ok(true) - }, - c => { - Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.acc)) - - }, } }, // This means the key should be a root key @@ -323,54 +373,60 @@ impl<'a> Parser<'a> { key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> { - let res = if let Some(x) = self.peek() { - match *x { - b'=' => { - // Key is finished, parse up until the '&' as the value - self.clear_acc(); - for _ in self.take_while(|b| *b != &b'&') {} - let value: Cow<'a, str> = self.collect_str()?; - node.insert_map_value(key, value); - Ok(()) - }, - b'&' => { - // No value - node.insert_map_value(key, Cow::Borrowed("")); - Ok(()) - }, - b'[' => { - // The key continues to another level of nested. - // Add a new unitialised level for this node and continue. - if let Level::Uninitialised = *node { - *node = Level::Nested(BTreeMap::default()); - } - if let Level::Nested(ref mut map) = *node { - // By parsing we drop down another level - self.depth -= 1; - // Either take the existing entry, or add a new - // unitialised level - // Use this new node to keep parsing - let _ = self.parse( - map.entry(key).or_insert(Level::Uninitialised) - )?; - Ok(()) - } else { - // We expected to parse into a map here. - Err(super::Error::parse_err(&format!("tried to insert a \ - new key into {:?}", - node), self.acc)) - } - }, - c => { - // Anything else is unexpected since we just finished - // parsing a key. - Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.acc)) - }, + let res = loop { + if let Some(x) = self.peek() { + match *x { + b'=' => { + // Key is finished, parse up until the '&' as the value + self.clear_acc(); + for _ in self.take_while(|b| *b != &b'&') {} + let value: Cow<'a, str> = self.collect_str()?; + node.insert_map_value(key, value); + break Ok(()); + }, + b'&' => { + // No value + node.insert_map_value(key, Cow::Borrowed("")); + break Ok(()); + }, + b'[' => { + // The key continues to another level of nested. + // Add a new unitialised level for this node and continue. + if let Level::Uninitialised = *node { + *node = Level::Nested(BTreeMap::default()); + } + if let Level::Nested(ref mut map) = *node { + // By parsing we drop down another level + self.depth -= 1; + // Either take the existing entry, or add a new + // unitialised level + // Use this new node to keep parsing + let _ = self.parse( + map.entry(key).or_insert(Level::Uninitialised) + )?; + break Ok(()); + } else { + // We expected to parse into a map here. + break Err(super::Error::parse_err(&format!("tried to insert a \ + new key into {:?}", + node), self.index)); + } + }, + c => { + // Anything else is unexpected since we just finished + // parsing a key. + if self.strict { + break Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.index)) + } else { + let _ = self.next(); + } + }, + } + } else { + // The string has ended, so the value is empty. + node.insert_map_value(key, Cow::Borrowed("")); + break Ok(()); } - } else { - // The string has ended, so the value is empty. - node.insert_map_value(key, Cow::Borrowed("")); - Ok(()) }; // We have finished parsing this level, so go back up a level. self.depth +=1; @@ -382,54 +438,60 @@ impl<'a> Parser<'a> { /// Basically the same as the above, but we insert into `OrderedSeq` /// Can potentially be merged? fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> { - let res = if let Some(x) = self.peek() { - match *x { - b'=' => { - // Key is finished, parse up until the '&' as the value - self.clear_acc(); - for _ in self.take_while(|b| *b != &b'&') {} - let value = self.collect_str()?; - // Reached the end of the key string - node.insert_ord_seq_value(key, value); - Ok(()) - }, - b'&' => { - // No value - node.insert_ord_seq_value(key, Cow::Borrowed("")); - Ok(()) - }, - b'[' => { - // The key continues to another level of nested. - // Add a new unitialised level for this node and continue. - if let Level::Uninitialised = *node { - *node = Level::OrderedSeq(BTreeMap::default()); - } - if let Level::OrderedSeq(ref mut map) = *node { - // By parsing we drop down another level - self.depth -= 1; - let _ = self.parse( - // Either take the existing entry, or add a new - // unitialised level - // Use this new node to keep parsing - map.entry(key).or_insert(Level::Uninitialised))?; - Ok(()) - } else { - // We expected to parse into a seq here. - Err(super::Error::parse_err(&format!("tried to insert a \ - new key into {:?}", - node), self.acc)) - } - }, - _ => { - // Anything else is unexpected since we just finished - // parsing a key. - Err(super::Error::parse_err("Unexpected character found when parsing", self.acc)) - }, + let res = loop { + if let Some(x) = self.peek() { + match *x { + b'=' => { + // Key is finished, parse up until the '&' as the value + self.clear_acc(); + for _ in self.take_while(|b| *b != &b'&') {} + let value = self.collect_str()?; + // Reached the end of the key string + node.insert_ord_seq_value(key, value); + break Ok(()) + }, + b'&' => { + // No value + node.insert_ord_seq_value(key, Cow::Borrowed("")); + break Ok(()) + }, + b'[' => { + // The key continues to another level of nested. + // Add a new unitialised level for this node and continue. + if let Level::Uninitialised = *node { + *node = Level::OrderedSeq(BTreeMap::default()); + } + if let Level::OrderedSeq(ref mut map) = *node { + // By parsing we drop down another level + self.depth -= 1; + let _ = self.parse( + // Either take the existing entry, or add a new + // unitialised level + // Use this new node to keep parsing + map.entry(key).or_insert(Level::Uninitialised))?; + break Ok(()) + } else { + // We expected to parse into a seq here. + break Err(super::Error::parse_err(&format!("tried to insert a \ + new key into {:?}", + node), self.index)) + } + }, + c => { + // Anything else is unexpected since we just finished + // parsing a key. + if self.strict { + break Err(super::Error::parse_err(format!("Unexpected character: {:?} found when parsing", c), self.index)) + } else { + let _ = self.next(); + } + }, + } + } else { + // The string has ended, so the value is empty. + node.insert_ord_seq_value(key, Cow::Borrowed("")); + break Ok(()) } - } else { - // The string has ended, so the value is empty. - node.insert_ord_seq_value(key, Cow::Borrowed("")); - Ok(()) }; // We have finished parsing this level, so go back up a level. self.depth += 1; @@ -457,7 +519,7 @@ impl<'a> Parser<'a> { } _ => { Err(super::Error::parse_err("non-indexed sequence of structs not \ - supported", self.acc)) + supported", self.index)) }, }, None => { |