From 32c685bf7472ebbc8f10b9d9e57f29f2d7501566 Mon Sep 17 00:00:00 2001 From: Sam Scott Date: Thu, 9 Nov 2017 16:15:24 +0000 Subject: Implement `strict` option feature. Permits encoded brackets, and will generally tolerate parsing errors where possible. Permitting, for example, `a%5B[0%5D=1` to decode as `a: vec![1]`. Default behaviour is strict. --- src/de/mod.rs | 48 +++++-- src/de/parse.rs | 336 +++++++++++++++++++++++++++------------------- src/error.rs | 11 +- src/lib.rs | 2 +- tests/test_deserialize.rs | 96 +++++++++---- 5 files changed, 309 insertions(+), 184 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 066270a..c28c459 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -52,23 +52,32 @@ use std::collections::btree_map::{BTreeMap, Entry, IntoIter}; /// To override the default serialization parameters, first construct a new /// Config. /// +/// The `strict` parameter controls whether the deserializer will tolerate +/// encoded brackets as part of the key. For example, serializing the field +/// `a = vec![12]` might give `a[0]=12`. In strict mode, the only string accepted +/// will be this string, whereas in non-strict mode, this can also be deserialized +/// from `a%5B0%5D=12`. Strict mode is more accurate for cases where it a field +/// may contain square brackets. +/// In non-strict mode, the deserializer will generally tolerate unexpected +/// characters. +/// /// A `max_depth` of 0 implies no nesting: the result will be a flat map. /// This is mostly useful when the maximum nested depth is known beforehand, /// to prevent denial of service attacks by providing incredibly deeply nested /// inputs. /// -/// The default value for `max_depth` is 5. +/// The default value for `max_depth` is 5, and the default mode is `strict=true`. /// /// ``` /// use serde_qs::Config; /// use std::collections::HashMap; /// -/// let config = Config::with_max_depth(0); +/// let config = Config::new(0, true); /// let map: HashMap = config.deserialize_str("a[b][c]=1") /// .unwrap(); /// assert_eq!(map.get("a[b][c]").unwrap(), "1"); /// -/// let config = Config::with_max_depth(10); +/// let config = Config::new(10, true); /// let map: HashMap>> = /// config.deserialize_str("a[b][c]=1").unwrap(); /// assert_eq!(map.get("a").unwrap().get("b").unwrap().get("c").unwrap(), "1"); @@ -78,24 +87,27 @@ pub struct Config { /// Specifies the maximum depth key that `serde_qs` will attempt to /// deserialize. Default is 5. max_depth: usize, + /// Strict deserializing mode will not tolerate encoded brackets. + strict: bool, } impl Default for Config { fn default() -> Self { - Config { max_depth: 5 } + Self::new(5, true) } } impl Config { - /// Construct a new `Config` with the specified maximum depth of nesting. - pub fn with_max_depth(depth: usize) -> Config { - Config { - max_depth: depth + /// Create a new `Config` with the specified `max_depth` and `strict` mode. + pub fn new(max_depth: usize, strict: bool) -> Self { + Self { + max_depth, + strict } } /// Get maximum depth parameter. - pub fn max_depth(&self) -> usize { + fn max_depth(&self) -> usize { self.max_depth } } @@ -104,10 +116,20 @@ impl Config { /// Deserializes a querystring from a `&[u8]` using this `Config`. pub fn deserialize_bytes<'de, T: de::Deserialize<'de>>(&self, input: &'de [u8]) - -> Result { + -> Result + { T::deserialize(QsDeserializer::with_config(self, input)?) } + // pub fn deserialize_bytes_sloppy(&self, input: &[u8]) + // -> Result + // { + // let buf = String::from_utf8(input.to_vec())?; + // let buf = buf.replace("%5B", "[").replace("%5D", "]").into_bytes(); + // let deser = QsDeserializer::with_config(self, &buf)?; + // T::deserialize(deser) + // } + /// Deserializes a querystring from a `&str` using this `Config`. pub fn deserialize_str<'de, T: de::Deserialize<'de>>(&self, input: &'de str) @@ -178,7 +200,7 @@ pub fn from_str<'de, T: de::Deserialize<'de>>(input: &'de str) -> Result { /// A deserializer for the querystring format. /// /// Supported top-level outputs are structs and maps. -pub struct QsDeserializer<'a> { +pub(crate) struct QsDeserializer<'a> { iter: IntoIter, Level<'a>>, value: Option>, } @@ -202,8 +224,8 @@ impl<'a> QsDeserializer<'a> { } /// Returns a new `QsDeserializer<'a>`. - pub fn with_config(config: &Config, input: &'a [u8]) -> Result { - parse::Parser::new(input, config.max_depth()).as_deserializer() + fn with_config(config: &Config, input: &'a [u8]) -> Result { + parse::Parser::new(input, config.max_depth(), config.strict).as_deserializer() } } diff --git a/src/de/parse.rs b/src/de/parse.rs index 225d030..3b39c46 100644 --- a/src/de/parse.rs +++ b/src/de/parse.rs @@ -9,7 +9,6 @@ use std::str; use super::*; - macro_rules! tu { ($x:expr) => ( match $x { @@ -96,20 +95,59 @@ impl<'a> Level<'a> { pub struct Parser<'a> { inner: &'a [u8], iter: Iter<'a, u8>, + index: usize, acc: (usize, usize), peeked: Option<&'a u8>, depth: usize, // stores the current depth, for use in bounded-depth parsing + strict: bool, } impl<'a> Iterator for Parser<'a> { type Item = &'a u8; #[inline] fn next(&mut self) -> Option { - match self.peeked.take() { - Some(v) => Some(v), - None => { - self.acc.1 += 1; - self.iter.next() + if self.strict { + match self.peeked.take() { + Some(v) => Some(v), + None => { + self.index += 1; + self.acc.1 += 1; + self.iter.next() + } + } + } else { + // in non-strict mode, we will happily decode any bracket + match self.peeked.take() { + Some(v) => Some(v), + None => { + self.index += 1; + self.acc.1 += 1; + match self.iter.next() { + Some(v) if v == &b'%' => { + match &self.iter.as_slice()[..2] { + b"5B" => { + // skip the next two characters + let _ = self.iter.next(); + let _ = self.iter.next(); + self.index += 2; + Some(&b'[') + }, + b"5D" => { + // skip the next two characters + let _ = self.iter.next(); + let _ = self.iter.next(); + self.index += 2; + Some(&b']') + }, + _ => { + Some(v) + } + } + } + Some(v) => Some(v), + None => None, + } + } } } } @@ -148,19 +186,21 @@ fn replace_plus(input: Cow) -> Cow { } impl<'a> Parser<'a> { - pub fn new(encoded: &'a [u8], depth: usize) -> Self { + pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self { Parser { - inner: encoded, + inner: encoded, iter: encoded.iter(), acc: (0, 0), + index: 0, peeked: None, - depth: depth, + depth, + strict, } } /// Resets the accumulator range by setting `(start, end)` to `(end, end)`. fn clear_acc(&mut self) { - self.acc.0 = self.acc.1; + self.acc = (self.index, self.index); } /// Extracts a string from the internal byte slice from the range tracked by @@ -176,7 +216,7 @@ impl<'a> Parser<'a> { /// In some ways the main way to use a `Parser`, this runs the parsing step /// and outputs a simple `Deserializer` over the parsed map. - pub fn as_deserializer(&mut self) -> Result> { + pub(crate) fn as_deserializer(&mut self) -> Result> { let map = BTreeMap::default(); let mut root = Level::Nested(map); @@ -210,38 +250,48 @@ impl<'a> Parser<'a> { Some(x) => { match *x { b'[' => { - self.clear_acc(); - // Only peek at the next value to determine the key type. - match tu!(self.peek()) { - // key is of the form "[...", not really allowed. - b'[' => { - Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.acc)) - }, - // key is simply "[]", so treat as a seq. - b']' => { - // throw away the bracket - let _ = self.next(); - self.clear_acc(); - self.parse_seq_value(node)?; - Ok(true) - }, - // First character is an integer, attempt to parse it as an integer key - b'0'...b'9' => { - let key = self.parse_key(b']', true)?; - let key = usize::from_str_radix(&key, 10).map_err(Error::from)?; - self.parse_ord_seq_value(key, node)?; - Ok(true) + loop { + self.clear_acc(); + // Only peek at the next value to determine the key type. + match tu!(self.peek()) { + // key is of the form "[...", not really allowed. + b'[' => { + // If we're in strict mode, error, otherwise just ignore it. + if self.strict { + return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index)) + } else { + let _ = self.next(); + } + }, + // key is simply "[]", so treat as a seq. + b']' => { + // throw away the bracket + let _ = self.next(); + self.clear_acc(); + self.parse_seq_value(node)?; + return Ok(true); + }, + // First character is an integer, attempt to parse it as an integer key + b'0'...b'9' => { + let key = self.parse_key(b']', true)?; + let key = usize::from_str_radix(&key, 10).map_err(Error::from)?; + self.parse_ord_seq_value(key, node)?; + return Ok(true); + } + // Key is "[a..." so parse up to the closing "]" + 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => { + let key = self.parse_key(b']', true)?; + self.parse_map_value(key, node)?; + return Ok(true); + }, + c => { + if self.strict { + return Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.index)) + } else { + let _ = self.next(); + } + }, } - // Key is "[a..." so parse up to the closing "]" - 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => { - let key = self.parse_key(b']', true)?; - self.parse_map_value(key, node)?; - Ok(true) - }, - c => { - Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.acc)) - - }, } }, // This means the key should be a root key @@ -323,54 +373,60 @@ impl<'a> Parser<'a> { key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> { - let res = if let Some(x) = self.peek() { - match *x { - b'=' => { - // Key is finished, parse up until the '&' as the value - self.clear_acc(); - for _ in self.take_while(|b| *b != &b'&') {} - let value: Cow<'a, str> = self.collect_str()?; - node.insert_map_value(key, value); - Ok(()) - }, - b'&' => { - // No value - node.insert_map_value(key, Cow::Borrowed("")); - Ok(()) - }, - b'[' => { - // The key continues to another level of nested. - // Add a new unitialised level for this node and continue. - if let Level::Uninitialised = *node { - *node = Level::Nested(BTreeMap::default()); - } - if let Level::Nested(ref mut map) = *node { - // By parsing we drop down another level - self.depth -= 1; - // Either take the existing entry, or add a new - // unitialised level - // Use this new node to keep parsing - let _ = self.parse( - map.entry(key).or_insert(Level::Uninitialised) - )?; - Ok(()) - } else { - // We expected to parse into a map here. - Err(super::Error::parse_err(&format!("tried to insert a \ - new key into {:?}", - node), self.acc)) - } - }, - c => { - // Anything else is unexpected since we just finished - // parsing a key. - Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.acc)) - }, + let res = loop { + if let Some(x) = self.peek() { + match *x { + b'=' => { + // Key is finished, parse up until the '&' as the value + self.clear_acc(); + for _ in self.take_while(|b| *b != &b'&') {} + let value: Cow<'a, str> = self.collect_str()?; + node.insert_map_value(key, value); + break Ok(()); + }, + b'&' => { + // No value + node.insert_map_value(key, Cow::Borrowed("")); + break Ok(()); + }, + b'[' => { + // The key continues to another level of nested. + // Add a new unitialised level for this node and continue. + if let Level::Uninitialised = *node { + *node = Level::Nested(BTreeMap::default()); + } + if let Level::Nested(ref mut map) = *node { + // By parsing we drop down another level + self.depth -= 1; + // Either take the existing entry, or add a new + // unitialised level + // Use this new node to keep parsing + let _ = self.parse( + map.entry(key).or_insert(Level::Uninitialised) + )?; + break Ok(()); + } else { + // We expected to parse into a map here. + break Err(super::Error::parse_err(&format!("tried to insert a \ + new key into {:?}", + node), self.index)); + } + }, + c => { + // Anything else is unexpected since we just finished + // parsing a key. + if self.strict { + break Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.index)) + } else { + let _ = self.next(); + } + }, + } + } else { + // The string has ended, so the value is empty. + node.insert_map_value(key, Cow::Borrowed("")); + break Ok(()); } - } else { - // The string has ended, so the value is empty. - node.insert_map_value(key, Cow::Borrowed("")); - Ok(()) }; // We have finished parsing this level, so go back up a level. self.depth +=1; @@ -382,54 +438,60 @@ impl<'a> Parser<'a> { /// Basically the same as the above, but we insert into `OrderedSeq` /// Can potentially be merged? fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> { - let res = if let Some(x) = self.peek() { - match *x { - b'=' => { - // Key is finished, parse up until the '&' as the value - self.clear_acc(); - for _ in self.take_while(|b| *b != &b'&') {} - let value = self.collect_str()?; - // Reached the end of the key string - node.insert_ord_seq_value(key, value); - Ok(()) - }, - b'&' => { - // No value - node.insert_ord_seq_value(key, Cow::Borrowed("")); - Ok(()) - }, - b'[' => { - // The key continues to another level of nested. - // Add a new unitialised level for this node and continue. - if let Level::Uninitialised = *node { - *node = Level::OrderedSeq(BTreeMap::default()); - } - if let Level::OrderedSeq(ref mut map) = *node { - // By parsing we drop down another level - self.depth -= 1; - let _ = self.parse( - // Either take the existing entry, or add a new - // unitialised level - // Use this new node to keep parsing - map.entry(key).or_insert(Level::Uninitialised))?; - Ok(()) - } else { - // We expected to parse into a seq here. - Err(super::Error::parse_err(&format!("tried to insert a \ - new key into {:?}", - node), self.acc)) - } - }, - _ => { - // Anything else is unexpected since we just finished - // parsing a key. - Err(super::Error::parse_err("Unexpected character found when parsing", self.acc)) - }, + let res = loop { + if let Some(x) = self.peek() { + match *x { + b'=' => { + // Key is finished, parse up until the '&' as the value + self.clear_acc(); + for _ in self.take_while(|b| *b != &b'&') {} + let value = self.collect_str()?; + // Reached the end of the key string + node.insert_ord_seq_value(key, value); + break Ok(()) + }, + b'&' => { + // No value + node.insert_ord_seq_value(key, Cow::Borrowed("")); + break Ok(()) + }, + b'[' => { + // The key continues to another level of nested. + // Add a new unitialised level for this node and continue. + if let Level::Uninitialised = *node { + *node = Level::OrderedSeq(BTreeMap::default()); + } + if let Level::OrderedSeq(ref mut map) = *node { + // By parsing we drop down another level + self.depth -= 1; + let _ = self.parse( + // Either take the existing entry, or add a new + // unitialised level + // Use this new node to keep parsing + map.entry(key).or_insert(Level::Uninitialised))?; + break Ok(()) + } else { + // We expected to parse into a seq here. + break Err(super::Error::parse_err(&format!("tried to insert a \ + new key into {:?}", + node), self.index)) + } + }, + c => { + // Anything else is unexpected since we just finished + // parsing a key. + if self.strict { + break Err(super::Error::parse_err(format!("Unexpected character: {:?} found when parsing", c), self.index)) + } else { + let _ = self.next(); + } + }, + } + } else { + // The string has ended, so the value is empty. + node.insert_ord_seq_value(key, Cow::Borrowed("")); + break Ok(()) } - } else { - // The string has ended, so the value is empty. - node.insert_ord_seq_value(key, Cow::Borrowed("")); - Ok(()) }; // We have finished parsing this level, so go back up a level. self.depth += 1; @@ -457,7 +519,7 @@ impl<'a> Parser<'a> { } _ => { Err(super::Error::parse_err("non-indexed sequence of structs not \ - supported", self.acc)) + supported", self.index)) }, }, None => { diff --git a/src/error.rs b/src/error.rs index 44459c5..555bf5e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,10 +9,13 @@ use std::string; error_chain! { errors { - Custom(msg: String) - Parse(msg: String, pos: (usize, usize)) { + Custom(msg: String) { + description("miscellaneous failure") + display("failed with reason: {}", msg) + } + Parse(msg: String, pos: usize) { description("parsing failure") - display("parsing failed with error: '{}' at position: {:?}", msg, pos) + display("parsing failed with error: '{}' at position: {}", msg, pos) } Unsupported } @@ -35,7 +38,7 @@ impl Error { } /// Generate a parsing error message with position. - pub fn parse_err(msg: T, position: (usize, usize)) -> Self + pub fn parse_err(msg: T, position: usize) -> Self where T: Display { ErrorKind::Parse(msg.to_string(), position).into() } diff --git a/src/lib.rs b/src/lib.rs index 0d228c1..e6f5190 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,7 +135,7 @@ mod ser; pub use error::Error; #[doc(inline)] -pub use de::{QsDeserializer, from_bytes, from_str}; +pub use de::{from_bytes, from_str}; #[doc(inline)] pub use de::Config; #[doc(inline)] diff --git a/tests/test_deserialize.rs b/tests/test_deserialize.rs index cff219d..e0392f5 100644 --- a/tests/test_deserialize.rs +++ b/tests/test_deserialize.rs @@ -23,9 +23,11 @@ struct QueryParams { // qs::from_str. All types are inferred by the compiler. macro_rules! map_test { ($string:expr, $($mapvars:tt)*) => { - let expected_map = hash_to_map!(New $($mapvars)*); - let testmap: HashMap<_, _> = qs::from_str($string).unwrap(); - assert_eq!(expected_map, testmap); + for config in vec![qs::Config::new(5, true), qs::Config::new(5, false)] { + let expected_map = hash_to_map!(New $($mapvars)*); + let testmap: HashMap<_, _> = config.deserialize_str($string).unwrap(); + assert_eq!(expected_map, testmap); + } } } @@ -79,29 +81,31 @@ fn deserialize_struct() { user_ids: vec![1, 2, 3, 4], }; - // standard parameters - let rec_params: QueryParams = qs::from_str("\ - name=Acme&id=42&phone=12345&address[postcode]=12345&\ - address[city]=Carrot+City&user_ids[0]=1&user_ids[1]=2&\ - user_ids[2]=3&user_ids[3]=4") - .unwrap(); - assert_eq!(rec_params, params); - - // unindexed arrays - let rec_params: QueryParams = qs::from_str("\ - name=Acme&id=42&phone=12345&address[postcode]=12345&\ - address[city]=Carrot+City&user_ids[]=1&user_ids[]=2&\ - user_ids[]=3&user_ids[]=4") - .unwrap(); - assert_eq!(rec_params, params); - - // ordering doesn't matter - let rec_params: QueryParams = qs::from_str("\ - address[city]=Carrot+City&user_ids[]=1&user_ids[]=2&\ - name=Acme&id=42&phone=12345&address[postcode]=12345&\ - user_ids[]=3&user_ids[]=4") - .unwrap(); - assert_eq!(rec_params, params); + for config in vec![qs::Config::new(5, true), qs::Config::new(5, false)] { + // standard parameters + let rec_params: QueryParams = config.deserialize_str("\ + name=Acme&id=42&phone=12345&address[postcode]=12345&\ + address[city]=Carrot+City&user_ids[0]=1&user_ids[1]=2&\ + user_ids[2]=3&user_ids[3]=4") + .unwrap(); + assert_eq!(rec_params, params); + + // unindexed arrays + let rec_params: QueryParams = config.deserialize_str("\ + name=Acme&id=42&phone=12345&address[postcode]=12345&\ + address[city]=Carrot+City&user_ids[]=1&user_ids[]=2&\ + user_ids[]=3&user_ids[]=4") + .unwrap(); + assert_eq!(rec_params, params); + + // ordering doesn't matter + let rec_params: QueryParams = config.deserialize_str("\ + address[city]=Carrot+City&user_ids[]=1&user_ids[]=2&\ + name=Acme&id=42&phone=12345&address[postcode]=12345&\ + user_ids[]=3&user_ids[]=4") + .unwrap(); + assert_eq!(rec_params, params); + } } @@ -394,15 +398,49 @@ fn returns_errors() { vec: Vec } - let params: Result = qs::from_str("vec[[]=a&vec[]=2"); + let params: Result = qs::from_str("vec[[]=1&vec[]=2"); assert!(params.is_err()); println!("{}", params.unwrap_err()); - let params: Result = qs::from_str("vec[\x00[]=a&vec[]=2"); + let params: Result = qs::from_str("vec[\x00[]=1&vec[]=2"); assert!(params.is_err()); println!("{}", params.unwrap_err()); - let params: Result = qs::from_str("vec[0]=a&vec[0]=2"); + let params: Result = qs::from_str("vec[0]=1&vec[0]=2"); assert!(params.is_err()); println!("{}", params.unwrap_err()); } + + +#[test] +fn strict_mode() { + #[derive(Deserialize,Serialize,Debug, PartialEq)] + struct Test { + a: u8 + } + #[derive(Debug,Serialize,Deserialize,PartialEq)] + #[serde(deny_unknown_fields)] + struct Query { + vec: Vec + } + + let strict_config = qs::Config::default(); + + let params: Result = strict_config.deserialize_str("vec%5B0%5D%5Ba%5D=1&vec[1][a]=2"); + assert!(params.is_err()); + println!("{}", params.unwrap_err()); + + let loose_config = qs::Config::new(5, false); + + let params: Result = loose_config.deserialize_str("vec%5B0%5D%5Ba%5D=1&vec[1][a]=2"); + assert_eq!(params.unwrap(), Query { vec: vec![Test { a: 1 }, Test { a: 2 }] }); + + let params: Result = loose_config.deserialize_str("vec[%5B0%5D%5Ba%5D]=1&vec[1][a]=2"); + assert_eq!(params.unwrap(), Query { vec: vec![Test { a: 1 }, Test { a: 2 }] }); + + let params: Result = loose_config.deserialize_str("vec[%5B0%5D%5Ba%5D=1&vec[1][a]=2"); + assert_eq!(params.unwrap(), Query { vec: vec![Test { a: 1 }, Test { a: 2 }] }); + + let params: Result = loose_config.deserialize_str("vec%5B0%5D%5Ba%5D]=1&vec[1][a]=2"); + assert_eq!(params.unwrap(), Query { vec: vec![Test { a: 1 }, Test { a: 2 }] }); +} \ No newline at end of file -- cgit v1.2.3