summaryrefslogtreecommitdiff
path: root/src/de
diff options
context:
space:
mode:
authorSam Scott <sam.scott89@gmail.com>2017-11-09 16:15:24 +0000
committerSam Scott <sam.scott89@gmail.com>2017-11-09 16:37:53 +0000
commit32c685bf7472ebbc8f10b9d9e57f29f2d7501566 (patch)
tree00e7e887c50e34ed9476313fa979a9614b4767c7 /src/de
parent1ee313aa2998feed7d1721e0df8db0e7004fb35d (diff)
Implement `strict` option feature.
Permits encoded brackets, and will generally tolerate parsing errors where possible. Permitting, for example, `a%5B[0%5D=1` to decode as `a: vec![1]`. Default behaviour is strict.
Diffstat (limited to 'src/de')
-rw-r--r--src/de/mod.rs48
-rw-r--r--src/de/parse.rs336
2 files changed, 234 insertions, 150 deletions
diff --git a/src/de/mod.rs b/src/de/mod.rs
index 066270a..c28c459 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -52,23 +52,32 @@ use std::collections::btree_map::{BTreeMap, Entry, IntoIter};
/// To override the default serialization parameters, first construct a new
/// Config.
///
+/// The `strict` parameter controls whether the deserializer will tolerate
+/// encoded brackets as part of the key. For example, serializing the field
+/// `a = vec![12]` might give `a[0]=12`. In strict mode, the only string accepted
+/// will be this string, whereas in non-strict mode, this can also be deserialized
+/// from `a%5B0%5D=12`. Strict mode is more accurate for cases where it a field
+/// may contain square brackets.
+/// In non-strict mode, the deserializer will generally tolerate unexpected
+/// characters.
+///
/// A `max_depth` of 0 implies no nesting: the result will be a flat map.
/// This is mostly useful when the maximum nested depth is known beforehand,
/// to prevent denial of service attacks by providing incredibly deeply nested
/// inputs.
///
-/// The default value for `max_depth` is 5.
+/// The default value for `max_depth` is 5, and the default mode is `strict=true`.
///
/// ```
/// use serde_qs::Config;
/// use std::collections::HashMap;
///
-/// let config = Config::with_max_depth(0);
+/// let config = Config::new(0, true);
/// let map: HashMap<String, String> = config.deserialize_str("a[b][c]=1")
/// .unwrap();
/// assert_eq!(map.get("a[b][c]").unwrap(), "1");
///
-/// let config = Config::with_max_depth(10);
+/// let config = Config::new(10, true);
/// let map: HashMap<String, HashMap<String, HashMap<String, String>>> =
/// config.deserialize_str("a[b][c]=1").unwrap();
/// assert_eq!(map.get("a").unwrap().get("b").unwrap().get("c").unwrap(), "1");
@@ -78,24 +87,27 @@ pub struct Config {
/// Specifies the maximum depth key that `serde_qs` will attempt to
/// deserialize. Default is 5.
max_depth: usize,
+ /// Strict deserializing mode will not tolerate encoded brackets.
+ strict: bool,
}
impl Default for Config {
fn default() -> Self {
- Config { max_depth: 5 }
+ Self::new(5, true)
}
}
impl Config {
- /// Construct a new `Config` with the specified maximum depth of nesting.
- pub fn with_max_depth(depth: usize) -> Config {
- Config {
- max_depth: depth
+ /// Create a new `Config` with the specified `max_depth` and `strict` mode.
+ pub fn new(max_depth: usize, strict: bool) -> Self {
+ Self {
+ max_depth,
+ strict
}
}
/// Get maximum depth parameter.
- pub fn max_depth(&self) -> usize {
+ fn max_depth(&self) -> usize {
self.max_depth
}
}
@@ -104,10 +116,20 @@ impl Config {
/// Deserializes a querystring from a `&[u8]` using this `Config`.
pub fn deserialize_bytes<'de, T: de::Deserialize<'de>>(&self,
input: &'de [u8])
- -> Result<T> {
+ -> Result<T>
+ {
T::deserialize(QsDeserializer::with_config(self, input)?)
}
+ // pub fn deserialize_bytes_sloppy<T: de::DeserializeOwned>(&self, input: &[u8])
+ // -> Result<T>
+ // {
+ // let buf = String::from_utf8(input.to_vec())?;
+ // let buf = buf.replace("%5B", "[").replace("%5D", "]").into_bytes();
+ // let deser = QsDeserializer::with_config(self, &buf)?;
+ // T::deserialize(deser)
+ // }
+
/// Deserializes a querystring from a `&str` using this `Config`.
pub fn deserialize_str<'de, T: de::Deserialize<'de>>(&self,
input: &'de str)
@@ -178,7 +200,7 @@ pub fn from_str<'de, T: de::Deserialize<'de>>(input: &'de str) -> Result<T> {
/// A deserializer for the querystring format.
///
/// Supported top-level outputs are structs and maps.
-pub struct QsDeserializer<'a> {
+pub(crate) struct QsDeserializer<'a> {
iter: IntoIter<Cow<'a, str>, Level<'a>>,
value: Option<Level<'a>>,
}
@@ -202,8 +224,8 @@ impl<'a> QsDeserializer<'a> {
}
/// Returns a new `QsDeserializer<'a>`.
- pub fn with_config(config: &Config, input: &'a [u8]) -> Result<Self> {
- parse::Parser::new(input, config.max_depth()).as_deserializer()
+ fn with_config(config: &Config, input: &'a [u8]) -> Result<Self> {
+ parse::Parser::new(input, config.max_depth(), config.strict).as_deserializer()
}
}
diff --git a/src/de/parse.rs b/src/de/parse.rs
index 225d030..3b39c46 100644
--- a/src/de/parse.rs
+++ b/src/de/parse.rs
@@ -9,7 +9,6 @@ use std::str;
use super::*;
-
macro_rules! tu {
($x:expr) => (
match $x {
@@ -96,20 +95,59 @@ impl<'a> Level<'a> {
pub struct Parser<'a> {
inner: &'a [u8],
iter: Iter<'a, u8>,
+ index: usize,
acc: (usize, usize),
peeked: Option<&'a u8>,
depth: usize, // stores the current depth, for use in bounded-depth parsing
+ strict: bool,
}
impl<'a> Iterator for Parser<'a> {
type Item = &'a u8;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
- match self.peeked.take() {
- Some(v) => Some(v),
- None => {
- self.acc.1 += 1;
- self.iter.next()
+ if self.strict {
+ match self.peeked.take() {
+ Some(v) => Some(v),
+ None => {
+ self.index += 1;
+ self.acc.1 += 1;
+ self.iter.next()
+ }
+ }
+ } else {
+ // in non-strict mode, we will happily decode any bracket
+ match self.peeked.take() {
+ Some(v) => Some(v),
+ None => {
+ self.index += 1;
+ self.acc.1 += 1;
+ match self.iter.next() {
+ Some(v) if v == &b'%' => {
+ match &self.iter.as_slice()[..2] {
+ b"5B" => {
+ // skip the next two characters
+ let _ = self.iter.next();
+ let _ = self.iter.next();
+ self.index += 2;
+ Some(&b'[')
+ },
+ b"5D" => {
+ // skip the next two characters
+ let _ = self.iter.next();
+ let _ = self.iter.next();
+ self.index += 2;
+ Some(&b']')
+ },
+ _ => {
+ Some(v)
+ }
+ }
+ }
+ Some(v) => Some(v),
+ None => None,
+ }
+ }
}
}
}
@@ -148,19 +186,21 @@ fn replace_plus(input: Cow<str>) -> Cow<str> {
}
impl<'a> Parser<'a> {
- pub fn new(encoded: &'a [u8], depth: usize) -> Self {
+ pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self {
Parser {
- inner: encoded,
+ inner: encoded,
iter: encoded.iter(),
acc: (0, 0),
+ index: 0,
peeked: None,
- depth: depth,
+ depth,
+ strict,
}
}
/// Resets the accumulator range by setting `(start, end)` to `(end, end)`.
fn clear_acc(&mut self) {
- self.acc.0 = self.acc.1;
+ self.acc = (self.index, self.index);
}
/// Extracts a string from the internal byte slice from the range tracked by
@@ -176,7 +216,7 @@ impl<'a> Parser<'a> {
/// In some ways the main way to use a `Parser`, this runs the parsing step
/// and outputs a simple `Deserializer` over the parsed map.
- pub fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
+ pub(crate) fn as_deserializer(&mut self) -> Result<QsDeserializer<'a>> {
let map = BTreeMap::default();
let mut root = Level::Nested(map);
@@ -210,38 +250,48 @@ impl<'a> Parser<'a> {
Some(x) => {
match *x {
b'[' => {
- self.clear_acc();
- // Only peek at the next value to determine the key type.
- match tu!(self.peek()) {
- // key is of the form "[...", not really allowed.
- b'[' => {
- Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.acc))
- },
- // key is simply "[]", so treat as a seq.
- b']' => {
- // throw away the bracket
- let _ = self.next();
- self.clear_acc();
- self.parse_seq_value(node)?;
- Ok(true)
- },
- // First character is an integer, attempt to parse it as an integer key
- b'0'...b'9' => {
- let key = self.parse_key(b']', true)?;
- let key = usize::from_str_radix(&key, 10).map_err(Error::from)?;
- self.parse_ord_seq_value(key, node)?;
- Ok(true)
+ loop {
+ self.clear_acc();
+ // Only peek at the next value to determine the key type.
+ match tu!(self.peek()) {
+ // key is of the form "[...", not really allowed.
+ b'[' => {
+ // If we're in strict mode, error, otherwise just ignore it.
+ if self.strict {
+ return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index))
+ } else {
+ let _ = self.next();
+ }
+ },
+ // key is simply "[]", so treat as a seq.
+ b']' => {
+ // throw away the bracket
+ let _ = self.next();
+ self.clear_acc();
+ self.parse_seq_value(node)?;
+ return Ok(true);
+ },
+ // First character is an integer, attempt to parse it as an integer key
+ b'0'...b'9' => {
+ let key = self.parse_key(b']', true)?;
+ let key = usize::from_str_radix(&key, 10).map_err(Error::from)?;
+ self.parse_ord_seq_value(key, node)?;
+ return Ok(true);
+ }
+ // Key is "[a..." so parse up to the closing "]"
+ 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => {
+ let key = self.parse_key(b']', true)?;
+ self.parse_map_value(key, node)?;
+ return Ok(true);
+ },
+ c => {
+ if self.strict {
+ return Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.index))
+ } else {
+ let _ = self.next();
+ }
+ },
}
- // Key is "[a..." so parse up to the closing "]"
- 0x20...0x2f | 0x3a...0x5a | 0x5c | 0x5e...0x7e => {
- let key = self.parse_key(b']', true)?;
- self.parse_map_value(key, node)?;
- Ok(true)
- },
- c => {
- Err(super::Error::parse_err(&format!("unexpected character: {}", String::from_utf8_lossy(&[c])), self.acc))
-
- },
}
},
// This means the key should be a root key
@@ -323,54 +373,60 @@ impl<'a> Parser<'a> {
key: Cow<'a, str>,
node: &mut Level<'a>)
-> Result<()> {
- let res = if let Some(x) = self.peek() {
- match *x {
- b'=' => {
- // Key is finished, parse up until the '&' as the value
- self.clear_acc();
- for _ in self.take_while(|b| *b != &b'&') {}
- let value: Cow<'a, str> = self.collect_str()?;
- node.insert_map_value(key, value);
- Ok(())
- },
- b'&' => {
- // No value
- node.insert_map_value(key, Cow::Borrowed(""));
- Ok(())
- },
- b'[' => {
- // The key continues to another level of nested.
- // Add a new unitialised level for this node and continue.
- if let Level::Uninitialised = *node {
- *node = Level::Nested(BTreeMap::default());
- }
- if let Level::Nested(ref mut map) = *node {
- // By parsing we drop down another level
- self.depth -= 1;
- // Either take the existing entry, or add a new
- // unitialised level
- // Use this new node to keep parsing
- let _ = self.parse(
- map.entry(key).or_insert(Level::Uninitialised)
- )?;
- Ok(())
- } else {
- // We expected to parse into a map here.
- Err(super::Error::parse_err(&format!("tried to insert a \
- new key into {:?}",
- node), self.acc))
- }
- },
- c => {
- // Anything else is unexpected since we just finished
- // parsing a key.
- Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.acc))
- },
+ let res = loop {
+ if let Some(x) = self.peek() {
+ match *x {
+ b'=' => {
+ // Key is finished, parse up until the '&' as the value
+ self.clear_acc();
+ for _ in self.take_while(|b| *b != &b'&') {}
+ let value: Cow<'a, str> = self.collect_str()?;
+ node.insert_map_value(key, value);
+ break Ok(());
+ },
+ b'&' => {
+ // No value
+ node.insert_map_value(key, Cow::Borrowed(""));
+ break Ok(());
+ },
+ b'[' => {
+ // The key continues to another level of nested.
+ // Add a new unitialised level for this node and continue.
+ if let Level::Uninitialised = *node {
+ *node = Level::Nested(BTreeMap::default());
+ }
+ if let Level::Nested(ref mut map) = *node {
+ // By parsing we drop down another level
+ self.depth -= 1;
+ // Either take the existing entry, or add a new
+ // unitialised level
+ // Use this new node to keep parsing
+ let _ = self.parse(
+ map.entry(key).or_insert(Level::Uninitialised)
+ )?;
+ break Ok(());
+ } else {
+ // We expected to parse into a map here.
+ break Err(super::Error::parse_err(&format!("tried to insert a \
+ new key into {:?}",
+ node), self.index));
+ }
+ },
+ c => {
+ // Anything else is unexpected since we just finished
+ // parsing a key.
+ if self.strict {
+ break Err(super::Error::parse_err(format!("Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c])), self.index))
+ } else {
+ let _ = self.next();
+ }
+ },
+ }
+ } else {
+ // The string has ended, so the value is empty.
+ node.insert_map_value(key, Cow::Borrowed(""));
+ break Ok(());
}
- } else {
- // The string has ended, so the value is empty.
- node.insert_map_value(key, Cow::Borrowed(""));
- Ok(())
};
// We have finished parsing this level, so go back up a level.
self.depth +=1;
@@ -382,54 +438,60 @@ impl<'a> Parser<'a> {
/// Basically the same as the above, but we insert into `OrderedSeq`
/// Can potentially be merged?
fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> {
- let res = if let Some(x) = self.peek() {
- match *x {
- b'=' => {
- // Key is finished, parse up until the '&' as the value
- self.clear_acc();
- for _ in self.take_while(|b| *b != &b'&') {}
- let value = self.collect_str()?;
- // Reached the end of the key string
- node.insert_ord_seq_value(key, value);
- Ok(())
- },
- b'&' => {
- // No value
- node.insert_ord_seq_value(key, Cow::Borrowed(""));
- Ok(())
- },
- b'[' => {
- // The key continues to another level of nested.
- // Add a new unitialised level for this node and continue.
- if let Level::Uninitialised = *node {
- *node = Level::OrderedSeq(BTreeMap::default());
- }
- if let Level::OrderedSeq(ref mut map) = *node {
- // By parsing we drop down another level
- self.depth -= 1;
- let _ = self.parse(
- // Either take the existing entry, or add a new
- // unitialised level
- // Use this new node to keep parsing
- map.entry(key).or_insert(Level::Uninitialised))?;
- Ok(())
- } else {
- // We expected to parse into a seq here.
- Err(super::Error::parse_err(&format!("tried to insert a \
- new key into {:?}",
- node), self.acc))
- }
- },
- _ => {
- // Anything else is unexpected since we just finished
- // parsing a key.
- Err(super::Error::parse_err("Unexpected character found when parsing", self.acc))
- },
+ let res = loop {
+ if let Some(x) = self.peek() {
+ match *x {
+ b'=' => {
+ // Key is finished, parse up until the '&' as the value
+ self.clear_acc();
+ for _ in self.take_while(|b| *b != &b'&') {}
+ let value = self.collect_str()?;
+ // Reached the end of the key string
+ node.insert_ord_seq_value(key, value);
+ break Ok(())
+ },
+ b'&' => {
+ // No value
+ node.insert_ord_seq_value(key, Cow::Borrowed(""));
+ break Ok(())
+ },
+ b'[' => {
+ // The key continues to another level of nested.
+ // Add a new unitialised level for this node and continue.
+ if let Level::Uninitialised = *node {
+ *node = Level::OrderedSeq(BTreeMap::default());
+ }
+ if let Level::OrderedSeq(ref mut map) = *node {
+ // By parsing we drop down another level
+ self.depth -= 1;
+ let _ = self.parse(
+ // Either take the existing entry, or add a new
+ // unitialised level
+ // Use this new node to keep parsing
+ map.entry(key).or_insert(Level::Uninitialised))?;
+ break Ok(())
+ } else {
+ // We expected to parse into a seq here.
+ break Err(super::Error::parse_err(&format!("tried to insert a \
+ new key into {:?}",
+ node), self.index))
+ }
+ },
+ c => {
+ // Anything else is unexpected since we just finished
+ // parsing a key.
+ if self.strict {
+ break Err(super::Error::parse_err(format!("Unexpected character: {:?} found when parsing", c), self.index))
+ } else {
+ let _ = self.next();
+ }
+ },
+ }
+ } else {
+ // The string has ended, so the value is empty.
+ node.insert_ord_seq_value(key, Cow::Borrowed(""));
+ break Ok(())
}
- } else {
- // The string has ended, so the value is empty.
- node.insert_ord_seq_value(key, Cow::Borrowed(""));
- Ok(())
};
// We have finished parsing this level, so go back up a level.
self.depth += 1;
@@ -457,7 +519,7 @@ impl<'a> Parser<'a> {
}
_ => {
Err(super::Error::parse_err("non-indexed sequence of structs not \
- supported", self.acc))
+ supported", self.index))
},
},
None => {