use crate::utils::{replace_space, QS_ENCODE_SET}; use super::*; use percent_encoding::percent_encode; use serde::de; use std::borrow::Cow; use std::iter::Iterator; use std::slice::Iter; use std::str; macro_rules! tu { ($x:expr) => { match $x { Some(x) => *x, None => return Err(de::Error::custom("query string ended before expected")), } }; } impl<'a> Level<'a> { /// If this `Level` value is indeed a map, then attempt to insert /// `value` for key `key`. /// Returns error if `self` is not a map, or already has an entry for that /// key. fn insert_map_value(&mut self, key: Cow<'a, str>, value: Cow<'a, str>) { if let Level::Nested(ref mut map) = *self { match map.entry(key) { Entry::Occupied(mut o) => { let key = o.key(); let error = if key.contains('[') { let newkey = percent_encode(key.as_bytes(), QS_ENCODE_SET) .map(replace_space) .collect::(); format!("Multiple values for one key: \"{}\"\nInvalid field contains an encoded bracket -- did you mean to use non-strict mode?\n https://docs.rs/serde_qs/latest/serde_qs/#strict-vs-non-strict-modes", newkey) } else { format!("Multiple values for one key: \"{}\"", key) }; // Throw away old result; map is now invalid anyway. let _ = o.insert(Level::Invalid(error)); } Entry::Vacant(vm) => { // Map is empty, result is None let _ = vm.insert(Level::Flat(value)); } } } else if let Level::Uninitialised = *self { let mut map = BTreeMap::default(); let _ = map.insert(key, Level::Flat(value)); *self = Level::Nested(map); } else { *self = Level::Invalid( "Attempted to insert map value into \ non-map structure" .to_string(), ); } } /// If this `Level` value is indeed a seq, then push a new value fn insert_ord_seq_value(&mut self, key: usize, value: Cow<'a, str>) { if let Level::OrderedSeq(ref mut map) = *self { match map.entry(key) { Entry::Occupied(mut o) => { // Throw away old result; map is now invalid anyway. let _ = o.insert(Level::Invalid("Multiple values for one key".to_string())); } Entry::Vacant(vm) => { // Map is empty, result is None let _ = vm.insert(Level::Flat(value)); } } } else if let Level::Uninitialised = *self { // To reach here, self is either an OrderedSeq or nothing. let mut map = BTreeMap::default(); let _ = map.insert(key, Level::Flat(value)); *self = Level::OrderedSeq(map); } else { *self = Level::Invalid( "Attempted to insert seq value into \ non-seq structure" .to_string(), ); } } /// If this `Level` value is indeed a seq, then attempt to insert /// `value` for key `key`. /// Returns error if `self` is not a seq, or already has an entry for that /// key. fn insert_seq_value(&mut self, value: Cow<'a, str>) { // Reached the end of the key string if let Level::Sequence(ref mut seq) = *self { seq.push(Level::Flat(value)); } else if let Level::Uninitialised = *self { let seq = vec![Level::Flat(value)]; *self = Level::Sequence(seq); } else { *self = Level::Invalid( "Attempted to insert seq value into \ non-seq structure" .to_string(), ); } } } /// The `Parser` struct is a stateful querystring parser. /// It iterates over a slice of bytes, with a range to track the current /// start/end points of a value. /// The parser additionally supports peeking values, which allows them to be /// re-used (precisely once, unlike with `Peekable` from `std::iter`). pub struct Parser<'a> { inner: &'a [u8], iter: Iter<'a, u8>, index: usize, acc: (usize, usize), peeked: Option<&'a u8>, depth: usize, // stores the current depth, for use in bounded-depth parsing strict: bool, state: ParsingState, } /// The parsing logic varies slightly based on whether it is a key or a value /// (determines how encoded brackets are parse in non-strict mode) /// This tracks the state. enum ParsingState { Init, Key, Value, } impl<'a> Iterator for Parser<'a> { type Item = &'a u8; #[inline] fn next(&mut self) -> Option { let preparse_brackets = match self.state { ParsingState::Value => false, _ => !self.strict, }; if preparse_brackets { // in non-strict mode, we will happily decode any bracket match self.peeked.take() { Some(v) => Some(v), None => { self.index += 1; self.acc.1 += 1; match self.iter.next() { Some(v) if v == &b'%' && self.iter.len() >= 2 => { match &self.iter.as_slice()[..2] { b"5B" => { // skip the next two characters let _ = self.iter.next(); let _ = self.iter.next(); self.index += 2; Some(&b'[') } b"5D" => { // skip the next two characters let _ = self.iter.next(); let _ = self.iter.next(); self.index += 2; Some(&b']') } _ => Some(v), } } Some(v) => Some(v), None => None, } } } } else { match self.peeked.take() { Some(v) => Some(v), None => { self.index += 1; self.acc.1 += 1; self.iter.next() } } } } } impl Parser<'_> { #[inline] fn peek(&mut self) -> Option<::Item> { if self.peeked.is_some() { self.peeked } else if let Some(x) = self.next() { self.peeked = Some(x); Some(x) } else { None } } } /// Replace b'+' with b' ' /// Copied from [`form_urlencoded`](https://github.com/servo/rust-url/blob/380be29859adb859e861c2d765897c22ec878e01/src/form_urlencoded.rs#L125). fn replace_plus(input: &[u8]) -> Cow<[u8]> { match input.iter().position(|&b| b == b'+') { None => Cow::Borrowed(input), Some(first_position) => { let mut replaced = input.to_owned(); replaced[first_position] = b' '; for byte in &mut replaced[first_position + 1..] { if *byte == b'+' { *byte = b' '; } } Cow::Owned(replaced) } } } impl<'a> Parser<'a> { pub fn new(encoded: &'a [u8], depth: usize, strict: bool) -> Self { Parser { inner: encoded, iter: encoded.iter(), acc: (0, 0), index: 0, peeked: None, depth, strict, state: ParsingState::Init, } } /// Resets the accumulator range by setting `(start, end)` to `(end, end)`. fn clear_acc(&mut self) { self.acc = (self.index, self.index); } /// Extracts a string from the internal byte slice from the range tracked by /// the parser. /// Avoids allocations when neither percent encoded, nor `'+'` values are /// present. fn collect_str(&mut self) -> Result> { let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]); let decoder = percent_encoding::percent_decode(&replaced); let maybe_decoded = if self.strict { decoder.decode_utf8()? } else { decoder.decode_utf8_lossy() }; let ret: Result> = match maybe_decoded { Cow::Borrowed(_) => { match replaced { Cow::Borrowed(_) => { // In this case, neither method made replacements, so we // reuse the original bytes let res = str::from_utf8(&self.inner[self.acc.0..self.acc.1 - 1])?; Ok(Cow::Borrowed(res)) } Cow::Owned(owned) => { let res = String::from_utf8(owned)?; Ok(Cow::Owned(res)) } } } Cow::Owned(owned) => Ok(Cow::Owned(owned)), }; self.clear_acc(); ret } /// In some ways the main way to use a `Parser`, this runs the parsing step /// and outputs a simple `Deserializer` over the parsed map. pub(crate) fn as_deserializer(&mut self) -> Result> { let map = BTreeMap::default(); let mut root = Level::Nested(map); // Parses all top level nodes into the `root` map. while self.parse(&mut root)? {} let iter = match root { Level::Nested(map) => map.into_iter(), _ => BTreeMap::default().into_iter(), }; Ok(QsDeserializer { iter, value: None }) } /// This is the top level parsing function. It checks the first character to /// decide the type of key (nested, sequence, etc.) and to call the /// approprate parsing function. /// /// Returns `Ok(false)` when there is no more string to parse. fn parse(&mut self, node: &mut Level<'a>) -> Result { // First character determines parsing type if self.depth == 0 { // Hit the maximum depth level, so parse everything as a key let key = self.parse_key(b'=', false)?; self.parse_map_value(key, node)?; return Ok(true); } match self.next() { Some(x) => { match *x { b'[' => { loop { self.clear_acc(); // Only peek at the next value to determine the key type. match tu!(self.peek()) { // key is of the form "[..=", not really allowed. b'[' => { // If we're in strict mode, error, otherwise just ignore it. if self.strict { return Err(super::Error::parse_err("found another opening bracket before the closed bracket", self.index)); } else { let _ = self.next(); } } // key is simply "[]", so treat as a seq. b']' => { // throw away the bracket let _ = self.next(); self.clear_acc(); self.parse_seq_value(node)?; return Ok(true); } // First character is an integer, attempt to parse it as an integer key b'0'..=b'9' => { let key = self.parse_key(b']', true)?; let key = key.parse().map_err(Error::from)?; self.parse_ord_seq_value(key, node)?; return Ok(true); } // Key is "[a..=" so parse up to the closing "]" 0x20..=0x2f | 0x3a..=0x5a | 0x5c | 0x5e..=0x7e => { let key = self.parse_key(b']', true)?; self.parse_map_value(key, node)?; return Ok(true); } c => { if self.strict { return Err(super::Error::parse_err( format!( "unexpected character: {}", String::from_utf8_lossy(&[c]) ), self.index, )); } else { let _ = self.next(); } } } } } // Skip empty byte sequences (e.g. leading `&`, trailing `&`, `&&`, ...) b'&' => { self.clear_acc(); Ok(true) } // This means the key should be a root key // of the form "abc" or "abc[..=]" // We do actually allow integer keys here since they cannot // be confused with sequences _ => { let key = { self.parse_key(b'[', false)? }; // Root keys are _always_ map values self.parse_map_value(key, node)?; Ok(true) } } } // Ran out of characters to parse None => Ok(false), } } /// The iterator is currently pointing at a key, so parse up until the /// `end_on` value. This will either be `'['` when the key is the root key, /// or `']'` when the key is a nested key. In the former case, `'='` will /// also finish the key parsing. /// /// The `consume` flag determines whether the end character should be /// returned to the buffer to be peeked. This is important when /// parsing keys like `abc[def][ghi]` since the `'['` character is /// needed to for the next iteration of `parse`. fn parse_key(&mut self, end_on: u8, consume: bool) -> Result> { self.state = ParsingState::Key; loop { if let Some(x) = self.next() { match *x { c if c == end_on => { // Add this character back to the buffer for peek. if !consume { self.peeked = Some(x); } return self.collect_str(); } b'=' => { // Allow the '=' byte only when parsing keys within [] if end_on != b']' { // Otherwise, we have reached the end of the key // Add this character back to the buffer for peek. self.peeked = Some(x); return self.collect_str(); } // otherwise do nothing, so '=' is accumulated } b'&' => { // important to keep the `&` character so we know the // key-value is of the form `key&..=` (i.e. no value) self.peeked = Some(&b'&'); return self.collect_str(); } _ => { // for any other character // do nothing, keep adding to key } } } else { // no more string to parse return self.collect_str(); } } } /// The `(key,value)` pair is determined to be corresponding to a map entry, /// so parse it as such. The first part of the `key` has been parsed. fn parse_map_value(&mut self, key: Cow<'a, str>, node: &mut Level<'a>) -> Result<()> { self.state = ParsingState::Key; let res = loop { if let Some(x) = self.peek() { match *x { b'=' => { // Key is finished, parse up until the '&' as the value self.clear_acc(); self.state = ParsingState::Value; for _ in self.take_while(|b| *b != &b'&') {} let value: Cow<'a, str> = self.collect_str()?; node.insert_map_value(key, value); break Ok(()); } b'&' => { // No value node.insert_map_value(key, Cow::Borrowed("")); break Ok(()); } b'[' => { // The key continues to another level of nested. // Add a new unitialised level for this node and continue. if let Level::Uninitialised = *node { *node = Level::Nested(BTreeMap::default()); } if let Level::Nested(ref mut map) = *node { // By parsing we drop down another level self.depth -= 1; // Either take the existing entry, or add a new // unitialised level // Use this new node to keep parsing let _ = self.parse(map.entry(key).or_insert(Level::Uninitialised))?; break Ok(()); } else { // We expected to parse into a map here. break Err(super::Error::parse_err( format!( "tried to insert a \ new key into {:?}", node ), self.index, )); } } c => { // Anything else is unexpected since we just finished // parsing a key. if self.strict { break Err(super::Error::parse_err( format!( "Unexpected character: '{}' found when parsing", String::from_utf8_lossy(&[c]) ), self.index, )); } else { let _ = self.next(); } } } } else { // The string has ended, so the value is empty. node.insert_map_value(key, Cow::Borrowed("")); break Ok(()); } }; // We have finished parsing this level, so go back up a level. self.depth += 1; res } /// The `(key,value)` pair is determined to be corresponding to an /// ordered sequence. /// Basically the same as the above, but we insert into `OrderedSeq` /// Can potentially be merged? fn parse_ord_seq_value(&mut self, key: usize, node: &mut Level<'a>) -> Result<()> { self.state = ParsingState::Key; let res = loop { if let Some(x) = self.peek() { match *x { b'=' => { // Key is finished, parse up until the '&' as the value self.clear_acc(); self.state = ParsingState::Value; for _ in self.take_while(|b| *b != &b'&') {} let value = self.collect_str()?; // Reached the end of the key string node.insert_ord_seq_value(key, value); break Ok(()); } b'&' => { // No value node.insert_ord_seq_value(key, Cow::Borrowed("")); break Ok(()); } b'[' => { // The key continues to another level of nested. // Add a new unitialised level for this node and continue. if let Level::Uninitialised = *node { *node = Level::OrderedSeq(BTreeMap::default()); } if let Level::OrderedSeq(ref mut map) = *node { // By parsing we drop down another level self.depth -= 1; let _ = self.parse( // Either take the existing entry, or add a new // unitialised level // Use this new node to keep parsing map.entry(key).or_insert(Level::Uninitialised), )?; break Ok(()); } else { // We expected to parse into a seq here. break Err(super::Error::parse_err( format!( "tried to insert a \ new key into {:?}", node ), self.index, )); } } c => { // Anything else is unexpected since we just finished // parsing a key. if self.strict { break Err(super::Error::parse_err( format!("Unexpected character: {:?} found when parsing", c), self.index, )); } else { let _ = self.next(); } } } } else { // The string has ended, so the value is empty. node.insert_ord_seq_value(key, Cow::Borrowed("")); break Ok(()); } }; // We have finished parsing this level, so go back up a level. self.depth += 1; res } /// The `(key,value)` pair is determined to be corresponding to an /// unordered sequence. /// This must be the final level of nesting, so assume we have a value fn parse_seq_value(&mut self, node: &mut Level<'a>) -> Result<()> { self.state = ParsingState::Key; let res = match self.peek() { Some(x) => { match *x { b'=' => { // Key is finished, parse up until the '&' as the value self.clear_acc(); self.state = ParsingState::Value; for _ in self.take_while(|b| *b != &b'&') {} let value = self.collect_str()?; node.insert_seq_value(value); Ok(()) } b'&' => { // key value is empty node.insert_seq_value(Cow::Borrowed("")); Ok(()) } _ => Err(super::Error::parse_err( "non-indexed sequence of \ structs not supported", self.index, )), } } None => { // The string has ended, so the value is empty. node.insert_seq_value(Cow::Borrowed("")); Ok(()) } }; // We have finished parsing this level, so go back up a level. self.depth += 1; res } }