From f26bdc9109abffc86ea78bb85cf3504a771d17ad Mon Sep 17 00:00:00 2001 From: Jeremiah Senkpiel Date: Sat, 9 Jul 2022 18:02:44 -0700 Subject: feat: non-strict mode replaces invalid UTF-8 (#62) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes non-strict mode percent-decode utf-8 lossily. This means that invalid UTF-8 percent encoded sequences, such as `%E9`, are decoded as the unicode replacement character (� `U+FFFD`). Refs: https://github.com/samscott89/serde_qs/issues/43 --- src/de/parse.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/de/parse.rs') diff --git a/src/de/parse.rs b/src/de/parse.rs index 5c85b12..fe14bb4 100644 --- a/src/de/parse.rs +++ b/src/de/parse.rs @@ -242,8 +242,16 @@ impl<'a> Parser<'a> { /// present. fn collect_str(&mut self) -> Result> { let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]); + let decoder = percent_encoding::percent_decode(&replaced); + + let maybe_decoded = if self.strict { + decoder.decode_utf8()? + } else { + decoder.decode_utf8_lossy() + }; + let ret: Result> = - match percent_encoding::percent_decode(&replaced).decode_utf8()? { + match maybe_decoded { Cow::Borrowed(_) => { match replaced { Cow::Borrowed(_) => { -- cgit v1.2.3