summaryrefslogtreecommitdiff
path: root/src/de
diff options
context:
space:
mode:
authorJeremiah Senkpiel <fishrock123@rocketmail.com>2022-07-09 18:02:44 -0700
committerGitHub <noreply@github.com>2022-07-09 20:02:44 -0500
commitf26bdc9109abffc86ea78bb85cf3504a771d17ad (patch)
treedbd543924da72c713578f8cb28e6facebcf04e5d /src/de
parent3ddcd171ea66efa94821533299f48788a18fb33a (diff)
feat: non-strict mode replaces invalid UTF-8 (#62)
This change makes non-strict mode percent-decode utf-8 lossily. This means that invalid UTF-8 percent encoded sequences, such as `%E9`, are decoded as the unicode replacement character (� `U+FFFD`). Refs: https://github.com/samscott89/serde_qs/issues/43
Diffstat (limited to 'src/de')
-rw-r--r--src/de/parse.rs10
1 files changed, 9 insertions, 1 deletions
diff --git a/src/de/parse.rs b/src/de/parse.rs
index 5c85b12..fe14bb4 100644
--- a/src/de/parse.rs
+++ b/src/de/parse.rs
@@ -242,8 +242,16 @@ impl<'a> Parser<'a> {
/// present.
fn collect_str(&mut self) -> Result<Cow<'a, str>> {
let replaced = replace_plus(&self.inner[self.acc.0..self.acc.1 - 1]);
+ let decoder = percent_encoding::percent_decode(&replaced);
+
+ let maybe_decoded = if self.strict {
+ decoder.decode_utf8()?
+ } else {
+ decoder.decode_utf8_lossy()
+ };
+
let ret: Result<Cow<'a, str>> =
- match percent_encoding::percent_decode(&replaced).decode_utf8()? {
+ match maybe_decoded {
Cow::Borrowed(_) => {
match replaced {
Cow::Borrowed(_) => {