From f26bdc9109abffc86ea78bb85cf3504a771d17ad Mon Sep 17 00:00:00 2001 From: Jeremiah Senkpiel Date: Sat, 9 Jul 2022 18:02:44 -0700 Subject: feat: non-strict mode replaces invalid UTF-8 (#62) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes non-strict mode percent-decode utf-8 lossily. This means that invalid UTF-8 percent encoded sequences, such as `%E9`, are decoded as the unicode replacement character (� `U+FFFD`). Refs: https://github.com/samscott89/serde_qs/issues/43 --- tests/test_deserialize.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tests') diff --git a/tests/test_deserialize.rs b/tests/test_deserialize.rs index 40cd43b..92e5f53 100644 --- a/tests/test_deserialize.rs +++ b/tests/test_deserialize.rs @@ -560,6 +560,19 @@ fn strict_mode() { .deserialize_str("vec%5B%5D=1&vec%5B%5D=2") .unwrap(); assert_eq!(params.vec, vec![1, 2]); + + #[derive(Debug, Serialize, Deserialize, PartialEq)] + struct StringQueryParam { + field: String, + } + + // Ensure strict mode produces an error for invalid UTF-8 percent encoded characters. + let invalid_utf8: Result = strict_config.deserialize_str("field=%E9"); + assert!(invalid_utf8.is_err()); + + // Ensure loose mode invalid UTF-8 percent encoded characters become � U+FFFD. + let valid_utf8: StringQueryParam = loose_config.deserialize_str("field=%E9").unwrap(); + assert_eq!(valid_utf8.field, "�"); } #[test] -- cgit v1.2.3