From d00c336a55d1dd047cdb778cb44ae441ea6b842e Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 2 Feb 2017 21:19:35 -0800 Subject: clean up editor HTML when posting to Micropub endpoint closes #67 --- lib/helpers.php | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'lib') diff --git a/lib/helpers.php b/lib/helpers.php index 4a4da11..26faf49 100644 --- a/lib/helpers.php +++ b/lib/helpers.php @@ -380,3 +380,50 @@ function correct_photo_rotation($filename) { $image->writeImage($filename); } } + +function sanitize_editor_html($html) { + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); + $config->set('HTML.AllowedElements', [ + 'a', + 'abbr', + 'b', + 'code', + 'del', + 'em', + 'i', + 'img', + 'q', + 'strike', + 'strong', + 'blockquote', + 'pre', + 'p', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'ul', + 'li', + 'ol' + ]); + + // Allow data: URIs + $config->set('URI.AllowedSchemes', array('data' => true, 'http' => true, 'https' => true)); + + // Strip all classes from elements + $config->set('Attr.AllowedClasses', ''); + + // $def = $config->getHTMLDefinition(true); + $purifier = new HTMLPurifier($config); + $sanitized = $purifier->purify($html); + $sanitized = str_replace(" ","\r",$sanitized); + + # Remove empty paragraphs + $sanitized = str_replace('


','',$sanitized); + $sanitized = str_replace('

','',$sanitized); + + return $sanitized; +} -- cgit v1.2.3