![]() Server : Apache System : Linux server2.corals.io 4.18.0-348.2.1.el8_5.x86_64 #1 SMP Mon Nov 15 09:17:08 EST 2021 x86_64 User : corals ( 1002) PHP Version : 7.4.33 Disable Function : exec,passthru,shell_exec,system Directory : /home/corals/mautic.corals.io/app/bundles/EmailBundle/Helper/ |
<?php namespace Mautic\EmailBundle\Helper; class PlainTextHelper { public const ENCODING = 'UTF-8'; /** * Contains the HTML content to convert. */ protected string $html = ''; /** * Contains the converted, formatted text. * * @var string */ protected $text; /** * Maximum width of the formatted text, in columns. * * Set this value to 0 (or less) to ignore word wrapping * and not constrain text to a fixed-width column. * * @var int */ protected $width = 70; /** * List of preg* regular expression patterns to search for, * used in conjunction with $replace. * * @var array * * @see $replace */ protected $search = [ "/\r/", // Non-legal carriage return "/[\n\t]+/", // Newlines and tabs '/<head[^>]*>.*?<\/head>/i', // <head> '/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with '/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with '/<p[^>]*>/i', // <P> '/<br[^>]*>/i', // <br> '/<i[^>]*>(.*?)<\/i>/i', // <i> '/<em[^>]*>(.*?)<\/em>/i', // <em> '/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> '/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol> '/(<dl[^>]*>|<\/dl>)/i', // <dl> and </dl> '/<li[^>]*>(.*?)<\/li>/i', // <li> and </li> '/<dd[^>]*>(.*?)<\/dd>/i', // <dd> and </dd> '/<dt[^>]*>(.*?)<\/dt>/i', // <dt> and </dt> '/<li[^>]*>/i', // <li> '/<hr[^>]*>/i', // <hr> '/<div[^>]*>/i', // <div> '/(<table[^>]*>|<\/table>)/i', // <table> and </table> '/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr> '/<td[^>]*>(.*?)<\/td>/i', // <td> and </td> '/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span> ]; /** * List of pattern replacements corresponding to patterns searched. * * @var array * * @see $search */ protected $replace = [ '', // Non-legal carriage return ' ', // Newlines and tabs '', // <head> '', // <script>s -- which strip_tags supposedly has problems with '', // <style>s -- which strip_tags supposedly has problems with "\n\n", // <P> "\n", // <br> '_\\1_', // <i> '_\\1_', // <em> "\n\n", // <ul> and </ul> "\n\n", // <ol> and </ol> "\n\n", // <dl> and </dl> "\t* \\1\n", // <li> and </li> " \\1\n", // <dd> and </dd> "\t* \\1", // <dt> and </dt> "\n\t* ", // <li> "\n-------------------------\n", // <hr> "<div>\n", // <div> "\n\n", // <table> and </table> "\n", // <tr> and </tr> "\t\t\\1\n", // <td> and </td> '', // <span class="_html2text_ignore">...</span> ]; /** * List of preg* regular expression patterns to search for, * used in conjunction with $entReplace. * * @var array * * @see $entReplace */ protected $entSearch = [ '/™/i', // TM symbol in win-1252 '/—/i', // m-dash in win-1252 '/&(amp|#38);/i', // Ampersand: see converter() '/[ ]{2,}/', // Runs of spaces, post-handling ]; /** * List of pattern replacements corresponding to patterns searched. * * @var array * * @see $entSearch */ protected $entReplace = [ '™', // TM symbol '—', // m-dash '|+|amp|+|', // Ampersand: see converter() ' ', // Runs of spaces, post-handling ]; /** * List of preg* regular expression patterns to search for * and replace using callback function. * * @var array */ protected $callbackSearch = [ '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6 '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b> '/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong> '/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th> '/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i', // <a href=""> ]; /** * List of preg* regular expression patterns to search for in PRE body, * used in conjunction with $preReplace. * * @var array * * @see $preReplace */ protected $preSearch = [ "/\n/", "/\t/", '/ /', '/<pre[^>]*>/', '/<\/pre>/', ]; /** * List of pattern replacements corresponding to patterns searched for PRE body. * * @var array * * @see $preSearch */ protected $preReplace = [ '<br>', ' ', ' ', '', '', ]; /** * Temporary workspace used during PRE processing. * * @var string */ protected $preContent = ''; /** * Indicates whether content in the $html variable has been converted yet. * * @var bool * * @see $html, $text */ protected $converted = false; /** * Contains URL addresses from links to be rendered in plain text. * * @var array * * @see buildlinkList() */ protected $linkList = []; /** * Various configuration options (able to be set in the constructor). * * @var array<string, mixed> */ protected array $options = [ 'do_links' => 'inline', // 'none' // 'inline' (show links inline) // 'nextline' (show links on the next line) // 'table' (if a table of link URLs should be listed after the text. 'width' => 70, // Maximum width of the formatted text, in columns. // Set this value to 0 (or less) to ignore word wrapping // and not constrain text to a fixed-width column. 'base_url' => '', 'preview_length' => 119, // Maximum length of the preview text ]; /** * @param array<string, mixed> $options Set configuration options */ public function __construct(array $options = []) { $this->options = array_merge($this->options, $options); } /** * Set the source HTML. * * @param string $html HTML source content * * @return PlainTextHelper */ public function setHtml($html) { $this->html = $html; $this->converted = false; return $this; } /** * Returns the text, converted from HTML. */ public function getText(): string { if (!$this->converted) { $this->convert(); } return trim($this->text); } public function getPreview(): string { $textContent = $this->getText(); $preview = trim(substr($textContent, 0, $this->options['preview_length'])); // If the text is longer than the preview length, append an ellipsis if (strlen($textContent) > $this->options['preview_length']) { $preview .= '...'; } return $preview; } protected function convert() { $this->linkList = []; $text = trim(stripslashes($this->html)); $this->converter($text); if ($this->linkList) { $text .= "\n\nLinks:\n------\n"; foreach ($this->linkList as $i => $url) { $text .= '['.($i + 1).'] '.$url."\n"; } } $this->text = $text; $this->converted = true; } protected function converter(&$text) { $this->convertBlockquotes($text); $this->convertPre($text); $text = preg_replace($this->search, $this->replace, $text); $text = preg_replace_callback($this->callbackSearch, [$this, 'pregCallback'], $text); $text = strip_tags($text); $text = preg_replace($this->entSearch, $this->entReplace, $text); $text = html_entity_decode($text, ENT_QUOTES, self::ENCODING); // Remove unknown/unhandled entities (this cannot be done in search-and-replace block) $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); // Convert "|+|amp|+|" into "&", need to be done after handling of unknown entities // This properly handles situation of "&quot;" in input string $text = str_replace('|+|amp|+|', '&', $text); // Normalise empty lines $text = preg_replace("/\n\s+\n/", "\n\n", $text); $text = preg_replace("/[\n]{3,}/", "\n\n", $text); // remove leading empty lines (can be produced by eg. P tag on the beginning) $text = ltrim($text, "\n"); if ($this->options['width'] > 0) { $text = $this->linewrap($text, $this->options['width']); } } /** * Helper function called by preg_replace() on link replacement. * * Maintains an internal list of links to be displayed at the end of the * text, with numeric indices to the original point in the text they * appeared. Also makes an effort at identifying and handling absolute * and relative links. * * @param string $link URL of the link * @param string $display Part of the text to associate number with * * @return string */ protected function buildlinkList($link, $display, ?string $linkOverride = null) { $linkMethod = $linkOverride ?: $this->options['do_links']; if ('none' == $linkMethod) { return $display; } // Ignored link types if (preg_match('!^(javascript:|mailto:|#)!i', $link)) { return $display; } if (preg_match('!^([a-z][a-z0-9.+-]+:)!i', $link) || preg_match('!({|%7B)(.*?)(}|%7D)!', $link)) { $url = $link; } else { $url = $this->options['base_url']; if (!str_starts_with($link, '/')) { $url .= '/'; } $url .= $link; } if ('table' == $linkMethod) { if (false === ($index = array_search($url, $this->linkList))) { $index = count($this->linkList); $this->linkList[] = $url; } return $display.' ['.($index + 1).']'; } elseif ('nextline' == $linkMethod) { return $display."\n[".$url.']'; } else { // link_method defaults to inline return $display.' ['.$url.']'; } } protected function convertPre(&$text) { // get the content of PRE element while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { $this->preContent = $matches[1]; // Run our defined tags search-and-replace with callback $this->preContent = preg_replace_callback( $this->callbackSearch, [$this, 'pregCallback'], $this->preContent ); // convert the content $this->preContent = sprintf( '<div><br>%s<br></div>', preg_replace($this->preSearch, $this->preReplace, $this->preContent) ); // replace the content (use callback because content can contain $0 variable) $text = preg_replace_callback( '/<pre[^>]*>.*<\/pre>/ismU', [$this, 'pregPreCallback'], $text, 1 ); // free memory $this->preContent = ''; } } /** * Helper function for BLOCKQUOTE body conversion. * * @param string $text HTML content */ protected function convertBlockquotes(&$text) { if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { $start = 0; $taglen = 0; $level = 0; $diff = 0; foreach ($matches[0] as $m) { if ('<' == $m[0][0] && '/' == $m[0][1]) { --$level; if ($level < 0) { $level = 0; // malformed HTML: go to next blockquote } elseif ($level > 0) { // skip inner blockquote } else { $end = $m[1]; $len = $end - $taglen - $start; // Get blockquote content $body = substr($text, $start + $taglen - $diff, $len); // Set text width $pWidth = $this->options['width']; if ($this->options['width'] > 0) { $this->options['width'] -= 2; } // Convert blockquote content $body = trim($body); $this->converter($body); // Add citation markers and create PRE block $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); $body = '<pre>'.htmlspecialchars($body).'</pre>'; // Re-set text width $this->options['width'] = $pWidth; // Replace content $text = substr($text, 0, $start - $diff) .$body.substr($text, $end + strlen($m[0]) - $diff); $diff = $len + $taglen + strlen($m[0]) - strlen($body); unset($body); } } else { if (0 == $level) { $start = $m[1]; $taglen = strlen($m[0]); } ++$level; } } } } /** * Callback function for preg_replace_callback use. * * @param array $matches PREG matches * * @return string */ protected function pregCallback($matches) { switch (strtolower($matches[1])) { case 'b': case 'strong': return $matches[3]; case 'th': return $this->toupper("\t\t".$matches[3]."\n"); case 'h': return $this->toupper("\n\n".$matches[3]."\n\n"); case 'a': // override the link method $linkOverride = null; if (preg_match('/_html2text_link_(\w+)/', $matches[4], $linkOverrideMatch)) { $linkOverride = $linkOverrideMatch[1]; } // Remove spaces in URL (#1487805) $url = str_replace(' ', '', $matches[3]); return $this->buildlinkList($url, $matches[5], $linkOverride); } return ''; } /** * Callback function for preg_replace_callback use in PRE content handler. * * @param array $matches PREG matches * * @return string */ protected function pregPreCallback(/* @noinspection PhpUnusedParameterInspection */ $matches) { return $this->preContent; } /** * Strtoupper function with HTML tags and entities handling. * * @param string $str Text to convert * * @return string Converted text */ private function toupper($str): string { // string can contain HTML tags $chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); // convert toupper only the text between HTML tags foreach ($chunks as $i => $chunk) { if ('<' != $chunk[0]) { $chunks[$i] = $this->strtoupper($chunk); } } return implode('', $chunks); } /** * Strtoupper multibyte wrapper function with HTML entities handling. * * @param string $str Text to convert * * @return string Converted text */ private function strtoupper($str): string { $str = html_entity_decode($str, ENT_COMPAT, self::ENCODING); if (function_exists('mb_strtoupper')) { $str = mb_strtoupper($str, self::ENCODING); } else { $str = strtoupper($str); } return htmlspecialchars($str, ENT_COMPAT, self::ENCODING); } /** * @param string $breakline * @param bool|false $cut */ private function linewrap($text, $width, $breakline = "\n", $cut = false): string { $lines = explode("\n", $text); $text = ''; foreach ($lines as $line) { $text .= trim(wordwrap(trim($line), $width, $breakline, $cut)); $text .= "\n"; } return $text; } }