tags line by line, so * pasted Markdown like `# Heading` ends up as `

# Heading

` and never * gets rendered as a real heading. This normalizer detects that situation * and runs the extracted text through a Markdown renderer. * * If the HTML already contains structural elements produced by the editor * toolbar (headings, lists, blockquotes, code blocks), the content is left * untouched so real rich-text edits are preserved. */ class RichTextMarkdownNormalizer { /** * @var array HTML tags that indicate the user already * used the editor toolbar for structure. */ private const STRUCTURAL_TAGS = [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'blockquote', 'pre', 'table', ]; /** * Regex fragments that indicate plain-text Markdown syntax. * * @var array */ private const MARKDOWN_LINE_PATTERNS = [ '/^\s{0,3}#{1,6}\s+\S/m', // ATX headings '/^\s{0,3}[-*+]\s+\S/m', // bullet list '/^\s{0,3}\d{1,9}[.)]\s+\S/m', // ordered list '/^\s{0,3}>\s?/m', // blockquote '/^\s{0,3}```/m', // fenced code block ]; /** * Render a raw Markdown string directly to HTML using the same * configuration as normalize(). */ public function toHtml(string $markdown): string { return trim($this->renderer()->toHtml($markdown)); } public function normalize(?string $html): ?string { if ($html === null || trim($html) === '') { return $html; } // Pure plain text (no HTML tags at all): render directly as Markdown // so newline-separated paragraphs, headings, links, etc. become HTML. if (! $this->containsHtmlTags($html)) { $rendered = trim($this->renderer()->toHtml($html)); return $rendered === '' ? $html : $rendered; } // Already structured via the editor toolbar: leave untouched. if ($this->containsStructuralHtml($html)) { return $html; } // Paragraph-only HTML: if the inner text looks like Markdown // (pasted plain text wrapped in

by Tiptap), extract and render. $plainText = $this->extractPlainTextPreservingLineBreaks($html); if (! $this->looksLikeMarkdown($plainText)) { return $html; } $rendered = trim($this->renderer()->toHtml($plainText)); return $rendered === '' ? $html : $rendered; } private function containsHtmlTags(string $input): bool { return preg_match('/<[a-z!\/][^>]*>/i', $input) === 1; } private function renderer(): MarkdownRenderer { $config = config('markdown'); return new MarkdownRenderer( commonmarkOptions: $config['commonmark_options'] ?? [], highlightCode: $config['code_highlighting']['enabled'] ?? false, highlightTheme: $config['code_highlighting']['theme'] ?? 'github-light', cacheStoreName: $config['cache_store'] ?? null, renderAnchors: $config['add_anchors_to_headings'] ?? false, renderAnchorsAsLinks: $config['render_anchors_as_links'] ?? false, extensions: $config['extensions'] ?? [], blockRenderers: $config['block_renderers'] ?? [], inlineRenderers: $config['inline_renderers'] ?? [], inlineParsers: $config['inline_parsers'] ?? [], cacheDuration: $config['cache_duration'] ?? null, ); } private function containsStructuralHtml(string $html): bool { foreach (self::STRUCTURAL_TAGS as $tag) { if (stripos($html, '<'.$tag) !== false) { return true; } } return false; } private function looksLikeMarkdown(string $text): bool { foreach (self::MARKDOWN_LINE_PATTERNS as $pattern) { if (preg_match($pattern, $text) === 1) { return true; } } return false; } private function extractPlainTextPreservingLineBreaks(string $html): string { $dom = new DOMDocument; $previous = libxml_use_internal_errors(true); $dom->loadHTML('

'.$html.'
', LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); libxml_use_internal_errors($previous); $root = $dom->getElementsByTagName('div')->item(0); if ($root === null) { return html_entity_decode(strip_tags($html), ENT_QUOTES | ENT_HTML5); } $blocks = []; foreach ($root->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE && strtolower($child->nodeName) === 'p') { $blocks[] = $this->nodeToMarkdown($child); } else { $blocks[] = trim($this->nodeToMarkdown($child)); } } return trim(implode("\n\n", array_filter($blocks, static fn ($line) => $line !== ''))); } /** * Walk a DOM node and produce a Markdown-equivalent string for its * contents, preserving inline formatting (strong, em, code, links, * images) and converting
to newlines. */ private function nodeToMarkdown(\DOMNode $node): string { $buffer = ''; foreach ($node->childNodes as $child) { if ($child->nodeType === XML_TEXT_NODE) { $buffer .= $child->textContent ?? ''; continue; } if ($child->nodeType !== XML_ELEMENT_NODE) { continue; } /** @var \DOMElement $child */ $tag = strtolower($child->nodeName); switch ($tag) { case 'br': $buffer .= "\n"; break; case 'strong': case 'b': $inner = $this->nodeToMarkdown($child); $buffer .= $inner === '' ? '' : '**'.$inner.'**'; break; case 'em': case 'i': $inner = $this->nodeToMarkdown($child); $buffer .= $inner === '' ? '' : '*'.$inner.'*'; break; case 's': case 'del': case 'strike': $inner = $this->nodeToMarkdown($child); $buffer .= $inner === '' ? '' : '~~'.$inner.'~~'; break; case 'code': $buffer .= '`'.($child->textContent ?? '').'`'; break; case 'a': $text = $this->nodeToMarkdown($child); $href = $child->getAttribute('href'); if ($href === '') { $buffer .= $text; } elseif (trim($text) === '' || $text === $href) { $buffer .= $href; } else { $buffer .= '['.$text.']('.$href.')'; } break; case 'img': $src = $child->getAttribute('src'); $alt = $child->getAttribute('alt'); if ($src !== '') { $buffer .= '!['.$alt.']('.$src.')'; } break; default: $buffer .= $this->nodeToMarkdown($child); break; } } return $buffer; } }