*/
private const MARKDOWN_LINE_PATTERNS = [
'/^\s{0,3}#{1,6}\s+\S/m', // ATX headings
'/^\s{0,3}[-*+]\s+\S/m', // bullet list
'/^\s{0,3}\d{1,9}[.)]\s+\S/m', // ordered list
'/^\s{0,3}>\s?/m', // blockquote
'/^\s{0,3}```/m', // fenced code block
];
/**
* Render a raw Markdown string directly to HTML using the same
* configuration as normalize().
*/
public function toHtml(string $markdown): string
{
return trim($this->renderer()->toHtml($markdown));
}
public function normalize(?string $html): ?string
{
if ($html === null || trim($html) === '') {
return $html;
}
// Pure plain text (no HTML tags at all): render directly as Markdown
// so newline-separated paragraphs, headings, links, etc. become HTML.
if (! $this->containsHtmlTags($html)) {
$rendered = trim($this->renderer()->toHtml($html));
return $rendered === '' ? $html : $rendered;
}
// Already structured via the editor toolbar: leave untouched.
if ($this->containsStructuralHtml($html)) {
return $html;
}
// Paragraph-only HTML: if the inner text looks like Markdown
// (pasted plain text wrapped in by Tiptap), extract and render.
$plainText = $this->extractPlainTextPreservingLineBreaks($html);
if (! $this->looksLikeMarkdown($plainText)) {
return $html;
}
$rendered = trim($this->renderer()->toHtml($plainText));
return $rendered === '' ? $html : $rendered;
}
private function containsHtmlTags(string $input): bool
{
return preg_match('/<[a-z!\/][^>]*>/i', $input) === 1;
}
private function renderer(): MarkdownRenderer
{
$config = config('markdown');
return new MarkdownRenderer(
commonmarkOptions: $config['commonmark_options'] ?? [],
highlightCode: $config['code_highlighting']['enabled'] ?? false,
highlightTheme: $config['code_highlighting']['theme'] ?? 'github-light',
cacheStoreName: $config['cache_store'] ?? null,
renderAnchors: $config['add_anchors_to_headings'] ?? false,
renderAnchorsAsLinks: $config['render_anchors_as_links'] ?? false,
extensions: $config['extensions'] ?? [],
blockRenderers: $config['block_renderers'] ?? [],
inlineRenderers: $config['inline_renderers'] ?? [],
inlineParsers: $config['inline_parsers'] ?? [],
cacheDuration: $config['cache_duration'] ?? null,
);
}
private function containsStructuralHtml(string $html): bool
{
foreach (self::STRUCTURAL_TAGS as $tag) {
if (stripos($html, '<'.$tag) !== false) {
return true;
}
}
return false;
}
private function looksLikeMarkdown(string $text): bool
{
foreach (self::MARKDOWN_LINE_PATTERNS as $pattern) {
if (preg_match($pattern, $text) === 1) {
return true;
}
}
return false;
}
private function extractPlainTextPreservingLineBreaks(string $html): string
{
$dom = new DOMDocument;
$previous = libxml_use_internal_errors(true);
$dom->loadHTML('
'.$html.'
', LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
libxml_use_internal_errors($previous);
$root = $dom->getElementsByTagName('div')->item(0);
if ($root === null) {
return html_entity_decode(strip_tags($html), ENT_QUOTES | ENT_HTML5);
}
$blocks = [];
foreach ($root->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE && strtolower($child->nodeName) === 'p') {
$blocks[] = $this->nodeToMarkdown($child);
} else {
$blocks[] = trim($this->nodeToMarkdown($child));
}
}
return trim(implode("\n\n", array_filter($blocks, static fn ($line) => $line !== '')));
}
/**
* Walk a DOM node and produce a Markdown-equivalent string for its
* contents, preserving inline formatting (strong, em, code, links,
* images) and converting
to newlines.
*/
private function nodeToMarkdown(\DOMNode $node): string
{
$buffer = '';
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE) {
$buffer .= $child->textContent ?? '';
continue;
}
if ($child->nodeType !== XML_ELEMENT_NODE) {
continue;
}
/** @var \DOMElement $child */
$tag = strtolower($child->nodeName);
switch ($tag) {
case 'br':
$buffer .= "\n";
break;
case 'strong':
case 'b':
$inner = $this->nodeToMarkdown($child);
$buffer .= $inner === '' ? '' : '**'.$inner.'**';
break;
case 'em':
case 'i':
$inner = $this->nodeToMarkdown($child);
$buffer .= $inner === '' ? '' : '*'.$inner.'*';
break;
case 's':
case 'del':
case 'strike':
$inner = $this->nodeToMarkdown($child);
$buffer .= $inner === '' ? '' : '~~'.$inner.'~~';
break;
case 'code':
$buffer .= '`'.($child->textContent ?? '').'`';
break;
case 'a':
$text = $this->nodeToMarkdown($child);
$href = $child->getAttribute('href');
if ($href === '') {
$buffer .= $text;
} elseif (trim($text) === '' || $text === $href) {
$buffer .= $href;
} else {
$buffer .= '['.$text.']('.$href.')';
}
break;
case 'img':
$src = $child->getAttribute('src');
$alt = $child->getAttribute('alt');
if ($src !== '') {
$buffer .= '';
}
break;
default:
$buffer .= $this->nodeToMarkdown($child);
break;
}
}
return $buffer;
}
}