Files
Codewalkers/packages/web/src/lib/markdown-to-tiptap.ts
2026-02-07 00:33:12 +01:00

218 lines
6.2 KiB
TypeScript

/**
* Markdown to Tiptap JSON converter.
*
* Converts agent-produced markdown back into Tiptap JSON for page updates.
* Uses @tiptap/html's generateJSON to parse HTML into Tiptap nodes.
*/
import { generateJSON } from '@tiptap/html';
import StarterKit from '@tiptap/starter-kit';
import { Table, TableRow, TableCell, TableHeader } from '@tiptap/extension-table';
/**
* Convert markdown string to Tiptap JSON document.
*/
export function markdownToTiptapJson(markdown: string): object {
const html = markdownToHtml(markdown);
return generateJSON(html, [StarterKit, Table, TableRow, TableCell, TableHeader]);
}
/**
* Simple markdown → HTML converter covering StarterKit nodes.
* Handles: headings, paragraphs, bold, italic, code, code blocks,
* bullet lists, ordered lists, blockquotes, links, horizontal rules, tables.
*/
function markdownToHtml(md: string): string {
// Normalize line endings
let text = md.replace(/\r\n/g, '\n');
// Code blocks (fenced)
text = text.replace(/```(\w*)\n([\s\S]*?)```/g, (_match, lang, code) => {
const escaped = escapeHtml(code.replace(/\n$/, ''));
const langAttr = lang ? ` class="language-${lang}"` : '';
return `<pre><code${langAttr}>${escaped}</code></pre>`;
});
// Split into lines for block-level processing
const lines = text.split('\n');
const htmlLines: string[] = [];
let i = 0;
while (i < lines.length) {
const line = lines[i];
// Skip lines inside pre blocks (already handled)
if (line.startsWith('<pre>')) {
let block = line;
while (i < lines.length && !lines[i].includes('</pre>')) {
i++;
block += '\n' + lines[i];
}
htmlLines.push(block);
i++;
continue;
}
// Horizontal rule
if (/^---+$/.test(line.trim())) {
htmlLines.push('<hr>');
i++;
continue;
}
// Headings
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
if (headingMatch) {
const level = headingMatch[1].length;
htmlLines.push(`<h${level}>${inlineMarkdown(headingMatch[2])}</h${level}>`);
i++;
continue;
}
// Blockquote
if (line.startsWith('> ')) {
const quoteLines: string[] = [];
while (i < lines.length && lines[i].startsWith('> ')) {
quoteLines.push(lines[i].slice(2));
i++;
}
htmlLines.push(`<blockquote><p>${inlineMarkdown(quoteLines.join(' '))}</p></blockquote>`);
continue;
}
// Unordered list
if (/^[-*]\s+/.test(line)) {
const items: string[] = [];
while (i < lines.length && /^[-*]\s+/.test(lines[i])) {
items.push(lines[i].replace(/^[-*]\s+/, ''));
i++;
}
const lis = items.map((item) => `<li><p>${inlineMarkdown(item)}</p></li>`).join('');
htmlLines.push(`<ul>${lis}</ul>`);
continue;
}
// Ordered list
if (/^\d+\.\s+/.test(line)) {
const items: string[] = [];
while (i < lines.length && /^\d+\.\s+/.test(lines[i])) {
items.push(lines[i].replace(/^\d+\.\s+/, ''));
i++;
}
const lis = items.map((item) => `<li><p>${inlineMarkdown(item)}</p></li>`).join('');
htmlLines.push(`<ol>${lis}</ol>`);
continue;
}
// Table: current line has | and next line is a separator row
if (line.includes('|') && i + 1 < lines.length && /^\s*\|?\s*[-:]+[-| :]*$/.test(lines[i + 1])) {
const headerCells = parseTableRow(line);
i += 2; // skip header + separator
const bodyRows: string[][] = [];
while (i < lines.length && lines[i].includes('|') && lines[i].trim() !== '') {
bodyRows.push(parseTableRow(lines[i]));
i++;
}
const ths = headerCells.map((c) => `<th>${inlineMarkdown(c)}</th>`).join('');
const thead = `<thead><tr>${ths}</tr></thead>`;
let tbody = '';
if (bodyRows.length > 0) {
const trs = bodyRows
.map((row) => {
const tds = row.map((c) => `<td>${inlineMarkdown(c)}</td>`).join('');
return `<tr>${tds}</tr>`;
})
.join('');
tbody = `<tbody>${trs}</tbody>`;
}
htmlLines.push(`<table>${thead}${tbody}</table>`);
continue;
}
// Empty line
if (line.trim() === '') {
i++;
continue;
}
// Paragraph (collect consecutive non-empty, non-block lines)
const paraLines: string[] = [];
while (
i < lines.length &&
lines[i].trim() !== '' &&
!lines[i].startsWith('#') &&
!lines[i].startsWith('> ') &&
!/^[-*]\s+/.test(lines[i]) &&
!/^\d+\.\s+/.test(lines[i]) &&
!/^---+$/.test(lines[i].trim()) &&
!lines[i].startsWith('<pre>') &&
!lines[i].startsWith('```') &&
!isTableStart(lines, i)
) {
paraLines.push(lines[i]);
i++;
}
if (paraLines.length > 0) {
htmlLines.push(`<p>${inlineMarkdown(paraLines.join(' '))}</p>`);
} else {
i++;
}
}
return htmlLines.join('');
}
/**
* Check if lines[i] starts a markdown table (has | and next line is separator).
*/
function isTableStart(lines: string[], i: number): boolean {
return (
lines[i].includes('|') &&
i + 1 < lines.length &&
/^\s*\|?\s*[-:]+[-| :]*$/.test(lines[i + 1])
);
}
/**
* Parse a markdown table row: strip leading/trailing pipes, split on |, trim cells.
*/
function parseTableRow(line: string): string[] {
let trimmed = line.trim();
if (trimmed.startsWith('|')) trimmed = trimmed.slice(1);
if (trimmed.endsWith('|')) trimmed = trimmed.slice(0, -1);
return trimmed.split('|').map((c) => c.trim());
}
/**
* Process inline markdown: bold, italic, inline code, links.
*/
function inlineMarkdown(text: string): string {
let result = escapeHtml(text);
// Inline code (must come before bold/italic to avoid conflicts)
result = result.replace(/`([^`]+)`/g, '<code>$1</code>');
// Bold
result = result.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
// Italic
result = result.replace(/\*(.+?)\*/g, '<em>$1</em>');
// Links [text](url)
result = result.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>');
return result;
}
function escapeHtml(text: string): string {
return text
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;');
}