diff --git a/src/mfm/from-html.ts b/src/mfm/from-html.ts index 4c8e2dbec..14279f338 100644 --- a/src/mfm/from-html.ts +++ b/src/mfm/from-html.ts @@ -5,7 +5,9 @@ import { URL } from 'url'; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; -export function fromHtml(html: string, hashtagNames?: string[]): string { +export function fromHtml(html: string, hashtagNames?: string[]): string | null { + if (html == null) return null; + const dom = parse5.parseFragment(html); let text = ''; @@ -19,6 +21,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { function getText(node: parse5.Node): string { if (treeAdapter.isTextNode(node)) return node.value; if (!treeAdapter.isElementNode(node)) return ''; + if (node.nodeName === 'br') return '\n'; if (node.childNodes) { return node.childNodes.map(n => getText(n)).join(''); @@ -27,6 +30,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { return ''; } + function appendChildren(childNodes: parse5.ChildNode[]): void { + if (childNodes) { + for (const n of childNodes) { + analyze(n); + } + } + } + function analyze(node: parse5.Node) { if (treeAdapter.isTextNode(node)) { text += node.value; @@ -42,6 +53,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { break; case 'a': + { const txt = getText(node); const rel = node.attrs.find(x => x.name === 'rel'); const href = node.attrs.find(x => x.name === 'href'); @@ -87,23 +99,111 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { text += generateLink(); } break; + } + + case 'h1': + { + text += '【'; + appendChildren(node.childNodes); + text += '】\n'; + break; + } + + case 'b': + case 'strong': + { + text += '**'; + appendChildren(node.childNodes); + text += '**'; + break; + } + + case 'small': + { + text += '<small>'; + appendChildren(node.childNodes); + text += '</small>'; + break; + } + + case 's': + case 'del': + { + text += '~~'; + appendChildren(node.childNodes); + text += '~~'; + break; + } + + case 'i': + case 'em': + { + text += '<i>'; + appendChildren(node.childNodes); + text += '</i>'; + break; + } + + // block code (<pre><code>) + case 'pre': { + if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') { + text += '```\n'; + text += getText(node.childNodes[0]); + text += '\n```\n'; + } else { + appendChildren(node.childNodes); + } + break; + } + + // inline code (<code>) + case 'code': { + text += '`'; + appendChildren(node.childNodes); + text += '`'; + break; + } + + case 'blockquote': { + const t = getText(node); + if (t) { + text += '> '; + text += t.split('\n').join(`\n> `); + } + break; + } case 'p': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + { text += '\n\n'; - if (node.childNodes) { - for (const n of node.childNodes) { - analyze(n); - } - } + appendChildren(node.childNodes); break; + } - default: - if (node.childNodes) { - for (const n of node.childNodes) { - analyze(n); - } - } + // other block elements + case 'div': + case 'header': + case 'footer': + case 'article': + case 'li': + case 'dt': + case 'dd': + { + text += '\n'; + appendChildren(node.childNodes); break; + } + + default: // includes inline elements + { + appendChildren(node.childNodes); + break; + } } } } diff --git a/test/mfm.ts b/test/mfm.ts index d9b98cdac..ecf886ad6 100644 --- a/test/mfm.ts +++ b/test/mfm.ts @@ -19,6 +19,30 @@ describe('toHtml', () => { }); describe('fromHtml', () => { + it('p', () => { + assert.deepStrictEqual(fromHtml('<p>a</p><p>b</p>'), 'a\n\nb'); + }); + + it('block element', () => { + assert.deepStrictEqual(fromHtml('<div>a</div><div>b</div>'), 'a\nb'); + }); + + it('inline element', () => { + assert.deepStrictEqual(fromHtml('<ul><li>a</li><li>b</li></ul>'), 'a\nb'); + }); + + it('block code', () => { + assert.deepStrictEqual(fromHtml('<pre><code>a\nb</code></pre>'), '```\na\nb\n```'); + }); + + it('inline code', () => { + assert.deepStrictEqual(fromHtml('<code>a</code>'), '`a`'); + }); + + it('quote', () => { + assert.deepStrictEqual(fromHtml('<blockquote>a\nb</blockquote>'), '> a\n> b'); + }); + it('br', () => { assert.deepStrictEqual(fromHtml('<p>abc<br><br/>d</p>'), 'abc\n\nd'); });