2019-02-01 04:41:13 -06:00
|
|
|
import { parseFragment, DefaultTreeDocumentFragment } from 'parse5';
|
2019-03-14 07:23:15 -05:00
|
|
|
import { urlRegex } from './prelude';
|
2018-06-20 11:21:57 -05:00
|
|
|
|
2019-01-30 01:56:27 -06:00
|
|
|
export function fromHtml(html: string): string {
|
2019-02-01 04:41:13 -06:00
|
|
|
const dom = parseFragment(html) as DefaultTreeDocumentFragment;
|
2018-06-20 11:21:57 -05:00
|
|
|
|
|
|
|
let text = '';
|
|
|
|
|
2018-12-11 05:36:55 -06:00
|
|
|
for (const n of dom.childNodes) {
|
|
|
|
analyze(n);
|
|
|
|
}
|
2018-06-20 11:21:57 -05:00
|
|
|
|
|
|
|
return text.trim();
|
|
|
|
|
2019-03-14 07:23:15 -05:00
|
|
|
function getText(node: any): string {
|
2018-06-20 11:21:57 -05:00
|
|
|
if (node.nodeName == '#text') return node.value;
|
|
|
|
|
|
|
|
if (node.childNodes) {
|
|
|
|
return node.childNodes.map((n: any) => getText(n)).join('');
|
|
|
|
}
|
|
|
|
|
|
|
|
return '';
|
|
|
|
}
|
|
|
|
|
|
|
|
function analyze(node: any) {
|
|
|
|
switch (node.nodeName) {
|
|
|
|
case '#text':
|
|
|
|
text += node.value;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'br':
|
|
|
|
text += '\n';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'a':
|
|
|
|
const txt = getText(node);
|
2018-09-01 08:45:27 -05:00
|
|
|
const rel = node.attrs.find((x: any) => x.name == 'rel');
|
|
|
|
const href = node.attrs.find((x: any) => x.name == 'href');
|
2020-01-19 10:53:17 -06:00
|
|
|
const _class = node.attrs.find((x: any) => x.name == 'class');
|
|
|
|
const isHashtag = rel?.value?.match('tag') || _class?.value?.match('hashtag');
|
2018-06-20 11:21:57 -05:00
|
|
|
|
2018-09-01 08:45:27 -05:00
|
|
|
// ハッシュタグ / hrefがない / txtがURL
|
2019-03-14 10:03:24 -05:00
|
|
|
if (isHashtag || !href || href.value == txt) {
|
|
|
|
text += isHashtag || txt.match(urlRegex) ? txt : `<${txt}>`;
|
2018-06-20 11:21:57 -05:00
|
|
|
// メンション
|
2018-12-11 20:47:07 -06:00
|
|
|
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
|
2018-06-20 11:21:57 -05:00
|
|
|
const part = txt.split('@');
|
|
|
|
|
|
|
|
if (part.length == 2) {
|
|
|
|
//#region ホスト名部分が省略されているので復元する
|
2018-09-01 09:12:51 -05:00
|
|
|
const acct = `${txt}@${(new URL(href.value)).hostname}`;
|
2018-06-20 11:21:57 -05:00
|
|
|
text += acct;
|
|
|
|
//#endregion
|
|
|
|
} else if (part.length == 3) {
|
|
|
|
text += txt;
|
|
|
|
}
|
2018-09-01 08:45:27 -05:00
|
|
|
// その他
|
|
|
|
} else {
|
|
|
|
text += `[${txt}](${href.value})`;
|
2018-06-20 11:21:57 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'p':
|
|
|
|
text += '\n\n';
|
|
|
|
if (node.childNodes) {
|
2018-12-11 05:36:55 -06:00
|
|
|
for (const n of node.childNodes) {
|
|
|
|
analyze(n);
|
|
|
|
}
|
2018-06-20 11:21:57 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
if (node.childNodes) {
|
2018-12-11 05:36:55 -06:00
|
|
|
for (const n of node.childNodes) {
|
|
|
|
analyze(n);
|
|
|
|
}
|
2018-06-20 11:21:57 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|