From 0ff390ed80be8fa64c1ba1d6a7cbc671b0156931 Mon Sep 17 00:00:00 2001 From: syuilo Date: Fri, 16 Nov 2018 21:57:19 +0900 Subject: [PATCH] [MFM] Improve various parsing Resolve #2779 Resolve #3053 --- src/mfm/parse/elements/hashtag.ts | 4 +- src/mfm/parse/elements/mention.ts | 3 +- src/mfm/parse/elements/quote.ts | 4 +- src/mfm/parse/elements/title.ts | 4 +- src/mfm/parse/elements/url.ts | 3 +- src/mfm/parse/index.ts | 8 +-- test/mfm.ts | 99 ++++++++++++++++++++----------- 7 files changed, 79 insertions(+), 46 deletions(-) diff --git a/src/mfm/parse/elements/hashtag.ts b/src/mfm/parse/elements/hashtag.ts index 7005dbe09..df07de664 100644 --- a/src/mfm/parse/elements/hashtag.ts +++ b/src/mfm/parse/elements/hashtag.ts @@ -8,7 +8,9 @@ export type TextElementHashtag = { hashtag: string; }; -export default function(text: string, isBegin: boolean) { +export default function(text: string, before: string) { + const isBegin = before == ''; + if (!(/^\s#[^\s\.,!\?#]+/.test(text) || (isBegin && /^#[^\s\.,!\?#]+/.test(text)))) return null; const isHead = text.startsWith('#'); const hashtag = text.match(/^\s?#[^\s\.,!\?#]+/)[0]; diff --git a/src/mfm/parse/elements/mention.ts b/src/mfm/parse/elements/mention.ts index 832a97c62..7a609e5d3 100644 --- a/src/mfm/parse/elements/mention.ts +++ b/src/mfm/parse/elements/mention.ts @@ -12,9 +12,10 @@ export type TextElementMention = { host: string; }; -export default function(text: string) { +export default function(text: string, before: string) { const match = text.match(/^@[a-z0-9_]+(?:@[a-z0-9\.\-]+[a-z0-9])?/i); if (!match) return null; + if (/[a-zA-Z0-9]$/.test(before)) return null; const mention = match[0]; const { username, host } = parseAcct(mention.substr(1)); const canonical = host != null ? `@${username}@${toUnicode(host)}` : mention; diff --git a/src/mfm/parse/elements/quote.ts b/src/mfm/parse/elements/quote.ts index 5f8c9c7fc..969c1fb4a 100644 --- a/src/mfm/parse/elements/quote.ts +++ b/src/mfm/parse/elements/quote.ts @@ -8,7 +8,9 @@ export type TextElementQuote = { quote: string; }; -export default function(text: string, isBegin: boolean) { +export default function(text: string, before: string) { + const isBegin = before == ''; + const match = text.match(/^"([\s\S]+?)\n"/) || text.match(/^\n>([\s\S]+?)(\n\n|$)/) || (isBegin ? text.match(/^>([\s\S]+?)(\n\n|$)/) : null); diff --git a/src/mfm/parse/elements/title.ts b/src/mfm/parse/elements/title.ts index d67236aa7..a9922c8ac 100644 --- a/src/mfm/parse/elements/title.ts +++ b/src/mfm/parse/elements/title.ts @@ -8,7 +8,9 @@ export type TextElementTitle = { title: string; }; -export default function(text: string, isBegin: boolean) { +export default function(text: string, before: string) { + const isBegin = before == ''; + const match = isBegin ? text.match(/^(【|\[)(.+?)(】|])\n/) : text.match(/^\n(【|\[)(.+?)(】|])\n/); if (!match) return null; return { diff --git a/src/mfm/parse/elements/url.ts b/src/mfm/parse/elements/url.ts index 411f2ebfa..a16f67f2c 100644 --- a/src/mfm/parse/elements/url.ts +++ b/src/mfm/parse/elements/url.ts @@ -8,12 +8,13 @@ export type TextElementUrl = { url: string; }; -export default function(text: string) { +export default function(text: string, before: string) { const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); if (!match) return null; let url = match[0]; if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); + if (url.endsWith(')') && before.endsWith('(')) url = url.substr(0, url.lastIndexOf(')')); return { type: 'url', content: url, diff --git a/src/mfm/parse/index.ts b/src/mfm/parse/index.ts index 46e66ac4c..7697bb6e3 100644 --- a/src/mfm/parse/index.ts +++ b/src/mfm/parse/index.ts @@ -52,7 +52,7 @@ export type TextElement = { type: 'text', content: string } | TextElementTitle | TextElementUrl | TextElementMotion; -export type TextElementProcessor = (text: string, isBegin: boolean) => TextElement | TextElement[]; +export type TextElementProcessor = (text: string, before: string) => TextElement | TextElement[]; export default (source: string): TextElement[] => { if (source == null || source == '') { @@ -68,12 +68,10 @@ export default (source: string): TextElement[] => { } } - let i = 0; - // パース while (source != '') { const parsed = elements.some(el => { - let _tokens = el(source, i == 0); + let _tokens = el(source, tokens.map(token => token.content).join('')); if (_tokens) { if (!Array.isArray(_tokens)) { _tokens = [_tokens]; @@ -91,8 +89,6 @@ export default (source: string): TextElement[] => { content: source[0] }); } - - i++; } const combineText = (es: TextElement[]): TextElement => diff --git a/test/mfm.ts b/test/mfm.ts index 27d0ffe66..9a562280d 100644 --- a/test/mfm.ts +++ b/test/mfm.ts @@ -82,7 +82,7 @@ describe('Text', () => { { type: 'text', content: ' お腹ペコい' } ], tokens); }); -/* + it('ignore', () => { const tokens = analyze('idolm@ster'); assert.deepEqual([ @@ -91,20 +91,19 @@ describe('Text', () => { const tokens2 = analyze('@a\n@b\n@c'); assert.deepEqual([ - { type: 'mention', content: '@a', username: 'a', host: null }, + { type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null }, { type: 'text', content: '\n' }, - { type: 'mention', content: '@b', username: 'b', host: null }, + { type: 'mention', content: '@b', canonical: '@b', username: 'b', host: null }, { type: 'text', content: '\n' }, - { type: 'mention', content: '@c', username: 'c', host: null } + { type: 'mention', content: '@c', canonical: '@c', username: 'c', host: null } ], tokens2); const tokens3 = analyze('**x**@a'); assert.deepEqual([ { type: 'bold', content: '**x**', bold: 'x' }, - { type: 'mention', content: '@a', username: 'a', host: null } + { type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null } ], tokens3); }); -*/ }); it('hashtag', () => { @@ -159,38 +158,68 @@ describe('Text', () => { ], tokens5); }); - it('url', () => { - const tokens1 = analyze('https://example.com'); - assert.deepEqual([{ - type: 'url', - content: 'https://example.com', - url: 'https://example.com' - }], tokens1); + describe('url', () => { + it('simple', () => { + const tokens = analyze('https://example.com'); + assert.deepEqual([{ + type: 'url', + content: 'https://example.com', + url: 'https://example.com' + }], tokens); + }); - const tokens2 = analyze('https://example.com.'); - assert.deepEqual([{ - type: 'url', - content: 'https://example.com', - url: 'https://example.com' - }, { - type: 'text', content: '.' - }], tokens2); + it('ignore trailing dot', () => { + const tokens = analyze('https://example.com.'); + assert.deepEqual([{ + type: 'url', + content: 'https://example.com', + url: 'https://example.com' + }, { + type: 'text', content: '.' + }], tokens); + }); - const tokens3 = analyze('https://example.com/foo?bar=a,b'); - assert.deepEqual([{ - type: 'url', - content: 'https://example.com/foo?bar=a,b', - url: 'https://example.com/foo?bar=a,b' - }], tokens3); + it('with comma', () => { + const tokens = analyze('https://example.com/foo?bar=a,b'); + assert.deepEqual([{ + type: 'url', + content: 'https://example.com/foo?bar=a,b', + url: 'https://example.com/foo?bar=a,b' + }], tokens); + }); - const tokens4 = analyze('https://example.com/foo, bar'); - assert.deepEqual([{ - type: 'url', - content: 'https://example.com/foo', - url: 'https://example.com/foo' - }, { - type: 'text', content: ', bar' - }], tokens4); + it('ignore trailing comma', () => { + const tokens = analyze('https://example.com/foo, bar'); + assert.deepEqual([{ + type: 'url', + content: 'https://example.com/foo', + url: 'https://example.com/foo' + }, { + type: 'text', content: ', bar' + }], tokens); + }); + + it('with brackets', () => { + const tokens = analyze('https://example.com/foo(bar)'); + assert.deepEqual([{ + type: 'url', + content: 'https://example.com/foo(bar)', + url: 'https://example.com/foo(bar)' + }], tokens); + }); + + it('ignore parent brackets', () => { + const tokens = analyze('(https://example.com/foo)'); + assert.deepEqual([{ + type: 'text', content: '(' + }, { + type: 'url', + content: 'https://example.com/foo', + url: 'https://example.com/foo' + }, { + type: 'text', content: ')' + }], tokens); + }); }); it('link', () => {