[MFM] Improve various parsing

Resolve #2779
Resolve #3053
This commit is contained in:
syuilo 2018-11-16 21:57:19 +09:00
parent 5e877644ca
commit 5d8c97dbc7
7 changed files with 79 additions and 46 deletions

View File

@ -8,7 +8,9 @@ export type TextElementHashtag = {
hashtag: string;
};
export default function(text: string, isBegin: boolean) {
export default function(text: string, before: string) {
const isBegin = before == '';
if (!(/^\s#[^\s\.,!\?#]+/.test(text) || (isBegin && /^#[^\s\.,!\?#]+/.test(text)))) return null;
const isHead = text.startsWith('#');
const hashtag = text.match(/^\s?#[^\s\.,!\?#]+/)[0];

View File

@ -12,9 +12,10 @@ export type TextElementMention = {
host: string;
};
export default function(text: string) {
export default function(text: string, before: string) {
const match = text.match(/^@[a-z0-9_]+(?:@[a-z0-9\.\-]+[a-z0-9])?/i);
if (!match) return null;
if (/[a-zA-Z0-9]$/.test(before)) return null;
const mention = match[0];
const { username, host } = parseAcct(mention.substr(1));
const canonical = host != null ? `@${username}@${toUnicode(host)}` : mention;

View File

@ -8,7 +8,9 @@ export type TextElementQuote = {
quote: string;
};
export default function(text: string, isBegin: boolean) {
export default function(text: string, before: string) {
const isBegin = before == '';
const match = text.match(/^"([\s\S]+?)\n"/) || text.match(/^\n>([\s\S]+?)(\n\n|$)/) ||
(isBegin ? text.match(/^>([\s\S]+?)(\n\n|$)/) : null);

View File

@ -8,7 +8,9 @@ export type TextElementTitle = {
title: string;
};
export default function(text: string, isBegin: boolean) {
export default function(text: string, before: string) {
const isBegin = before == '';
const match = isBegin ? text.match(/^(【|\[)(.+?)(】|])\n/) : text.match(/^\n(【|\[)(.+?)(】|])\n/);
if (!match) return null;
return {

View File

@ -8,12 +8,13 @@ export type TextElementUrl = {
url: string;
};
export default function(text: string) {
export default function(text: string, before: string) {
const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/);
if (!match) return null;
let url = match[0];
if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.'));
if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(','));
if (url.endsWith(')') && before.endsWith('(')) url = url.substr(0, url.lastIndexOf(')'));
return {
type: 'url',
content: url,

View File

@ -52,7 +52,7 @@ export type TextElement = { type: 'text', content: string }
| TextElementTitle
| TextElementUrl
| TextElementMotion;
export type TextElementProcessor = (text: string, isBegin: boolean) => TextElement | TextElement[];
export type TextElementProcessor = (text: string, before: string) => TextElement | TextElement[];
export default (source: string): TextElement[] => {
if (source == null || source == '') {
@ -68,12 +68,10 @@ export default (source: string): TextElement[] => {
}
}
let i = 0;
// パース
while (source != '') {
const parsed = elements.some(el => {
let _tokens = el(source, i == 0);
let _tokens = el(source, tokens.map(token => token.content).join(''));
if (_tokens) {
if (!Array.isArray(_tokens)) {
_tokens = [_tokens];
@ -91,8 +89,6 @@ export default (source: string): TextElement[] => {
content: source[0]
});
}
i++;
}
const combineText = (es: TextElement[]): TextElement =>

View File

@ -82,7 +82,7 @@ describe('Text', () => {
{ type: 'text', content: ' お腹ペコい' }
], tokens);
});
/*
it('ignore', () => {
const tokens = analyze('idolm@ster');
assert.deepEqual([
@ -91,20 +91,19 @@ describe('Text', () => {
const tokens2 = analyze('@a\n@b\n@c');
assert.deepEqual([
{ type: 'mention', content: '@a', username: 'a', host: null },
{ type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null },
{ type: 'text', content: '\n' },
{ type: 'mention', content: '@b', username: 'b', host: null },
{ type: 'mention', content: '@b', canonical: '@b', username: 'b', host: null },
{ type: 'text', content: '\n' },
{ type: 'mention', content: '@c', username: 'c', host: null }
{ type: 'mention', content: '@c', canonical: '@c', username: 'c', host: null }
], tokens2);
const tokens3 = analyze('**x**@a');
assert.deepEqual([
{ type: 'bold', content: '**x**', bold: 'x' },
{ type: 'mention', content: '@a', username: 'a', host: null }
{ type: 'mention', content: '@a', canonical: '@a', username: 'a', host: null }
], tokens3);
});
*/
});
it('hashtag', () => {
@ -159,38 +158,68 @@ describe('Text', () => {
], tokens5);
});
it('url', () => {
const tokens1 = analyze('https://example.com');
assert.deepEqual([{
type: 'url',
content: 'https://example.com',
url: 'https://example.com'
}], tokens1);
describe('url', () => {
it('simple', () => {
const tokens = analyze('https://example.com');
assert.deepEqual([{
type: 'url',
content: 'https://example.com',
url: 'https://example.com'
}], tokens);
});
const tokens2 = analyze('https://example.com.');
assert.deepEqual([{
type: 'url',
content: 'https://example.com',
url: 'https://example.com'
}, {
type: 'text', content: '.'
}], tokens2);
it('ignore trailing dot', () => {
const tokens = analyze('https://example.com.');
assert.deepEqual([{
type: 'url',
content: 'https://example.com',
url: 'https://example.com'
}, {
type: 'text', content: '.'
}], tokens);
});
const tokens3 = analyze('https://example.com/foo?bar=a,b');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo?bar=a,b',
url: 'https://example.com/foo?bar=a,b'
}], tokens3);
it('with comma', () => {
const tokens = analyze('https://example.com/foo?bar=a,b');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo?bar=a,b',
url: 'https://example.com/foo?bar=a,b'
}], tokens);
});
const tokens4 = analyze('https://example.com/foo, bar');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo',
url: 'https://example.com/foo'
}, {
type: 'text', content: ', bar'
}], tokens4);
it('ignore trailing comma', () => {
const tokens = analyze('https://example.com/foo, bar');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo',
url: 'https://example.com/foo'
}, {
type: 'text', content: ', bar'
}], tokens);
});
it('with brackets', () => {
const tokens = analyze('https://example.com/foo(bar)');
assert.deepEqual([{
type: 'url',
content: 'https://example.com/foo(bar)',
url: 'https://example.com/foo(bar)'
}], tokens);
});
it('ignore parent brackets', () => {
const tokens = analyze('(https://example.com/foo)');
assert.deepEqual([{
type: 'text', content: '('
}, {
type: 'url',
content: 'https://example.com/foo',
url: 'https://example.com/foo'
}, {
type: 'text', content: ')'
}], tokens);
});
});
it('link', () => {