link matchLink.ts

100% Statements 20/20
92.86% Branches 13/14
100% Functions 3/3
100% Lines 17/17
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95 1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
 
 
1x
1x
1x
1x
 
1x
 
 
 
 
 
 
 
26x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
88x
632x
632x
632x
632x
40x
 
 
 
 
 
 
 
 
48x
 
  import { getObjectKeys } from 'roosterjs-content-model-dom';
import type { LinkData } from 'roosterjs-content-model-types';
 
interface LinkMatchRule {
    match: RegExp;
    except?: RegExp;
    normalizeUrl?: (url: string) => string;
}
 
// http exclude matching regex
// invalid URL example (in particular on IE and Edge):
// - http://www.bing.com%00, %00 before ? (question mark) is considered invalid. IE/Edge throws invalid argument exception
// - http://www.bing.com%1, %1 is invalid
// - http://www.bing.com%g, %g is invalid (IE and Edge expects a two hex value after a %)
// - http://www.bing.com%, % as ending is invalid (IE and Edge expects a two hex value after a %)
// All above % cases if they're after ? (question mark) is then considered valid again
// Similar for @, it needs to be after / (forward slash), or ? (question mark). Otherwise IE/Edge will throw security exception
// - http://www.bing.com@name, @name before ? (question mark) is considered invalid
// - http://www.bing.com/@name, is valid sine it is after / (forward slash)
// - http://www.bing.com?@name, is also valid since it is after ? (question mark)
// The regex below is essentially a break down of:
// ^[^?]+%[^0-9a-f]+ => to exclude URL like www.bing.com%%
// ^[^?]+%[0-9a-f][^0-9a-f]+ => to exclude URL like www.bing.com%1
// ^[^?]+%00 => to exclude URL like www.bing.com%00
// ^[^?]+%$ => to exclude URL like www.bing.com%
// ^https?:\/\/[^?\/]+@ => to exclude URL like http://www.bing.com@name
// ^www\.[^?\/]+@ => to exclude URL like www.bing.com@name
// , => to exclude url like www.bing,,com
const httpExcludeRegEx = /^[^?]+%[^0-9a-f]+|^[^?]+%[0-9a-f][^0-9a-f]+|^[^?]+%00|^[^?]+%$|^https?:\/\/[^?\/]+@|^www\.[^?\/]+@/i;
 
// via https://tools.ietf.org/html/rfc1035 Page 7
const labelRegEx = '[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'; // We're using case insensitive regexps below so don't bother including A-Z
const domainNameRegEx = `(?:${labelRegEx}\\.)*${labelRegEx}`;
const domainPortRegEx = `${domainNameRegEx}(?:\\:[0-9]+)?`;
const domainPortWithUrlRegEx = `${domainPortRegEx}(?:[\\/\\?]\\S*)?`;
 
const linkMatchRules: Record<string, LinkMatchRule> = {
    http: {
        match: new RegExp(
            `^(?:microsoft-edge:)?http:\\/\\/${domainPortWithUrlRegEx}|www\\.${domainPortWithUrlRegEx}`,
            'i'
        ),
        except: httpExcludeRegEx,
        normalizeUrl: url =>
            new RegExp('^(?:microsoft-edge:)?http:\\/\\/', 'i').test(url) ? url : 'http://' + url,
    },
    https: {
        match: new RegExp(`^(?:microsoft-edge:)?https:\\/\\/${domainPortWithUrlRegEx}`, 'i'),
        except: httpExcludeRegEx,
    },
    mailto: { match: new RegExp('^mailto:\\S+@\\S+\\.\\S+', 'i') },
    notes: { match: new RegExp('^notes:\\/\\/\\S+', 'i') },
    file: { match: new RegExp('^file:\\/\\/\\/?\\S+', 'i') },
    unc: { match: new RegExp('^\\\\\\\\\\S+', 'i') },
    ftp: {
        match: new RegExp(
            `^ftp:\\/\\/${domainPortWithUrlRegEx}|ftp\\.${domainPortWithUrlRegEx}`,
            'i'
        ),
        normalizeUrl: url => (new RegExp('^ftp:\\/\\/', 'i').test(url) ? url : 'ftp://' + url),
    },
    news: { match: new RegExp(`^news:(\\/\\/)?${domainPortWithUrlRegEx}`, 'i') },
    telnet: { match: new RegExp(`^telnet:(\\/\\/)?${domainPortWithUrlRegEx}`, 'i') },
    gopher: { match: new RegExp(`^gopher:\\/\\/${domainPortWithUrlRegEx}`, 'i') },
    wais: { match: new RegExp(`^wais:(\\/\\/)?${domainPortWithUrlRegEx}`, 'i') },
};
 
/**
 * Try to match a given string with link match rules, return matched link
 * @param url Input url to match
 * @param option Link match option, exact or partial. If it is exact match, we need
 * to check the length of matched link and url
 * @param rules Optional link match rules, if not passed, only the default link match
 * rules will be applied
 * @returns The matched link data, or null if no match found.
 * The link data includes an original url and a normalized url
 */
export function matchLink(url: string): LinkData | null {
    Eif (url) {
        for (const schema of getObjectKeys(linkMatchRules)) {
            const rule = linkMatchRules[schema];
            const matches = url.match(rule.match);
            if (matches && matches[0] == url && (!rule.except || !rule.except.test(url))) {
                return {
                    scheme: schema,
                    originalUrl: url,
                    normalizedUrl: rule.normalizeUrl ? rule.normalizeUrl(url) : url,
                };
            }
        }
    }
 
    return null;
}