All files / roosterjs-content-model-markdown/lib/markdownToModel/utils splitParagraphSegments.ts

93.94% Statements 31/33
93.55% Branches 29/31
100% Functions 3/3
93.94% Lines 31/33

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88              1x                     1x 48x         48x             6x     42x 42x 42x             202x 202x 4x   198x             1x 203x 203x 203x   203x 48x 25x     48x 27x 24x   3x   21x 21x 18x   3x       48x     203x 171x     203x    
// Matches markdown links and images in a string.
// Group 1 (full link):  [text](url)       e.g. [Click here](https://example.com)
//   Group 2: link text  e.g. "Click here"
//   Group 3: link url   e.g. "https://example.com"
// Group 4 (full image): ![alt](url)       e.g. ![Logo](https://example.com/logo.png)
//   Group 5: alt text   e.g. "Logo"
//   Group 6: image url  e.g. "https://example.com/logo.png"
const linkRegex = /(\[([^\[]+)\]\(([^\)]+)\))|(\!\[([^\[]+)\]\(([^\)]+)\))/g;
 
/**
 * @internal
 */
interface MarkdownSegment {
    text: string;
    url: string;
    type: 'text' | 'link' | 'image';
}
 
const isValidUrl = (url: string) => {
    Iif (!url) {
        return false;
    }
 
    // Accept common non-http schemes and relative paths
    if (
        url.startsWith('data:') ||
        url.startsWith('blob:') ||
        url.startsWith('/') ||
        url.startsWith('./') ||
        url.startsWith('../')
    ) {
        return true;
    }
 
    try {
        const parsed = new URL(url);
        return parsed.protocol === 'http:' || parsed.protocol === 'https:';
    } catch (_) {
        return false;
    }
};
 
function pushText(result: MarkdownSegment[], text: string) {
    const last = result[result.length - 1];
    if (last && last.type === 'text') {
        last.text += text;
    } else {
        result.push({ type: 'text', text, url: '' });
    }
}
 
/**
 * @internal
 */
export function splitParagraphSegments(text: string): MarkdownSegment[] {
    const result: MarkdownSegment[] = [];
    let lastIndex = 0;
    let match: RegExpExecArray | null = null;
 
    while ((match = linkRegex.exec(text)) !== null) {
        if (match.index > lastIndex) {
            pushText(result, text.slice(lastIndex, match.index));
        }
 
        if (match[2] && match[3]) {
            if (isValidUrl(match[3])) {
                result.push({ type: 'link', text: match[2], url: match[3] });
            } else {
                pushText(result, match[0]);
            }
        } else Eif (match[5] && match[6]) {
            if (isValidUrl(match[6])) {
                result.push({ type: 'image', text: match[5], url: match[6] });
            } else {
                pushText(result, match[0]);
            }
        }
 
        lastIndex = linkRegex.lastIndex;
    }
 
    if (lastIndex < text.length) {
        pushText(result, text.slice(lastIndex));
    }
 
    return result;
}