All files / roosterjs-content-model-markdown/lib/markdownToModel/utils parseInlineSegments.ts

97.65% Statements 83/85
98.28% Branches 57/58
100% Functions 10/10
97.65% Lines 83/85

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 2261x 1x                                                     1x   1x               1x     453x     238x 238x   238x 352x 276x 276x       238x 2226x       2226x 14x 14x 14x       2212x 2212x 11x 11x 11x 11x       2201x 2201x 21x 21x       21x 21x 21x       2180x 2180x 82x 82x 82x 82x     2098x 2098x     238x       2180x   2180x 6x     2174x 44x     2130x 34x     2096x                 84x   84x 41x       43x 43x   43x 2x     41x       42x         15x       82x   44x 44x   32x 32x   6x 6x               84x   44x   34x   6x                 276x   276x 26x     276x 16x     276x 3x     276x       35x       35x             2x     33x 33x 33x          
import { createImageSegment } from '../creators/createImageSegment';
import { createText } from 'roosterjs-content-model-dom';
 
import type {
    ContentModelLink,
    ContentModelSegment,
    ContentModelSegmentFormat,
    ContentModelText,
} from 'roosterjs-content-model-types';
 
/**
 * @internal
 */
interface FormattingState {
    bold: boolean;
    italic: boolean;
    strikethrough: boolean;
}
 
/**
 * @internal
 */
interface FormatMarker {
    type: 'bold' | 'italic' | 'strikethrough';
    length: number;
}
 
// Matches a markdown link [text](url) anchored at the start of the input.
const linkPattern = /^\[([^\[\]]+)\]\(([^\)]+)\)/;
// Matches a markdown image ![alt](url) anchored at the start of the input.
const imagePattern = /^!\[([^\[\]]+)\]\(([^\)]+)\)/;
 
/**
 * @internal
 * Parse a markdown inline string into Content Model segments. Supports bold/italic/
 * strikethrough markers, links, and images, and keeps formatting state active across
 * link boundaries (e.g. **[link](url)**).
 */
export function parseInlineSegments(
    text: string,
    segments: ContentModelSegment[],
    state: FormattingState = { bold: false, italic: false, strikethrough: false },
    link?: ContentModelLink
) {
    let buffer = '';
    let i = 0;
 
    const flushBuffer = () => {
        if (buffer.length > 0) {
            segments.push(createFormattedSegment(buffer, state, link));
            buffer = '';
        }
    };
 
    while (i < text.length) {
        const remaining = text.substring(i);
 
        // Escaped character: a backslash followed by an ASCII punctuation character emits
        // that character literally (e.g. "\*" -> "*") and is never treated as a marker.
        if (text[i] === '\\' && i + 1 < text.length && isEscapable(text[i + 1])) {
            buffer += text[i + 1];
            i += 2;
            continue;
        }
 
        // Image: ![alt](url)
        const imgMatch = imagePattern.exec(remaining);
        if (imgMatch && isValidUrl(imgMatch[2])) {
            flushBuffer();
            segments.push(createImageSegment(imgMatch[1], imgMatch[2]));
            i += imgMatch[0].length;
            continue;
        }
 
        // Link: [text](url) — keep outer formatting state active inside the link
        const linkMatch = linkPattern.exec(remaining);
        if (linkMatch && isValidUrl(linkMatch[2])) {
            flushBuffer();
            const innerLink: ContentModelLink = {
                dataset: {},
                format: { href: linkMatch[2], underline: true },
            };
            parseInlineSegments(linkMatch[1], segments, state, innerLink);
            i += linkMatch[0].length;
            continue;
        }
 
        // Formatting marker
        const marker = parseMarkerAt(text, i);
        if (marker && shouldToggleFormatting(text, i, marker, state)) {
            flushBuffer();
            toggleFormatting(state, marker.type);
            i += marker.length;
            continue;
        }
 
        buffer += text[i];
        i++;
    }
 
    flushBuffer();
}
 
function parseMarkerAt(text: string, index: number): FormatMarker | null {
    const remaining = text.substring(index);
 
    if (remaining.startsWith('~~')) {
        return { type: 'strikethrough', length: 2 };
    }
 
    if (remaining.startsWith('**')) {
        return { type: 'bold', length: 2 };
    }
 
    if (remaining.startsWith('*')) {
        return { type: 'italic', length: 1 };
    }
 
    return null;
}
 
function shouldToggleFormatting(
    text: string,
    index: number,
    marker: FormatMarker,
    currentState: FormattingState
): boolean {
    const isCurrentlyActive = getCurrentFormatState(currentState, marker.type);
 
    if (isCurrentlyActive) {
        return true;
    }
 
    // Opening marker must be followed by a non-whitespace character.
    const nextIndex = index + marker.length;
    const nextChar = nextIndex < text.length ? text.charAt(nextIndex) : '';
 
    if (nextChar.length === 0 || isWhitespace(nextChar)) {
        return false;
    }
 
    return true;
}
 
function isWhitespace(char: string): boolean {
    return /\s/.test(char);
}
 
function isEscapable(char: string): boolean {
    // Per CommonMark, any ASCII punctuation character may be backslash-escaped.
    return /[!"#$%&'()*+,\-./:;<=>?@[\\\]^_`{|}~]/.test(char);
}
 
function toggleFormatting(state: FormattingState, type: 'bold' | 'italic' | 'strikethrough'): void {
    switch (type) {
        case 'bold':
            state.bold = !state.bold;
            break;
        case 'italic':
            state.italic = !state.italic;
            break;
        case 'strikethrough':
            state.strikethrough = !state.strikethrough;
            break;
    }
}
 
function getCurrentFormatState(
    state: FormattingState,
    type: 'bold' | 'italic' | 'strikethrough'
): boolean {
    switch (type) {
        case 'bold':
            return state.bold;
        case 'italic':
            return state.italic;
        case 'strikethrough':
            return state.strikethrough;
    }
}
 
function createFormattedSegment(
    text: string,
    state: FormattingState,
    link?: ContentModelLink
): ContentModelText {
    const format: ContentModelSegmentFormat = {};
 
    if (state.bold) {
        format.fontWeight = 'bold';
    }
 
    if (state.italic) {
        format.italic = true;
    }
 
    if (state.strikethrough) {
        format.strikethrough = true;
    }
 
    return createText(text, format, link);
}
 
function isValidUrl(url: string): boolean {
    Iif (!url) {
        return false;
    }
 
    if (
        url.startsWith('data:') ||
        url.startsWith('blob:') ||
        url.startsWith('/') ||
        url.startsWith('./') ||
        url.startsWith('../')
    ) {
        return true;
    }
 
    try {
        const parsed = new URL(url);
        return parsed.protocol === 'http:' || parsed.protocol === 'https:';
    } catch (_) {
        return false;
    }
}