All files / roosterjs-content-model-dom/lib/modelApi/common normalizeSegment.ts

100% Statements 52/52
94.59% Branches 35/37
100% Functions 13/13
100% Lines 52/52

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 1731x 1x             1x 1x 1x 1x         1x 2297x   2297x 1997x     2297x 2297x               1x     106x   53x   53x 53x                                 1x 8x           2551x                       1x         2058x   193x 193x     193x 193x               73x 73x 73x     1537x 1537x 1537x   1537x 1537x   1537x 1503x   39x 39x             1503x   52x 52x               1537x   1537x                 2490x     1479x 170x   170x         4x 4x                       2490x   45x 45x        
import { hasSpacesOnly } from './hasSpacesOnly';
import { mutateSegment } from './mutate';
import type {
    ReadonlyContentModelParagraph,
    ReadonlyContentModelSegment,
    ReadonlyContentModelText,
} from 'roosterjs-content-model-types';
 
const SPACE = '\u0020';
const NONE_BREAK_SPACE = '\u00A0';
const LEADING_SPACE_REGEX = /^\u0020+/;
const TRAILING_SPACE_REGEX = /\u0020+$/;
 
/**
 * @internal
 */
export function normalizeAllSegments(paragraph: ReadonlyContentModelParagraph) {
    const context = resetNormalizeSegmentContext();
 
    paragraph.segments.forEach(segment => {
        normalizeSegment(paragraph, segment, context);
    });
 
    normalizeTextSegments(paragraph, context.textSegments, context.lastInlineSegment);
    normalizeLastTextSegment(paragraph, context.lastTextSegment, context.lastInlineSegment);
}
 
/**
 * Normalize a given segment, make sure its spaces are correctly represented by space and non-break space
 * @param segment The segment to normalize
 * @param ignoreTrailingSpaces Whether we should ignore the trailing space of the text segment @default false
 */
export function normalizeSingleSegment(
    paragraph: ReadonlyContentModelParagraph,
    segment: ReadonlyContentModelSegment,
    EignoreTrailingSpaces: boolean = false
) {
    const context = resetNormalizeSegmentContext();
 
    context.ignoreTrailingSpaces = ignoreTrailingSpaces;
    normalizeSegment(paragraph, segment, context);
}
 
/**
 * @internal Export for test only
 */
export interface NormalizeSegmentContext {
    textSegments: ReadonlyContentModelText[];
    ignoreLeadingSpaces: boolean;
    ignoreTrailingSpaces: boolean;
    lastTextSegment: ReadonlyContentModelText | undefined;
    lastInlineSegment: ReadonlyContentModelSegment | undefined;
}
 
/**
 * @internal Export for test only
 */
export function createNormalizeSegmentContext(): NormalizeSegmentContext {
    return resetNormalizeSegmentContext();
}
 
function resetNormalizeSegmentContext(
    context?: Partial<NormalizeSegmentContext>
): NormalizeSegmentContext {
    return Object.assign(context ?? {}, {
        textSegments: [],
        ignoreLeadingSpaces: true,
        ignoreTrailingSpaces: true,
        lastInlineSegment: undefined,
        lastTextSegment: undefined,
    });
}
 
/**
 * @internal Export for test only
 */
export function normalizeSegment(
    paragraph: ReadonlyContentModelParagraph,
    segment: ReadonlyContentModelSegment,
    context: NormalizeSegmentContext
) {
    switch (segment.segmentType) {
        case 'Br':
            normalizeTextSegments(paragraph, context.textSegments, context.lastInlineSegment);
            normalizeLastTextSegment(paragraph, context.lastTextSegment, context.lastInlineSegment);
 
            // Line ends, reset all states
            resetNormalizeSegmentContext(context);
            break;
 
        case 'Entity':
        case 'General':
        case 'Image':
            // Here "inline segment" means a segment showing some content inline such as text, image, or other inline HTML elements
            // BR will end current line, so it is not treated as "inline" here.
            // We will do some normalization to the trailing spaces for non-inline-segments
            context.lastInlineSegment = segment;
            context.ignoreLeadingSpaces = false;
            break;
 
        case 'Text':
            context.textSegments.push(segment);
            context.lastInlineSegment = segment;
            context.lastTextSegment = segment;
 
            const first = segment.text.substring(0, 1);
            const last = segment.text.substr(-1);
 
            if (!hasSpacesOnly(segment.text)) {
                if (first == SPACE) {
                    // 1. Multiple leading space => single &nbsp; or empty (depends on if previous segment ends with space)
                    mutateSegment(paragraph, segment, textSegment => {
                        textSegment.text = textSegment.text.replace(
                            LEADING_SPACE_REGEX,
                            context.ignoreLeadingSpaces ? '' : NONE_BREAK_SPACE
                        );
                    });
                }
 
                if (last == SPACE) {
                    // 2. Multiple trailing space => single space
                    mutateSegment(paragraph, segment, textSegment => {
                        textSegment.text = textSegment.text.replace(
                            TRAILING_SPACE_REGEX,
                            context.ignoreTrailingSpaces ? SPACE : NONE_BREAK_SPACE
                        );
                    });
                }
            }
 
            context.ignoreLeadingSpaces = last == SPACE;
 
            break;
    }
}
 
function normalizeTextSegments(
    paragraph: ReadonlyContentModelParagraph,
    segments: ReadonlyContentModelText[],
    lastInlineSegment: ReadonlyContentModelSegment | undefined
) {
    segments.forEach(segment => {
        // 3. Segment ends with &nbsp; replace it with space if the previous char is not space so that next segment can wrap
        // Only do this for segments that is not the last one since the last space will be removed in step 4
        if (segment != lastInlineSegment) {
            const text = segment.text;
 
            if (
                text.substr(-1) == NONE_BREAK_SPACE &&
                text.length > 1 &&
                text.substr(-2, 1) != SPACE
            ) {
                mutateSegment(paragraph, segment, textSegment => {
                    textSegment.text = text.substring(0, text.length - 1) + SPACE;
                });
            }
        }
    });
}
 
function normalizeLastTextSegment(
    paragraph: ReadonlyContentModelParagraph,
    segment: ReadonlyContentModelText | undefined,
    lastInlineSegment: ReadonlyContentModelSegment | undefined
) {
    if (segment && segment == lastInlineSegment && segment?.text.substr(-1) == SPACE) {
        // 4. last text segment of the paragraph, remove trailing space
        mutateSegment(paragraph, segment, textSegment => {
            textSegment.text = textSegment.text.replace(TRAILING_SPACE_REGEX, '');
        });
    }
}