All files / roosterjs-content-model-dom/lib/modelApi/common sanitizeInvisibleUnicode.ts

97.67% Statements 42/43
86.96% Branches 20/23
100% Functions 5/5
96.77% Lines 30/31

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 901x                             1x 164x       434x 434x         434x   510x 510x   384x     100x 256x 256x     42x     7x   7x     7x       1x         510x 2x     510x   265x 265x     1x 1x 1x           244x         1x       1x 2x 2x        
import { stripInvisibleUnicode } from '../../domUtils/stripInvisibleUnicode';
import type {
    ContentModelBlock,
    ContentModelBlockGroup,
    ContentModelDocument,
    ContentModelSegment,
} from 'roosterjs-content-model-types';
 
/**
 * Strip invisible Unicode characters from all text and link hrefs in a content model.
 * This sanitizes the model at initialization time to prevent hidden content in links
 * or text (e.g. zero-width chars, bidirectional marks, Unicode Tags).
 * For General segments, all Text nodes under the element are also sanitized.
 * @param model The content model document to sanitize in-place
 */
export function sanitizeInvisibleUnicode(model: ContentModelDocument): void {
    sanitizeBlockGroup(model);
}
 
function sanitizeBlockGroup(group: ContentModelBlockGroup): void {
    for (const block of group.blocks) {
        sanitizeBlock(block);
    }
}
 
function sanitizeBlock(block: ContentModelBlock): void {
    switch (block.blockType) {
        case 'Paragraph':
            for (const segment of block.segments) {
                sanitizeSegment(segment);
            }
            break;
 
        case 'Table':
            for (const row of block.rows) {
                for (const cell of row.cells) {
                    sanitizeBlockGroup(cell);
                }
            }
            break;
 
        case 'BlockGroup':
            sanitizeBlockGroup(block);
 
            Iif (block.blockGroupType === 'General' && block.element) {
                sanitizeTextNodes(block.element);
            }
            break;
 
        case 'Entity':
        case 'Divider':
            break;
    }
}
 
function sanitizeSegment(segment: ContentModelSegment): void {
    if (segment.link?.format.href) {
        segment.link.format.href = stripInvisibleUnicode(segment.link.format.href);
    }
 
    switch (segment.segmentType) {
        case 'Text':
            segment.text = stripInvisibleUnicode(segment.text);
            break;
 
        case 'General':
            sanitizeTextNodes(segment.element);
            sanitizeBlockGroup(segment);
            break;
 
        case 'Image':
        case 'Entity':
        case 'Br':
        case 'SelectionMarker':
            break;
    }
}
 
function sanitizeTextNodes(element: HTMLElement): void {
    const walker = element.ownerDocument.createTreeWalker(element, NodeFilter.SHOW_TEXT);
 
    let node: Text | null;
 
    while ((node = walker.nextNode() as Text | null)) {
        Eif (node.nodeValue) {
            node.nodeValue = stripInvisibleUnicode(node.nodeValue);
        }
    }
}