pipr.tools
Pipes
Clean Email Strip formatting from pasted email for clean plain text Clean AI Output Clean plain text from ChatGPT or AI output Decode & Format JWT Decode a JWT and pretty-print header and payload Word Frequency Count word frequency in text

strip-invisible Strip invisible Unicode (watermarks, fingerprints)

Aa Text
Input0 chars
Output0 chars

Flags

--mode select: clean | audit default: clean

Examples

Clean hidden characters from AI-generated text

Usage
"Hello​, ‌world‍! This text has hidden⁠ characters." | strip-invisible

Remove invisible formatting from text pasted from a CMS

Usage
"Product​ Name‌: Super Widget‍
Price⁠: $29.99" | strip-invisible

Strip hidden Unicode from copy-pasted Word document text

Usage
"Dear Customer,​
Your‌ order‍ has⁠ shipped." | strip-invisible
View source
(input, opts = {})=>{
                const STRIP = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFE00-\uFE0F\uFEFF\uFFF9-\uFFFB\uFFA0\u{E0000}-\u{E007F}\u{E0100}-\u{E01EF}\u{1D173}-\u{1D17A}\u{1BCA0}-\u{1BCA3}]/gu;
                const NORM = /[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g;
                if (opts.mode === "audit") {
                    const names = {
                        0x00ad: "SOFT HYPHEN",
                        0x034f: "COMBINING GRAPHEME JOINER",
                        0x061c: "ARABIC LETTER MARK",
                        0x180e: "MONGOLIAN VOWEL SEPARATOR",
                        0x200b: "ZERO WIDTH SPACE",
                        0x200c: "ZERO WIDTH NON-JOINER",
                        0x200d: "ZERO WIDTH JOINER",
                        0x200e: "LEFT-TO-RIGHT MARK",
                        0x200f: "RIGHT-TO-LEFT MARK",
                        0x2060: "WORD JOINER",
                        0x2061: "FUNCTION APPLICATION",
                        0x2062: "INVISIBLE TIMES",
                        0x2063: "INVISIBLE SEPARATOR",
                        0x2064: "INVISIBLE PLUS",
                        0xfeff: "ZERO WIDTH NO-BREAK SPACE",
                        0x2800: "BRAILLE PATTERN BLANK",
                        0x3164: "HANGUL FILLER",
                        0x00a0: "NO-BREAK SPACE",
                        0x1680: "OGHAM SPACE MARK",
                        0x2000: "EN QUAD",
                        0x2001: "EM QUAD",
                        0x2002: "EN SPACE",
                        0x2003: "EM SPACE",
                        0x2004: "THREE-PER-EM SPACE",
                        0x2005: "FOUR-PER-EM SPACE",
                        0x2006: "SIX-PER-EM SPACE",
                        0x2007: "FIGURE SPACE",
                        0x2008: "PUNCTUATION SPACE",
                        0x2009: "THIN SPACE",
                        0x200a: "HAIR SPACE",
                        0x202f: "NARROW NO-BREAK SPACE",
                        0x205f: "MEDIUM MATHEMATICAL SPACE",
                        0x3000: "IDEOGRAPHIC SPACE"
                    };
                    const name = (cp)=>{
                        if (names[cp]) return names[cp];
                        if (cp >= 0xfe00 && cp <= 0xfe0f) return `VARIATION SELECTOR-${cp - 0xfdff}`;
                        if (cp >= 0xe0100 && cp <= 0xe01ef) return `VARIATION SELECTOR-${cp - 0xe00e9}`;
                        if (cp >= 0xe0020 && cp <= 0xe007e) {
                            const ch = String.fromCodePoint(cp - 0xe0000);
                            return ch.trim() ? `TAG '${ch}'` : "TAG SPACE";
                        }
                        if (cp >= 0xe0000 && cp <= 0xe007f) return "TAG CHARACTER";
                        if (cp >= 0x202a && cp <= 0x202e) return "BIDI CONTROL";
                        if (cp >= 0x2066 && cp <= 0x2069) return "BIDI ISOLATE";
                        if (cp >= 0x206a && cp <= 0x206f) return "DEPRECATED FORMAT CHAR";
                        return "INVISIBLE";
                    };
                    const ALL = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFE00-\uFE0F\uFEFF\uFFF9-\uFFFB\uFFA0\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\u{E0000}-\u{E007F}\u{E0100}-\u{E01EF}\u{1D173}-\u{1D17A}\u{1BCA0}-\u{1BCA3}]/gu;
                    const spaceSet = new Set([
                        0x00a0,
                        0x1680,
                        0x202f,
                        0x205f,
                        0x3000,
                        ...Array.from({
                            length: 11
                        }, (_, i)=>0x2000 + i)
                    ]);
                    const found = new Map();
                    for (const m of input.matchAll(ALL)){
                        const cp = m[0].codePointAt(0);
                        found.set(cp, (found.get(cp) || 0) + 1);
                    }
                    if (found.size === 0) return "No hidden characters found.";
                    const lines = [
                        ...found.entries()
                    ].sort((a, b)=>b[1] - a[1]).map(([cp, n])=>{
                        const hex = `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`;
                        const act = spaceSet.has(cp) ? "\u2192 space" : "strip";
                        return `${hex}  ${name(cp).padEnd(28)} \u00d7${String(n).padStart(3)}  (${act})`;
                    });
                    const total = [
                        ...found.values()
                    ].reduce((a, b)=>a + b, 0);
                    lines.push("", `${total} hidden char${total !== 1 ? "s" : ""} in ${found.size} type${found.size !== 1 ? "s" : ""}`);
                    return lines.join("\n");
                }
                return input.replace(NORM, " ").replace(STRIP, "");
            }

Also useful for