strip-invisible Strip invisible Unicode (watermarks, fingerprints)
Flags
--mode select: clean | audit default: clean
Examples
Clean hidden characters from AI-generated text
Usage
"Hello, world! This text has hidden characters." | strip-invisible Remove invisible formatting from text pasted from a CMS
Usage
"Product Name: Super Widget
Price: $29.99" | strip-invisible Strip hidden Unicode from copy-pasted Word document text
Usage
"Dear Customer,
Your order has shipped." | strip-invisible View source
(input, opts = {})=>{
const STRIP = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFE00-\uFE0F\uFEFF\uFFF9-\uFFFB\uFFA0\u{E0000}-\u{E007F}\u{E0100}-\u{E01EF}\u{1D173}-\u{1D17A}\u{1BCA0}-\u{1BCA3}]/gu;
const NORM = /[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g;
if (opts.mode === "audit") {
const names = {
0x00ad: "SOFT HYPHEN",
0x034f: "COMBINING GRAPHEME JOINER",
0x061c: "ARABIC LETTER MARK",
0x180e: "MONGOLIAN VOWEL SEPARATOR",
0x200b: "ZERO WIDTH SPACE",
0x200c: "ZERO WIDTH NON-JOINER",
0x200d: "ZERO WIDTH JOINER",
0x200e: "LEFT-TO-RIGHT MARK",
0x200f: "RIGHT-TO-LEFT MARK",
0x2060: "WORD JOINER",
0x2061: "FUNCTION APPLICATION",
0x2062: "INVISIBLE TIMES",
0x2063: "INVISIBLE SEPARATOR",
0x2064: "INVISIBLE PLUS",
0xfeff: "ZERO WIDTH NO-BREAK SPACE",
0x2800: "BRAILLE PATTERN BLANK",
0x3164: "HANGUL FILLER",
0x00a0: "NO-BREAK SPACE",
0x1680: "OGHAM SPACE MARK",
0x2000: "EN QUAD",
0x2001: "EM QUAD",
0x2002: "EN SPACE",
0x2003: "EM SPACE",
0x2004: "THREE-PER-EM SPACE",
0x2005: "FOUR-PER-EM SPACE",
0x2006: "SIX-PER-EM SPACE",
0x2007: "FIGURE SPACE",
0x2008: "PUNCTUATION SPACE",
0x2009: "THIN SPACE",
0x200a: "HAIR SPACE",
0x202f: "NARROW NO-BREAK SPACE",
0x205f: "MEDIUM MATHEMATICAL SPACE",
0x3000: "IDEOGRAPHIC SPACE"
};
const name = (cp)=>{
if (names[cp]) return names[cp];
if (cp >= 0xfe00 && cp <= 0xfe0f) return `VARIATION SELECTOR-${cp - 0xfdff}`;
if (cp >= 0xe0100 && cp <= 0xe01ef) return `VARIATION SELECTOR-${cp - 0xe00e9}`;
if (cp >= 0xe0020 && cp <= 0xe007e) {
const ch = String.fromCodePoint(cp - 0xe0000);
return ch.trim() ? `TAG '${ch}'` : "TAG SPACE";
}
if (cp >= 0xe0000 && cp <= 0xe007f) return "TAG CHARACTER";
if (cp >= 0x202a && cp <= 0x202e) return "BIDI CONTROL";
if (cp >= 0x2066 && cp <= 0x2069) return "BIDI ISOLATE";
if (cp >= 0x206a && cp <= 0x206f) return "DEPRECATED FORMAT CHAR";
return "INVISIBLE";
};
const ALL = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFE00-\uFE0F\uFEFF\uFFF9-\uFFFB\uFFA0\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000\u{E0000}-\u{E007F}\u{E0100}-\u{E01EF}\u{1D173}-\u{1D17A}\u{1BCA0}-\u{1BCA3}]/gu;
const spaceSet = new Set([
0x00a0,
0x1680,
0x202f,
0x205f,
0x3000,
...Array.from({
length: 11
}, (_, i)=>0x2000 + i)
]);
const found = new Map();
for (const m of input.matchAll(ALL)){
const cp = m[0].codePointAt(0);
found.set(cp, (found.get(cp) || 0) + 1);
}
if (found.size === 0) return "No hidden characters found.";
const lines = [
...found.entries()
].sort((a, b)=>b[1] - a[1]).map(([cp, n])=>{
const hex = `U+${cp.toString(16).toUpperCase().padStart(4, "0")}`;
const act = spaceSet.has(cp) ? "\u2192 space" : "strip";
return `${hex} ${name(cp).padEnd(28)} \u00d7${String(n).padStart(3)} (${act})`;
});
const total = [
...found.values()
].reduce((a, b)=>a + b, 0);
lines.push("", `${total} hidden char${total !== 1 ? "s" : ""} in ${found.size} type${found.size !== 1 ? "s" : ""}`);
return lines.join("\n");
}
return input.replace(NORM, " ").replace(STRIP, "");
}