token-split Visualize token boundaries from BPE tokenization
Flags
--encoding select: cl100k_base | o200k_base default: cl100k_base
--format select: brackets | ids | both default: brackets
Examples
See how a simple sentence gets tokenized
Usage
"Hello, world! This is a test." | token-split Visualize token boundaries in JavaScript code
Usage
"const fibonacci = (n) => n <= 1 ? n : fibonacci(n - 1) + fib..." | token-split Inspect BPE token splits for common English text
Usage
"The quick brown fox jumps over the lazy dog." | token-split View source
async (input, opts = {})=>{
if (!input.trim()) return "(empty input)";
const { getEncoder } = await import('./tokenizer_qMtbZfTQ.mjs').then(async (m)=>{
await m.__tla;
return m;
});
const encoding = opts.encoding || "cl100k_base";
const format = opts.format || "brackets";
const enc = await getEncoder(encoding);
const ids = enc.encode(input);
const decoder = new TextDecoder("utf-8", {
fatal: false
});
if (format === "ids") return Array.from(ids).join(" ");
return Array.from(ids).map((id)=>{
const text = decoder.decode(enc.decode(new Uint32Array([
id
])));
if (format === "both") return `[${id}:${text}]`;
return `[${text}]`;
}).join("");
}