#include #include "utils.h" bool skip_valid_ansi_csi_sgr(char*& str) { if (*str++ != '\x1B') return false; if (*str++ != '[') // CSI return false; for (char* c = str; *c; c++) { if (*c >= '0' && *c <= '9') continue; if (*c == ';' || *c == ':') continue; if (*c == 'm') // SGR break; return false; } return true; } void RemoveAsciiControlSequences(char* str, bool allow_color_codes) { for (char *pc = str, c = *pc; c = *pc; pc++) { // skip UTF-8 characters int bytesToSkip = 0; if ((c & 0xE0) == 0xC0) bytesToSkip = 1; // skip 2-byte UTF-8 sequence else if ((c & 0xF0) == 0xE0) bytesToSkip = 2; // skip 3-byte UTF-8 sequence else if ((c & 0xF8) == 0xF0) bytesToSkip = 3; // skip 4-byte UTF-8 sequence else if ((c & 0xFC) == 0xF8) bytesToSkip = 4; // skip 5-byte UTF-8 sequence else if ((c & 0xFE) == 0xFC) bytesToSkip = 5; // skip 6-byte UTF-8 sequence bool invalid = false; char* orgpc = pc; for (int i = 0; i < bytesToSkip; i++) { char next = pc[1]; // valid UTF-8 part if ((next & 0xC0) == 0x80) { pc++; continue; } // invalid UTF-8 part or encountered \0 invalid = true; break; } if (invalid) { // erase the whole "UTF-8" sequence for (char* x = orgpc; x <= pc; x++) if (*x != '\0') *x = ' '; else break; } if (bytesToSkip > 0) continue; // this byte was already handled as UTF-8 // an invalid control character or an UTF-8 part outside of UTF-8 sequence if ((iscntrl(c) && c != '\n' && c != '\r' && c != '\x1B') || (c & 0x80) != 0) { *pc = ' '; continue; } if (c == '\x1B') // separate handling for this escape sequence... if (allow_color_codes && skip_valid_ansi_csi_sgr(pc)) // ...which we allow for color codes... pc--; else // ...but remove it otherwise *pc = ' '; } }