aboutsummaryrefslogtreecommitdiff
path: root/primedev/util/utils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'primedev/util/utils.cpp')
-rw-r--r--primedev/util/utils.cpp82
1 files changed, 82 insertions, 0 deletions
diff --git a/primedev/util/utils.cpp b/primedev/util/utils.cpp
new file mode 100644
index 00000000..c3f90cfa
--- /dev/null
+++ b/primedev/util/utils.cpp
@@ -0,0 +1,82 @@
+#include <ctype.h>
+#include "utils.h"
+
+bool skip_valid_ansi_csi_sgr(char*& str)
+{
+ if (*str++ != '\x1B')
+ return false;
+ if (*str++ != '[') // CSI
+ return false;
+ for (char* c = str; *c; c++)
+ {
+ if (*c >= '0' && *c <= '9')
+ continue;
+ if (*c == ';' || *c == ':')
+ continue;
+ if (*c == 'm') // SGR
+ break;
+ return false;
+ }
+ return true;
+}
+
+void RemoveAsciiControlSequences(char* str, bool allow_color_codes)
+{
+ for (char *pc = str, c = *pc; c = *pc; pc++)
+ {
+ // skip UTF-8 characters
+ int bytesToSkip = 0;
+ if ((c & 0xE0) == 0xC0)
+ bytesToSkip = 1; // skip 2-byte UTF-8 sequence
+ else if ((c & 0xF0) == 0xE0)
+ bytesToSkip = 2; // skip 3-byte UTF-8 sequence
+ else if ((c & 0xF8) == 0xF0)
+ bytesToSkip = 3; // skip 4-byte UTF-8 sequence
+ else if ((c & 0xFC) == 0xF8)
+ bytesToSkip = 4; // skip 5-byte UTF-8 sequence
+ else if ((c & 0xFE) == 0xFC)
+ bytesToSkip = 5; // skip 6-byte UTF-8 sequence
+
+ bool invalid = false;
+ char* orgpc = pc;
+ for (int i = 0; i < bytesToSkip; i++)
+ {
+ char next = pc[1];
+
+ // valid UTF-8 part
+ if ((next & 0xC0) == 0x80)
+ {
+ pc++;
+ continue;
+ }
+
+ // invalid UTF-8 part or encountered \0
+ invalid = true;
+ break;
+ }
+ if (invalid)
+ {
+ // erase the whole "UTF-8" sequence
+ for (char* x = orgpc; x <= pc; x++)
+ if (*x != '\0')
+ *x = ' ';
+ else
+ break;
+ }
+ if (bytesToSkip > 0)
+ continue; // this byte was already handled as UTF-8
+
+ // an invalid control character or an UTF-8 part outside of UTF-8 sequence
+ if ((iscntrl(c) && c != '\n' && c != '\r' && c != '\x1B') || (c & 0x80) != 0)
+ {
+ *pc = ' ';
+ continue;
+ }
+
+ if (c == '\x1B') // separate handling for this escape sequence...
+ if (allow_color_codes && skip_valid_ansi_csi_sgr(pc)) // ...which we allow for color codes...
+ pc--;
+ else // ...but remove it otherwise
+ *pc = ' ';
+ }
+}