* vocab : prevent integer overflow during load
* Add static cast and GGML_ABORT
---------
Co-authored-by: Georgi Gerganov <redacted>
#include <set>
#include <unordered_map>
#include <cctype>
+#include <cinttypes>
//
// helpers
// copy piece chars to output text buffer
// skip up to 'lstrip' leading spaces before copying
auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
+ if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
+ GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
+ }
+
for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
token++;
size--;