size_t pos = 0;
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
auto prev_stacks = grammar->stacks;
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
if (grammar->stacks.empty()) {
error_pos = pos;
error_msg = "Unexpected character '" + unicode_cpt_to_utf8(*it) + "'";
// be positioned at a character range (see `llama_grammar_advance_stack`), and
// produces the N possible stacks if the given char is accepted at those
// positions
-std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
+void llama_grammar_accept(
const std::vector<std::vector<llama_grammar_element>> & rules,
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
- const uint32_t chr) {
+ const uint32_t chr,
+ std::vector<std::vector<const llama_grammar_element *>> & new_stacks) {
- std::vector<std::vector<const llama_grammar_element *>> new_stacks;
+ new_stacks.clear();
for (const auto & stack : stacks) {
if (stack.empty()) {
llama_grammar_advance_stack(rules, new_stack, new_stacks);
}
}
-
- return new_stacks;
}
static std::vector<llama_grammar_candidate> llama_grammar_reject_candidates(
const std::vector<llama_grammar_candidate> & candidates) {
std::vector<llama_grammar_candidate> rejects;
+ rejects.reserve(candidates.size());
if (stack.empty()) {
for (const auto & tok : candidates) {
const llama_grammar_element * stack_pos = stack.back();
std::vector<llama_grammar_candidate> next_candidates;
+ next_candidates.reserve(candidates.size());
+
for (const auto & tok : candidates) {
if (*tok.code_points == 0) {
// reached end of full codepoints in token, reject iff it ended in a partial sequence
// Note terminating 0 in decoded string
const auto decoded = decode_utf8(piece, grammar->partial_utf8);
const auto & code_points = decoded.first;
+ std::vector<std::vector<const llama_grammar_element *>> tmp_new_stacks;
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+ llama_grammar_accept(grammar->rules, grammar->stacks, *it, tmp_new_stacks);
+ grammar->stacks = tmp_new_stacks;
}
grammar->partial_utf8 = decoded.second;
GGML_ASSERT(!grammar->stacks.empty());
struct llama_context * ctx
);
-std::vector<std::vector<const llama_grammar_element *>> llama_grammar_accept(
+void llama_grammar_accept(
const std::vector<std::vector<llama_grammar_element>> & rules,
const std::vector<std::vector<const llama_grammar_element *>> & stacks,
- const uint32_t chr);
+ const uint32_t chr,
+ std::vector<std::vector<const llama_grammar_element *>> & new_stacks);
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
const std::string & src,
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
auto prev_stacks = grammar->stacks;
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
assert(!grammar->stacks.empty());
}
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
++pos;
auto prev_stacks = grammar->stacks;
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
// Expect that each code point will not cause the grammar to fail
if (grammar->stacks.empty()) {
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
auto prev_stacks = grammar->stacks;
- grammar->stacks = llama_grammar_accept(grammar->rules, grammar->stacks, *it);
+ llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
if (grammar->stacks.empty()) {
parse_failed = true;
break;