i++;
} else if (c == '(') {
i++;
- if (i < length) {
- if (sub_pattern[i] == '?') {
+ if (i < length && sub_pattern[i] == '?') {
+ if (i + 1 < length && sub_pattern[i + 1] == ':') {
+ i += 2; // skip "?:" for non-capturing group, treat as regular group
+ } else {
+ // lookahead/lookbehind (?=, ?!, ?<=, ?<!) - not supported
_warnings.push_back("Unsupported pattern syntax");
+ // skip to matching ')' to avoid UB on empty seq
+ int depth = 1;
+ while (i < length && depth > 0) {
+ if (sub_pattern[i] == '\\' && i + 1 < length) {
+ i += 2; // skip escaped character
+ } else {
+ if (sub_pattern[i] == '(') depth++;
+ else if (sub_pattern[i] == ')') depth--;
+ i++;
+ }
+ }
+ continue;
}
}
seq.emplace_back("(" + to_rule(transform()) + ")", false);
} else if (c == ')') {
i++;
- if (start > 0 && sub_pattern[start - 1] != '(') {
+ if (start > 0 && sub_pattern[start - 1] != '(' && (start < 2 || sub_pattern[start - 2] != '?' || sub_pattern[start - 1] != ':')) {
_errors.push_back("Unbalanced parentheses");
}
return join_seq();
}
});
+ // C++ only tests (features not yet supported in JS/Python implementations)
+ {
+ fprintf(stderr, "#\n# Testing C++ only features\n#\n");
+ auto run = [](const TestCase & tc) {
+ fprintf(stderr, "- %s\n", tc.name.c_str());
+ try {
+ tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
+ tc.verify_status(SUCCESS);
+ } catch (const std::invalid_argument & ex) {
+ fprintf(stderr, "Error: %s\n", ex.what());
+ tc.verify_status(FAILURE);
+ }
+ };
+
+ run({
+ SUCCESS,
+ "regexp with non-capturing group",
+ R"""({
+ "type": "string",
+ "pattern": "^(?:foo|bar)baz$"
+ })""",
+ R"""(
+ root ::= "\"" (("foo" | "bar") "baz") "\"" space
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""",
+ });
+
+ run({
+ SUCCESS,
+ "regexp with nested non-capturing groups",
+ R"""({
+ "type": "string",
+ "pattern": "^(?:(?:ab)+c)?d$"
+ })""",
+ R"""(
+ root ::= "\"" ((("ab")+ "c")? "d") "\"" space
+ space ::= | " " | "\n"{1,2} [ \t]{0,20}
+ )""",
+ });
+ }
+
if (getenv("LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR")) {
fprintf(stderr, "\033[33mWARNING: Skipping slow tests on emulator.\n\033[0m");
} else {