From 684a61e2765fc220c9725a2403d46f42be934fb8 Mon Sep 17 00:00:00 2001 From: leftibot Date: Fri, 17 Apr 2026 07:43:42 -0600 Subject: [PATCH] Fix #7: tokenizer treats `\\` before `"` as escaped terminator `next_token` set `in_escape = true` on every backslash instead of toggling it. After an even run of backslashes the flag stayed true, so the following `"` was consumed as an escaped quote and the tokenizer ran past the end of the literal. Toggling `in_escape` makes each backslash cancel its predecessor, matching what `process_string_escapes` already does in the second pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- include/cons_expr/cons_expr.hpp | 2 +- test/string_escape_tests.cpp | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/cons_expr/cons_expr.hpp b/include/cons_expr/cons_expr.hpp index 545624e..9fffcb9 100644 --- a/include/cons_expr/cons_expr.hpp +++ b/include/cons_expr/cons_expr.hpp @@ -358,7 +358,7 @@ template [[nodiscard]] constexpr Token next_token(s auto location = std::next(input.begin()); while (location != input.end()) { if (*location == chars::ch('\\')) { - in_escape = true; + in_escape = !in_escape; } else if (*location == chars::ch('"') && !in_escape) { ++location; break; diff --git a/test/string_escape_tests.cpp b/test/string_escape_tests.cpp index 7279b99..6ff9461 100644 --- a/test/string_escape_tests.cpp +++ b/test/string_escape_tests.cpp @@ -93,6 +93,41 @@ TEST_CASE("String escape edge cases", "[string][escape][edge]") STATIC_CHECK(evaluate_expected("\"\\n\"", "\n")); } +// Regression tests for issue #7: escaped backslash before the closing quote +// caused the tokenizer to stay in escape mode and eat the terminator. +TEST_CASE("Tokenizer backslash escape state", "[string][escape][tokenizer][regression][issue-7]") +{ + // Direct tokenizer test: `"a\\" "b"` must split into two string tokens. + // With the bug, `\\` left the tokenizer in escape mode, so the first `"` + // after `\\` was consumed instead of terminating the literal. + constexpr auto split_two_literals = []() constexpr { + auto token = lefticus::next_token(std::string_view(R"("a\\" "b")")); + return token.parsed == std::string_view(R"("a\\")") + && token.remaining == std::string_view(R"("b")"); + }; + STATIC_CHECK(split_two_literals()); + + // End-to-end: evaluating the two-literal sequence should yield "b". + STATIC_CHECK(evaluate_expected(R"("a\\" "b")", "b")); + + // `"\\"` is a valid one-character string whose content is a single backslash. + STATIC_CHECK(evaluate_expected(R"("\\")", "\\")); + + // A valid string ending in `\\` followed by a newline must parse cleanly + // and unescape to `a\`. With the bug the tokenizer ran past the terminator, + // leaving a token that did not end in `"`, so the parser reported the + // literal as unterminated. + STATIC_CHECK(evaluate_expected("\"a\\\\\"\n", "a\\")); + + // A consecutive-escape terminator: `"\\\\"` is two backslashes in the + // source string literal. Every pair must cancel so the final `"` closes. + constexpr auto double_escaped_backslash_pair = []() constexpr { + auto token = lefticus::next_token(std::string_view(R"("\\\\")")); + return token.parsed == std::string_view(R"("\\\\")") && token.remaining.empty(); + }; + STATIC_CHECK(double_escaped_backslash_pair()); +} + // Branch Coverage Enhancement Tests - Missing String Cases TEST_CASE("String escape error conditions for coverage", "[string][escape][coverage]")