diff --git a/include/cons_expr/cons_expr.hpp b/include/cons_expr/cons_expr.hpp index 545624e..9fffcb9 100644 --- a/include/cons_expr/cons_expr.hpp +++ b/include/cons_expr/cons_expr.hpp @@ -358,7 +358,7 @@ template [[nodiscard]] constexpr Token next_token(s auto location = std::next(input.begin()); while (location != input.end()) { if (*location == chars::ch('\\')) { - in_escape = true; + in_escape = !in_escape; } else if (*location == chars::ch('"') && !in_escape) { ++location; break; diff --git a/test/string_escape_tests.cpp b/test/string_escape_tests.cpp index 7279b99..6ff9461 100644 --- a/test/string_escape_tests.cpp +++ b/test/string_escape_tests.cpp @@ -93,6 +93,41 @@ TEST_CASE("String escape edge cases", "[string][escape][edge]") STATIC_CHECK(evaluate_expected("\"\\n\"", "\n")); } +// Regression tests for issue #7: escaped backslash before the closing quote +// caused the tokenizer to stay in escape mode and eat the terminator. +TEST_CASE("Tokenizer backslash escape state", "[string][escape][tokenizer][regression][issue-7]") +{ + // Direct tokenizer test: `"a\\" "b"` must split into two string tokens. + // With the bug, `\\` left the tokenizer in escape mode, so the first `"` + // after `\\` was consumed instead of terminating the literal. + constexpr auto split_two_literals = []() constexpr { + auto token = lefticus::next_token(std::string_view(R"("a\\" "b")")); + return token.parsed == std::string_view(R"("a\\")") + && token.remaining == std::string_view(R"("b")"); + }; + STATIC_CHECK(split_two_literals()); + + // End-to-end: evaluating the two-literal sequence should yield "b". + STATIC_CHECK(evaluate_expected(R"("a\\" "b")", "b")); + + // `"\\"` is a valid one-character string whose content is a single backslash. + STATIC_CHECK(evaluate_expected(R"("\\")", "\\")); + + // A valid string ending in `\\` followed by a newline must parse cleanly + // and unescape to `a\`. With the bug the tokenizer ran past the terminator, + // leaving a token that did not end in `"`, so the parser reported the + // literal as unterminated. + STATIC_CHECK(evaluate_expected("\"a\\\\\"\n", "a\\")); + + // A consecutive-escape terminator: `"\\\\"` is two backslashes in the + // source string literal. Every pair must cancel so the final `"` closes. + constexpr auto double_escaped_backslash_pair = []() constexpr { + auto token = lefticus::next_token(std::string_view(R"("\\\\")")); + return token.parsed == std::string_view(R"("\\\\")") && token.remaining.empty(); + }; + STATIC_CHECK(double_escaped_backslash_pair()); +} + // Branch Coverage Enhancement Tests - Missing String Cases TEST_CASE("String escape error conditions for coverage", "[string][escape][coverage]")