Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions snapshots/heredoc_dedent_line_continuation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
@ ProgramNode (location: (1,0)-(1,6))
├── flags: ∅
├── locals: []
└── statements:
@ StatementsNode (location: (1,0)-(1,6))
├── flags: ∅
└── body: (length: 1)
└── @ InterpolatedStringNode (location: (1,0)-(1,6))
├── flags: newline
├── opening_loc: (1,0)-(1,6) = "<<~FOO"
├── parts: (length: 3)
│ ├── @ StringNode (location: (2,0)-(3,0))
│ │ ├── flags: static_literal, frozen
│ │ ├── opening_loc: ∅
│ │ ├── content_loc: (2,0)-(3,0) = " foo\\\n"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: "foo"
│ ├── @ StringNode (location: (3,0)-(4,0))
│ │ ├── flags: static_literal, frozen
│ │ ├── opening_loc: ∅
│ │ ├── content_loc: (3,0)-(4,0) = " \\\n"
│ │ ├── closing_loc: ∅
│ │ └── unescaped: ""
│ └── @ StringNode (location: (4,0)-(5,0))
│ ├── flags: static_literal, frozen
│ ├── opening_loc: ∅
│ ├── content_loc: (4,0)-(5,0) = " bar\n"
│ ├── closing_loc: ∅
│ └── unescaped: "bar\n"
└── closing_loc: (5,0)-(6,0) = "FOO\n"
18 changes: 15 additions & 3 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -15821,6 +15821,19 @@ parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
string->length = dest_length;
}

/**
* If we end up trimming all of the whitespace from a node and it isn't
* part of a line continuation, then we'll drop it from the list entirely.
*/
static inline bool
heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
if (string_node->unescaped.length == 0) {
const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know you've checked this thoroughly, but I'm just uncomfortable with this because it seems like it could include other kinds of strings, but by virtue of the dedent algorithm we're cool.

Just to be super clear, are we saying that we want to include the string node if it matches [\s\t]*\\\r?\n, but otherwise not?

If that's the case, I think I'd prefer that we explicitly check that pattern here, without relying on pm_memchr.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like it could include other kinds of strings, but by virtue of the dedent algorithm we're cool.

Hm, I wouldn't say that is the case, we only take this branch when the unescaped string is empty. That's only possible when the string indeed is dedenting whitespace only, anything else would result in it having some value.

Otherwise, your regexp is pretty much what I am checking for, with the assumption that just checking \ is good enough for the above reason. Otherwise, this is the implementation I came up with:

static inline bool
heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
    if (string_node->unescaped.length == 0) {
        const uint8_t *start = parser->start + PM_LOCATION_START(&string_node->content_loc);
        const uint8_t *end = parser->start + PM_LOCATION_END(&string_node->content_loc);

        while (start < end && pm_char_is_inline_whitespace(*start)) start++;
        if (start < end && *start == '\\') {
            start++;
            if (start < end && *start == '\r') {
                start++;
            }
            if (start < end && *start == '\n' && start + 1 == end) {
                // The string is whitespace only and ends with a line continuation.
                return false;
            }
        }
        // The string may be empty, or all whitespace was removed via dedenting.
        return true;
    }
    // The string has content. It may be whitespace not removed via dedenting or any other characters.
    return false;
}

Do you prefer this still?

}
return false;
}

/**
* Take a heredoc node that is indented by a ~ and trim the leading whitespace.
*/
Expand All @@ -15831,8 +15844,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
bool dedent_next = true;

// Iterate over all nodes, and trim whitespace accordingly. We're going to
// keep around two indices: a read and a write. If we end up trimming all of
// the whitespace from a node, then we'll drop it from the list entirely.
// keep around two indices: a read and a write.
size_t write_index = 0;

pm_node_t *node;
Expand All @@ -15851,7 +15863,7 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_w
parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
}

if (string_node->unescaped.length == 0) {
if (heredoc_dedent_discard_string_node(parser, string_node)) {
pm_node_destroy(parser, node);
} else {
nodes->nodes[write_index++] = node;
Expand Down
5 changes: 5 additions & 0 deletions test/prism/fixtures/heredoc_dedent_line_continuation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<<~FOO
foo\
\
bar
FOO
1 change: 1 addition & 0 deletions test/prism/ruby/ruby_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class RubyParserTest < TestCase
"alias.txt",
"dsym_str.txt",
"dos_endings.txt",
"heredoc_dedent_line_continuation.txt",
"heredoc_percent_q_newline_delimiter.txt",
"heredocs_with_fake_newlines.txt",
"heredocs_with_ignored_newlines.txt",
Expand Down