HS-gen: string-aware line-comment stripping (+1 test)

process_hs_val stripped `//…` line comments with a naïve regex,
which devoured `https://yyy.xxxxxx.com/…` inside a backtick template
— the 'properly interpolates values 2' fixture was landing with
the HS source truncated at `https:`.

New helper _strip_hs_line_comments walks char-by-char and only
strips `//` / leading-whitespace `--` when not inside `'…'`, `"…"`,
or backticks; respects `\\`-escapes inside strings.

Suite hs-upstream-core/regressions: 11/16 → 12/16.
Smoke 0-195: 163/195 → 164/195.
This commit is contained in:
2026-04-24 09:42:19 +00:00
parent 094945d86a
commit cb37259d10
4 changed files with 51 additions and 9 deletions

View File

@@ -1177,6 +1177,45 @@ def parse_dev_body(body, elements, var_names):
# ── Test generation ───────────────────────────────────────────────
def _strip_hs_line_comments(s):
"""Strip `//…` and `--…` line comments outside HS string literals.
HS has three string delimiters: single quotes, double quotes, and
backticks (template strings). `https://…` inside a backtick must not
be treated as a comment.
"""
out = []
i = 0
n = len(s)
in_str = None # None | "'" | '"' | '`'
while i < n:
ch = s[i]
if in_str is None:
# Check for line-comment starters at depth 0.
if ch == '/' and i + 1 < n and s[i + 1] == '/':
# Skip to newline.
while i < n and s[i] != '\n':
i += 1
continue
if ch == '-' and i + 1 < n and s[i + 1] == '-' and (i == 0 or s[i - 1].isspace()):
while i < n and s[i] != '\n':
i += 1
continue
if ch in ("'", '"', '`'):
in_str = ch
out.append(ch)
i += 1
else:
if ch == '\\' and i + 1 < n:
out.append(ch); out.append(s[i + 1]); i += 2
continue
if ch == in_str:
in_str = None
out.append(ch)
i += 1
return ''.join(out)
def process_hs_val(hs_val):
"""Process a raw HS attribute value: collapse whitespace, insert 'then' separators."""
# Convert escaped newlines/tabs to real whitespace
@@ -1187,8 +1226,8 @@ def process_hs_val(hs_val):
hs_val = hs_val.replace('\x00QUOT\x00', '\\"')
# Strip line comments BEFORE newline collapse — once newlines become `then`,
# an unterminated `//` / ` --` comment would consume the rest of the input.
hs_val = re.sub(r'//[^\n]*', '', hs_val)
hs_val = re.sub(r'(^|\s)--[^\n]*', r'\1', hs_val)
# String-aware: `https://…` inside a backtick template must not be stripped.
hs_val = _strip_hs_line_comments(hs_val)
cmd_kws = r'(?:set|put|get|add|remove|toggle|hide|show|if|repeat|for|wait|send|trigger|log|call|take|throw|return|append|tell|go|halt|settle|increment|decrement|fetch|make|install|measure|empty|reset|swap|default|morph|render|scroll|focus|select|pick|beep!)'
hs_val = re.sub(r'\s{2,}(?=' + cmd_kws + r'\b)', ' then ', hs_val)
hs_val = re.sub(r'\s*[\n\r]\s*', ' then ', hs_val)