Skip to content

Commit

Permalink
syntax: implement support for ES6 template literals
Browse files Browse the repository at this point in the history
Implement support for ECMAScript 6 template literals which allow simple
interpolation of variable values into strings without resorting to
`sprintf()` or manual string concatenation.

Signed-off-by: Jo-Philipp Wich <jo@mein.io>
  • Loading branch information
jow- committed Apr 13, 2022
1 parent 23ddf91 commit e14b099
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 10 deletions.
23 changes: 23 additions & 0 deletions compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ static void uc_compiler_compile_paren(uc_compiler_t *compiler);
static void uc_compiler_compile_call(uc_compiler_t *compiler);
static void uc_compiler_compile_post_inc(uc_compiler_t *compiler);
static void uc_compiler_compile_constant(uc_compiler_t *compiler);
static void uc_compiler_compile_template(uc_compiler_t *compiler);
static void uc_compiler_compile_comma(uc_compiler_t *compiler);
static void uc_compiler_compile_labelexpr(uc_compiler_t *compiler);
static void uc_compiler_compile_function(uc_compiler_t *compiler);
Expand Down Expand Up @@ -72,6 +73,7 @@ uc_compiler_parse_rules[TK_ERROR + 1] = {
[TK_NULL] = { uc_compiler_compile_constant, NULL, P_NONE },
[TK_THIS] = { uc_compiler_compile_constant, NULL, P_NONE },
[TK_REGEXP] = { uc_compiler_compile_constant, NULL, P_NONE },
[TK_TEMPLATE] = { uc_compiler_compile_template, NULL, P_NONE },
[TK_COMMA] = { NULL, uc_compiler_compile_comma, P_COMMA },
[TK_LABEL] = { uc_compiler_compile_labelexpr, NULL, P_NONE },
[TK_FUNC] = { uc_compiler_compile_function, NULL, P_NONE },
Expand Down Expand Up @@ -1483,6 +1485,27 @@ uc_compiler_compile_constant(uc_compiler_t *compiler)
}
}

static void
uc_compiler_compile_template(uc_compiler_t *compiler)
{
uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.uv);

while (true) {
if (uc_compiler_parse_match(compiler, TK_TEMPLATE)) {
uc_compiler_emit_constant(compiler, compiler->parser->prev.pos, compiler->parser->prev.uv);
uc_compiler_emit_insn(compiler, 0, I_ADD);
}
else if (uc_compiler_parse_match(compiler, TK_PLACEH)) {
uc_compiler_compile_expression(compiler);
uc_compiler_emit_insn(compiler, 0, I_ADD);
uc_compiler_parse_consume(compiler, TK_RBRACE);
}
else {
break;
}
}
}

static void
uc_compiler_compile_comma(uc_compiler_t *compiler)
{
Expand Down
8 changes: 8 additions & 0 deletions include/ucode/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ typedef enum {
TK_ASOR,
TK_ASNULLISH,
TK_NULLISH,
TK_PLACEH,
TK_TEMPLATE,

TK_EOF,
TK_ERROR
Expand All @@ -129,6 +131,7 @@ typedef enum {
UC_LEX_BLOCK_COMMENT,
UC_LEX_IDENTIFY_TOKEN,
UC_LEX_PARSE_TOKEN,
UC_LEX_PLACEHOLDER,
UC_LEX_EOF
} uc_lex_state_t;

Expand All @@ -144,6 +147,7 @@ typedef struct {
uc_source_t *source;
uint8_t eof:1;
uint8_t is_escape:1;
uint8_t is_placeholder:1;
uint8_t no_regexp:1;
uint8_t no_keyword:1;
size_t buflen;
Expand All @@ -168,6 +172,10 @@ typedef struct {
STATEMENTS = '%',
COMMENT = '#'
} block;
struct {
size_t count;
size_t *entries;
} templates;
} uc_lexer_t;


Expand Down
5 changes: 5 additions & 0 deletions include/ucode/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@
#define uc_vector_last(vec) \
(&((vec)->entries[(vec)->count - 1]))

#define uc_vector_push(vec, val) do { \
uc_vector_grow(vec); \
(vec)->entries[(vec)->count++] = (val); \
} while(0)


/* "failsafe" utility functions */

Expand Down
81 changes: 71 additions & 10 deletions lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ static const struct token tokens[] = {
{ TK_ARROW, { .pat = "=>" }, 2, NULL },
{ TK_NULLISH, { .pat = "??" }, 2, NULL },
{ TK_QDOT, { .pat = "?." }, 2, NULL },
{ TK_PLACEH, { .pat = "${" }, 2, NULL },
{ TK_ADD, { .pat = "+" }, 1, NULL },
{ TK_ASSIGN, { .pat = "=" }, 1, NULL },
{ TK_BAND, { .pat = "&" }, 1, NULL },
Expand Down Expand Up @@ -138,6 +139,9 @@ static const struct token tokens[] = {
{ TK_LABEL, { .pat = "az" }, 0, parse_label },
{ TK_LABEL, { .pat = "AZ" }, 0, parse_label },
{ TK_NUMBER, { .pat = "09" }, 0, parse_number },

/* NB: this must be last for simple retrieval */
{ TK_TEMPLATE, { .pat = "`" }, 1, parse_string }
};

static const struct keyword reserved_words[] = {
Expand Down Expand Up @@ -313,6 +317,22 @@ parse_string(uc_lexer_t *lex)
return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated string"));

for (ptr = lex->bufstart; ptr < lex->bufend; ptr++) {
/* continuation of placeholder start */
if (lex->is_placeholder) {
if (*ptr == '{') {
buf_consume(lex, 1);
rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL);

if (!rv)
rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0));

return rv;
}

lex->is_placeholder = false;
lookbehind_append(lex, "$", 1);
}

/* continuation of escape sequence */
if (lex->is_escape) {
if (lex->esclen == 0) {
Expand Down Expand Up @@ -486,10 +506,10 @@ parse_string(uc_lexer_t *lex)
lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart);
buf_consume(lex, (ptr + 1) - lex->bufstart);

rv = lookbehind_to_text(lex, lex->lastoff, TK_STRING, NULL);
rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL);

if (!rv)
rv = emit_op(lex, lex->lastoff, TK_STRING, ucv_string_new_length("", 0));
rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0));

return rv;
}
Expand All @@ -500,6 +520,13 @@ parse_string(uc_lexer_t *lex)
lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart);
buf_consume(lex, (ptr - lex->bufstart) + 1);
}

/* potential placeholder start */
else if (q == '`' && *ptr == '$') {
lex->is_placeholder = true;
lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart);
buf_consume(lex, (ptr - lex->bufstart) + 1);
}
}

lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart);
Expand Down Expand Up @@ -721,7 +748,7 @@ lex_step(uc_lexer_t *lex, FILE *fp)
uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) };
union { uint32_t n; char str[4]; } search;
const struct token *tok;
size_t rlen, rem;
size_t rlen, rem, *nest;
char *ptr, c;
uc_token_t *rv;
size_t i;
Expand Down Expand Up @@ -966,6 +993,26 @@ lex_step(uc_lexer_t *lex, FILE *fp)
lex->block = NONE;
}

/* track opening braces */
else if (tok->type == TK_LBRACE && lex->templates.count > 0) {
nest = uc_vector_last(&lex->templates);
(*nest)++;
}

/* check end of placeholder expression */
else if (tok->type == TK_RBRACE && lex->templates.count > 0) {
nest = uc_vector_last(&lex->templates);

if (*nest == 0) {
lex->templates.count--;
lex->state = UC_LEX_PARSE_TOKEN;
lex->tok = &tokens[ARRAY_SIZE(tokens) - 1]; /* NB: TK_TEMPLATE token spec */
}
else {
(*nest)--;
}
}

/* do not report statement tags to the parser */
if (tok->type != 0 && tok->type != TK_LSTM)
rv = emit_op(lex, lex->source->off,
Expand Down Expand Up @@ -1001,7 +1048,8 @@ lex_step(uc_lexer_t *lex, FILE *fp)

if (rv) {
memset(lex->esc, 0, sizeof(lex->esc));
lex->state = UC_LEX_IDENTIFY_TOKEN;
lex->state = lex->is_placeholder ? UC_LEX_PLACEHOLDER : UC_LEX_IDENTIFY_TOKEN;
lex->is_placeholder = false;
lex->tok = NULL;

if (rv == UC_LEX_CONTINUE_PARSING)
Expand All @@ -1013,6 +1061,14 @@ lex_step(uc_lexer_t *lex, FILE *fp)
break;


case UC_LEX_PLACEHOLDER:
lex->state = UC_LEX_IDENTIFY_TOKEN;

uc_vector_push(&lex->templates, 0);

return emit_op(lex, lex->source->off, TK_PLACEH, NULL);


case UC_LEX_EOF:
break;
}
Expand Down Expand Up @@ -1051,6 +1107,9 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source)

lex->lastoff = 0;

lex->templates.count = 0;
lex->templates.entries = NULL;

if (config && config->raw_mode) {
lex->state = UC_LEX_IDENTIFY_TOKEN;
lex->block = STATEMENTS;
Expand All @@ -1060,6 +1119,7 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source)
void
uc_lexer_free(uc_lexer_t *lex)
{
uc_vector_clear(&lex->templates);
uc_source_put(lex->source);

free(lex->lookbehind);
Expand Down Expand Up @@ -1095,12 +1155,13 @@ uc_tokenname(unsigned type)
size_t i;

switch (type) {
case 0: return "End of file";
case TK_STRING: return "String";
case TK_LABEL: return "Label";
case TK_NUMBER: return "Number";
case TK_DOUBLE: return "Double";
case TK_REGEXP: return "Regexp";
case 0: return "End of file";
case TK_TEMPLATE: return "Template";
case TK_STRING: return "String";
case TK_LABEL: return "Label";
case TK_NUMBER: return "Number";
case TK_DOUBLE: return "Double";
case TK_REGEXP: return "Regexp";
}

for (i = 0; i < ARRAY_SIZE(tokens); i++) {
Expand Down
102 changes: 102 additions & 0 deletions tests/custom/00_syntax/27_template_literals
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
The ucode language supports ES6 template literals for easy interpolation
of expression results into strings.


1. Simple template literals are equivalent to strings.

-- Testcase --
{{ `foo` === 'foo' }}
-- End --

-- Expect stdout --
true
-- End --


2. Template literals may embed expressions using `${...}` placeholder notation.

-- Testcase --
{%
let x = 2;
let y = 4;

print(`The result of ${x} * ${y} is ${x * y}\n`);
%}
-- End --

-- Expect stdout --
The result of 2 * 4 is 8
-- End --


3. Template literals may be nested.

-- Testcase --
{%
let isFoo = false;
let isBar = true;

print(`Foo is ${isFoo} and ${isBar ? `bar is ${isBar}` : `nothing else`}!\n`);
%}
-- End --

-- Expect stdout --
Foo is false and bar is true!
-- End --


4. Placeholder expression results are implicitly stringified.

-- Testcase --
{%
let o1 = { foo: true };
let o2 = proto({ color: "red" }, { tostring: function() { return `I am a ${this.color} object` } });

print(`The first object is ${o1} and the second says "${o2}".\n`);
%}
-- End --

-- Expect stdout --
The first object is { "foo": true } and the second says "I am a red object".
-- End --


5. Escaping either `$` or `{` prevents interpolation as placeholder, sole `$`
characters bear no special meaning.

-- Testcase --
{%
printf("%.J\n", [
`foo \${bar} baz`,
`foo $\{bar} baz`,
`foo $bar baz`
]);
%}
-- End --

-- Expect stdout --
[
"foo ${bar} baz",
"foo ${bar} baz",
"foo $bar baz"
]
-- End --


6. Unterminated placeholder expressions are a synatax error.

-- Testcase --
{{
`foo ${ bar`
}}
-- End --

-- Expect stderr --
Syntax error: Unterminated string
In line 2, byte 13:

` `foo ${ bar``
Near here -----^


-- End --

0 comments on commit e14b099

Please sign in to comment.