feat: add new target-constrained jump instruction (vyperlang#3687)

this commit adds a new "djmp" instruction which allows jumping to one of multiple jump targets. it has been added in both the s-expr IR and venom IR. this removes the workarounds that we had to implement in the normalization pass and the cfg calculations. --------- Co-authored-by: Charles Cooper <cooper.charles.m@gmail.com>
tserg · Dec 21, 2023 · 3116e88 · 3116e88
1 parent 9165926
commit 3116e88
Show file tree

Hide file tree

Showing 17 changed files with 158 additions and 127 deletions.
diff --git a/tests/unit/ast/test_pre_parser.py b/tests/unit/ast/test_pre_parser.py
@@ -184,6 +184,9 @@ def test_parse_pragmas(code, pre_parse_settings, compiler_data_settings, mock_ve
         # None is sentinel here meaning that nothing changed
         compiler_data_settings = pre_parse_settings
 
+    # cannot be set via pragma, don't check
+    compiler_data_settings.experimental_codegen = False
+
     assert compiler_data.settings == compiler_data_settings
 
 

diff --git a/tests/unit/compiler/venom/test_multi_entry_block.py b/tests/unit/compiler/venom/test_multi_entry_block.py
@@ -95,3 +95,44 @@ def test_multi_entry_block_2():
     assert cfg_in[0].label.value == "target", "Should contain target"
     assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
     assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"
+
+
+def test_multi_entry_block_with_dynamic_jump():
+    ctx = IRFunction()
+
+    finish_label = IRLabel("finish")
+    target_label = IRLabel("target")
+    block_1_label = IRLabel("block_1", ctx)
+
+    bb = ctx.get_basic_block()
+    op = bb.append_instruction("store", 10)
+    acc = bb.append_instruction("add", op, op)
+    bb.append_instruction("djmp", acc, finish_label, block_1_label)
+
+    block_1 = IRBasicBlock(block_1_label, ctx)
+    ctx.append_basic_block(block_1)
+    acc = block_1.append_instruction("add", acc, op)
+    op = block_1.append_instruction("store", 10)
+    block_1.append_instruction("mstore", acc, op)
+    block_1.append_instruction("jnz", acc, finish_label, target_label)
+
+    target_bb = IRBasicBlock(target_label, ctx)
+    ctx.append_basic_block(target_bb)
+    target_bb.append_instruction("mul", acc, acc)
+    target_bb.append_instruction("jmp", finish_label)
+
+    finish_bb = IRBasicBlock(finish_label, ctx)
+    ctx.append_basic_block(finish_bb)
+    finish_bb.append_instruction("stop")
+
+    calculate_cfg(ctx)
+    assert not ctx.normalized, "CFG should not be normalized"
+
+    NormalizationPass.run_pass(ctx)
+    assert ctx.normalized, "CFG should be normalized"
+
+    finish_bb = ctx.get_basic_block(finish_label.value)
+    cfg_in = list(finish_bb.cfg_in.keys())
+    assert cfg_in[0].label.value == "target", "Should contain target"
+    assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
+    assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"
diff --git a/vyper/cli/vyper_compile.py b/vyper/cli/vyper_compile.py
@@ -147,6 +147,7 @@ def _parse_args(argv):
         "--experimental-codegen",
         help="The compiler use the new IR codegen. This is an experimental feature.",
         action="store_true",
+        dest="experimental_codegen",
     )
 
     args = parser.parse_args(argv)
@@ -184,6 +185,9 @@ def _parse_args(argv):
     if args.evm_version:
         settings.evm_version = args.evm_version
 
+    if args.experimental_codegen:
+        settings.experimental_codegen = args.experimental_codegen
+
     if args.verbose:
         print(f"cli specified: `{settings}`", file=sys.stderr)
 
@@ -195,7 +199,6 @@ def _parse_args(argv):
         settings,
         args.storage_layout,
         args.no_bytecode_metadata,
-        args.experimental_codegen,
     )
 
     if args.output_path:
@@ -233,7 +236,6 @@ def compile_files(
     settings: Optional[Settings] = None,
     storage_layout_paths: list[str] = None,
     no_bytecode_metadata: bool = False,
-    experimental_codegen: bool = False,
 ) -> dict:
     paths = paths or []
 
@@ -287,7 +289,6 @@ def compile_files(
             storage_layout_override=storage_layout_override,
             show_gas_estimates=show_gas_estimates,
             no_bytecode_metadata=no_bytecode_metadata,
-            experimental_codegen=experimental_codegen,
         )
 
         ret[file_path] = output

diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py
@@ -892,6 +892,8 @@ def make_setter(left, right):
 _opt_level = OptimizationLevel.GAS
 
 
+# FIXME: this is to get around the fact that we don't have a
+# proper context object in the IR generation phase.
 @contextlib.contextmanager
 def anchor_opt_level(new_level: OptimizationLevel) -> Generator:
     """

diff --git a/vyper/codegen/module.py b/vyper/codegen/module.py
@@ -311,21 +311,23 @@ def _selector_section_sparse(external_functions, module_ctx):
 
         ret.append(["codecopy", dst, bucket_hdr_location, SZ_BUCKET_HEADER])
 
-        jumpdest = IRnode.from_list(["mload", 0])
-        # don't particularly like using `jump` here since it can cause
-        # issues for other backends, consider changing `goto` to allow
-        # dynamic jumps, or adding some kind of jumptable instruction
-        ret.append(["jump", jumpdest])
+        jump_targets = []
 
-        jumptable_data = ["data", "selector_buckets"]
         for i in range(n_buckets):
             if i in buckets:
                 bucket_label = f"selector_bucket_{i}"
-                jumptable_data.append(["symbol", bucket_label])
+                jump_targets.append(bucket_label)
             else:
                 # empty bucket
-                jumptable_data.append(["symbol", "fallback"])
+                jump_targets.append("fallback")
+
+        jumptable_data = ["data", "selector_buckets"]
+        jumptable_data.extend(["symbol", label] for label in jump_targets)
+
+        jumpdest = IRnode.from_list(["mload", 0])
 
+        jump_instr = IRnode.from_list(["djump", jumpdest, *jump_targets])
+        ret.append(jump_instr)
         ret.append(jumptable_data)
 
     for bucket_id, bucket in buckets.items():

diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py
@@ -53,7 +53,6 @@ def compile_from_file_input(
     no_bytecode_metadata: bool = False,
     show_gas_estimates: bool = False,
     exc_handler: Optional[Callable] = None,
-    experimental_codegen: bool = False,
 ) -> dict:
     """
     Main entry point into the compiler.
@@ -107,7 +106,6 @@ def compile_from_file_input(
         storage_layout_override,
         show_gas_estimates,
         no_bytecode_metadata,
-        experimental_codegen,
     )
 
     ret = {}

diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py
@@ -89,6 +89,9 @@ def build_ir_runtime_output(compiler_data: CompilerData) -> IRnode:
 
 
 def _ir_to_dict(ir_node):
+    # Currently only supported with IRnode and not VenomIR
+    if not isinstance(ir_node, IRnode):
+        return
     args = ir_node.args
     if len(args) > 0 or ir_node.value == "seq":
         return {ir_node.value: [_ir_to_dict(x) for x in args]}

diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py
@@ -21,6 +21,26 @@
 DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy")
 
 
+def _merge_one(lhs, rhs, helpstr):
+    if lhs is not None and rhs is not None and lhs != rhs:
+        raise StructureException(
+            f"compiler settings indicate {helpstr} {lhs}, " f"but source pragma indicates {rhs}."
+        )
+    return lhs if rhs is None else rhs
+
+
+# TODO: does this belong as a method under Settings?
+def _merge_settings(cli: Settings, pragma: Settings):
+    ret = Settings()
+    ret.evm_version = _merge_one(cli.evm_version, pragma.evm_version, "evm version")
+    ret.optimize = _merge_one(cli.optimize, pragma.optimize, "optimize")
+    ret.experimental_codegen = _merge_one(
+        cli.experimental_codegen, pragma.experimental_codegen, "experimental codegen"
+    )
+
+    return ret
+
+
 class CompilerData:
     """
     Object for fetching and storing compiler data for a Vyper contract.
@@ -59,7 +79,6 @@ def __init__(
         storage_layout: StorageLayout = None,
         show_gas_estimates: bool = False,
         no_bytecode_metadata: bool = False,
-        experimental_codegen: bool = False,
     ) -> None:
         """
         Initialization method.
@@ -76,11 +95,9 @@ def __init__(
             Show gas estimates for abi and ir output modes
         no_bytecode_metadata: bool, optional
             Do not add metadata to bytecode. Defaults to False
-        experimental_codegen: bool, optional
-            Use experimental codegen. Defaults to False
         """
         # to force experimental codegen, uncomment:
-        # experimental_codegen = True
+        # settings.experimental_codegen = True
 
         if isinstance(file_input, str):
             file_input = FileInput(
@@ -93,7 +110,6 @@ def __init__(
         self.storage_layout_override = storage_layout
         self.show_gas_estimates = show_gas_estimates
         self.no_bytecode_metadata = no_bytecode_metadata
-        self.experimental_codegen = experimental_codegen
         self.settings = settings or Settings()
         self.input_bundle = input_bundle or FilesystemInputBundle([Path(".")])
 
@@ -120,32 +136,13 @@ def _generate_ast(self):
             resolved_path=str(self.file_input.resolved_path),
         )
 
-        # validate the compiler settings
-        # XXX: this is a bit ugly, clean up later
-        if settings.evm_version is not None:
-            if (
-                self.settings.evm_version is not None
-                and self.settings.evm_version != settings.evm_version
-            ):
-                raise StructureException(
-                    f"compiler settings indicate evm version {self.settings.evm_version}, "
-                    f"but source pragma indicates {settings.evm_version}."
-                )
-
-            self.settings.evm_version = settings.evm_version
-
-        if settings.optimize is not None:
-            if self.settings.optimize is not None and self.settings.optimize != settings.optimize:
-                raise StructureException(
-                    f"compiler options indicate optimization mode {self.settings.optimize}, "
-                    f"but source pragma indicates {settings.optimize}."
-                )
-            self.settings.optimize = settings.optimize
-
-        # ensure defaults
+        self.settings = _merge_settings(self.settings, settings)
         if self.settings.optimize is None:
             self.settings.optimize = OptimizationLevel.default()
 
+        if self.settings.experimental_codegen is None:
+            self.settings.experimental_codegen = False
+
         # note self.settings.compiler_version is erased here as it is
         # not used after pre-parsing
         return ast
@@ -184,8 +181,10 @@ def global_ctx(self) -> ModuleT:
     @cached_property
     def _ir_output(self):
         # fetch both deployment and runtime IR
-        nodes = generate_ir_nodes(self.global_ctx, self.settings.optimize)
-        if self.experimental_codegen:
+        nodes = generate_ir_nodes(
+            self.global_ctx, self.settings.optimize, self.settings.experimental_codegen
+        )
+        if self.settings.experimental_codegen:
             return [generate_ir(nodes[0]), generate_ir(nodes[1])]
         else:
             return nodes
@@ -211,7 +210,7 @@ def function_signatures(self) -> dict[str, ContractFunctionT]:
 
     @cached_property
     def assembly(self) -> list:
-        if self.experimental_codegen:
+        if self.settings.experimental_codegen:
             return generate_assembly_experimental(
                 self.ir_nodes, self.settings.optimize  # type: ignore
             )
@@ -220,7 +219,7 @@ def assembly(self) -> list:
 
     @cached_property
     def assembly_runtime(self) -> list:
-        if self.experimental_codegen:
+        if self.settings.experimental_codegen:
             return generate_assembly_experimental(
                 self.ir_runtime, self.settings.optimize  # type: ignore
             )
@@ -294,7 +293,9 @@ def generate_folded_ast(
     return vyper_module_folded, symbol_tables
 
 
-def generate_ir_nodes(global_ctx: ModuleT, optimize: OptimizationLevel) -> tuple[IRnode, IRnode]:
+def generate_ir_nodes(
+    global_ctx: ModuleT, optimize: OptimizationLevel, experimental_codegen: bool
+) -> tuple[IRnode, IRnode]:
     """
     Generate the intermediate representation (IR) from the contextualized AST.
 

diff --git a/vyper/compiler/settings.py b/vyper/compiler/settings.py
@@ -42,6 +42,7 @@ class Settings:
     compiler_version: Optional[str] = None
     optimize: Optional[OptimizationLevel] = None
     evm_version: Optional[str] = None
+    experimental_codegen: Optional[bool] = None
 
 
 _DEBUG = False

diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py
@@ -702,6 +702,13 @@ def _height_of(witharg):
             o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i))
         o.extend(["_sym_" + code.args[0].value, "JUMP"])
         return o
+    elif code.value == "djump":
+        o = []
+        # "djump" compiles to a raw EVM jump instruction
+        jump_target = code.args[0]
+        o.extend(_compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height))
+        o.append("JUMP")
+        return o
     # push a literal symbol
     elif code.value == "symbol":
         return ["_sym_" + code.args[0].value]

diff --git a/vyper/utils.py b/vyper/utils.py
@@ -331,6 +331,7 @@ class SizeLimits:
     "with",
     "label",
     "goto",
+    "djump",  # "dynamic jump", i.e. constrained, multi-destination jump
     "~extcode",
     "~selfcode",
     "~calldata",

diff --git a/vyper/venom/analysis.py b/vyper/venom/analysis.py
@@ -40,15 +40,6 @@ def calculate_cfg(ctx: IRFunction) -> None:
     else:
         entry_block = ctx.basic_blocks[0]
 
-    # TODO: Special case for the jump table of selector buckets and fallback.
-    # this will be cleaner when we introduce an "indirect jump" instruction
-    # for the selector table (which includes all possible targets). it will
-    # also clean up the code for normalization because it will not have to
-    # handle this case specially.
-    for bb in ctx.basic_blocks:
-        if "selector_bucket_" in bb.label.value or bb.label.value == "fallback":
-            bb.add_cfg_in(entry_block)
-
     for bb in ctx.basic_blocks:
         assert len(bb.instructions) > 0, "Basic block should not be empty"
         last_inst = bb.instructions[-1]

diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py
@@ -4,7 +4,7 @@
 from vyper.utils import OrderedSet
 
 # instructions which can terminate a basic block
-BB_TERMINATORS = frozenset(["jmp", "jnz", "ret", "return", "revert", "deploy", "stop"])
+BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "deploy", "stop"])
 
 VOLATILE_INSTRUCTIONS = frozenset(
     [
@@ -50,12 +50,15 @@
         "invalid",
         "invoke",
         "jmp",
+        "djmp",
         "jnz",
         "log",
     ]
 )
 
-CFG_ALTERING_INSTRUCTIONS = frozenset(["jmp", "jnz", "call", "staticcall", "invoke", "deploy"])
+CFG_ALTERING_INSTRUCTIONS = frozenset(
+    ["jmp", "djmp", "jnz", "call", "staticcall", "invoke", "deploy"]
+)
 
 if TYPE_CHECKING:
     from vyper.venom.function import IRFunction
@@ -236,6 +239,16 @@ def replace_operands(self, replacements: dict) -> None:
             if operand in replacements:
                 self.operands[i] = replacements[operand]
 
+    def replace_label_operands(self, replacements: dict) -> None:
+        """
+        Update label operands with replacements.
+        replacements are represented using a dict: "key" is replaced by "value".
+        """
+        replacements = {k.value: v for k, v in replacements.items()}
+        for i, operand in enumerate(self.operands):
+            if isinstance(operand, IRLabel) and operand.value in replacements:
+                self.operands[i] = replacements[operand.value]
+
     def __repr__(self) -> str:
         s = ""
         if self.output: