diff --git a/tests/unit/ast/test_pre_parser.py b/tests/unit/ast/test_pre_parser.py index 3d072674f6..682c13ca84 100644 --- a/tests/unit/ast/test_pre_parser.py +++ b/tests/unit/ast/test_pre_parser.py @@ -184,6 +184,9 @@ def test_parse_pragmas(code, pre_parse_settings, compiler_data_settings, mock_ve # None is sentinel here meaning that nothing changed compiler_data_settings = pre_parse_settings + # cannot be set via pragma, don't check + compiler_data_settings.experimental_codegen = False + assert compiler_data.settings == compiler_data_settings diff --git a/tests/unit/compiler/venom/test_multi_entry_block.py b/tests/unit/compiler/venom/test_multi_entry_block.py index 6e7e6995d6..104697432b 100644 --- a/tests/unit/compiler/venom/test_multi_entry_block.py +++ b/tests/unit/compiler/venom/test_multi_entry_block.py @@ -95,3 +95,44 @@ def test_multi_entry_block_2(): assert cfg_in[0].label.value == "target", "Should contain target" assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global" assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1" + + +def test_multi_entry_block_with_dynamic_jump(): + ctx = IRFunction() + + finish_label = IRLabel("finish") + target_label = IRLabel("target") + block_1_label = IRLabel("block_1", ctx) + + bb = ctx.get_basic_block() + op = bb.append_instruction("store", 10) + acc = bb.append_instruction("add", op, op) + bb.append_instruction("djmp", acc, finish_label, block_1_label) + + block_1 = IRBasicBlock(block_1_label, ctx) + ctx.append_basic_block(block_1) + acc = block_1.append_instruction("add", acc, op) + op = block_1.append_instruction("store", 10) + block_1.append_instruction("mstore", acc, op) + block_1.append_instruction("jnz", acc, finish_label, target_label) + + target_bb = IRBasicBlock(target_label, ctx) + ctx.append_basic_block(target_bb) + target_bb.append_instruction("mul", acc, acc) + target_bb.append_instruction("jmp", finish_label) + + finish_bb = IRBasicBlock(finish_label, ctx) + ctx.append_basic_block(finish_bb) + finish_bb.append_instruction("stop") + + calculate_cfg(ctx) + assert not ctx.normalized, "CFG should not be normalized" + + NormalizationPass.run_pass(ctx) + assert ctx.normalized, "CFG should be normalized" + + finish_bb = ctx.get_basic_block(finish_label.value) + cfg_in = list(finish_bb.cfg_in.keys()) + assert cfg_in[0].label.value == "target", "Should contain target" + assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global" + assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1" diff --git a/vyper/cli/vyper_compile.py b/vyper/cli/vyper_compile.py index 25f1180098..3063a289ab 100755 --- a/vyper/cli/vyper_compile.py +++ b/vyper/cli/vyper_compile.py @@ -147,6 +147,7 @@ def _parse_args(argv): "--experimental-codegen", help="The compiler use the new IR codegen. This is an experimental feature.", action="store_true", + dest="experimental_codegen", ) args = parser.parse_args(argv) @@ -184,6 +185,9 @@ def _parse_args(argv): if args.evm_version: settings.evm_version = args.evm_version + if args.experimental_codegen: + settings.experimental_codegen = args.experimental_codegen + if args.verbose: print(f"cli specified: `{settings}`", file=sys.stderr) @@ -195,7 +199,6 @@ def _parse_args(argv): settings, args.storage_layout, args.no_bytecode_metadata, - args.experimental_codegen, ) if args.output_path: @@ -233,7 +236,6 @@ def compile_files( settings: Optional[Settings] = None, storage_layout_paths: list[str] = None, no_bytecode_metadata: bool = False, - experimental_codegen: bool = False, ) -> dict: paths = paths or [] @@ -287,7 +289,6 @@ def compile_files( storage_layout_override=storage_layout_override, show_gas_estimates=show_gas_estimates, no_bytecode_metadata=no_bytecode_metadata, - experimental_codegen=experimental_codegen, ) ret[file_path] = output diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index e1d3ea12b4..503e0e2f3b 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -892,6 +892,8 @@ def make_setter(left, right): _opt_level = OptimizationLevel.GAS +# FIXME: this is to get around the fact that we don't have a +# proper context object in the IR generation phase. @contextlib.contextmanager def anchor_opt_level(new_level: OptimizationLevel) -> Generator: """ diff --git a/vyper/codegen/module.py b/vyper/codegen/module.py index ef861e3953..98395a6a0c 100644 --- a/vyper/codegen/module.py +++ b/vyper/codegen/module.py @@ -311,21 +311,23 @@ def _selector_section_sparse(external_functions, module_ctx): ret.append(["codecopy", dst, bucket_hdr_location, SZ_BUCKET_HEADER]) - jumpdest = IRnode.from_list(["mload", 0]) - # don't particularly like using `jump` here since it can cause - # issues for other backends, consider changing `goto` to allow - # dynamic jumps, or adding some kind of jumptable instruction - ret.append(["jump", jumpdest]) + jump_targets = [] - jumptable_data = ["data", "selector_buckets"] for i in range(n_buckets): if i in buckets: bucket_label = f"selector_bucket_{i}" - jumptable_data.append(["symbol", bucket_label]) + jump_targets.append(bucket_label) else: # empty bucket - jumptable_data.append(["symbol", "fallback"]) + jump_targets.append("fallback") + + jumptable_data = ["data", "selector_buckets"] + jumptable_data.extend(["symbol", label] for label in jump_targets) + + jumpdest = IRnode.from_list(["mload", 0]) + jump_instr = IRnode.from_list(["djump", jumpdest, *jump_targets]) + ret.append(jump_instr) ret.append(jumptable_data) for bucket_id, bucket in buckets.items(): diff --git a/vyper/compiler/__init__.py b/vyper/compiler/__init__.py index 026c8369c5..c87814ba15 100644 --- a/vyper/compiler/__init__.py +++ b/vyper/compiler/__init__.py @@ -53,7 +53,6 @@ def compile_from_file_input( no_bytecode_metadata: bool = False, show_gas_estimates: bool = False, exc_handler: Optional[Callable] = None, - experimental_codegen: bool = False, ) -> dict: """ Main entry point into the compiler. @@ -107,7 +106,6 @@ def compile_from_file_input( storage_layout_override, show_gas_estimates, no_bytecode_metadata, - experimental_codegen, ) ret = {} diff --git a/vyper/compiler/output.py b/vyper/compiler/output.py index 6d1e7ef70f..dc2a43720e 100644 --- a/vyper/compiler/output.py +++ b/vyper/compiler/output.py @@ -89,6 +89,9 @@ def build_ir_runtime_output(compiler_data: CompilerData) -> IRnode: def _ir_to_dict(ir_node): + # Currently only supported with IRnode and not VenomIR + if not isinstance(ir_node, IRnode): + return args = ir_node.args if len(args) > 0 or ir_node.value == "seq": return {ir_node.value: [_ir_to_dict(x) for x in args]} diff --git a/vyper/compiler/phases.py b/vyper/compiler/phases.py index edffa9a85e..199bbbc3e5 100644 --- a/vyper/compiler/phases.py +++ b/vyper/compiler/phases.py @@ -21,6 +21,26 @@ DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy") +def _merge_one(lhs, rhs, helpstr): + if lhs is not None and rhs is not None and lhs != rhs: + raise StructureException( + f"compiler settings indicate {helpstr} {lhs}, " f"but source pragma indicates {rhs}." + ) + return lhs if rhs is None else rhs + + +# TODO: does this belong as a method under Settings? +def _merge_settings(cli: Settings, pragma: Settings): + ret = Settings() + ret.evm_version = _merge_one(cli.evm_version, pragma.evm_version, "evm version") + ret.optimize = _merge_one(cli.optimize, pragma.optimize, "optimize") + ret.experimental_codegen = _merge_one( + cli.experimental_codegen, pragma.experimental_codegen, "experimental codegen" + ) + + return ret + + class CompilerData: """ Object for fetching and storing compiler data for a Vyper contract. @@ -59,7 +79,6 @@ def __init__( storage_layout: StorageLayout = None, show_gas_estimates: bool = False, no_bytecode_metadata: bool = False, - experimental_codegen: bool = False, ) -> None: """ Initialization method. @@ -76,11 +95,9 @@ def __init__( Show gas estimates for abi and ir output modes no_bytecode_metadata: bool, optional Do not add metadata to bytecode. Defaults to False - experimental_codegen: bool, optional - Use experimental codegen. Defaults to False """ # to force experimental codegen, uncomment: - # experimental_codegen = True + # settings.experimental_codegen = True if isinstance(file_input, str): file_input = FileInput( @@ -93,7 +110,6 @@ def __init__( self.storage_layout_override = storage_layout self.show_gas_estimates = show_gas_estimates self.no_bytecode_metadata = no_bytecode_metadata - self.experimental_codegen = experimental_codegen self.settings = settings or Settings() self.input_bundle = input_bundle or FilesystemInputBundle([Path(".")]) @@ -120,32 +136,13 @@ def _generate_ast(self): resolved_path=str(self.file_input.resolved_path), ) - # validate the compiler settings - # XXX: this is a bit ugly, clean up later - if settings.evm_version is not None: - if ( - self.settings.evm_version is not None - and self.settings.evm_version != settings.evm_version - ): - raise StructureException( - f"compiler settings indicate evm version {self.settings.evm_version}, " - f"but source pragma indicates {settings.evm_version}." - ) - - self.settings.evm_version = settings.evm_version - - if settings.optimize is not None: - if self.settings.optimize is not None and self.settings.optimize != settings.optimize: - raise StructureException( - f"compiler options indicate optimization mode {self.settings.optimize}, " - f"but source pragma indicates {settings.optimize}." - ) - self.settings.optimize = settings.optimize - - # ensure defaults + self.settings = _merge_settings(self.settings, settings) if self.settings.optimize is None: self.settings.optimize = OptimizationLevel.default() + if self.settings.experimental_codegen is None: + self.settings.experimental_codegen = False + # note self.settings.compiler_version is erased here as it is # not used after pre-parsing return ast @@ -184,8 +181,10 @@ def global_ctx(self) -> ModuleT: @cached_property def _ir_output(self): # fetch both deployment and runtime IR - nodes = generate_ir_nodes(self.global_ctx, self.settings.optimize) - if self.experimental_codegen: + nodes = generate_ir_nodes( + self.global_ctx, self.settings.optimize, self.settings.experimental_codegen + ) + if self.settings.experimental_codegen: return [generate_ir(nodes[0]), generate_ir(nodes[1])] else: return nodes @@ -211,7 +210,7 @@ def function_signatures(self) -> dict[str, ContractFunctionT]: @cached_property def assembly(self) -> list: - if self.experimental_codegen: + if self.settings.experimental_codegen: return generate_assembly_experimental( self.ir_nodes, self.settings.optimize # type: ignore ) @@ -220,7 +219,7 @@ def assembly(self) -> list: @cached_property def assembly_runtime(self) -> list: - if self.experimental_codegen: + if self.settings.experimental_codegen: return generate_assembly_experimental( self.ir_runtime, self.settings.optimize # type: ignore ) @@ -294,7 +293,9 @@ def generate_folded_ast( return vyper_module_folded, symbol_tables -def generate_ir_nodes(global_ctx: ModuleT, optimize: OptimizationLevel) -> tuple[IRnode, IRnode]: +def generate_ir_nodes( + global_ctx: ModuleT, optimize: OptimizationLevel, experimental_codegen: bool +) -> tuple[IRnode, IRnode]: """ Generate the intermediate representation (IR) from the contextualized AST. diff --git a/vyper/compiler/settings.py b/vyper/compiler/settings.py index d2c88a8592..51c8d64e41 100644 --- a/vyper/compiler/settings.py +++ b/vyper/compiler/settings.py @@ -42,6 +42,7 @@ class Settings: compiler_version: Optional[str] = None optimize: Optional[OptimizationLevel] = None evm_version: Optional[str] = None + experimental_codegen: Optional[bool] = None _DEBUG = False diff --git a/vyper/ir/compile_ir.py b/vyper/ir/compile_ir.py index 1d3df8becb..8ce8c887f1 100644 --- a/vyper/ir/compile_ir.py +++ b/vyper/ir/compile_ir.py @@ -702,6 +702,13 @@ def _height_of(witharg): o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i)) o.extend(["_sym_" + code.args[0].value, "JUMP"]) return o + elif code.value == "djump": + o = [] + # "djump" compiles to a raw EVM jump instruction + jump_target = code.args[0] + o.extend(_compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height)) + o.append("JUMP") + return o # push a literal symbol elif code.value == "symbol": return ["_sym_" + code.args[0].value] diff --git a/vyper/utils.py b/vyper/utils.py index 6816db9bae..a778a4e31b 100644 --- a/vyper/utils.py +++ b/vyper/utils.py @@ -331,6 +331,7 @@ class SizeLimits: "with", "label", "goto", + "djump", # "dynamic jump", i.e. constrained, multi-destination jump "~extcode", "~selfcode", "~calldata", diff --git a/vyper/venom/analysis.py b/vyper/venom/analysis.py index 1a82ca85d0..6dfc3c3d7c 100644 --- a/vyper/venom/analysis.py +++ b/vyper/venom/analysis.py @@ -40,15 +40,6 @@ def calculate_cfg(ctx: IRFunction) -> None: else: entry_block = ctx.basic_blocks[0] - # TODO: Special case for the jump table of selector buckets and fallback. - # this will be cleaner when we introduce an "indirect jump" instruction - # for the selector table (which includes all possible targets). it will - # also clean up the code for normalization because it will not have to - # handle this case specially. - for bb in ctx.basic_blocks: - if "selector_bucket_" in bb.label.value or bb.label.value == "fallback": - bb.add_cfg_in(entry_block) - for bb in ctx.basic_blocks: assert len(bb.instructions) > 0, "Basic block should not be empty" last_inst = bb.instructions[-1] diff --git a/vyper/venom/basicblock.py b/vyper/venom/basicblock.py index 6f1c1c8ab3..9afaa5e6fd 100644 --- a/vyper/venom/basicblock.py +++ b/vyper/venom/basicblock.py @@ -4,7 +4,7 @@ from vyper.utils import OrderedSet # instructions which can terminate a basic block -BB_TERMINATORS = frozenset(["jmp", "jnz", "ret", "return", "revert", "deploy", "stop"]) +BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "deploy", "stop"]) VOLATILE_INSTRUCTIONS = frozenset( [ @@ -50,12 +50,15 @@ "invalid", "invoke", "jmp", + "djmp", "jnz", "log", ] ) -CFG_ALTERING_INSTRUCTIONS = frozenset(["jmp", "jnz", "call", "staticcall", "invoke", "deploy"]) +CFG_ALTERING_INSTRUCTIONS = frozenset( + ["jmp", "djmp", "jnz", "call", "staticcall", "invoke", "deploy"] +) if TYPE_CHECKING: from vyper.venom.function import IRFunction @@ -236,6 +239,16 @@ def replace_operands(self, replacements: dict) -> None: if operand in replacements: self.operands[i] = replacements[operand] + def replace_label_operands(self, replacements: dict) -> None: + """ + Update label operands with replacements. + replacements are represented using a dict: "key" is replaced by "value". + """ + replacements = {k.value: v for k, v in replacements.items()} + for i, operand in enumerate(self.operands): + if isinstance(operand, IRLabel) and operand.value in replacements: + self.operands[i] = replacements[operand.value] + def __repr__(self) -> str: s = "" if self.output: diff --git a/vyper/venom/function.py b/vyper/venom/function.py index e16b2ad6e6..665fa0c6c2 100644 --- a/vyper/venom/function.py +++ b/vyper/venom/function.py @@ -125,17 +125,11 @@ def normalized(self) -> bool: # TODO: this check could be: # `if len(in_bb.cfg_out) > 1: return False` # but the cfg is currently not calculated "correctly" for - # certain special instructions (deploy instruction and - # selector table indirect jumps). + # the special deploy instruction. for in_bb in bb.cfg_in: jump_inst = in_bb.instructions[-1] - if jump_inst.opcode != "jnz": - continue - if jump_inst.opcode == "jmp" and isinstance(jump_inst.operands[0], IRLabel): - continue - - # The function is not normalized - return False + if jump_inst.opcode in ("jnz", "djmp"): + return False # The function is normalized return True diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 0aaf6aba03..9f5c23df0b 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -166,7 +166,6 @@ def _handle_self_call( ret_args.append(return_buf.value) # type: ignore bb = ctx.get_basic_block() - do_ret = func_t.return_type is not None if do_ret: invoke_ret = bb.append_invoke_instruction(ret_args, returns=True) # type: ignore @@ -453,9 +452,11 @@ def _convert_ir_basicblock(ctx, ir, symbols, variables, allocated_variables): ) # body elif ir.value == "goto": _append_jmp(ctx, IRLabel(ir.args[0].value)) - elif ir.value == "jump": - arg_1 = _convert_ir_basicblock(ctx, ir.args[0], symbols, variables, allocated_variables) - ctx.get_basic_block().append_instruction("jmp", arg_1) + elif ir.value == "djump": + args = [_convert_ir_basicblock(ctx, ir.args[0], symbols, variables, allocated_variables)] + for target in ir.args[1:]: + args.append(IRLabel(target.value)) + ctx.get_basic_block().append_instruction("djmp", *args) _new_block(ctx) elif ir.value == "set": sym = ir.args[0] diff --git a/vyper/venom/passes/normalization.py b/vyper/venom/passes/normalization.py index 90dd60e881..43e8d47235 100644 --- a/vyper/venom/passes/normalization.py +++ b/vyper/venom/passes/normalization.py @@ -1,5 +1,5 @@ -from vyper.exceptions import CompilerPanic -from vyper.venom.basicblock import IRBasicBlock, IRLabel, IRVariable +from vyper.venom.analysis import calculate_cfg +from vyper.venom.basicblock import IRBasicBlock, IRLabel from vyper.venom.function import IRFunction from vyper.venom.passes.base_pass import IRPass @@ -19,72 +19,43 @@ def _split_basic_block(self, bb: IRBasicBlock) -> None: jump_inst = in_bb.instructions[-1] assert bb in in_bb.cfg_out - # Handle static and dynamic branching - if jump_inst.opcode == "jnz": - self._split_for_static_branch(bb, in_bb) - elif jump_inst.opcode == "jmp" and isinstance(jump_inst.operands[0], IRVariable): - self._split_for_dynamic_branch(bb, in_bb) - else: - continue - - self.changes += 1 - - def _split_for_static_branch(self, bb: IRBasicBlock, in_bb: IRBasicBlock) -> None: - jump_inst = in_bb.instructions[-1] - for i, op in enumerate(jump_inst.operands): - if op == bb.label: - edge = i + # Handle branching + if jump_inst.opcode in ("jnz", "djmp"): + self._insert_split_basicblock(bb, in_bb) + self.changes += 1 break - else: - # none of the edges points to this bb - raise CompilerPanic("bad CFG") - - assert edge in (1, 2) # the arguments which can be labels - - split_bb = self._insert_split_basicblock(bb, in_bb) - - # Redirect the original conditional jump to the intermediary basic block - jump_inst.operands[edge] = split_bb.label - - def _split_for_dynamic_branch(self, bb: IRBasicBlock, in_bb: IRBasicBlock) -> None: - split_bb = self._insert_split_basicblock(bb, in_bb) - - # Update any affected labels in the data segment - # TODO: this DESTROYS the cfg! refactor so the translation of the - # selector table produces indirect jumps properly. - for inst in self.ctx.data_segment: - if inst.opcode == "db" and inst.operands[0] == bb.label: - inst.operands[0] = split_bb.label def _insert_split_basicblock(self, bb: IRBasicBlock, in_bb: IRBasicBlock) -> IRBasicBlock: # Create an intermediary basic block and append it source = in_bb.label.value target = bb.label.value - split_bb = IRBasicBlock(IRLabel(f"{target}_split_{source}"), self.ctx) + + split_label = IRLabel(f"{target}_split_{source}") + in_terminal = in_bb.instructions[-1] + in_terminal.replace_label_operands({bb.label: split_label}) + + split_bb = IRBasicBlock(split_label, self.ctx) split_bb.append_instruction("jmp", bb.label) self.ctx.append_basic_block(split_bb) - # Rewire the CFG - # TODO: this is cursed code, it is necessary instead of just running - # calculate_cfg() because split_for_dynamic_branch destroys the CFG! - # ideally, remove this rewiring and just re-run calculate_cfg(). - split_bb.add_cfg_in(in_bb) - split_bb.add_cfg_out(bb) - in_bb.remove_cfg_out(bb) - in_bb.add_cfg_out(split_bb) - bb.remove_cfg_in(in_bb) - bb.add_cfg_in(split_bb) + # Update the labels in the data segment + for inst in self.ctx.data_segment: + if inst.opcode == "db" and inst.operands[0] == bb.label: + inst.operands[0] = split_bb.label + return split_bb def _run_pass(self, ctx: IRFunction) -> int: self.ctx = ctx self.changes = 0 + # Split blocks that need splitting for bb in ctx.basic_blocks: if len(bb.cfg_in) > 1: self._split_basic_block(bb) - # Sanity check - assert ctx.normalized, "Normalization pass failed" + # If we made changes, recalculate the cfg + if self.changes > 0: + calculate_cfg(ctx) return self.changes diff --git a/vyper/venom/venom_to_assembly.py b/vyper/venom/venom_to_assembly.py index 8760e9aa63..0c32c3b816 100644 --- a/vyper/venom/venom_to_assembly.py +++ b/vyper/venom/venom_to_assembly.py @@ -261,7 +261,7 @@ def _generate_evm_for_instruction( # Step 1: Apply instruction special stack manipulations - if opcode in ["jmp", "jnz", "invoke"]: + if opcode in ["jmp", "djmp", "jnz", "invoke"]: operands = inst.get_non_label_operands() elif opcode == "alloca": operands = inst.operands[1:2] @@ -296,7 +296,7 @@ def _generate_evm_for_instruction( self._emit_input_operands(assembly, inst, operands, stack) # Step 3: Reorder stack - if opcode in ["jnz", "jmp"]: + if opcode in ["jnz", "djmp", "jmp"]: # prepare stack for jump into another basic block assert inst.parent and isinstance(inst.parent.cfg_out, OrderedSet) b = next(iter(inst.parent.cfg_out)) @@ -344,11 +344,12 @@ def _generate_evm_for_instruction( assembly.append("JUMP") elif opcode == "jmp": - if isinstance(inst.operands[0], IRLabel): - assembly.append(f"_sym_{inst.operands[0].value}") - assembly.append("JUMP") - else: - assembly.append("JUMP") + assert isinstance(inst.operands[0], IRLabel) + assembly.append(f"_sym_{inst.operands[0].value}") + assembly.append("JUMP") + elif opcode == "djmp": + assert isinstance(inst.operands[0], IRVariable) + assembly.append("JUMP") elif opcode == "gt": assembly.append("GT") elif opcode == "lt":