Skip to content

Commit

Permalink
feat: add new target-constrained jump instruction (vyperlang#3687)
Browse files Browse the repository at this point in the history
this commit adds a new "djmp" instruction which allows jumping to one of
multiple jump targets. it has been added in both the s-expr IR and venom
IR. this removes the workarounds that we had to implement in the
normalization pass and the cfg calculations.

---------

Co-authored-by: Charles Cooper <cooper.charles.m@gmail.com>
  • Loading branch information
harkal and charles-cooper authored Dec 21, 2023
1 parent 9165926 commit 3116e88
Show file tree
Hide file tree
Showing 17 changed files with 158 additions and 127 deletions.
3 changes: 3 additions & 0 deletions tests/unit/ast/test_pre_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,9 @@ def test_parse_pragmas(code, pre_parse_settings, compiler_data_settings, mock_ve
# None is sentinel here meaning that nothing changed
compiler_data_settings = pre_parse_settings

# cannot be set via pragma, don't check
compiler_data_settings.experimental_codegen = False

assert compiler_data.settings == compiler_data_settings


Expand Down
41 changes: 41 additions & 0 deletions tests/unit/compiler/venom/test_multi_entry_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,44 @@ def test_multi_entry_block_2():
assert cfg_in[0].label.value == "target", "Should contain target"
assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"


def test_multi_entry_block_with_dynamic_jump():
ctx = IRFunction()

finish_label = IRLabel("finish")
target_label = IRLabel("target")
block_1_label = IRLabel("block_1", ctx)

bb = ctx.get_basic_block()
op = bb.append_instruction("store", 10)
acc = bb.append_instruction("add", op, op)
bb.append_instruction("djmp", acc, finish_label, block_1_label)

block_1 = IRBasicBlock(block_1_label, ctx)
ctx.append_basic_block(block_1)
acc = block_1.append_instruction("add", acc, op)
op = block_1.append_instruction("store", 10)
block_1.append_instruction("mstore", acc, op)
block_1.append_instruction("jnz", acc, finish_label, target_label)

target_bb = IRBasicBlock(target_label, ctx)
ctx.append_basic_block(target_bb)
target_bb.append_instruction("mul", acc, acc)
target_bb.append_instruction("jmp", finish_label)

finish_bb = IRBasicBlock(finish_label, ctx)
ctx.append_basic_block(finish_bb)
finish_bb.append_instruction("stop")

calculate_cfg(ctx)
assert not ctx.normalized, "CFG should not be normalized"

NormalizationPass.run_pass(ctx)
assert ctx.normalized, "CFG should be normalized"

finish_bb = ctx.get_basic_block(finish_label.value)
cfg_in = list(finish_bb.cfg_in.keys())
assert cfg_in[0].label.value == "target", "Should contain target"
assert cfg_in[1].label.value == "finish_split_global", "Should contain finish_split_global"
assert cfg_in[2].label.value == "finish_split_block_1", "Should contain finish_split_block_1"
7 changes: 4 additions & 3 deletions vyper/cli/vyper_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def _parse_args(argv):
"--experimental-codegen",
help="The compiler use the new IR codegen. This is an experimental feature.",
action="store_true",
dest="experimental_codegen",
)

args = parser.parse_args(argv)
Expand Down Expand Up @@ -184,6 +185,9 @@ def _parse_args(argv):
if args.evm_version:
settings.evm_version = args.evm_version

if args.experimental_codegen:
settings.experimental_codegen = args.experimental_codegen

if args.verbose:
print(f"cli specified: `{settings}`", file=sys.stderr)

Expand All @@ -195,7 +199,6 @@ def _parse_args(argv):
settings,
args.storage_layout,
args.no_bytecode_metadata,
args.experimental_codegen,
)

if args.output_path:
Expand Down Expand Up @@ -233,7 +236,6 @@ def compile_files(
settings: Optional[Settings] = None,
storage_layout_paths: list[str] = None,
no_bytecode_metadata: bool = False,
experimental_codegen: bool = False,
) -> dict:
paths = paths or []

Expand Down Expand Up @@ -287,7 +289,6 @@ def compile_files(
storage_layout_override=storage_layout_override,
show_gas_estimates=show_gas_estimates,
no_bytecode_metadata=no_bytecode_metadata,
experimental_codegen=experimental_codegen,
)

ret[file_path] = output
Expand Down
2 changes: 2 additions & 0 deletions vyper/codegen/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,8 @@ def make_setter(left, right):
_opt_level = OptimizationLevel.GAS


# FIXME: this is to get around the fact that we don't have a
# proper context object in the IR generation phase.
@contextlib.contextmanager
def anchor_opt_level(new_level: OptimizationLevel) -> Generator:
"""
Expand Down
18 changes: 10 additions & 8 deletions vyper/codegen/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,21 +311,23 @@ def _selector_section_sparse(external_functions, module_ctx):

ret.append(["codecopy", dst, bucket_hdr_location, SZ_BUCKET_HEADER])

jumpdest = IRnode.from_list(["mload", 0])
# don't particularly like using `jump` here since it can cause
# issues for other backends, consider changing `goto` to allow
# dynamic jumps, or adding some kind of jumptable instruction
ret.append(["jump", jumpdest])
jump_targets = []

jumptable_data = ["data", "selector_buckets"]
for i in range(n_buckets):
if i in buckets:
bucket_label = f"selector_bucket_{i}"
jumptable_data.append(["symbol", bucket_label])
jump_targets.append(bucket_label)
else:
# empty bucket
jumptable_data.append(["symbol", "fallback"])
jump_targets.append("fallback")

jumptable_data = ["data", "selector_buckets"]
jumptable_data.extend(["symbol", label] for label in jump_targets)

jumpdest = IRnode.from_list(["mload", 0])

jump_instr = IRnode.from_list(["djump", jumpdest, *jump_targets])
ret.append(jump_instr)
ret.append(jumptable_data)

for bucket_id, bucket in buckets.items():
Expand Down
2 changes: 0 additions & 2 deletions vyper/compiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def compile_from_file_input(
no_bytecode_metadata: bool = False,
show_gas_estimates: bool = False,
exc_handler: Optional[Callable] = None,
experimental_codegen: bool = False,
) -> dict:
"""
Main entry point into the compiler.
Expand Down Expand Up @@ -107,7 +106,6 @@ def compile_from_file_input(
storage_layout_override,
show_gas_estimates,
no_bytecode_metadata,
experimental_codegen,
)

ret = {}
Expand Down
3 changes: 3 additions & 0 deletions vyper/compiler/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def build_ir_runtime_output(compiler_data: CompilerData) -> IRnode:


def _ir_to_dict(ir_node):
# Currently only supported with IRnode and not VenomIR
if not isinstance(ir_node, IRnode):
return
args = ir_node.args
if len(args) > 0 or ir_node.value == "seq":
return {ir_node.value: [_ir_to_dict(x) for x in args]}
Expand Down
67 changes: 34 additions & 33 deletions vyper/compiler/phases.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,26 @@
DEFAULT_CONTRACT_PATH = PurePath("VyperContract.vy")


def _merge_one(lhs, rhs, helpstr):
if lhs is not None and rhs is not None and lhs != rhs:
raise StructureException(
f"compiler settings indicate {helpstr} {lhs}, " f"but source pragma indicates {rhs}."
)
return lhs if rhs is None else rhs


# TODO: does this belong as a method under Settings?
def _merge_settings(cli: Settings, pragma: Settings):
ret = Settings()
ret.evm_version = _merge_one(cli.evm_version, pragma.evm_version, "evm version")
ret.optimize = _merge_one(cli.optimize, pragma.optimize, "optimize")
ret.experimental_codegen = _merge_one(
cli.experimental_codegen, pragma.experimental_codegen, "experimental codegen"
)

return ret


class CompilerData:
"""
Object for fetching and storing compiler data for a Vyper contract.
Expand Down Expand Up @@ -59,7 +79,6 @@ def __init__(
storage_layout: StorageLayout = None,
show_gas_estimates: bool = False,
no_bytecode_metadata: bool = False,
experimental_codegen: bool = False,
) -> None:
"""
Initialization method.
Expand All @@ -76,11 +95,9 @@ def __init__(
Show gas estimates for abi and ir output modes
no_bytecode_metadata: bool, optional
Do not add metadata to bytecode. Defaults to False
experimental_codegen: bool, optional
Use experimental codegen. Defaults to False
"""
# to force experimental codegen, uncomment:
# experimental_codegen = True
# settings.experimental_codegen = True

if isinstance(file_input, str):
file_input = FileInput(
Expand All @@ -93,7 +110,6 @@ def __init__(
self.storage_layout_override = storage_layout
self.show_gas_estimates = show_gas_estimates
self.no_bytecode_metadata = no_bytecode_metadata
self.experimental_codegen = experimental_codegen
self.settings = settings or Settings()
self.input_bundle = input_bundle or FilesystemInputBundle([Path(".")])

Expand All @@ -120,32 +136,13 @@ def _generate_ast(self):
resolved_path=str(self.file_input.resolved_path),
)

# validate the compiler settings
# XXX: this is a bit ugly, clean up later
if settings.evm_version is not None:
if (
self.settings.evm_version is not None
and self.settings.evm_version != settings.evm_version
):
raise StructureException(
f"compiler settings indicate evm version {self.settings.evm_version}, "
f"but source pragma indicates {settings.evm_version}."
)

self.settings.evm_version = settings.evm_version

if settings.optimize is not None:
if self.settings.optimize is not None and self.settings.optimize != settings.optimize:
raise StructureException(
f"compiler options indicate optimization mode {self.settings.optimize}, "
f"but source pragma indicates {settings.optimize}."
)
self.settings.optimize = settings.optimize

# ensure defaults
self.settings = _merge_settings(self.settings, settings)
if self.settings.optimize is None:
self.settings.optimize = OptimizationLevel.default()

if self.settings.experimental_codegen is None:
self.settings.experimental_codegen = False

# note self.settings.compiler_version is erased here as it is
# not used after pre-parsing
return ast
Expand Down Expand Up @@ -184,8 +181,10 @@ def global_ctx(self) -> ModuleT:
@cached_property
def _ir_output(self):
# fetch both deployment and runtime IR
nodes = generate_ir_nodes(self.global_ctx, self.settings.optimize)
if self.experimental_codegen:
nodes = generate_ir_nodes(
self.global_ctx, self.settings.optimize, self.settings.experimental_codegen
)
if self.settings.experimental_codegen:
return [generate_ir(nodes[0]), generate_ir(nodes[1])]
else:
return nodes
Expand All @@ -211,7 +210,7 @@ def function_signatures(self) -> dict[str, ContractFunctionT]:

@cached_property
def assembly(self) -> list:
if self.experimental_codegen:
if self.settings.experimental_codegen:
return generate_assembly_experimental(
self.ir_nodes, self.settings.optimize # type: ignore
)
Expand All @@ -220,7 +219,7 @@ def assembly(self) -> list:

@cached_property
def assembly_runtime(self) -> list:
if self.experimental_codegen:
if self.settings.experimental_codegen:
return generate_assembly_experimental(
self.ir_runtime, self.settings.optimize # type: ignore
)
Expand Down Expand Up @@ -294,7 +293,9 @@ def generate_folded_ast(
return vyper_module_folded, symbol_tables


def generate_ir_nodes(global_ctx: ModuleT, optimize: OptimizationLevel) -> tuple[IRnode, IRnode]:
def generate_ir_nodes(
global_ctx: ModuleT, optimize: OptimizationLevel, experimental_codegen: bool
) -> tuple[IRnode, IRnode]:
"""
Generate the intermediate representation (IR) from the contextualized AST.

Expand Down
1 change: 1 addition & 0 deletions vyper/compiler/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Settings:
compiler_version: Optional[str] = None
optimize: Optional[OptimizationLevel] = None
evm_version: Optional[str] = None
experimental_codegen: Optional[bool] = None


_DEBUG = False
Expand Down
7 changes: 7 additions & 0 deletions vyper/ir/compile_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,13 @@ def _height_of(witharg):
o.extend(_compile_to_assembly(c, withargs, existing_labels, break_dest, height + i))
o.extend(["_sym_" + code.args[0].value, "JUMP"])
return o
elif code.value == "djump":
o = []
# "djump" compiles to a raw EVM jump instruction
jump_target = code.args[0]
o.extend(_compile_to_assembly(jump_target, withargs, existing_labels, break_dest, height))
o.append("JUMP")
return o
# push a literal symbol
elif code.value == "symbol":
return ["_sym_" + code.args[0].value]
Expand Down
1 change: 1 addition & 0 deletions vyper/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ class SizeLimits:
"with",
"label",
"goto",
"djump", # "dynamic jump", i.e. constrained, multi-destination jump
"~extcode",
"~selfcode",
"~calldata",
Expand Down
9 changes: 0 additions & 9 deletions vyper/venom/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,6 @@ def calculate_cfg(ctx: IRFunction) -> None:
else:
entry_block = ctx.basic_blocks[0]

# TODO: Special case for the jump table of selector buckets and fallback.
# this will be cleaner when we introduce an "indirect jump" instruction
# for the selector table (which includes all possible targets). it will
# also clean up the code for normalization because it will not have to
# handle this case specially.
for bb in ctx.basic_blocks:
if "selector_bucket_" in bb.label.value or bb.label.value == "fallback":
bb.add_cfg_in(entry_block)

for bb in ctx.basic_blocks:
assert len(bb.instructions) > 0, "Basic block should not be empty"
last_inst = bb.instructions[-1]
Expand Down
17 changes: 15 additions & 2 deletions vyper/venom/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from vyper.utils import OrderedSet

# instructions which can terminate a basic block
BB_TERMINATORS = frozenset(["jmp", "jnz", "ret", "return", "revert", "deploy", "stop"])
BB_TERMINATORS = frozenset(["jmp", "djmp", "jnz", "ret", "return", "revert", "deploy", "stop"])

VOLATILE_INSTRUCTIONS = frozenset(
[
Expand Down Expand Up @@ -50,12 +50,15 @@
"invalid",
"invoke",
"jmp",
"djmp",
"jnz",
"log",
]
)

CFG_ALTERING_INSTRUCTIONS = frozenset(["jmp", "jnz", "call", "staticcall", "invoke", "deploy"])
CFG_ALTERING_INSTRUCTIONS = frozenset(
["jmp", "djmp", "jnz", "call", "staticcall", "invoke", "deploy"]
)

if TYPE_CHECKING:
from vyper.venom.function import IRFunction
Expand Down Expand Up @@ -236,6 +239,16 @@ def replace_operands(self, replacements: dict) -> None:
if operand in replacements:
self.operands[i] = replacements[operand]

def replace_label_operands(self, replacements: dict) -> None:
"""
Update label operands with replacements.
replacements are represented using a dict: "key" is replaced by "value".
"""
replacements = {k.value: v for k, v in replacements.items()}
for i, operand in enumerate(self.operands):
if isinstance(operand, IRLabel) and operand.value in replacements:
self.operands[i] = replacements[operand.value]

def __repr__(self) -> str:
s = ""
if self.output:
Expand Down
Loading

0 comments on commit 3116e88

Please sign in to comment.