# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 # For details: https://github.com/coveragepy/coveragepy/blob/main/NOTICE.txt """Bytecode analysis for coverage.py""" from __future__ import annotations import collections import dis from types import CodeType from typing import Iterable, Mapping, Optional from coverage.types import TArc, TLineNo, TOffset def code_objects(code: CodeType) -> Iterable[CodeType]: """Iterate over all the code objects in `code`.""" stack = [code] while stack: # We're going to return the code object on the stack, but first # push its children for later returning. code = stack.pop() for c in code.co_consts: if isinstance(c, CodeType): stack.append(c) yield code def op_set(*op_names: str) -> set[int]: """Make a set of opcodes from instruction names. The names might not exist in this version of Python, skip those if not. """ ops = {op for name in op_names if (op := dis.opmap.get(name))} assert ops, f"At least one opcode must exist: {op_names}" return ops # Opcodes that are unconditional jumps elsewhere. ALWAYS_JUMPS = op_set( "JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT", "JUMP_FORWARD", ) # Opcodes that exit from a function. RETURNS = op_set( "RETURN_VALUE", "RETURN_GENERATOR", ) # Opcodes that do nothing. NOPS = op_set( "NOP", "NOT_TAKEN", ) class InstructionWalker: """Utility to step through trails of instructions. We have two reasons to need sequences of instructions from a code object: First, in strict sequence to visit all the instructions in the object. This is `walk(follow_jumps=False)`. Second, we want to follow jumps to understand how execution will flow: `walk(follow_jumps=True)`. """ def __init__(self, code: CodeType) -> None: self.code = code self.insts: dict[TOffset, dis.Instruction] = {} inst = None for inst in dis.get_instructions(code): self.insts[inst.offset] = inst assert inst is not None self.max_offset = inst.offset def walk( self, *, start_at: TOffset = 0, follow_jumps: bool = True ) -> Iterable[dis.Instruction]: """ Yield instructions starting from `start_at`. Follow unconditional jumps if `follow_jumps` is true. """ seen = set() offset = start_at while offset < self.max_offset + 1: if offset in seen: break seen.add(offset) if inst := self.insts.get(offset): yield inst if follow_jumps and inst.opcode in ALWAYS_JUMPS: offset = inst.jump_target continue offset += 2 TBranchTrailsOneSource = dict[Optional[TArc], set[TOffset]] TBranchTrails = dict[TOffset, TBranchTrailsOneSource] def branch_trails( code: CodeType, multiline_map: Mapping[TLineNo, TLineNo], ) -> TBranchTrails: """ Calculate branch trails for `code`. `multiline_map` maps line numbers to the first line number of a multi-line statement. Instructions can have a jump_target, where they might jump to next. Some instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so they aren't interesting to us, since they aren't the start of a branch possibility. Instructions that might or might not jump somewhere else are branch possibilities. For each of those, we track a trail of instructions. These are lists of instruction offsets, the next instructions that can execute. We follow the trail until we get to a new source line. That gives us the arc from the original instruction's line to the new source line. """ the_trails: TBranchTrails = collections.defaultdict(lambda: collections.defaultdict(set)) iwalker = InstructionWalker(code) for inst in iwalker.walk(follow_jumps=False): if not inst.jump_target: # We only care about instructions with jump targets. continue if inst.opcode in ALWAYS_JUMPS: # We don't care about unconditional jumps. continue from_line = inst.line_number if from_line is None: continue from_line = multiline_map.get(from_line, from_line) def add_one_branch_trail( trails: TBranchTrailsOneSource, start_at: TOffset, ) -> None: # pylint: disable=cell-var-from-loop inst_offsets: set[TOffset] = set() to_line = None for inst2 in iwalker.walk(start_at=start_at, follow_jumps=True): inst_offsets.add(inst2.offset) l2 = inst2.line_number if l2 is not None: l2 = multiline_map.get(l2, l2) if l2 and l2 != from_line: to_line = l2 break elif inst2.jump_target and (inst2.opcode not in ALWAYS_JUMPS): break elif inst2.opcode in RETURNS: to_line = -code.co_firstlineno break if to_line is not None: trails[(from_line, to_line)].update(inst_offsets) else: trails[None] = set() # Calculate two trails: one from the next instruction, and one from the # jump_target instruction. trails: TBranchTrailsOneSource = collections.defaultdict(set) add_one_branch_trail(trails, start_at=inst.offset + 2) add_one_branch_trail(trails, start_at=inst.jump_target) the_trails[inst.offset] = trails # Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions # other than the original jump possibility instruction. Register each # trail under all of their offsets so we can pick up in the middle of a # trail if need be. for arc, offsets in trails.items(): for offset in offsets: the_trails[offset][arc].update(offsets) return the_trails def always_jumps(code: CodeType) -> dict[TOffset, TOffset]: """Make a map of unconditional bytecodes jumping to others. Only include bytecodes that do no work and go to another bytecode. """ jumps = {} iwalker = InstructionWalker(code) for inst in iwalker.walk(follow_jumps=False): if inst.opcode in ALWAYS_JUMPS: jumps[inst.offset] = inst.jump_target elif inst.opcode in NOPS: jumps[inst.offset] = inst.offset + 2 return jumps