[PATCH v2 03/13] verification/rvgen: Implement state and transition parser based on Lark

From: Nam Cao

Date: Thu May 28 2026 - 04:28:41 EST


The DOT parsing scripts directly parse the raw text and they are quite
fragile. If the input dot files' formats are slightly changed (for
instance, by breaking long some lines which is allowed by the DOT
language), the scripts would fail.

Prepare to move away from the raw text processing, implement parsers based
on Lark which parse states, transitions and constraints.

The parse results are not used yet. The existing scripts will be converted
one by one to them, and the raw text processing will eventually be removed.

Signed-off-by: Nam Cao <namcao@xxxxxxxxxxxxx>
---
v2:
- fixup guard grammar [Gabriele]
- fixup inconsistent types in a list [Wander]
- compiling the parsers only once to avoid overhead [Sashiko]
- fix up the signature of State.__init__() [Sashiko]
- gracefully handle node statement without label [Sashiko]
- lark parse exception handling
---
tools/verification/rvgen/rvgen/automata.py | 216 +++++++++++++++++++++
1 file changed, 216 insertions(+)

diff --git a/tools/verification/rvgen/rvgen/automata.py b/tools/verification/rvgen/rvgen/automata.py
index 8649d982383d..b86275e7bf28 100644
--- a/tools/verification/rvgen/rvgen/automata.py
+++ b/tools/verification/rvgen/rvgen/automata.py
@@ -198,6 +198,164 @@ class ParseTree:
self.node_attrs = attributes_parser.node_attrs
self.edge_attrs = attributes_parser.edge_attrs

+class ConstraintCondition:
+ def __init__(self, env: str, op: str, val: str, unit=None):
+ self.env = env
+ self.op = op
+ self.val = val
+ self.unit = unit
+ if unit is None:
+ # try to infer unit from constants or parameters
+ val_for_unit = val.lower().replace("()", "")
+ if val_for_unit.endswith("_ns"):
+ self.unit = "ns"
+ if val_for_unit.endswith("_jiffies"):
+ self.unit = "j"
+
+class ConstraintRule:
+ grammar = r'''
+ rule: condition (OP condition)*
+
+ OP: "&&" | "||"
+
+ condition: ENV CMP_OP VAL UNIT?
+
+ ENV: CNAME
+
+ CMP_OP: "==" | "<=" | "<" | ">=" | ">"
+
+ VAL: /[0-9]+/
+ | /[A-Z_]+\(\)/
+ | /[A-Z_]+/
+ | /[a-z_]+\(\)/
+ | /[a-z_]+/
+
+ UNIT: "ns" | "us" | "ms" | "s"
+ '''
+
+ def __init__(self, c: ConstraintCondition):
+ '''
+ A list of pairs of
+ - the condition (e.g. is_constr_dl == 1)
+ - the logical operator ("||" or "&&") combining this
+ condition with the next one if it exists, otherwise None
+
+ TODO: Perhaps use an abstract syntax tree instead, because
+ this representation cannot capture precedence
+ '''
+ self.rules = [[c, None]]
+
+ def chain(self, op: str, c: ConstraintCondition):
+ self.rules[-1][1] = op
+ self.rules.append([c, None])
+
+class ConstraintReset:
+ def __init__(self, env):
+ self.env = env
+
+class StateLabelParser:
+ grammar = r'''
+ label: CNAME ("\\n" condition)?
+
+ %import common.CNAME
+ %import common.WS
+ %ignore WS
+ ''' + ConstraintRule.grammar
+
+ parser = lark.Lark(grammar, parser='lalr', start="label")
+
+ def __init__(self, label: str):
+ try:
+ tree = self.parser.parse(label)
+ except lark.exceptions.UnexpectedInput as exc:
+ raise(AutomataError(f"Unrecognised state \"{label}\"\n{exc}"))
+
+ self.state = tree.children[0]
+ self.constraint = None
+
+ if len(tree.children) == 2:
+ self.constraint = ConstraintCondition(*tree.children[1].children)
+ if self.constraint.op not in ("<", "<="):
+ raise AutomataError("State constraints must be clock expirations like"
+ f" clk<N ({label})")
+
+class EventLabelParser:
+ grammar = r'''
+ events: event ("\\n" event)*
+
+ event: name (";" guard)?
+
+ guard: reset
+ | rule
+ | rule ";" reset
+ | reset ";" rule
+
+ name: CNAME
+
+ reset: "reset" "(" ENV ")"
+
+ %import common.CNAME
+ %import common.WS
+ %ignore WS
+ ''' + ConstraintRule.grammar
+
+ parser = lark.Lark(grammar, parser='lalr', start="events")
+
+ class GetEvents(lark.visitors.Transformer):
+ def guard(self, args):
+ reset = None
+ rule = None
+ for arg in args:
+ if arg.data == "reset":
+ reset = ConstraintReset(arg.children[0])
+ elif arg.data == "rule":
+ conditions = arg.children
+ rule = ConstraintRule(conditions[0])
+ for i in range(1, len(conditions), 2):
+ rule.chain(conditions[i], conditions[i + 1])
+ return reset, rule
+
+ def OP(self, args):
+ return args
+
+ def condition(self, args):
+ return ConstraintCondition(*args)
+
+ def event(self, args):
+ assert(len(args) <= 2)
+ name = args[0]
+ rule, reset = None, None
+ if len(args) == 2:
+ reset, rule = args[1]
+ return name, reset, rule
+
+ def events(self, args):
+ return args
+
+ def name(self, args):
+ return args[0]
+
+ def __init__(self, label: str):
+ try:
+ tree = self.parser.parse(label)
+ self.events = self.GetEvents().transform(tree)
+ except lark.exceptions.UnexpectedInput as exc:
+ raise(AutomataError(f"Unrecognised event \"{label}\"\n{exc}"))
+
+class Transition:
+ def __init__(self, src: str, dst: str, event: str,
+ reset: ConstraintReset, rule: ConstraintRule):
+ self.src = src
+ self.dst = dst
+ self.event = event
+ self.rule = rule
+ self.reset = reset
+
+class State:
+ def __init__(self, name: str, inv: ConstraintCondition):
+ self.name = name
+ self.inv = inv
+
class _ConstraintKey:
"""Base class for constraint keys."""

@@ -252,6 +410,8 @@ class Automata:
self.name = model_name or self.__get_model_name()
self.__dot_lines = self.__open_dot()
self.__parse_tree = ParseTree(file_path)
+ self.transitions = self.__parse_transitions()
+ self._states, self._initial_state, self._final_states = self.__parse_states()
self.states, self.initial_state, self.final_states = self.__get_state_variables()
self.env_types = {}
self.env_stored = set()
@@ -327,6 +487,62 @@ class Automata:

return cursor

+ def __parse_transitions(self):
+ transitions = []
+
+ for edge in self.__parse_tree.edges:
+ attr = self.__parse_tree.edge_attrs.get(edge)
+ if not attr:
+ continue
+
+ label = attr.get("label")
+
+ src, dst = edge
+
+ parser = EventLabelParser(label)
+ for event, reset, rule in parser.events:
+ transitions.append(Transition(src, dst, event, reset, rule))
+
+ transitions.sort(key=lambda t : (t.src, t.event))
+ return transitions
+
+ def __parse_states(self):
+ initial_state = ""
+ states = []
+ final_states = []
+
+ for node in self.__parse_tree.nodes:
+ attr = self.__parse_tree.node_attrs[node]
+ label = attr.get("label")
+
+ if node.startswith(Automata.init_marker):
+ initial_state = node[len(Automata.init_marker):]
+
+ if not label:
+ continue
+
+ parser = StateLabelParser(label)
+ state = State(parser.state, parser.constraint)
+
+ states.append(state)
+
+ shape = attr.get("shape")
+ if shape in ("doublecircle", "ellipse"):
+ final_states.append(state)
+
+
+ initial_state = next((s for s in states if s.name == initial_state), None)
+ if not initial_state:
+ raise AutomataError("The automaton doesn't have an initial state")
+
+ if not final_states:
+ final_states.append(initial_state)
+
+ states.remove(initial_state)
+ states.sort(key=lambda s : s.name)
+ states.insert(0, initial_state)
+ return states, initial_state, final_states
+
def __get_state_variables(self) -> tuple[list[str], str, list[str]]:
# wait for node declaration
states = []
--
2.47.3