Skip to content

Commit

Permalink
feat: more robust RT sampling.
Browse files Browse the repository at this point in the history
Signed-off-by: Matteo Manica <drugilsberg@gmail.com>
  • Loading branch information
drugilsberg committed Nov 16, 2024
1 parent bd955fd commit c4b152c
Showing 1 changed file with 8 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ def compile_regression_result(
e.g., '<qed>0.727' or '<logp>6.65<scscore>3.82'.
"""
properties = []
supported_properties_set = set(self.properties)
for inp, pred in zip(input_ids, prediction):
in_tokens = self.tokenizer.decode(
inp, clean_up_tokenization_spaces=False
Expand All @@ -455,6 +456,13 @@ def compile_regression_result(
pred, clean_up_tokenization_spaces=False
).split(" ")
joined = self.tokenizer.get_sample_prediction(out_tokens, in_tokens)
# NOTE: clean-up unknown properties
joined = [
token
for token in joined
if token in supported_properties_set
or not (token.startswith("<") and token.endswith(">"))
]
_, gen_prop = self.tokenizer.aggregate_tokens(joined, label_mode=False)
properties.append(
"".join(
Expand Down

0 comments on commit c4b152c

Please sign in to comment.