Advanced
Custom Reward Functions
Build domain-specific reward functions for specialized training.
Basic Custom Reward
def math_reward(completion, answer):
"""Custom reward for math problems."""
try:
result = eval_math(completion)
return 1.0 if result == answer else 0.0
except:
return 0.0
config = ModelConfig(
reward_fn=math_reward,
)Custom Reward Model
from thinkrl.rewards import BaseRewardModel
class CodeRewardModel(BaseRewardModel):
def __init__(self):
super().__init__()
self.linter = load_linter()
def score(self, prompt, completion):
errors = self.linter.check(completion)
return 1.0 - (len(errors) * 0.1)