ThinkRL
GitHub
Advanced

Custom Reward Functions

Build domain-specific reward functions for specialized training.

Basic Custom Reward

def math_reward(completion, answer):
    """Custom reward for math problems."""
    try:
        result = eval_math(completion)
        return 1.0 if result == answer else 0.0
    except:
        return 0.0

config = ModelConfig(
    reward_fn=math_reward,
)

Custom Reward Model

from thinkrl.rewards import BaseRewardModel

class CodeRewardModel(BaseRewardModel):
    def __init__(self):
        super().__init__()
        self.linter = load_linter()
    
    def score(self, prompt, completion):
        errors = self.linter.check(completion)
        return 1.0 - (len(errors) * 0.1)