gpo.zugaina.org

Search Portage & Overlays:

dev-python/trl-fpo

Train transformer language models with reinforcement learning.

Screenshots

  • trl-fpo-0.0.15
    ~amd64 ~x86
    benchmark deepspeed dev diffusers llm-judge peft quantization test python_targets_python3_11 python_targets_python3_12 python_targets_python3_13 python_targets_python3_14

    View      Download      Browse     License: Apache-2.0   
    Overlay: pypi

Runtime Dependencies

trl-fpo-0.0.15





dev-python/torch[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/transformers[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




<dev-python/numpy-2.0.0[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/accelerate[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/datasets[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/tyro[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/peft[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/wandb[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/deepspeed[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/stanza[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/nltk[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]




dev-python/scipy[python_targets_python3_11
      (-)
?
,python_targets_python3_12
      (-)
?
,python_targets_python3_13
      (-)
?
,python_targets_python3_14
      (-)
?
]

benchmark?
      ( dev-python/wandb[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

benchmark?
      ( dev-python/ghapi[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

benchmark?
      ( dev-python/openrlbenchmark[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

benchmark?
      ( dev-python/requests[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

benchmark?
      ( dev-python/deepspeed[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

deepspeed?
      ( dev-python/deepspeed[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/parameterized[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/peft[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/pytest[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/pytest-xdist[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/pytest-cov[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/scikit-learn[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/pillow[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/diffusers[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/deepspeed[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/wandb[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/ghapi[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/openrlbenchmark[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/requests[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/deepspeed[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/bitsandbytes[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/openai[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/huggingface-hub[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

dev?
      ( dev-python/llm-blender[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

diffusers?
      ( dev-python/diffusers[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

llm-judge?
      ( dev-python/openai[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

llm-judge?
      ( dev-python/huggingface-hub[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

llm-judge?
      ( dev-python/llm-blender[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

peft?
      ( dev-python/peft[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

quantization?
      ( dev-python/bitsandbytes[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/parameterized[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/peft[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/pytest[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/pytest-xdist[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/pytest-cov[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/scikit-learn[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

test?
      ( dev-python/pillow[python_targets_python3_11
            (-)
      ?
,python_targets_python3_12
            (-)
      ?
,python_targets_python3_13
            (-)
      ?
,python_targets_python3_14
            (-)
      ?
] )

python_targets_python3_11?
      ( dev-lang/python:3.11 )

python_targets_python3_12?
      ( dev-lang/python:3.12 )

python_targets_python3_13?
      ( dev-lang/python:3.13 )

python_targets_python3_14?
      ( dev-lang/python:3.14 )