dev-python/trl-fpo
Train transformer language models with reinforcement learning.
Runtime Dependencies
trl-fpo-0.0.15
dev-python/torch[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/transformers[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
<dev-python/numpy-2.0.0[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/accelerate[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/datasets[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/tyro[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/peft[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/wandb[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/deepspeed[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/stanza[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/nltk[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
dev-python/scipy[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
]
benchmark?
( dev-python/wandb[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
benchmark?
( dev-python/ghapi[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
benchmark?
( dev-python/openrlbenchmark[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
benchmark?
( dev-python/requests[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
benchmark?
( dev-python/deepspeed[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
deepspeed?
( dev-python/deepspeed[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/parameterized[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/peft[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/pytest[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/pytest-xdist[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/pytest-cov[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/scikit-learn[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/pillow[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/diffusers[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/deepspeed[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/wandb[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/ghapi[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/openrlbenchmark[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/requests[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/deepspeed[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/bitsandbytes[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/openai[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/huggingface-hub[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
dev?
( dev-python/llm-blender[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
diffusers?
( dev-python/diffusers[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
llm-judge?
( dev-python/openai[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
llm-judge?
( dev-python/huggingface-hub[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
llm-judge?
( dev-python/llm-blender[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
peft?
( dev-python/peft[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
quantization?
( dev-python/bitsandbytes[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/parameterized[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/peft[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/pytest[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/pytest-xdist[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/pytest-cov[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/scikit-learn[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
test?
( dev-python/pillow[python_targets_python3_11
(-)
?
,python_targets_python3_12
(-)
?
,python_targets_python3_13
(-)
?
,python_targets_python3_14
(-)
?
] )
python_targets_python3_11?
( dev-lang/python:3.11 )
python_targets_python3_12?
( dev-lang/python:3.12 )
python_targets_python3_13?
( dev-lang/python:3.13 )
python_targets_python3_14?
( dev-lang/python:3.14 )


View
Download
Browse