Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions xtuner/v1/train/rl_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1573,6 +1573,12 @@ def _sync_weights_from_train_workers(self) -> None:
self.logger.info("Rollout workers updated weights from train workers.")

def fit(self):
try:
self._fit()
finally:
self._exp_tracker.close()

def _fit(self):
self.logger.info("Start RL training")
if self._cur_step >= self._total_train_steps:
self.logger.info(f"Train steps {self._total_train_steps} reached, stop training")
Expand All @@ -1583,7 +1589,8 @@ def fit(self):
return

ray.get(
self.rollout_controller.validate_registered_workers_to_proxy.remote(), timeout=RL_TRAINER_RAY_GET_TIMEOUT
self.rollout_controller.validate_registered_workers_to_proxy.remote(),
timeout=RL_TRAINER_RAY_GET_TIMEOUT,
)

if self._enable_initial_evaluate and not self._debug_rollout:
Expand Down Expand Up @@ -1788,7 +1795,10 @@ def _resume_from_checkpoint(self, checkpoint_path: Path | str) -> None:

def fit(self):
# 对外同步 fit;内部用 async loop 组织 producer/consumer。
return asyncio_run(self._fit())
try:
return asyncio_run(self._fit())
finally:
self._exp_tracker.close()

async def _get_batch_or_raise_producer_failure(
self,
Expand Down
Loading