diff --git a/examples/smac/custom_vecinfo.py b/examples/smac/custom_vecinfo.py index 52a2b5b2..ba39f6e1 100644 --- a/examples/smac/custom_vecinfo.py +++ b/examples/smac/custom_vecinfo.py @@ -41,10 +41,10 @@ def statistics(self, buffer: Any) -> Dict[str, Any]: assert ( "game_state" in singe_env_info["final_info"].keys() ), "game_state must be in info" - assert singe_env_info["final_info"]["game_state"] in [ - "win", - "lose", - ], "game_state in the final_info must be win or lose" + # assert singe_env_info["final_info"]["game_state"] in [ + # "win", + # "lose", + # ], "game_state in the final_info must be win or lose" self.win_history.append( singe_env_info["final_info"]["game_state"] == "win" ) diff --git a/examples/smac/train_ppo.py b/examples/smac/train_ppo.py index 32f8acff..4c03d295 100644 --- a/examples/smac/train_ppo.py +++ b/examples/smac/train_ppo.py @@ -25,7 +25,8 @@ def train(): # create environment env_num = 8 env = make( - "2s_vs_1sc", + "3m", + # "2s_vs_1sc", env_num=env_num, asynchronous=True, cfg=cfg, diff --git a/examples/smacv2/custom_vecinfo.py b/examples/smacv2/custom_vecinfo.py index 6dd90d00..48fc210d 100644 --- a/examples/smacv2/custom_vecinfo.py +++ b/examples/smacv2/custom_vecinfo.py @@ -33,21 +33,21 @@ def __init__(self, *args, **kwargs): def statistics(self, buffer: Any) -> Dict[str, Any]: info_dict = super().statistics(buffer) - """for step_info in self.infos: + for step_info in self.infos: for singe_env_info in step_info: assert isinstance(singe_env_info, dict), "singe_env_info must be dict" if "final_info" in singe_env_info.keys(): assert ( "game_state" in singe_env_info["final_info"].keys() - ), "game_state must be in info" - assert singe_env_info["final_info"]["game_state"] in [ - "win", - "lose", - ], "game_state in the final_info must be win or lose" + ), "win_state must be in info" + # assert singe_env_info["final_info"]["game_state"] in [ + # "win", + # "lose", + # ], "win_state in the final_info must be win or lose" self.win_history.append( singe_env_info["final_info"]["game_state"] == "win" - )""" + ) if len(self.win_history) > 0: info_dict["win_rate"] = np.mean(self.win_history)