diff --git a/scripts/gyms/sprint.py b/scripts/gyms/sprint.py index 2533255..06e9a20 100644 --- a/scripts/gyms/sprint.py +++ b/scripts/gyms/sprint.py @@ -280,12 +280,16 @@ class sprint(gym.Env): self.player.terminate() def change_target(self): - # 生成一个 -10 到 +10 度之间的随机角度 - orientations = random.choice([-1, 1]) - random_target = orientations * 10 # 单位是度 + original_angle = M.vector_angle(self.walk_rel_target) + random_angle_delta = np.random.uniform(-10, 10) + new_angle = (original_angle + np.radians(random_angle_delta)) * 3 * math.sin(time.time()) - self.walk_rel_target = (15, random_target) + new_walk_rel_target = np.array([ + np.cos(new_angle) * self.walk_distance, + np.sin(new_angle) * self.walk_distance + ]) + self.walk_rel_target = new_walk_rel_target def step(self, action): @@ -294,7 +298,7 @@ class sprint(gym.Env): w = self.player.world current_time = time.time() - if current_time - self.last_target_update_time > 0.8: + if current_time - self.last_target_update_time > 0.6: self.change_target() self.last_target_update_time = current_time @@ -342,19 +346,16 @@ class sprint(gym.Env): reward = robot_speed * (1 - direction_error / 10) if self.player.behavior.is_ready("Get_Up"): self.terminal = True - - elif w.time_local_ms - self.reset_time > 10000 * 2: + elif w.time_local_ms - self.reset_time > 30000: self.terminal = True elif r.loc_torso_position[0] > 14.5: self.terminal = True reward += 500 elif r.loc_torso_position[0] > 0: - reward += r.loc_torso_position[0] * 10 + reward += 3 * r.loc_torso_position[0] else: self.terminal = False return obs, reward, self.terminal, {} - - class Train(Train_Base): def __init__(self, script) -> None: super().__init__(script)