diff --git a/scripts/gyms/Sprint.py b/scripts/gyms/Sprint.py index e09ef42..bbc70ae 100644 --- a/scripts/gyms/Sprint.py +++ b/scripts/gyms/Sprint.py @@ -53,7 +53,7 @@ class sprint(gym.Env): max_ankle_z = nao_specs[5] * 1.8 self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z) self.DEFAULT_ARMS = np.array([-90, -90, 8, 8, 90, 90, 70, 70], np.float32) - + self.path_manager = self.player.path_manager self.walk_rel_orientation = None self.walk_rel_target = None self.walk_distance = None @@ -233,10 +233,9 @@ class sprint(gym.Env): self.reset_time = t self.target = np.array([3, 0]) - distance = np.linalg.norm(self.target - self.Gen_player_pos[:2]) - self.walk_rel_target = self.target - self.walk_distance = distance - self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) + self.walk_rel_target = self.path_manager.get_path_to_target(target=self.target)[0] + self.walk_distance = self.path_manager.get_path_to_target(target=self.target)[2] + self.walk_rel_orientation = self.path_manager.get_path_to_target(target=self.target)[1] for _ in range(25): self.player.scom.unofficial_beam(self.Gen_player_pos, 0) # beam player continuously (floating above ground) @@ -296,11 +295,14 @@ class sprint(gym.Env): w = self.player.world internal_dist = np.linalg.norm(self.internal_target) action_mult = 1 if internal_dist > 0.2 else (0.7 / 0.2) * internal_dist + 0.3 - self.walk_rel_target = M.rotate_2d_vec( - (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]), - -r.imu_torso_orientation) - self.walk_distance = np.linalg.norm(self.walk_rel_target) - self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.5 + # self.walk_rel_target = M.rotate_2d_vec( + # (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]), + # -r.imu_torso_orientation) + # self.walk_distance = np.linalg.norm(self.walk_rel_target) + # self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.5 + self.walk_rel_target = self.path_manager.get_path_to_target(target=self.target)[0] + self.walk_distance = self.path_manager.get_path_to_target(target=self.target)[2] + self.walk_rel_orientation = self.path_manager.get_path_to_target(target=self.target)[1] # exponential moving average self.act = 0.8 * self.act + 0.2 * action * action_mult * 0.7 @@ -332,10 +334,14 @@ class sprint(gym.Env): self.sync() self.step_counter += 1 obs = self.observe() - dirct = abs((self.target[1] - r.loc_head_position[1]) / (self.target[0] - r.loc_head_position[0])) - # direction_error = abs(r.imu_torso_orientation - M.vector_angle(self.target - r.loc_head_position[:2])) - reward = (r.loc_torso_velocity[0] * 0.1 + dirct * r.loc_torso_velocity[1] * 0.1 - + 0.1 * math.exp(-10*abs(self.walk_rel_orientation))) + unit_vector = (self.walk_rel_target - r.loc_head_position[:2]) / np.linalg.norm(self.walk_rel_target - r.loc_head_position[:2]) + if np.linalg.norm(r.loc_torso_velocity[:2]) != 0: + cos_theta = np.dot(unit_vector, r.loc_torso_velocity[:2]) / ( + np.linalg.norm(unit_vector) * np.linalg.norm(r.loc_torso_velocity[:2])) + else: + cos_theta = 0 + reward = np.linalg.norm(r.loc_torso_velocity[:2]) * cos_theta * 0.2 + if np.linalg.norm(self.target - r.loc_head_position[:2]) < 0.3: reward += 50 self.generate_random_target(self.target)