|
|
@ -53,7 +53,7 @@ class sprint(gym.Env):
|
|
|
|
max_ankle_z = nao_specs[5] * 1.8
|
|
|
|
max_ankle_z = nao_specs[5] * 1.8
|
|
|
|
self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
|
|
|
|
self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
|
|
|
|
self.DEFAULT_ARMS = np.array([-90, -90, 8, 8, 90, 90, 70, 70], np.float32)
|
|
|
|
self.DEFAULT_ARMS = np.array([-90, -90, 8, 8, 90, 90, 70, 70], np.float32)
|
|
|
|
|
|
|
|
self.path_manager = self.player.path_manager
|
|
|
|
self.walk_rel_orientation = None
|
|
|
|
self.walk_rel_orientation = None
|
|
|
|
self.walk_rel_target = None
|
|
|
|
self.walk_rel_target = None
|
|
|
|
self.walk_distance = None
|
|
|
|
self.walk_distance = None
|
|
|
@ -233,10 +233,9 @@ class sprint(gym.Env):
|
|
|
|
self.reset_time = t
|
|
|
|
self.reset_time = t
|
|
|
|
|
|
|
|
|
|
|
|
self.target = np.array([3, 0])
|
|
|
|
self.target = np.array([3, 0])
|
|
|
|
distance = np.linalg.norm(self.target - self.Gen_player_pos[:2])
|
|
|
|
self.walk_rel_target = self.path_manager.get_path_to_target(target=self.target)[0]
|
|
|
|
self.walk_rel_target = self.target
|
|
|
|
self.walk_distance = self.path_manager.get_path_to_target(target=self.target)[2]
|
|
|
|
self.walk_distance = distance
|
|
|
|
self.walk_rel_orientation = self.path_manager.get_path_to_target(target=self.target)[1]
|
|
|
|
self.walk_rel_orientation = M.vector_angle(self.walk_rel_target)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for _ in range(25):
|
|
|
|
for _ in range(25):
|
|
|
|
self.player.scom.unofficial_beam(self.Gen_player_pos, 0) # beam player continuously (floating above ground)
|
|
|
|
self.player.scom.unofficial_beam(self.Gen_player_pos, 0) # beam player continuously (floating above ground)
|
|
|
@ -296,11 +295,14 @@ class sprint(gym.Env):
|
|
|
|
w = self.player.world
|
|
|
|
w = self.player.world
|
|
|
|
internal_dist = np.linalg.norm(self.internal_target)
|
|
|
|
internal_dist = np.linalg.norm(self.internal_target)
|
|
|
|
action_mult = 1 if internal_dist > 0.2 else (0.7 / 0.2) * internal_dist + 0.3
|
|
|
|
action_mult = 1 if internal_dist > 0.2 else (0.7 / 0.2) * internal_dist + 0.3
|
|
|
|
self.walk_rel_target = M.rotate_2d_vec(
|
|
|
|
# self.walk_rel_target = M.rotate_2d_vec(
|
|
|
|
(self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]),
|
|
|
|
# (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]),
|
|
|
|
-r.imu_torso_orientation)
|
|
|
|
# -r.imu_torso_orientation)
|
|
|
|
self.walk_distance = np.linalg.norm(self.walk_rel_target)
|
|
|
|
# self.walk_distance = np.linalg.norm(self.walk_rel_target)
|
|
|
|
self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.5
|
|
|
|
# self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.5
|
|
|
|
|
|
|
|
self.walk_rel_target = self.path_manager.get_path_to_target(target=self.target)[0]
|
|
|
|
|
|
|
|
self.walk_distance = self.path_manager.get_path_to_target(target=self.target)[2]
|
|
|
|
|
|
|
|
self.walk_rel_orientation = self.path_manager.get_path_to_target(target=self.target)[1]
|
|
|
|
# exponential moving average
|
|
|
|
# exponential moving average
|
|
|
|
self.act = 0.8 * self.act + 0.2 * action * action_mult * 0.7
|
|
|
|
self.act = 0.8 * self.act + 0.2 * action * action_mult * 0.7
|
|
|
|
|
|
|
|
|
|
|
@ -332,10 +334,14 @@ class sprint(gym.Env):
|
|
|
|
self.sync()
|
|
|
|
self.sync()
|
|
|
|
self.step_counter += 1
|
|
|
|
self.step_counter += 1
|
|
|
|
obs = self.observe()
|
|
|
|
obs = self.observe()
|
|
|
|
dirct = abs((self.target[1] - r.loc_head_position[1]) / (self.target[0] - r.loc_head_position[0]))
|
|
|
|
unit_vector = (self.walk_rel_target - r.loc_head_position[:2]) / np.linalg.norm(self.walk_rel_target - r.loc_head_position[:2])
|
|
|
|
# direction_error = abs(r.imu_torso_orientation - M.vector_angle(self.target - r.loc_head_position[:2]))
|
|
|
|
if np.linalg.norm(r.loc_torso_velocity[:2]) != 0:
|
|
|
|
reward = (r.loc_torso_velocity[0] * 0.1 + dirct * r.loc_torso_velocity[1] * 0.1
|
|
|
|
cos_theta = np.dot(unit_vector, r.loc_torso_velocity[:2]) / (
|
|
|
|
+ 0.1 * math.exp(-10*abs(self.walk_rel_orientation)))
|
|
|
|
np.linalg.norm(unit_vector) * np.linalg.norm(r.loc_torso_velocity[:2]))
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
cos_theta = 0
|
|
|
|
|
|
|
|
reward = np.linalg.norm(r.loc_torso_velocity[:2]) * cos_theta * 0.2
|
|
|
|
|
|
|
|
|
|
|
|
if np.linalg.norm(self.target - r.loc_head_position[:2]) < 0.3:
|
|
|
|
if np.linalg.norm(self.target - r.loc_head_position[:2]) < 0.3:
|
|
|
|
reward += 50
|
|
|
|
reward += 50
|
|
|
|
self.generate_random_target(self.target)
|
|
|
|
self.generate_random_target(self.target)
|
|
|
|