|
|
@ -44,8 +44,8 @@ class sprint(gym.Env):
|
|
|
|
|
|
|
|
|
|
|
|
# Step behavior defaults
|
|
|
|
# Step behavior defaults
|
|
|
|
self.STEP_DUR = 10
|
|
|
|
self.STEP_DUR = 10
|
|
|
|
self.STEP_Z_SPAN = 0.2
|
|
|
|
self.STEP_Z_SPAN = 0.4
|
|
|
|
self.STEP_Z_MAX = 0.7
|
|
|
|
self.STEP_Z_MAX = 0.8
|
|
|
|
nao_specs = self.ik.NAO_SPECS
|
|
|
|
nao_specs = self.ik.NAO_SPECS
|
|
|
|
self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height
|
|
|
|
self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height
|
|
|
|
feet_y_dev = nao_specs[0] * 2 # wider step
|
|
|
|
feet_y_dev = nao_specs[0] * 2 # wider step
|
|
|
@ -298,7 +298,7 @@ class sprint(gym.Env):
|
|
|
|
|
|
|
|
|
|
|
|
w = self.player.world
|
|
|
|
w = self.player.world
|
|
|
|
current_time = time.time()
|
|
|
|
current_time = time.time()
|
|
|
|
if current_time - self.last_target_update_time > 1:
|
|
|
|
if current_time - self.last_target_update_time > 2:
|
|
|
|
self.change_target()
|
|
|
|
self.change_target()
|
|
|
|
self.last_target_update_time = current_time
|
|
|
|
self.last_target_update_time = current_time
|
|
|
|
|
|
|
|
|
|
|
@ -310,7 +310,7 @@ class sprint(gym.Env):
|
|
|
|
self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3
|
|
|
|
self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3
|
|
|
|
|
|
|
|
|
|
|
|
# exponential moving average
|
|
|
|
# exponential moving average
|
|
|
|
self.act = 0.7 * self.act + 0.3 * action_mult * 0.7
|
|
|
|
self.act = 0.8 * self.act + 0.2 * 0.7
|
|
|
|
|
|
|
|
|
|
|
|
# execute Step behavior to extract the target positions of each leg (we will override these targets)
|
|
|
|
# execute Step behavior to extract the target positions of each leg (we will override these targets)
|
|
|
|
lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
|
|
|
|
lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
|
|
|
@ -343,16 +343,14 @@ class sprint(gym.Env):
|
|
|
|
robot_speed = r.loc_torso_velocity[0]
|
|
|
|
robot_speed = r.loc_torso_velocity[0]
|
|
|
|
direction_error = abs(self.walk_rel_orientation)
|
|
|
|
direction_error = abs(self.walk_rel_orientation)
|
|
|
|
direction_error = min(direction_error, 10)
|
|
|
|
direction_error = min(direction_error, 10)
|
|
|
|
reward = robot_speed * (1 - direction_error / 10)
|
|
|
|
reward = robot_speed * (1.5 - direction_error / 10)
|
|
|
|
if self.player.behavior.is_ready("Get_Up"):
|
|
|
|
if self.player.behavior.is_ready("Get_Up"):
|
|
|
|
self.terminal = True
|
|
|
|
self.terminal = True
|
|
|
|
elif w.time_local_ms - self.reset_time > 30000:
|
|
|
|
elif w.time_local_ms - self.reset_time > 50000:
|
|
|
|
self.terminal = True
|
|
|
|
self.terminal = True
|
|
|
|
elif r.loc_torso_position[0] > 14.5:
|
|
|
|
elif r.loc_torso_position[0] > 14.5:
|
|
|
|
self.terminal = True
|
|
|
|
self.terminal = True
|
|
|
|
reward += 500
|
|
|
|
reward += 500
|
|
|
|
elif r.loc_torso_position[0] > 0:
|
|
|
|
|
|
|
|
reward += 3 * r.loc_torso_position[0]
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.terminal = False
|
|
|
|
self.terminal = False
|
|
|
|
return obs, reward, self.terminal, {}
|
|
|
|
return obs, reward, self.terminal, {}
|
|
|
|