diff --git a/scripts/gyms/Sprint.py b/scripts/gyms/Sprint.py index 54642b8..951befc 100644 --- a/scripts/gyms/Sprint.py +++ b/scripts/gyms/Sprint.py @@ -43,8 +43,8 @@ class sprint(gym.Env): self.ik = self.player.inv_kinematics # Step behavior defaults - self.STEP_DUR = 10 - self.STEP_Z_SPAN = 0.2 + self.STEP_DUR = 8 + self.STEP_Z_SPAN = 0.3 self.STEP_Z_MAX = 0.7 nao_specs = self.ik.NAO_SPECS self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height @@ -56,7 +56,6 @@ class sprint(gym.Env): self.walk_rel_orientation = None self.walk_rel_target = None - self.walk_target = None self.walk_distance = None self.act = np.zeros(16, np.float32) # memory variable @@ -146,7 +145,7 @@ class sprint(gym.Env): Create internal target with a smoother variation ''' - MAX_LINEAR_DIST = 0.5 + MAX_LINEAR_DIST = 1 MAX_LINEAR_DIFF = 0.014 # max difference (meters) per step MAX_ROTATION_DIFF = 1.6 # max difference (degrees) per step MAX_ROTATION_DIST = 45 @@ -192,7 +191,6 @@ class sprint(gym.Env): self.obs[62] = internal_target_vel[0] / MAX_LINEAR_DIFF return self.obs - def execute_ik(self, l_pos, l_rot, r_pos, r_rot): r = self.player.world.robot # Apply IK to each leg + Set joint targets @@ -311,7 +309,7 @@ class sprint(gym.Env): self.walk_distance = np.linalg.norm(self.walk_target - r.loc_head_position[:2]) self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3 # exponential moving average - self.act = 0.6 * self.act + 0.4 * action + self.act = 0.8 * self.act + 0.2 * action * 0.7 # execute Step behavior to extract the target positions of each leg (we will override these targets) lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR, @@ -341,16 +339,17 @@ class sprint(gym.Env): self.sync() self.step_counter += 1 obs = self.observe() + robot_speed = np.linalg.norm(r.loc_torso_velocity[:2]) direction_error = abs(self.walk_rel_orientation) direction_error = min(direction_error, 10) - reward = robot_speed * (1 - direction_error / 10) * 0.02 + reward = robot_speed * (1.5 - direction_error / 10) * 0.03 if self.walk_distance < 0.3: reward += 10 if self.player.behavior.is_ready("Get_Up"): self.terminal = True - elif w.time_local_ms - self.reset_time > 15000 * 2: + elif w.time_local_ms - self.reset_time > 45000 * 2: self.terminal = True else: self.terminal = False diff --git a/scripts/gyms/sprint_demo.py b/scripts/gyms/sprint_demo.py index 87a8676..5bc0911 100644 --- a/scripts/gyms/sprint_demo.py +++ b/scripts/gyms/sprint_demo.py @@ -45,7 +45,7 @@ class sprint(gym.Env): # Step behavior defaults self.STEP_DUR = 8 self.STEP_Z_SPAN = 0.3 - self.STEP_Z_MAX = 0.8 + self.STEP_Z_MAX = 0.7 nao_specs = self.ik.NAO_SPECS self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height feet_y_dev = nao_specs[0] * 2 # wider step @@ -310,7 +310,7 @@ class sprint(gym.Env): self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3 # exponential moving average - self.act = 0.8 * self.act + 0.2 * 0.7 + self.act = 0.8 * self.act + 0.2 * action_mult * 0.7 # execute Step behavior to extract the target positions of each leg (we will override these targets) lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,