diff --git a/scripts/gyms/dribble.py b/scripts/gyms/dribble.py index 6e2bff2..c78d9a2 100644 --- a/scripts/gyms/dribble.py +++ b/scripts/gyms/dribble.py @@ -293,59 +293,9 @@ class dribble(gym.Env): arms[0:4] += a[12:16] * 4 # arms pitch+roll return l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms - def step(self, action): - r = (self. - player.world.robot) - - w = self.player.world - d = w.draw - if w.ball_abs_pos[1] > 0: # - dribble_target = (15, 5) - else: - dribble_target = (15, -5) - - self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1] - # # exponential moving average - # self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed - # - # # execute Step behavior to extract the target positions of each leg (we will override these targets) - # lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR, - # self.STEP_Z_SPAN, - # self.leg_length * self.STEP_Z_MAX) - # - # # Leg IK - # a = self.act - # l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz) - # r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz) - # l_foot_rot = a[6:9] * (2, 2, 3) - # r_foot_rot = a[9:12] * (2, 2, 3) - # - # # Limit leg yaw/pitch (and add bias) - # l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3) - # r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3) - # - # # Arms actions - # arms = np.copy(self.DEFAULT_ARMS) # default arms pose - # arms[0:4] += a[12:16] * 4 # arms pitch+roll - l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action) - action_p = np.concatenate((l_ankle_pos, r_ankle_pos)) - action_r = np.concatenate((l_foot_rot, r_foot_rot, arms)) - # Set target positions - self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs - r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) # arms - self.sync() - self.step_counter += 1 - - # 收集观测 - obs = self.observe() - angle_rad = np.radians(self.gym_last_internal_abs_ori) # 将角度转换为弧度 - unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)]) # 计算单位向量 - if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0: - cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / ( - np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2])) - else: - cos_theta = 0 + def loss(self, obs, action_p, action_r): + r = self.player.world.robot with open(M.get_active_directory([ "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl", "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl", @@ -362,10 +312,49 @@ class dribble(gym.Env): action_r_tensor = torch.from_numpy(action_r) act_p_tensor = torch.from_numpy(act_p) act_r_tensor = torch.from_numpy(act_r) - loss_p = torch.exp(-torch.norm(action_p_tensor - act_p_tensor, p=2)) - loss_r = torch.exp(-torch.norm(action_r_tensor - act_r_tensor, p=2)) + loss_p = torch.exp(-40*torch.norm(action_p_tensor - act_p_tensor, p=2)) + loss_r = torch.exp(-0.1*torch.norm(action_r_tensor - act_r_tensor, p=2)) + loss = loss_p + loss_r + return loss + + def step(self, action): + r = (self. + player.world.robot) + + w = self.player.world + d = w.draw + if w.ball_abs_pos[1] > 0: # + dribble_target = (15, 5) + else: + dribble_target = (15, -5) + + self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1] + + l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action) + + action_p = np.concatenate((l_ankle_pos, r_ankle_pos)) + action_r = np.concatenate((l_foot_rot, r_foot_rot, arms)) + + # Set target positions + self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs + r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) # arms + self.sync() + self.step_counter += 1 + + # 收集观测 + obs = self.observe() + angle_rad = np.radians(self.gym_last_internal_abs_ori) # 将角度转换为弧度 + unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)]) # 计算单位向量 + if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0: + cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / ( + np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2])) + else: + cos_theta = 0 + + loss = self.loss(obs, action_p, action_r) + # 计算奖励 - reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss_p + loss_r + reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta if self.ball_dist_hip_center_2d < 0.115: reward = 0 @@ -387,7 +376,7 @@ class Train(Train_Base): def train(self, args): # --------------------------------------- Learning parameters - n_envs = min(16, os.cpu_count()) + n_envs = min(12, os.cpu_count()) n_steps_per_env = 1024 # RolloutBuffer is of size (n_steps_per_env * n_envs) minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs) total_steps = 50000000