From ae6989ecf878bb602cefb2f05bbd6bb102b3e4e9 Mon Sep 17 00:00:00 2001 From: MagDish <2717360869@qq.com> Date: Wed, 25 Sep 2024 19:11:01 +0800 Subject: [PATCH] test --- scripts/gyms/dribble.py | 75 +++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 17 deletions(-) diff --git a/scripts/gyms/dribble.py b/scripts/gyms/dribble.py index 6299bcc..6e2bff2 100644 --- a/scripts/gyms/dribble.py +++ b/scripts/gyms/dribble.py @@ -259,20 +259,16 @@ class dribble(gym.Env): def close(self): Draw.clear_all() self.player.terminate() + def execute(self, action): - def step(self, action): - - r = (self. - player.world.robot) + # Actions: + # 0,1,2 left ankle pos + # 3,4,5 right ankle pos + # 6,7,8 left foot rotation + # 9,10,11 right foot rotation + # 12,13 left/right arm pitch + # 14,15 left/right arm roll - w = self.player.world - d = w.draw - if w.ball_abs_pos[1] > 0: # - dribble_target = (15, 5) - else: - dribble_target = (15, -5) - - self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1] # exponential moving average self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed @@ -296,6 +292,44 @@ class dribble(gym.Env): arms = np.copy(self.DEFAULT_ARMS) # default arms pose arms[0:4] += a[12:16] * 4 # arms pitch+roll + return l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms + def step(self, action): + r = (self. + player.world.robot) + + w = self.player.world + d = w.draw + if w.ball_abs_pos[1] > 0: # + dribble_target = (15, 5) + else: + dribble_target = (15, -5) + + self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1] + # # exponential moving average + # self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed + # + # # execute Step behavior to extract the target positions of each leg (we will override these targets) + # lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR, + # self.STEP_Z_SPAN, + # self.leg_length * self.STEP_Z_MAX) + # + # # Leg IK + # a = self.act + # l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz) + # r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz) + # l_foot_rot = a[6:9] * (2, 2, 3) + # r_foot_rot = a[9:12] * (2, 2, 3) + # + # # Limit leg yaw/pitch (and add bias) + # l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3) + # r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3) + # + # # Arms actions + # arms = np.copy(self.DEFAULT_ARMS) # default arms pose + # arms[0:4] += a[12:16] * 4 # arms pitch+roll + l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action) + action_p = np.concatenate((l_ankle_pos, r_ankle_pos)) + action_r = np.concatenate((l_foot_rot, r_foot_rot, arms)) # Set target positions self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) # arms @@ -311,6 +345,7 @@ class dribble(gym.Env): np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2])) else: cos_theta = 0 + with open(M.get_active_directory([ "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl", "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl", @@ -320,11 +355,17 @@ class dribble(gym.Env): ][r.type]), 'rb') as f: model = pickle.load(f) act = run_mlp(obs, model) - action_tensor = torch.from_numpy(action) - act_tensor = torch.from_numpy(act) - loss = torch.exp(-torch.norm(action_tensor - act_tensor, p=2)) + a_l_ankle_pos, a_r_ankle_pos, a_l_foot_rot, a_r_foot_rot, a_arms = self.execute(act) + act_p = np.concatenate((a_l_ankle_pos, a_r_ankle_pos)) + act_r = np.concatenate((a_l_foot_rot, a_r_foot_rot, a_arms)) + action_p_tensor = torch.from_numpy(action_p) + action_r_tensor = torch.from_numpy(action_r) + act_p_tensor = torch.from_numpy(act_p) + act_r_tensor = torch.from_numpy(act_r) + loss_p = torch.exp(-torch.norm(action_p_tensor - act_p_tensor, p=2)) + loss_r = torch.exp(-torch.norm(action_r_tensor - act_r_tensor, p=2)) # 计算奖励 - reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss + reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss_p + loss_r if self.ball_dist_hip_center_2d < 0.115: reward = 0 @@ -346,7 +387,7 @@ class Train(Train_Base): def train(self, args): # --------------------------------------- Learning parameters - n_envs = min(1, os.cpu_count()) + n_envs = min(16, os.cpu_count()) n_steps_per_env = 1024 # RolloutBuffer is of size (n_steps_per_env * n_envs) minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs) total_steps = 50000000