From ae6989ecf878bb602cefb2f05bbd6bb102b3e4e9 Mon Sep 17 00:00:00 2001
From: MagDish <2717360869@qq.com>
Date: Wed, 25 Sep 2024 19:11:01 +0800
Subject: [PATCH] test

---
 scripts/gyms/dribble.py | 75 +++++++++++++++++++++++++++++++----------
 1 file changed, 58 insertions(+), 17 deletions(-)

diff --git a/scripts/gyms/dribble.py b/scripts/gyms/dribble.py
index 6299bcc..6e2bff2 100644
--- a/scripts/gyms/dribble.py
+++ b/scripts/gyms/dribble.py
@@ -259,20 +259,16 @@ class dribble(gym.Env):
     def close(self):
         Draw.clear_all()
         self.player.terminate()
+    def execute(self, action):
 
-    def step(self, action):
-
-        r = (self.
-             player.world.robot)
+        # Actions:
+        # 0,1,2    left ankle pos
+        # 3,4,5    right ankle pos
+        # 6,7,8    left foot rotation
+        # 9,10,11  right foot rotation
+        # 12,13    left/right arm pitch
+        # 14,15    left/right arm roll
 
-        w = self.player.world
-        d = w.draw
-        if w.ball_abs_pos[1] > 0:  #
-            dribble_target = (15, 5)
-        else:
-            dribble_target = (15, -5)
-
-        self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
         # exponential moving average
         self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed
 
@@ -296,6 +292,44 @@ class dribble(gym.Env):
         arms = np.copy(self.DEFAULT_ARMS)  # default arms pose
         arms[0:4] += a[12:16] * 4  # arms pitch+roll
 
+        return l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms
+    def step(self, action):
+        r = (self.
+             player.world.robot)
+
+        w = self.player.world
+        d = w.draw
+        if w.ball_abs_pos[1] > 0:  #
+            dribble_target = (15, 5)
+        else:
+            dribble_target = (15, -5)
+
+        self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
+        # # exponential moving average
+        # self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed
+        #
+        # # execute Step behavior to extract the target positions of each leg (we will override these targets)
+        # lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
+        #                                                               self.STEP_Z_SPAN,
+        #                                                               self.leg_length * self.STEP_Z_MAX)
+        #
+        # # Leg IK
+        # a = self.act
+        # l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
+        # r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
+        # l_foot_rot = a[6:9] * (2, 2, 3)
+        # r_foot_rot = a[9:12] * (2, 2, 3)
+        #
+        # # Limit leg yaw/pitch (and add bias)
+        # l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
+        # r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
+        #
+        # # Arms actions
+        # arms = np.copy(self.DEFAULT_ARMS)  # default arms pose
+        # arms[0:4] += a[12:16] * 4  # arms pitch+roll
+        l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action)
+        action_p = np.concatenate((l_ankle_pos, r_ankle_pos))
+        action_r = np.concatenate((l_foot_rot, r_foot_rot, arms))
         # Set target positions
         self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot)  # legs
         r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False)  # arms
@@ -311,6 +345,7 @@ class dribble(gym.Env):
                     np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2]))
         else:
             cos_theta = 0
+
         with open(M.get_active_directory([
                                              "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
                                              "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
@@ -320,11 +355,17 @@ class dribble(gym.Env):
                                          ][r.type]), 'rb') as f:
             model = pickle.load(f)
         act = run_mlp(obs, model)
-        action_tensor = torch.from_numpy(action)
-        act_tensor = torch.from_numpy(act)
-        loss = torch.exp(-torch.norm(action_tensor - act_tensor, p=2))
+        a_l_ankle_pos, a_r_ankle_pos, a_l_foot_rot, a_r_foot_rot, a_arms = self.execute(act)
+        act_p = np.concatenate((a_l_ankle_pos, a_r_ankle_pos))
+        act_r = np.concatenate((a_l_foot_rot, a_r_foot_rot, a_arms))
+        action_p_tensor = torch.from_numpy(action_p)
+        action_r_tensor = torch.from_numpy(action_r)
+        act_p_tensor = torch.from_numpy(act_p)
+        act_r_tensor = torch.from_numpy(act_r)
+        loss_p = torch.exp(-torch.norm(action_p_tensor - act_p_tensor, p=2))
+        loss_r = torch.exp(-torch.norm(action_r_tensor - act_r_tensor, p=2))
         # 计算奖励
-        reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss
+        reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss_p + loss_r
 
         if self.ball_dist_hip_center_2d < 0.115:
             reward = 0
@@ -346,7 +387,7 @@ class Train(Train_Base):
     def train(self, args):
 
         # --------------------------------------- Learning parameters
-        n_envs = min(1, os.cpu_count())
+        n_envs = min(16, os.cpu_count())
         n_steps_per_env = 1024  # RolloutBuffer is of size (n_steps_per_env * n_envs)
         minibatch_size = 64  # should be a factor of (n_steps_per_env * n_envs)
         total_steps = 50000000