new

2024-09-25 21:47:43 +08:00 · 2024-09-25 21:47:43 +08:00 · 371e761a44
commit 371e761a44
parent ae6989ecf8
1 changed files with 45 additions and 56 deletions
--- a/scripts/gyms/dribble.py
+++ b/scripts/gyms/dribble.py
@ -293,59 +293,9 @@ class dribble(gym.Env):
        arms[0:4] += a[12:16] * 4  # arms pitch+roll
        return l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms
    def step(self, action):
        r = (self.
             player.world.robot)
        w = self.player.world
        d = w.draw
        if w.ball_abs_pos[1] > 0:  #
            dribble_target = (15, 5)
        else:
            dribble_target = (15, -5)
        self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
        # # exponential moving average
        # self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed
        #
        # # execute Step behavior to extract the target positions of each leg (we will override these targets)
        # lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
        #                                                               self.STEP_Z_SPAN,
        #                                                               self.leg_length * self.STEP_Z_MAX)
        #
        # # Leg IK
        # a = self.act
        # l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
        # r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
        # l_foot_rot = a[6:9] * (2, 2, 3)
        # r_foot_rot = a[9:12] * (2, 2, 3)
        #
        # # Limit leg yaw/pitch (and add bias)
        # l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
        # r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
        #
        # # Arms actions
        # arms = np.copy(self.DEFAULT_ARMS)  # default arms pose
        # arms[0:4] += a[12:16] * 4  # arms pitch+roll
        l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action)
        action_p = np.concatenate((l_ankle_pos, r_ankle_pos))
        action_r = np.concatenate((l_foot_rot, r_foot_rot, arms))
        # Set target positions
        self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot)  # legs
        r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False)  # arms
        self.sync()
        self.step_counter += 1
        # 收集观测
        obs = self.observe()
        angle_rad = np.radians(self.gym_last_internal_abs_ori)  # 将角度转换为弧度
        unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)])  # 计算单位向量
        if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0:
            cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / (
                    np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2]))
        else:
            cos_theta = 0
    def loss(self, obs, action_p, action_r):
        r = self.player.world.robot
        with open(M.get_active_directory([
                                             "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
                                             "/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
@ -362,10 +312,49 @@ class dribble(gym.Env):
        action_r_tensor = torch.from_numpy(action_r)
        act_p_tensor = torch.from_numpy(act_p)
        act_r_tensor = torch.from_numpy(act_r)
-        loss_p = torch.exp(-torch.norm(action_p_tensor - act_p_tensor, p=2))
+        loss_p = torch.exp(-40*torch.norm(action_p_tensor - act_p_tensor, p=2))
-        loss_r = torch.exp(-torch.norm(action_r_tensor - act_r_tensor, p=2))
+        loss_r = torch.exp(-0.1*torch.norm(action_r_tensor - act_r_tensor, p=2))
        loss = loss_p + loss_r
        return loss
    def step(self, action):
        r = (self.
             player.world.robot)
        w = self.player.world
        d = w.draw
        if w.ball_abs_pos[1] > 0:  #
            dribble_target = (15, 5)
        else:
            dribble_target = (15, -5)
        self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
        l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action)
        action_p = np.concatenate((l_ankle_pos, r_ankle_pos))
        action_r = np.concatenate((l_foot_rot, r_foot_rot, arms))
        # Set target positions
        self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot)  # legs
        r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False)  # arms
        self.sync()
        self.step_counter += 1
        # 收集观测
        obs = self.observe()
        angle_rad = np.radians(self.gym_last_internal_abs_ori)  # 将角度转换为弧度
        unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)])  # 计算单位向量
        if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0:
            cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / (
                    np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2]))
        else:
            cos_theta = 0
        loss = self.loss(obs, action_p, action_r)
        # 计算奖励
-        reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss_p + loss_r
+        reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta
        if self.ball_dist_hip_center_2d < 0.115:
            reward = 0
@ -387,7 +376,7 @@ class Train(Train_Base):
    def train(self, args):
        # --------------------------------------- Learning parameters
-        n_envs = min(16, os.cpu_count())
+        n_envs = min(12, os.cpu_count())
        n_steps_per_env = 1024  # RolloutBuffer is of size (n_steps_per_env * n_envs)
        minibatch_size = 64  # should be a factor of (n_steps_per_env * n_envs)
        total_steps = 50000000