dribble

2025-04-09 21:13:39 +08:00 · 2025-04-09 21:13:39 +08:00 · 30df6ef4a0
commit 30df6ef4a0
parent 0a1f594d19
20 changed files with 395 additions and 6 deletions
--- a/.idea/FCPCodebase.iml
+++ b/.idea/FCPCodebase.iml
@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Train" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="FCPCodebase" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyDocumentationSettings">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,5 +3,5 @@
  <component name="Black">
    <option name="sdkName" value="fcp_env" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Train" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="FCPCodebase" project-jdk-type="Python SDK" />
 </project>
--- a/agent/Agent.py
+++ b/agent/Agent.py
@ -32,6 +32,8 @@ class Agent(Base_Agent):
            distance_to_final_target = np.linalg.norm(target_2d - self.loc_head_position[:2])
        return self.behavior.execute("Dribble", next_rel_ori,
                                     False)  # Args: target, is_target_abs, ori, is_ori_abs, distance
    def push(self, target_2d=(15, 0), avoid_obstacles=True):
        self.behavior.execute("Push_RL")
    def beam(self, avoid_center_circle=False):
        r = self.world.robot
--- a/agent/pycache/Agent.cpython-311.pyc
+++ b/agent/pycache/Agent.cpython-311.pyc
--- a/behaviors/Behavior.py
+++ b/behaviors/Behavior.py
@ -49,7 +49,9 @@ class Behavior():
        from behaviors.custom.Get_Up.Get_Up import Get_Up
        from behaviors.custom.Step.Step import Step
        from behaviors.custom.Walk.Walk import Walk
-        classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk]
+        from behaviors.custom.Push_RL.Push_RL import Push_RL
        classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk,Push_RL]
        '''---- End of manual declarations ----'''
--- a/behaviors/pycache/Behavior.cpython-311.pyc
+++ b/behaviors/pycache/Behavior.cpython-311.pyc
--- a/behaviors/custom/Dribble/pycache/Dribble.cpython-311.pyc
+++ b/behaviors/custom/Dribble/pycache/Dribble.cpython-311.pyc
--- a/behaviors/custom/Dribble/pycache/Env.cpython-311.pyc
+++ b/behaviors/custom/Dribble/pycache/Env.cpython-311.pyc
--- a/behaviors/custom/Push_RL/Env_HL.py
+++ b/behaviors/custom/Push_RL/Env_HL.py
@ -0,0 +1,79 @@
 import math
 from typing import List
 import numpy as np
 from math_ops.Math_Ops import Math_Ops as U
 from behaviors.custom.Step.Step_Generator import Step_Generator
 from world.commons import Other_Robot
 from world.World import World
 from agent.Base_Agent import Base_Agent
 class Env_HL:
    COLS = 16
    LINS = 5
    def __init__(self, base_agent: Base_Agent):
        self.world = base_agent.world
        self.obs = np.zeros(163, np.float32)
        self.output = 0
    def fill_radar(self, radar, team=None, radar_part=None, RADIAL_START=None, RADIAL_MULT=None):
        if RADIAL_MULT is None:
            RADIAL_MULT = {'team': List[Other_Robot]}
        w = self.world
        bp = w.ball_abs_pos[:2]
        C = self.COLS
        L = self.LINS
        vec_b_goal = (15.5, 0) - bp
        vec_b_goal_absdir = U.vector_angle(vec_b_goal)
        dist_closest_player = 10
        for t in team:
            if w.time_local_ms - t.state_last_update > 500:
                continue
            vec_b_opp = t.state_abs_pos[:2] - bp
            dist_b_opp = np.linalg.norm(vec_b_opp)
            if dist_b_opp < dist_closest_player:
                dist_closest_player = dist_b_opp
            vec_b_opp_dir = U.normalize_deg(U.vector_angle(vec_b_opp) - vec_b_goal_absdir)
            (div, mod) = divmod(vec_b_opp_dir + 180, 360 / C)
            zone = int(div) % C
            prog = mod * C / 360
            ang_column_weight_1 = (zone, 1 - prog)
            ang_column_weight_2 = ((zone + 1) % C, prog)
            zone = max(1, 1 + math.log((dist_b_opp + 1e-06) / RADIAL_START, RADIAL_MULT))
            prog = zone % 1
            zone = math.ceil(zone) - 1
            if zone >= L:
                continue
            rad_line_weight_1 = None if zone == 0 else (zone - 1, 1 - prog)
            rad_line_weight_2 = (zone, 1 if zone == 0 else prog)
            if rad_line_weight_1 is not None:
                radar[(radar_part, rad_line_weight_1[0], ang_column_weight_1[0])] += rad_line_weight_1[1] * \
                                                                                     ang_column_weight_1[1]
                radar[(radar_part, rad_line_weight_1[0], ang_column_weight_2[0])] += rad_line_weight_1[1] * \
                                                                                     ang_column_weight_2[1]
            radar[(radar_part, rad_line_weight_2[0], ang_column_weight_1[0])] += rad_line_weight_2[1] * \
                                                                                 ang_column_weight_1[1]
            radar[(radar_part, rad_line_weight_2[0], ang_column_weight_2[0])] += rad_line_weight_2[1] * \
                                                                                 ang_column_weight_2[1]
        return dist_closest_player
    def observe(self, init=False):
        if init:
            self.output = 0
        radar = np.zeros((2, self.LINS, self.COLS))
        RADIAL_START = 0.3
        RADIAL_MULT = 1.7
        dist_closest_tm = self.fill_radar(radar, self.world.teammates, 0, RADIAL_START, RADIAL_MULT)
        dist_closest_opp = self.fill_radar(radar, self.world.opponents, 1, RADIAL_START, RADIAL_MULT)
        self.obs = np.append(radar.flatten(), (dist_closest_tm * 0.5, dist_closest_opp * 0.5, self.output / 40))
        return self.obs
    def execute(self, action):
        vec_b_goal = (15.5, 0) - self.world.ball_abs_pos[:2]
        vec_b_goal_absdir = U.vector_angle(vec_b_goal)
        rel_direction = action[0] * 60
        self.output += np.clip(U.normalize_deg(rel_direction - self.output), -45, 45)
        abs_direction = U.normalize_deg(vec_b_goal_absdir + self.output)
        return abs_direction
--- a/behaviors/custom/Push_RL/Env_LL.py
+++ b/behaviors/custom/Push_RL/Env_LL.py
@ -0,0 +1,140 @@
 import math
 import numpy as np
 from math_ops.Math_Ops import Math_Ops as U
 from behaviors.custom.Step.Step_Generator import Step_Generator
 from agent.Base_Agent import Base_Agent
 class Env_LL:
    def __init__(self, base_agent: Base_Agent, step_width=None):
        self.world = base_agent.world
        self.obs = np.zeros(78, np.float32)
        self.STEP_DUR = 8
        self.STEP_Z_SPAN = 0.02
        self.STEP_Z_MAX = 0.7
        r = self.world.robot
        nao_specs = base_agent.inv_kinematics.NAO_SPECS
        self.leg_length = nao_specs[1] + nao_specs[3]
        feet_y_dev = nao_specs[0] * step_width
        sample_time = r.STEPTIME
        max_ankle_z = nao_specs[5]
        self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
        self.inv_kinematics = base_agent.inv_kinematics
        self.DEFAULT_ARMS = np.array([
            -90, -90, 8, 8, 90, 90, 70, 70], np.float32)
        self.HL_abs_direction = None
        self.push_speed = 1
        self.step_counter = 0
        self.act = np.zeros(16, np.float32)
        self.values_l = None
        self.values_r = None
    def observe(self, init=False):
        w = self.world
        r = self.world.robot
        if init:
            self.step_counter = 0
            self.act = np.zeros(16, np.float32)
        # 填充观测向量
        self.obs[0] = min(self.step_counter, 96) / 100
        self.obs[1] = r.loc_head_z * 3
        self.obs[2] = r.loc_head_z_vel / 2
        self.obs[3] = r.imu_torso_roll / 15
        self.obs[4] = r.imu_torso_pitch / 15
        self.obs[5:8] = r.gyro / 100
        self.obs[8:11] = r.acc / 10
        self.obs[11:17] = r.frp.get('lf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01)
        self.obs[17:23] = r.frp.get('rf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01)
        self.obs[23:43] = r.joints_position[2:22] / 100
        self.obs[43:63] = r.joints_speed[2:22] / 6.1395
        if init:
            self.obs[63] = 1
            self.obs[64] = 1
            self.obs[65] = 0
            self.obs[66] = 0
        else:
            self.obs[63] = self.step_generator.external_progress
            self.obs[64] = float(self.step_generator.state_is_left_active)
            self.obs[65] = float(not self.step_generator.state_is_left_active)
            self.obs[66] = math.sin((self.step_generator.state_current_ts / self.step_generator.ts_per_step) * math.pi)
        ball_rel_hip_center = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)
        ball_dist_hip_center = np.linalg.norm(ball_rel_hip_center)
        if init:
            self.obs[67:70] = (0, 0, 0)
        elif w.ball_is_visible:
            self.obs[67:70] = (ball_rel_hip_center - self.obs[70:73]) * 10
        self.obs[70:73] = ball_rel_hip_center
        self.obs[73] = ball_dist_hip_center * 2
        rel_HL_target = U.normalize_deg(self.HL_abs_direction - r.imu_torso_orientation)
        self.obs[74] = U.deg_cos(rel_HL_target)
        self.obs[75] = U.deg_sin(rel_HL_target)
        self.obs[76] = 2
        self.obs[77] = 2
        # 找到最近的对手
        opps_dist = [o.state_horizontal_dist for o in w.opponents]
        if opps_dist:
            closest_opp_idx = np.argmin(opps_dist)
            o = w.opponents[closest_opp_idx]
            if opps_dist[closest_opp_idx] < 1:
                body_parts_rel_torso_2d_avg = np.zeros(2)
                weight_sum = 0
                for pos in o.body_parts_cart_rel_pos.values():
                    bp_rel_torso_2d = r.head_to_body_part_transform('torso', pos)[:2]
                    weight = math.pow(1e+06, -np.linalg.norm(bp_rel_torso_2d))
                    body_parts_rel_torso_2d_avg += weight * bp_rel_torso_2d
                    weight_sum += weight
                if weight_sum > 0:
                    body_parts_rel_torso_2d_avg /= weight_sum
                    self.obs[76] = body_parts_rel_torso_2d_avg[0]
                    self.obs[77] = body_parts_rel_torso_2d_avg[1]
        return self.obs
    def execute_ik(self, l_pos, l_rot, r_pos, r_rot):
        r = self.world.robot
        (indices, self.values_l, error_codes) = self.inv_kinematics.leg(
            l_pos, l_rot, True, dynamic_pose=False)
        r.set_joints_target_position_direct(indices, self.values_l, harmonize=False)
        (indices, self.values_r, error_codes) = self.inv_kinematics.leg(
            r_pos, r_rot, False, dynamic_pose=False)
        r.set_joints_target_position_direct(indices, self.values_r, harmonize=False)
    def execute(self, action):
        r = self.world.robot
        self.act = 0.8 * self.act + 0.2 * action * 0.9 * self.push_speed
        (lfy, lfz, rfy, rfz) = self.step_generator.get_target_positions(
            self.step_counter == 0, self.STEP_DUR, self.STEP_Z_SPAN,
            self.leg_length * self.STEP_Z_MAX)
        a = self.act
        l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
        r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
        l_foot_rot = a[6:9] * (2, 2, 3)
        r_foot_rot = a[9:12] * (2, 2, 3)
        l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
        r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
        arms = np.copy(self.DEFAULT_ARMS)
        arms[0:4] += a[12:16] * 4
        self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot)
        r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False)
        self.step_counter += 1
--- a/behaviors/custom/Push_RL/Push_RL.py
+++ b/behaviors/custom/Push_RL/Push_RL.py
@ -0,0 +1,164 @@
 import pickle
 from behaviors.custom.Push_RL.Env_LL import Env_LL
 from behaviors.custom.Push_RL.Env_HL import Env_HL
 from math_ops.Neural_Network import run_mlp
 from math_ops.Math_Ops import Math_Ops as U
 import numpy as np
 from agent.Base_Agent import Base_Agent
 class Push_RL:
    def __init__(self, base_agent: Base_Agent):
        self.world = base_agent.world
        self.description = 'RL push'
        self.auto_head = True
        self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2)
        self.env_HL = Env_HL(base_agent)
        self.phase = 0
        self.counter = 0
        self.behavior = base_agent.behavior
        self.path_manager = base_agent.path_manager
        self.inv_kinematics = base_agent.inv_kinematics
        # 模型加载（这部分可能在反编译中丢失）
        with open(U.get_active_directory([
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl"
                                         ][self.world.robot.type]), 'rb') as f:
            self.model_LL = pickle.load(f)
        with open(U.get_active_directory([
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl"
                                         ][self.world.robot.type]), 'rb') as f:
            self.model_HL = pickle.load(f)
    def execute(self, reset, stop=False):
        ''' Just push the ball autonomously, no target is required '''
        w = self.world
        r = self.world.robot
        bp = w.ball_abs_pos[:2]
        me = r.loc_head_position[:2]
        step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator
        reset_push = False
        if reset:
            self.phase = 0
            b_rel = w.ball_rel_torso_cart_pos
            if self.behavior.previous_behavior == 'Dribble':
                if b_rel[0] < 0.25:
                    pass
                else:
                    self.phase = 0
            elif abs(b_rel[1]) < 0.07:
                self.phase = 1
                reset_push = True
        if self.phase == 0:
            goal_target = (15.1, np.clip(bp[1], -0.7, 0.7))
            goal_ori = U.vector_angle(goal_target - bp)
            vec_me_ball_ori = U.vector_angle(bp - me)
            rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori)
            abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60)
            if bp[1] > 9:
                abs_targ_angle = np.clip(abs_targ_angle, -160, -20)
            elif bp[1] < -9:
                abs_targ_angle = np.clip(abs_targ_angle, 20, 160)
            if bp[0] > 14:
                if bp[1] > 1.1:
                    abs_targ_angle = np.clip(abs_targ_angle, -140, -100)
                elif bp[1] < -1.1:
                    abs_targ_angle = np.clip(abs_targ_angle, 100, 140)
                else:
                    abs_targ_angle = goal_ori
            ball_dist = np.linalg.norm(bp - me)
            ori = None if ball_dist > 0.8 else abs_targ_angle
            (next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball(
                x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori)
            b_rel = w.ball_rel_torso_cart_pos
            ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation))
            # 检查是否可以进入阶段1
            if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \
                    w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10:
                reset_push = True
                self.phase += 1
                self.counter = 0
            else:
                dist = max(0.13, dist_to_final_target)
                reset_walk = reset or self.behavior.previous_behavior != 'Walk'
                self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True,
                                                next_ori if dist_to_final_target < 1 else None, True, dist)
        if stop:
            self.phase = 0 # Reset phase on forced stop
            return True
        if self.phase == 1:
            # 检查是否要离开场地
            leaving_field = False
            if (bp[1] > 9 and r.imu_torso_orientation > 0) or \
                    (bp[1] < -9 and r.imu_torso_orientation < 0) or \
                    (bp[0] > 14 and abs(bp[1]) > 1.1):
                leaving_field = abs(r.imu_torso_orientation) < 90
            ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2]
            dist_ball_our_goal = np.linalg.norm(bp - (-15, 0))
            dist_us_our_goal = np.linalg.norm(me - (-15, 0))
            # 检查是否丢球
            lost = abs(ball_hip[1]) > 0.2
            ball_unseen = w.time_local_ms - w.ball_last_seen >= 400
            ball_far = np.linalg.norm(ball_hip) > 0.3
            terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \
                       (not ball_unseen and ball_far and lost)
            if stop or terminal:
                self.phase += 1
            elif self.counter % 25 == 0:
                obs = self.env_HL.observe(reset_push)
                action = run_mlp(obs, self.model_HL)
                self.env_LL.HL_abs_direction = self.env_HL.execute(action)
            self.counter += 1
            self.env_LL.push_speed = 1
            obs = self.env_LL.observe(reset_push)
            action = run_mlp(obs, self.model_LL)
            self.env_LL.execute(action)
            # 绘制调试信息
            d = w.draw
            if d.enabled:
                vec = U.vector_from_angle(self.env_LL.HL_abs_direction)
                d.line(me, me + vec, 4, d.Color.red, 'opp_vec')
            return False
        if self.phase > 1:
            WIND_DOWN_STEPS = 50
            self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS
            self.env_LL.HL_abs_direction = r.imu_torso_orientation
            obs = self.env_LL.observe(reset_push)
            action = run_mlp(obs, self.model_LL)
            self.env_LL.execute(action)
            self.phase += 1
            if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15:
                self.phase = 0
                return True
        return False
    def is_ready(self):
        ''' Returns True if Push Behavior is ready to start under current game/robot conditions '''
        return True
--- a/behaviors/custom/Push_RL/pycache/Env_HL.cpython-311.pyc
+++ b/behaviors/custom/Push_RL/pycache/Env_HL.cpython-311.pyc
--- a/behaviors/custom/Push_RL/pycache/Env_LL.cpython-311.pyc
+++ b/behaviors/custom/Push_RL/pycache/Env_LL.cpython-311.pyc
--- a/behaviors/custom/Push_RL/pycache/Push_RL.cpython-311.pyc
+++ b/behaviors/custom/Push_RL/pycache/Push_RL.cpython-311.pyc
--- a/behaviors/custom/Push_RL/push_HL_R1_X3_901120_steps.pkl
+++ b/behaviors/custom/Push_RL/push_HL_R1_X3_901120_steps.pkl
--- a/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl
+++ b/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl
--- a/behaviors/custom/Push_RL/push_LL_R1_X3_22937600_steps.pkl
+++ b/behaviors/custom/Push_RL/push_LL_R1_X3_22937600_steps.pkl
--- a/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl
+++ b/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl
--- a/world/commons/Path_Manager.py
+++ b/world/commons/Path_Manager.py
@ -94,7 +94,9 @@ class Path_Manager():
            def get_hard_radius(t):
                if t.unum in priority_unums:
-                    return 1.0 # extra distance for priority roles
+                    if len(priority_unums) > 2:
                        return 1.5
                    return None
                else:
                    return t.state_ground_area[1]+0.2
@ -113,8 +115,8 @@ class Path_Manager():
                soft_radius = 0.6
                hard_radius = lambda o : 0.2
            elif mode == Path_Manager.MODE_DRIBBLE:
-                soft_radius = 2.3
+                soft_radius = 1.6
-                hard_radius = lambda o : o.state_ground_area[1]+0.9
+                hard_radius = lambda o : o.state_ground_area[1]+0.3
            else:
                soft_radius = 1.0
                hard_radius = lambda o : o.state_ground_area[1]+0.2
--- a/world/commons/pycache/Path_Manager.cpython-311.pyc
+++ b/world/commons/pycache/Path_Manager.cpython-311.pyc