Dribble/behaviors/custom/Push_RL/Push_RL.py

import pickle
from behaviors.custom.Push_RL.Env_LL import Env_LL
from behaviors.custom.Push_RL.Env_HL import Env_HL
from math_ops.Neural_Network import run_mlp
from math_ops.Math_Ops import Math_Ops as U
import numpy as np
from agent.Base_Agent import Base_Agent


class Push_RL:

    def __init__(self, base_agent: Base_Agent):
        self.world = base_agent.world
        self.description = 'RL push'
        self.auto_head = True
        self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2)
        self.env_HL = Env_HL(base_agent)
        self.phase = 0
        self.counter = 0
        self.behavior = base_agent.behavior
        self.path_manager = base_agent.path_manager
        self.inv_kinematics = base_agent.inv_kinematics

        # 模型加载（这部分可能在反编译中丢失）
        with open(U.get_active_directory([
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl"
                                         ][self.world.robot.type]), 'rb') as f:
            self.model_LL = pickle.load(f)
        with open(U.get_active_directory([
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
                                             "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl"
                                         ][self.world.robot.type]), 'rb') as f:
            self.model_HL = pickle.load(f)
    def execute(self, reset, stop=False):
        ''' Just push the ball autonomously, no target is required '''
        w = self.world
        r = self.world.robot
        bp = w.ball_abs_pos[:2]
        me = r.loc_head_position[:2]
        step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator
        reset_push = False

        if reset:
            self.phase = 0
            b_rel = w.ball_rel_torso_cart_pos
            if self.behavior.previous_behavior == 'Dribble':
                if b_rel[0] < 0.25:
                    pass
                else:
                    self.phase = 0
            elif abs(b_rel[1]) < 0.07:
                self.phase = 1
                reset_push = True

        if self.phase == 0:
            goal_target = (15.1, np.clip(bp[1], -0.7, 0.7))
            goal_ori = U.vector_angle(goal_target - bp)
            vec_me_ball_ori = U.vector_angle(bp - me)
            rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori)
            abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60)

            if bp[1] > 9:
                abs_targ_angle = np.clip(abs_targ_angle, -160, -20)
            elif bp[1] < -9:
                abs_targ_angle = np.clip(abs_targ_angle, 20, 160)

            if bp[0] > 14:
                if bp[1] > 1.1:
                    abs_targ_angle = np.clip(abs_targ_angle, -140, -100)
                elif bp[1] < -1.1:
                    abs_targ_angle = np.clip(abs_targ_angle, 100, 140)
                else:
                    abs_targ_angle = goal_ori

            ball_dist = np.linalg.norm(bp - me)
            ori = None if ball_dist > 0.8 else abs_targ_angle
            (next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball(
                x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori)

            b_rel = w.ball_rel_torso_cart_pos
            ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation))

            # 检查是否可以进入阶段1
            if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \
                    w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10:
                reset_push = True
                self.phase += 1
                self.counter = 0
            else:
                dist = max(0.13, dist_to_final_target)
                reset_walk = reset or self.behavior.previous_behavior != 'Walk'
                self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True,
                                                next_ori if dist_to_final_target < 1 else None, True, dist)

        if stop:
            self.phase = 0 # Reset phase on forced stop
            return True

        if self.phase == 1:
            # 检查是否要离开场地
            leaving_field = False
            if (bp[1] > 9 and r.imu_torso_orientation > 0) or \
                    (bp[1] < -9 and r.imu_torso_orientation < 0) or \
                    (bp[0] > 14 and abs(bp[1]) > 1.1):
                leaving_field = abs(r.imu_torso_orientation) < 90

            ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2]
            dist_ball_our_goal = np.linalg.norm(bp - (-15, 0))
            dist_us_our_goal = np.linalg.norm(me - (-15, 0))

            # 检查是否丢球
            lost = abs(ball_hip[1]) > 0.2
            ball_unseen = w.time_local_ms - w.ball_last_seen >= 400
            ball_far = np.linalg.norm(ball_hip) > 0.3

            terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \
                       (not ball_unseen and ball_far and lost)

            if stop or terminal:
                self.phase += 1
            elif self.counter % 25 == 0:
                obs = self.env_HL.observe(reset_push)
                action = run_mlp(obs, self.model_HL)
                self.env_LL.HL_abs_direction = self.env_HL.execute(action)

            self.counter += 1
            self.env_LL.push_speed = 1
            obs = self.env_LL.observe(reset_push)
            action = run_mlp(obs, self.model_LL)
            self.env_LL.execute(action)

            # 绘制调试信息
            d = w.draw
            if d.enabled:
                vec = U.vector_from_angle(self.env_LL.HL_abs_direction)
                d.line(me, me + vec, 4, d.Color.red, 'opp_vec')

            return False

        if self.phase > 1:
            WIND_DOWN_STEPS = 50
            self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS
            self.env_LL.HL_abs_direction = r.imu_torso_orientation
            obs = self.env_LL.observe(reset_push)
            action = run_mlp(obs, self.model_LL)
            self.env_LL.execute(action)
            self.phase += 1

            if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15:
                self.phase = 0
                return True

        return False

    def is_ready(self):
        ''' Returns True if Push Behavior is ready to start under current game/robot conditions '''
        return True