2025-04-09 21:13:39 +08:00

164 lines
7.2 KiB
Python

import pickle
from behaviors.custom.Push_RL.Env_LL import Env_LL
from behaviors.custom.Push_RL.Env_HL import Env_HL
from math_ops.Neural_Network import run_mlp
from math_ops.Math_Ops import Math_Ops as U
import numpy as np
from agent.Base_Agent import Base_Agent
class Push_RL:
def __init__(self, base_agent: Base_Agent):
self.world = base_agent.world
self.description = 'RL push'
self.auto_head = True
self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2)
self.env_HL = Env_HL(base_agent)
self.phase = 0
self.counter = 0
self.behavior = base_agent.behavior
self.path_manager = base_agent.path_manager
self.inv_kinematics = base_agent.inv_kinematics
# 模型加载(这部分可能在反编译中丢失)
with open(U.get_active_directory([
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl"
][self.world.robot.type]), 'rb') as f:
self.model_LL = pickle.load(f)
with open(U.get_active_directory([
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl"
][self.world.robot.type]), 'rb') as f:
self.model_HL = pickle.load(f)
def execute(self, reset, stop=False):
''' Just push the ball autonomously, no target is required '''
w = self.world
r = self.world.robot
bp = w.ball_abs_pos[:2]
me = r.loc_head_position[:2]
step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator
reset_push = False
if reset:
self.phase = 0
b_rel = w.ball_rel_torso_cart_pos
if self.behavior.previous_behavior == 'Dribble':
if b_rel[0] < 0.25:
pass
else:
self.phase = 0
elif abs(b_rel[1]) < 0.07:
self.phase = 1
reset_push = True
if self.phase == 0:
goal_target = (15.1, np.clip(bp[1], -0.7, 0.7))
goal_ori = U.vector_angle(goal_target - bp)
vec_me_ball_ori = U.vector_angle(bp - me)
rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori)
abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60)
if bp[1] > 9:
abs_targ_angle = np.clip(abs_targ_angle, -160, -20)
elif bp[1] < -9:
abs_targ_angle = np.clip(abs_targ_angle, 20, 160)
if bp[0] > 14:
if bp[1] > 1.1:
abs_targ_angle = np.clip(abs_targ_angle, -140, -100)
elif bp[1] < -1.1:
abs_targ_angle = np.clip(abs_targ_angle, 100, 140)
else:
abs_targ_angle = goal_ori
ball_dist = np.linalg.norm(bp - me)
ori = None if ball_dist > 0.8 else abs_targ_angle
(next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball(
x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori)
b_rel = w.ball_rel_torso_cart_pos
ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation))
# 检查是否可以进入阶段1
if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \
w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10:
reset_push = True
self.phase += 1
self.counter = 0
else:
dist = max(0.13, dist_to_final_target)
reset_walk = reset or self.behavior.previous_behavior != 'Walk'
self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True,
next_ori if dist_to_final_target < 1 else None, True, dist)
if stop:
self.phase = 0 # Reset phase on forced stop
return True
if self.phase == 1:
# 检查是否要离开场地
leaving_field = False
if (bp[1] > 9 and r.imu_torso_orientation > 0) or \
(bp[1] < -9 and r.imu_torso_orientation < 0) or \
(bp[0] > 14 and abs(bp[1]) > 1.1):
leaving_field = abs(r.imu_torso_orientation) < 90
ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2]
dist_ball_our_goal = np.linalg.norm(bp - (-15, 0))
dist_us_our_goal = np.linalg.norm(me - (-15, 0))
# 检查是否丢球
lost = abs(ball_hip[1]) > 0.2
ball_unseen = w.time_local_ms - w.ball_last_seen >= 400
ball_far = np.linalg.norm(ball_hip) > 0.3
terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \
(not ball_unseen and ball_far and lost)
if stop or terminal:
self.phase += 1
elif self.counter % 25 == 0:
obs = self.env_HL.observe(reset_push)
action = run_mlp(obs, self.model_HL)
self.env_LL.HL_abs_direction = self.env_HL.execute(action)
self.counter += 1
self.env_LL.push_speed = 1
obs = self.env_LL.observe(reset_push)
action = run_mlp(obs, self.model_LL)
self.env_LL.execute(action)
# 绘制调试信息
d = w.draw
if d.enabled:
vec = U.vector_from_angle(self.env_LL.HL_abs_direction)
d.line(me, me + vec, 4, d.Color.red, 'opp_vec')
return False
if self.phase > 1:
WIND_DOWN_STEPS = 50
self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS
self.env_LL.HL_abs_direction = r.imu_torso_orientation
obs = self.env_LL.observe(reset_push)
action = run_mlp(obs, self.model_LL)
self.env_LL.execute(action)
self.phase += 1
if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15:
self.phase = 0
return True
return False
def is_ready(self):
''' Returns True if Push Behavior is ready to start under current game/robot conditions '''
return True