import pickle from behaviors.custom.Push_RL.Env_LL import Env_LL from behaviors.custom.Push_RL.Env_HL import Env_HL from math_ops.Neural_Network import run_mlp from math_ops.Math_Ops import Math_Ops as U import numpy as np from agent.Base_Agent import Base_Agent class Push_RL: def __init__(self, base_agent: Base_Agent): self.world = base_agent.world self.description = 'RL push' self.auto_head = True self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2) self.env_HL = Env_HL(base_agent) self.phase = 0 self.counter = 0 self.behavior = base_agent.behavior self.path_manager = base_agent.path_manager self.inv_kinematics = base_agent.inv_kinematics # 模型加载(这部分可能在反编译中丢失) with open(U.get_active_directory([ "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl" ][self.world.robot.type]), 'rb') as f: self.model_LL = pickle.load(f) with open(U.get_active_directory([ "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl" ][self.world.robot.type]), 'rb') as f: self.model_HL = pickle.load(f) def execute(self, reset, stop=False): ''' Just push the ball autonomously, no target is required ''' w = self.world r = self.world.robot bp = w.ball_abs_pos[:2] me = r.loc_head_position[:2] step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator reset_push = False if reset: self.phase = 0 b_rel = w.ball_rel_torso_cart_pos if self.behavior.previous_behavior == 'Dribble': if b_rel[0] < 0.25: pass else: self.phase = 0 elif abs(b_rel[1]) < 0.07: self.phase = 1 reset_push = True if self.phase == 0: goal_target = (15.1, np.clip(bp[1], -0.7, 0.7)) goal_ori = U.vector_angle(goal_target - bp) vec_me_ball_ori = U.vector_angle(bp - me) rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori) abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60) if bp[1] > 9: abs_targ_angle = np.clip(abs_targ_angle, -160, -20) elif bp[1] < -9: abs_targ_angle = np.clip(abs_targ_angle, 20, 160) if bp[0] > 14: if bp[1] > 1.1: abs_targ_angle = np.clip(abs_targ_angle, -140, -100) elif bp[1] < -1.1: abs_targ_angle = np.clip(abs_targ_angle, 100, 140) else: abs_targ_angle = goal_ori ball_dist = np.linalg.norm(bp - me) ori = None if ball_dist > 0.8 else abs_targ_angle (next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball( x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori) b_rel = w.ball_rel_torso_cart_pos ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation)) # 检查是否可以进入阶段1 if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \ w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10: reset_push = True self.phase += 1 self.counter = 0 else: dist = max(0.13, dist_to_final_target) reset_walk = reset or self.behavior.previous_behavior != 'Walk' self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True, next_ori if dist_to_final_target < 1 else None, True, dist) if stop: self.phase = 0 # Reset phase on forced stop return True if self.phase == 1: # 检查是否要离开场地 leaving_field = False if (bp[1] > 9 and r.imu_torso_orientation > 0) or \ (bp[1] < -9 and r.imu_torso_orientation < 0) or \ (bp[0] > 14 and abs(bp[1]) > 1.1): leaving_field = abs(r.imu_torso_orientation) < 90 ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2] dist_ball_our_goal = np.linalg.norm(bp - (-15, 0)) dist_us_our_goal = np.linalg.norm(me - (-15, 0)) # 检查是否丢球 lost = abs(ball_hip[1]) > 0.2 ball_unseen = w.time_local_ms - w.ball_last_seen >= 400 ball_far = np.linalg.norm(ball_hip) > 0.3 terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \ (not ball_unseen and ball_far and lost) if stop or terminal: self.phase += 1 elif self.counter % 25 == 0: obs = self.env_HL.observe(reset_push) action = run_mlp(obs, self.model_HL) self.env_LL.HL_abs_direction = self.env_HL.execute(action) self.counter += 1 self.env_LL.push_speed = 1 obs = self.env_LL.observe(reset_push) action = run_mlp(obs, self.model_LL) self.env_LL.execute(action) # 绘制调试信息 d = w.draw if d.enabled: vec = U.vector_from_angle(self.env_LL.HL_abs_direction) d.line(me, me + vec, 4, d.Color.red, 'opp_vec') return False if self.phase > 1: WIND_DOWN_STEPS = 50 self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS self.env_LL.HL_abs_direction = r.imu_torso_orientation obs = self.env_LL.observe(reset_push) action = run_mlp(obs, self.model_LL) self.env_LL.execute(action) self.phase += 1 if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15: self.phase = 0 return True return False def is_ready(self): ''' Returns True if Push Behavior is ready to start under current game/robot conditions ''' return True