diff --git a/.idea/FCPCodebase.iml b/.idea/FCPCodebase.iml index b512808..3b6709a 100644 --- a/.idea/FCPCodebase.iml +++ b/.idea/FCPCodebase.iml @@ -2,7 +2,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 8a59b20..a33389a 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/agent/Agent.py b/agent/Agent.py index 4232022..afbefa1 100644 --- a/agent/Agent.py +++ b/agent/Agent.py @@ -32,6 +32,8 @@ class Agent(Base_Agent): distance_to_final_target = np.linalg.norm(target_2d - self.loc_head_position[:2]) return self.behavior.execute("Dribble", next_rel_ori, False) # Args: target, is_target_abs, ori, is_ori_abs, distance + def push(self, target_2d=(15, 0), avoid_obstacles=True): + self.behavior.execute("Push_RL") def beam(self, avoid_center_circle=False): r = self.world.robot diff --git a/agent/__pycache__/Agent.cpython-311.pyc b/agent/__pycache__/Agent.cpython-311.pyc index 793995d..20e2c34 100644 Binary files a/agent/__pycache__/Agent.cpython-311.pyc and b/agent/__pycache__/Agent.cpython-311.pyc differ diff --git a/behaviors/Behavior.py b/behaviors/Behavior.py index acbb4be..424786b 100644 --- a/behaviors/Behavior.py +++ b/behaviors/Behavior.py @@ -49,7 +49,9 @@ class Behavior(): from behaviors.custom.Get_Up.Get_Up import Get_Up from behaviors.custom.Step.Step import Step from behaviors.custom.Walk.Walk import Walk - classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk] + from behaviors.custom.Push_RL.Push_RL import Push_RL + + classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk,Push_RL] '''---- End of manual declarations ----''' diff --git a/behaviors/__pycache__/Behavior.cpython-311.pyc b/behaviors/__pycache__/Behavior.cpython-311.pyc index aad0700..8273fe6 100644 Binary files a/behaviors/__pycache__/Behavior.cpython-311.pyc and b/behaviors/__pycache__/Behavior.cpython-311.pyc differ diff --git a/behaviors/custom/Dribble/__pycache__/Dribble.cpython-311.pyc b/behaviors/custom/Dribble/__pycache__/Dribble.cpython-311.pyc index f15a3d4..702e387 100644 Binary files a/behaviors/custom/Dribble/__pycache__/Dribble.cpython-311.pyc and b/behaviors/custom/Dribble/__pycache__/Dribble.cpython-311.pyc differ diff --git a/behaviors/custom/Dribble/__pycache__/Env.cpython-311.pyc b/behaviors/custom/Dribble/__pycache__/Env.cpython-311.pyc index ad59742..441312e 100644 Binary files a/behaviors/custom/Dribble/__pycache__/Env.cpython-311.pyc and b/behaviors/custom/Dribble/__pycache__/Env.cpython-311.pyc differ diff --git a/behaviors/custom/Push_RL/Env_HL.py b/behaviors/custom/Push_RL/Env_HL.py new file mode 100644 index 0000000..d59d711 --- /dev/null +++ b/behaviors/custom/Push_RL/Env_HL.py @@ -0,0 +1,79 @@ +import math +from typing import List +import numpy as np +from math_ops.Math_Ops import Math_Ops as U +from behaviors.custom.Step.Step_Generator import Step_Generator +from world.commons import Other_Robot +from world.World import World +from agent.Base_Agent import Base_Agent + + +class Env_HL: + COLS = 16 + LINS = 5 + + def __init__(self, base_agent: Base_Agent): + self.world = base_agent.world + self.obs = np.zeros(163, np.float32) + + self.output = 0 + + def fill_radar(self, radar, team=None, radar_part=None, RADIAL_START=None, RADIAL_MULT=None): + if RADIAL_MULT is None: + RADIAL_MULT = {'team': List[Other_Robot]} + w = self.world + bp = w.ball_abs_pos[:2] + C = self.COLS + L = self.LINS + vec_b_goal = (15.5, 0) - bp + vec_b_goal_absdir = U.vector_angle(vec_b_goal) + dist_closest_player = 10 + for t in team: + if w.time_local_ms - t.state_last_update > 500: + continue + vec_b_opp = t.state_abs_pos[:2] - bp + dist_b_opp = np.linalg.norm(vec_b_opp) + if dist_b_opp < dist_closest_player: + dist_closest_player = dist_b_opp + vec_b_opp_dir = U.normalize_deg(U.vector_angle(vec_b_opp) - vec_b_goal_absdir) + (div, mod) = divmod(vec_b_opp_dir + 180, 360 / C) + zone = int(div) % C + prog = mod * C / 360 + ang_column_weight_1 = (zone, 1 - prog) + ang_column_weight_2 = ((zone + 1) % C, prog) + zone = max(1, 1 + math.log((dist_b_opp + 1e-06) / RADIAL_START, RADIAL_MULT)) + prog = zone % 1 + zone = math.ceil(zone) - 1 + if zone >= L: + continue + rad_line_weight_1 = None if zone == 0 else (zone - 1, 1 - prog) + rad_line_weight_2 = (zone, 1 if zone == 0 else prog) + if rad_line_weight_1 is not None: + radar[(radar_part, rad_line_weight_1[0], ang_column_weight_1[0])] += rad_line_weight_1[1] * \ + ang_column_weight_1[1] + radar[(radar_part, rad_line_weight_1[0], ang_column_weight_2[0])] += rad_line_weight_1[1] * \ + ang_column_weight_2[1] + radar[(radar_part, rad_line_weight_2[0], ang_column_weight_1[0])] += rad_line_weight_2[1] * \ + ang_column_weight_1[1] + radar[(radar_part, rad_line_weight_2[0], ang_column_weight_2[0])] += rad_line_weight_2[1] * \ + ang_column_weight_2[1] + return dist_closest_player + + def observe(self, init=False): + if init: + self.output = 0 + radar = np.zeros((2, self.LINS, self.COLS)) + RADIAL_START = 0.3 + RADIAL_MULT = 1.7 + dist_closest_tm = self.fill_radar(radar, self.world.teammates, 0, RADIAL_START, RADIAL_MULT) + dist_closest_opp = self.fill_radar(radar, self.world.opponents, 1, RADIAL_START, RADIAL_MULT) + self.obs = np.append(radar.flatten(), (dist_closest_tm * 0.5, dist_closest_opp * 0.5, self.output / 40)) + return self.obs + + def execute(self, action): + vec_b_goal = (15.5, 0) - self.world.ball_abs_pos[:2] + vec_b_goal_absdir = U.vector_angle(vec_b_goal) + rel_direction = action[0] * 60 + self.output += np.clip(U.normalize_deg(rel_direction - self.output), -45, 45) + abs_direction = U.normalize_deg(vec_b_goal_absdir + self.output) + return abs_direction diff --git a/behaviors/custom/Push_RL/Env_LL.py b/behaviors/custom/Push_RL/Env_LL.py new file mode 100644 index 0000000..eda2550 --- /dev/null +++ b/behaviors/custom/Push_RL/Env_LL.py @@ -0,0 +1,140 @@ +import math +import numpy as np +from math_ops.Math_Ops import Math_Ops as U +from behaviors.custom.Step.Step_Generator import Step_Generator +from agent.Base_Agent import Base_Agent + + +class Env_LL: + + def __init__(self, base_agent: Base_Agent, step_width=None): + self.world = base_agent.world + self.obs = np.zeros(78, np.float32) + self.STEP_DUR = 8 + self.STEP_Z_SPAN = 0.02 + self.STEP_Z_MAX = 0.7 + + r = self.world.robot + nao_specs = base_agent.inv_kinematics.NAO_SPECS + self.leg_length = nao_specs[1] + nao_specs[3] + feet_y_dev = nao_specs[0] * step_width + sample_time = r.STEPTIME + max_ankle_z = nao_specs[5] + + self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z) + self.inv_kinematics = base_agent.inv_kinematics + self.DEFAULT_ARMS = np.array([ + -90, -90, 8, 8, 90, 90, 70, 70], np.float32) + self.HL_abs_direction = None + self.push_speed = 1 + self.step_counter = 0 + self.act = np.zeros(16, np.float32) + self.values_l = None + self.values_r = None + + def observe(self, init=False): + w = self.world + r = self.world.robot + + if init: + self.step_counter = 0 + self.act = np.zeros(16, np.float32) + + # 填充观测向量 + self.obs[0] = min(self.step_counter, 96) / 100 + self.obs[1] = r.loc_head_z * 3 + self.obs[2] = r.loc_head_z_vel / 2 + self.obs[3] = r.imu_torso_roll / 15 + self.obs[4] = r.imu_torso_pitch / 15 + self.obs[5:8] = r.gyro / 100 + self.obs[8:11] = r.acc / 10 + self.obs[11:17] = r.frp.get('lf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01) + self.obs[17:23] = r.frp.get('rf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01) + self.obs[23:43] = r.joints_position[2:22] / 100 + self.obs[43:63] = r.joints_speed[2:22] / 6.1395 + + if init: + self.obs[63] = 1 + self.obs[64] = 1 + self.obs[65] = 0 + self.obs[66] = 0 + else: + self.obs[63] = self.step_generator.external_progress + self.obs[64] = float(self.step_generator.state_is_left_active) + self.obs[65] = float(not self.step_generator.state_is_left_active) + self.obs[66] = math.sin((self.step_generator.state_current_ts / self.step_generator.ts_per_step) * math.pi) + + ball_rel_hip_center = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos) + ball_dist_hip_center = np.linalg.norm(ball_rel_hip_center) + + if init: + self.obs[67:70] = (0, 0, 0) + elif w.ball_is_visible: + self.obs[67:70] = (ball_rel_hip_center - self.obs[70:73]) * 10 + + self.obs[70:73] = ball_rel_hip_center + self.obs[73] = ball_dist_hip_center * 2 + + rel_HL_target = U.normalize_deg(self.HL_abs_direction - r.imu_torso_orientation) + self.obs[74] = U.deg_cos(rel_HL_target) + self.obs[75] = U.deg_sin(rel_HL_target) + self.obs[76] = 2 + self.obs[77] = 2 + + # 找到最近的对手 + opps_dist = [o.state_horizontal_dist for o in w.opponents] + if opps_dist: + closest_opp_idx = np.argmin(opps_dist) + o = w.opponents[closest_opp_idx] + + if opps_dist[closest_opp_idx] < 1: + body_parts_rel_torso_2d_avg = np.zeros(2) + weight_sum = 0 + + for pos in o.body_parts_cart_rel_pos.values(): + bp_rel_torso_2d = r.head_to_body_part_transform('torso', pos)[:2] + weight = math.pow(1e+06, -np.linalg.norm(bp_rel_torso_2d)) + body_parts_rel_torso_2d_avg += weight * bp_rel_torso_2d + weight_sum += weight + + if weight_sum > 0: + body_parts_rel_torso_2d_avg /= weight_sum + self.obs[76] = body_parts_rel_torso_2d_avg[0] + self.obs[77] = body_parts_rel_torso_2d_avg[1] + + return self.obs + + def execute_ik(self, l_pos, l_rot, r_pos, r_rot): + r = self.world.robot + (indices, self.values_l, error_codes) = self.inv_kinematics.leg( + l_pos, l_rot, True, dynamic_pose=False) + r.set_joints_target_position_direct(indices, self.values_l, harmonize=False) + + (indices, self.values_r, error_codes) = self.inv_kinematics.leg( + r_pos, r_rot, False, dynamic_pose=False) + r.set_joints_target_position_direct(indices, self.values_r, harmonize=False) + + def execute(self, action): + r = self.world.robot + self.act = 0.8 * self.act + 0.2 * action * 0.9 * self.push_speed + + (lfy, lfz, rfy, rfz) = self.step_generator.get_target_positions( + self.step_counter == 0, self.STEP_DUR, self.STEP_Z_SPAN, + self.leg_length * self.STEP_Z_MAX) + + a = self.act + l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz) + r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz) + + l_foot_rot = a[6:9] * (2, 2, 3) + r_foot_rot = a[9:12] * (2, 2, 3) + l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3) + r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3) + + arms = np.copy(self.DEFAULT_ARMS) + arms[0:4] += a[12:16] * 4 + + self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) + r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) + + self.step_counter += 1 diff --git a/behaviors/custom/Push_RL/Push_RL.py b/behaviors/custom/Push_RL/Push_RL.py new file mode 100644 index 0000000..77c7b56 --- /dev/null +++ b/behaviors/custom/Push_RL/Push_RL.py @@ -0,0 +1,164 @@ +import pickle +from behaviors.custom.Push_RL.Env_LL import Env_LL +from behaviors.custom.Push_RL.Env_HL import Env_HL +from math_ops.Neural_Network import run_mlp +from math_ops.Math_Ops import Math_Ops as U +import numpy as np +from agent.Base_Agent import Base_Agent + + +class Push_RL: + + def __init__(self, base_agent: Base_Agent): + self.world = base_agent.world + self.description = 'RL push' + self.auto_head = True + self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2) + self.env_HL = Env_HL(base_agent) + self.phase = 0 + self.counter = 0 + self.behavior = base_agent.behavior + self.path_manager = base_agent.path_manager + self.inv_kinematics = base_agent.inv_kinematics + + # 模型加载(这部分可能在反编译中丢失) + with open(U.get_active_directory([ + "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", + "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", + "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", + "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl", + "/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl" + ][self.world.robot.type]), 'rb') as f: + self.model_LL = pickle.load(f) + with open(U.get_active_directory([ + "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", + "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", + "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", + "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl", + "/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl" + ][self.world.robot.type]), 'rb') as f: + self.model_HL = pickle.load(f) + def execute(self, reset, stop=False): + ''' Just push the ball autonomously, no target is required ''' + w = self.world + r = self.world.robot + bp = w.ball_abs_pos[:2] + me = r.loc_head_position[:2] + step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator + reset_push = False + + if reset: + self.phase = 0 + b_rel = w.ball_rel_torso_cart_pos + if self.behavior.previous_behavior == 'Dribble': + if b_rel[0] < 0.25: + pass + else: + self.phase = 0 + elif abs(b_rel[1]) < 0.07: + self.phase = 1 + reset_push = True + + if self.phase == 0: + goal_target = (15.1, np.clip(bp[1], -0.7, 0.7)) + goal_ori = U.vector_angle(goal_target - bp) + vec_me_ball_ori = U.vector_angle(bp - me) + rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori) + abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60) + + if bp[1] > 9: + abs_targ_angle = np.clip(abs_targ_angle, -160, -20) + elif bp[1] < -9: + abs_targ_angle = np.clip(abs_targ_angle, 20, 160) + + if bp[0] > 14: + if bp[1] > 1.1: + abs_targ_angle = np.clip(abs_targ_angle, -140, -100) + elif bp[1] < -1.1: + abs_targ_angle = np.clip(abs_targ_angle, 100, 140) + else: + abs_targ_angle = goal_ori + + ball_dist = np.linalg.norm(bp - me) + ori = None if ball_dist > 0.8 else abs_targ_angle + (next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball( + x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori) + + b_rel = w.ball_rel_torso_cart_pos + ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation)) + + # 检查是否可以进入阶段1 + if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \ + w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10: + reset_push = True + self.phase += 1 + self.counter = 0 + else: + dist = max(0.13, dist_to_final_target) + reset_walk = reset or self.behavior.previous_behavior != 'Walk' + self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True, + next_ori if dist_to_final_target < 1 else None, True, dist) + + if stop: + self.phase = 0 # Reset phase on forced stop + return True + + if self.phase == 1: + # 检查是否要离开场地 + leaving_field = False + if (bp[1] > 9 and r.imu_torso_orientation > 0) or \ + (bp[1] < -9 and r.imu_torso_orientation < 0) or \ + (bp[0] > 14 and abs(bp[1]) > 1.1): + leaving_field = abs(r.imu_torso_orientation) < 90 + + ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2] + dist_ball_our_goal = np.linalg.norm(bp - (-15, 0)) + dist_us_our_goal = np.linalg.norm(me - (-15, 0)) + + # 检查是否丢球 + lost = abs(ball_hip[1]) > 0.2 + ball_unseen = w.time_local_ms - w.ball_last_seen >= 400 + ball_far = np.linalg.norm(ball_hip) > 0.3 + + terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \ + (not ball_unseen and ball_far and lost) + + if stop or terminal: + self.phase += 1 + elif self.counter % 25 == 0: + obs = self.env_HL.observe(reset_push) + action = run_mlp(obs, self.model_HL) + self.env_LL.HL_abs_direction = self.env_HL.execute(action) + + self.counter += 1 + self.env_LL.push_speed = 1 + obs = self.env_LL.observe(reset_push) + action = run_mlp(obs, self.model_LL) + self.env_LL.execute(action) + + # 绘制调试信息 + d = w.draw + if d.enabled: + vec = U.vector_from_angle(self.env_LL.HL_abs_direction) + d.line(me, me + vec, 4, d.Color.red, 'opp_vec') + + return False + + if self.phase > 1: + WIND_DOWN_STEPS = 50 + self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS + self.env_LL.HL_abs_direction = r.imu_torso_orientation + obs = self.env_LL.observe(reset_push) + action = run_mlp(obs, self.model_LL) + self.env_LL.execute(action) + self.phase += 1 + + if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15: + self.phase = 0 + return True + + return False + + def is_ready(self): + ''' Returns True if Push Behavior is ready to start under current game/robot conditions ''' + return True \ No newline at end of file diff --git a/behaviors/custom/Push_RL/__pycache__/Env_HL.cpython-311.pyc b/behaviors/custom/Push_RL/__pycache__/Env_HL.cpython-311.pyc new file mode 100644 index 0000000..b09924d Binary files /dev/null and b/behaviors/custom/Push_RL/__pycache__/Env_HL.cpython-311.pyc differ diff --git a/behaviors/custom/Push_RL/__pycache__/Env_LL.cpython-311.pyc b/behaviors/custom/Push_RL/__pycache__/Env_LL.cpython-311.pyc new file mode 100644 index 0000000..9322b4a Binary files /dev/null and b/behaviors/custom/Push_RL/__pycache__/Env_LL.cpython-311.pyc differ diff --git a/behaviors/custom/Push_RL/__pycache__/Push_RL.cpython-311.pyc b/behaviors/custom/Push_RL/__pycache__/Push_RL.cpython-311.pyc new file mode 100644 index 0000000..363e946 Binary files /dev/null and b/behaviors/custom/Push_RL/__pycache__/Push_RL.cpython-311.pyc differ diff --git a/behaviors/custom/Push_RL/push_HL_R1_X3_901120_steps.pkl b/behaviors/custom/Push_RL/push_HL_R1_X3_901120_steps.pkl new file mode 100644 index 0000000..1982335 Binary files /dev/null and b/behaviors/custom/Push_RL/push_HL_R1_X3_901120_steps.pkl differ diff --git a/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl b/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl new file mode 100644 index 0000000..117516d Binary files /dev/null and b/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl differ diff --git a/behaviors/custom/Push_RL/push_LL_R1_X3_22937600_steps.pkl b/behaviors/custom/Push_RL/push_LL_R1_X3_22937600_steps.pkl new file mode 100644 index 0000000..d07f039 Binary files /dev/null and b/behaviors/custom/Push_RL/push_LL_R1_X3_22937600_steps.pkl differ diff --git a/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl b/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl new file mode 100644 index 0000000..c664e80 Binary files /dev/null and b/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl differ diff --git a/world/commons/Path_Manager.py b/world/commons/Path_Manager.py index 7cc3443..a1ea590 100644 --- a/world/commons/Path_Manager.py +++ b/world/commons/Path_Manager.py @@ -94,7 +94,9 @@ class Path_Manager(): def get_hard_radius(t): if t.unum in priority_unums: - return 1.0 # extra distance for priority roles + if len(priority_unums) > 2: + return 1.5 + return None else: return t.state_ground_area[1]+0.2 @@ -113,8 +115,8 @@ class Path_Manager(): soft_radius = 0.6 hard_radius = lambda o : 0.2 elif mode == Path_Manager.MODE_DRIBBLE: - soft_radius = 2.3 - hard_radius = lambda o : o.state_ground_area[1]+0.9 + soft_radius = 1.6 + hard_radius = lambda o : o.state_ground_area[1]+0.3 else: soft_radius = 1.0 hard_radius = lambda o : o.state_ground_area[1]+0.2 diff --git a/world/commons/__pycache__/Path_Manager.cpython-311.pyc b/world/commons/__pycache__/Path_Manager.cpython-311.pyc index d5462d7..ffb705e 100644 Binary files a/world/commons/__pycache__/Path_Manager.cpython-311.pyc and b/world/commons/__pycache__/Path_Manager.cpython-311.pyc differ