Compare commits
No commits in common. "test_dribble" and "main" have entirely different histories.
test_dribb
...
main
2
.idea/FCPCodebase.iml
generated
2
.idea/FCPCodebase.iml
generated
@ -2,7 +2,7 @@
|
|||||||
<module type="PYTHON_MODULE" version="4">
|
<module type="PYTHON_MODULE" version="4">
|
||||||
<component name="NewModuleRootManager">
|
<component name="NewModuleRootManager">
|
||||||
<content url="file://$MODULE_DIR$" />
|
<content url="file://$MODULE_DIR$" />
|
||||||
<orderEntry type="jdk" jdkName="FCPCodebase" jdkType="Python SDK" />
|
<orderEntry type="jdk" jdkName="FCP" jdkType="Python SDK" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PyDocumentationSettings">
|
<component name="PyDocumentationSettings">
|
||||||
|
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
@ -3,5 +3,5 @@
|
|||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="fcp_env" />
|
<option name="sdkName" value="fcp_env" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="FCPCodebase" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="FCP" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
@ -32,8 +32,6 @@ class Agent(Base_Agent):
|
|||||||
distance_to_final_target = np.linalg.norm(target_2d - self.loc_head_position[:2])
|
distance_to_final_target = np.linalg.norm(target_2d - self.loc_head_position[:2])
|
||||||
return self.behavior.execute("Dribble", next_rel_ori,
|
return self.behavior.execute("Dribble", next_rel_ori,
|
||||||
False) # Args: target, is_target_abs, ori, is_ori_abs, distance
|
False) # Args: target, is_target_abs, ori, is_ori_abs, distance
|
||||||
def push(self, target_2d=(15, 0), avoid_obstacles=True):
|
|
||||||
self.behavior.execute("Push_RL")
|
|
||||||
|
|
||||||
def beam(self, avoid_center_circle=False):
|
def beam(self, avoid_center_circle=False):
|
||||||
r = self.world.robot
|
r = self.world.robot
|
||||||
|
Binary file not shown.
@ -49,9 +49,7 @@ class Behavior():
|
|||||||
from behaviors.custom.Get_Up.Get_Up import Get_Up
|
from behaviors.custom.Get_Up.Get_Up import Get_Up
|
||||||
from behaviors.custom.Step.Step import Step
|
from behaviors.custom.Step.Step import Step
|
||||||
from behaviors.custom.Walk.Walk import Walk
|
from behaviors.custom.Walk.Walk import Walk
|
||||||
from behaviors.custom.Push_RL.Push_RL import Push_RL
|
classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk]
|
||||||
|
|
||||||
classes = [Basic_Kick,Dribble,Fall,Get_Up,Step,Walk,Push_RL]
|
|
||||||
|
|
||||||
'''---- End of manual declarations ----'''
|
'''---- End of manual declarations ----'''
|
||||||
|
|
||||||
|
Binary file not shown.
@ -7,6 +7,7 @@ import pickle
|
|||||||
|
|
||||||
|
|
||||||
class Dribble():
|
class Dribble():
|
||||||
|
|
||||||
def __init__(self, base_agent : Base_Agent) -> None:
|
def __init__(self, base_agent : Base_Agent) -> None:
|
||||||
self.behavior = base_agent.behavior
|
self.behavior = base_agent.behavior
|
||||||
self.path_manager = base_agent.path_manager
|
self.path_manager = base_agent.path_manager
|
||||||
|
@ -178,4 +178,4 @@ class Env():
|
|||||||
self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs
|
self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs
|
||||||
r.set_joints_target_position_direct( slice(14,22), arms, harmonize=False ) # arms
|
r.set_joints_target_position_direct( slice(14,22), arms, harmonize=False ) # arms
|
||||||
|
|
||||||
self.step_counter += 1
|
self.step_counter += 1
|
Binary file not shown.
Binary file not shown.
@ -1,79 +0,0 @@
|
|||||||
import math
|
|
||||||
from typing import List
|
|
||||||
import numpy as np
|
|
||||||
from math_ops.Math_Ops import Math_Ops as U
|
|
||||||
from behaviors.custom.Step.Step_Generator import Step_Generator
|
|
||||||
from world.commons import Other_Robot
|
|
||||||
from world.World import World
|
|
||||||
from agent.Base_Agent import Base_Agent
|
|
||||||
|
|
||||||
|
|
||||||
class Env_HL:
|
|
||||||
COLS = 16
|
|
||||||
LINS = 5
|
|
||||||
|
|
||||||
def __init__(self, base_agent: Base_Agent):
|
|
||||||
self.world = base_agent.world
|
|
||||||
self.obs = np.zeros(163, np.float32)
|
|
||||||
|
|
||||||
self.output = 0
|
|
||||||
|
|
||||||
def fill_radar(self, radar, team=None, radar_part=None, RADIAL_START=None, RADIAL_MULT=None):
|
|
||||||
if RADIAL_MULT is None:
|
|
||||||
RADIAL_MULT = {'team': List[Other_Robot]}
|
|
||||||
w = self.world
|
|
||||||
bp = w.ball_abs_pos[:2]
|
|
||||||
C = self.COLS
|
|
||||||
L = self.LINS
|
|
||||||
vec_b_goal = (15.5, 0) - bp
|
|
||||||
vec_b_goal_absdir = U.vector_angle(vec_b_goal)
|
|
||||||
dist_closest_player = 10
|
|
||||||
for t in team:
|
|
||||||
if w.time_local_ms - t.state_last_update > 500:
|
|
||||||
continue
|
|
||||||
vec_b_opp = t.state_abs_pos[:2] - bp
|
|
||||||
dist_b_opp = np.linalg.norm(vec_b_opp)
|
|
||||||
if dist_b_opp < dist_closest_player:
|
|
||||||
dist_closest_player = dist_b_opp
|
|
||||||
vec_b_opp_dir = U.normalize_deg(U.vector_angle(vec_b_opp) - vec_b_goal_absdir)
|
|
||||||
(div, mod) = divmod(vec_b_opp_dir + 180, 360 / C)
|
|
||||||
zone = int(div) % C
|
|
||||||
prog = mod * C / 360
|
|
||||||
ang_column_weight_1 = (zone, 1 - prog)
|
|
||||||
ang_column_weight_2 = ((zone + 1) % C, prog)
|
|
||||||
zone = max(1, 1 + math.log((dist_b_opp + 1e-06) / RADIAL_START, RADIAL_MULT))
|
|
||||||
prog = zone % 1
|
|
||||||
zone = math.ceil(zone) - 1
|
|
||||||
if zone >= L:
|
|
||||||
continue
|
|
||||||
rad_line_weight_1 = None if zone == 0 else (zone - 1, 1 - prog)
|
|
||||||
rad_line_weight_2 = (zone, 1 if zone == 0 else prog)
|
|
||||||
if rad_line_weight_1 is not None:
|
|
||||||
radar[(radar_part, rad_line_weight_1[0], ang_column_weight_1[0])] += rad_line_weight_1[1] * \
|
|
||||||
ang_column_weight_1[1]
|
|
||||||
radar[(radar_part, rad_line_weight_1[0], ang_column_weight_2[0])] += rad_line_weight_1[1] * \
|
|
||||||
ang_column_weight_2[1]
|
|
||||||
radar[(radar_part, rad_line_weight_2[0], ang_column_weight_1[0])] += rad_line_weight_2[1] * \
|
|
||||||
ang_column_weight_1[1]
|
|
||||||
radar[(radar_part, rad_line_weight_2[0], ang_column_weight_2[0])] += rad_line_weight_2[1] * \
|
|
||||||
ang_column_weight_2[1]
|
|
||||||
return dist_closest_player
|
|
||||||
|
|
||||||
def observe(self, init=False):
|
|
||||||
if init:
|
|
||||||
self.output = 0
|
|
||||||
radar = np.zeros((2, self.LINS, self.COLS))
|
|
||||||
RADIAL_START = 0.3
|
|
||||||
RADIAL_MULT = 1.7
|
|
||||||
dist_closest_tm = self.fill_radar(radar, self.world.teammates, 0, RADIAL_START, RADIAL_MULT)
|
|
||||||
dist_closest_opp = self.fill_radar(radar, self.world.opponents, 1, RADIAL_START, RADIAL_MULT)
|
|
||||||
self.obs = np.append(radar.flatten(), (dist_closest_tm * 0.5, dist_closest_opp * 0.5, self.output / 40))
|
|
||||||
return self.obs
|
|
||||||
|
|
||||||
def execute(self, action):
|
|
||||||
vec_b_goal = (15.5, 0) - self.world.ball_abs_pos[:2]
|
|
||||||
vec_b_goal_absdir = U.vector_angle(vec_b_goal)
|
|
||||||
rel_direction = action[0] * 60
|
|
||||||
self.output += np.clip(U.normalize_deg(rel_direction - self.output), -45, 45)
|
|
||||||
abs_direction = U.normalize_deg(vec_b_goal_absdir + self.output)
|
|
||||||
return abs_direction
|
|
@ -1,140 +0,0 @@
|
|||||||
import math
|
|
||||||
import numpy as np
|
|
||||||
from math_ops.Math_Ops import Math_Ops as U
|
|
||||||
from behaviors.custom.Step.Step_Generator import Step_Generator
|
|
||||||
from agent.Base_Agent import Base_Agent
|
|
||||||
|
|
||||||
|
|
||||||
class Env_LL:
|
|
||||||
|
|
||||||
def __init__(self, base_agent: Base_Agent, step_width=None):
|
|
||||||
self.world = base_agent.world
|
|
||||||
self.obs = np.zeros(78, np.float32)
|
|
||||||
self.STEP_DUR = 8
|
|
||||||
self.STEP_Z_SPAN = 0.02
|
|
||||||
self.STEP_Z_MAX = 0.7
|
|
||||||
|
|
||||||
r = self.world.robot
|
|
||||||
nao_specs = base_agent.inv_kinematics.NAO_SPECS
|
|
||||||
self.leg_length = nao_specs[1] + nao_specs[3]
|
|
||||||
feet_y_dev = nao_specs[0] * step_width
|
|
||||||
sample_time = r.STEPTIME
|
|
||||||
max_ankle_z = nao_specs[5]
|
|
||||||
|
|
||||||
self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
|
|
||||||
self.inv_kinematics = base_agent.inv_kinematics
|
|
||||||
self.DEFAULT_ARMS = np.array([
|
|
||||||
-90, -90, 8, 8, 90, 90, 70, 70], np.float32)
|
|
||||||
self.HL_abs_direction = None
|
|
||||||
self.push_speed = 1
|
|
||||||
self.step_counter = 0
|
|
||||||
self.act = np.zeros(16, np.float32)
|
|
||||||
self.values_l = None
|
|
||||||
self.values_r = None
|
|
||||||
|
|
||||||
def observe(self, init=False):
|
|
||||||
w = self.world
|
|
||||||
r = self.world.robot
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.step_counter = 0
|
|
||||||
self.act = np.zeros(16, np.float32)
|
|
||||||
|
|
||||||
# 填充观测向量
|
|
||||||
self.obs[0] = min(self.step_counter, 96) / 100
|
|
||||||
self.obs[1] = r.loc_head_z * 3
|
|
||||||
self.obs[2] = r.loc_head_z_vel / 2
|
|
||||||
self.obs[3] = r.imu_torso_roll / 15
|
|
||||||
self.obs[4] = r.imu_torso_pitch / 15
|
|
||||||
self.obs[5:8] = r.gyro / 100
|
|
||||||
self.obs[8:11] = r.acc / 10
|
|
||||||
self.obs[11:17] = r.frp.get('lf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01)
|
|
||||||
self.obs[17:23] = r.frp.get('rf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01, 0.01)
|
|
||||||
self.obs[23:43] = r.joints_position[2:22] / 100
|
|
||||||
self.obs[43:63] = r.joints_speed[2:22] / 6.1395
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.obs[63] = 1
|
|
||||||
self.obs[64] = 1
|
|
||||||
self.obs[65] = 0
|
|
||||||
self.obs[66] = 0
|
|
||||||
else:
|
|
||||||
self.obs[63] = self.step_generator.external_progress
|
|
||||||
self.obs[64] = float(self.step_generator.state_is_left_active)
|
|
||||||
self.obs[65] = float(not self.step_generator.state_is_left_active)
|
|
||||||
self.obs[66] = math.sin((self.step_generator.state_current_ts / self.step_generator.ts_per_step) * math.pi)
|
|
||||||
|
|
||||||
ball_rel_hip_center = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)
|
|
||||||
ball_dist_hip_center = np.linalg.norm(ball_rel_hip_center)
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.obs[67:70] = (0, 0, 0)
|
|
||||||
elif w.ball_is_visible:
|
|
||||||
self.obs[67:70] = (ball_rel_hip_center - self.obs[70:73]) * 10
|
|
||||||
|
|
||||||
self.obs[70:73] = ball_rel_hip_center
|
|
||||||
self.obs[73] = ball_dist_hip_center * 2
|
|
||||||
|
|
||||||
rel_HL_target = U.normalize_deg(self.HL_abs_direction - r.imu_torso_orientation)
|
|
||||||
self.obs[74] = U.deg_cos(rel_HL_target)
|
|
||||||
self.obs[75] = U.deg_sin(rel_HL_target)
|
|
||||||
self.obs[76] = 2
|
|
||||||
self.obs[77] = 2
|
|
||||||
|
|
||||||
# 找到最近的对手
|
|
||||||
opps_dist = [o.state_horizontal_dist for o in w.opponents]
|
|
||||||
if opps_dist:
|
|
||||||
closest_opp_idx = np.argmin(opps_dist)
|
|
||||||
o = w.opponents[closest_opp_idx]
|
|
||||||
|
|
||||||
if opps_dist[closest_opp_idx] < 1:
|
|
||||||
body_parts_rel_torso_2d_avg = np.zeros(2)
|
|
||||||
weight_sum = 0
|
|
||||||
|
|
||||||
for pos in o.body_parts_cart_rel_pos.values():
|
|
||||||
bp_rel_torso_2d = r.head_to_body_part_transform('torso', pos)[:2]
|
|
||||||
weight = math.pow(1e+06, -np.linalg.norm(bp_rel_torso_2d))
|
|
||||||
body_parts_rel_torso_2d_avg += weight * bp_rel_torso_2d
|
|
||||||
weight_sum += weight
|
|
||||||
|
|
||||||
if weight_sum > 0:
|
|
||||||
body_parts_rel_torso_2d_avg /= weight_sum
|
|
||||||
self.obs[76] = body_parts_rel_torso_2d_avg[0]
|
|
||||||
self.obs[77] = body_parts_rel_torso_2d_avg[1]
|
|
||||||
|
|
||||||
return self.obs
|
|
||||||
|
|
||||||
def execute_ik(self, l_pos, l_rot, r_pos, r_rot):
|
|
||||||
r = self.world.robot
|
|
||||||
(indices, self.values_l, error_codes) = self.inv_kinematics.leg(
|
|
||||||
l_pos, l_rot, True, dynamic_pose=False)
|
|
||||||
r.set_joints_target_position_direct(indices, self.values_l, harmonize=False)
|
|
||||||
|
|
||||||
(indices, self.values_r, error_codes) = self.inv_kinematics.leg(
|
|
||||||
r_pos, r_rot, False, dynamic_pose=False)
|
|
||||||
r.set_joints_target_position_direct(indices, self.values_r, harmonize=False)
|
|
||||||
|
|
||||||
def execute(self, action):
|
|
||||||
r = self.world.robot
|
|
||||||
self.act = 0.8 * self.act + 0.2 * action * 0.9 * self.push_speed
|
|
||||||
|
|
||||||
(lfy, lfz, rfy, rfz) = self.step_generator.get_target_positions(
|
|
||||||
self.step_counter == 0, self.STEP_DUR, self.STEP_Z_SPAN,
|
|
||||||
self.leg_length * self.STEP_Z_MAX)
|
|
||||||
|
|
||||||
a = self.act
|
|
||||||
l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
|
|
||||||
r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
|
|
||||||
|
|
||||||
l_foot_rot = a[6:9] * (2, 2, 3)
|
|
||||||
r_foot_rot = a[9:12] * (2, 2, 3)
|
|
||||||
l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
|
|
||||||
r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
|
|
||||||
|
|
||||||
arms = np.copy(self.DEFAULT_ARMS)
|
|
||||||
arms[0:4] += a[12:16] * 4
|
|
||||||
|
|
||||||
self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot)
|
|
||||||
r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False)
|
|
||||||
|
|
||||||
self.step_counter += 1
|
|
@ -1,164 +0,0 @@
|
|||||||
import pickle
|
|
||||||
from behaviors.custom.Push_RL.Env_LL import Env_LL
|
|
||||||
from behaviors.custom.Push_RL.Env_HL import Env_HL
|
|
||||||
from math_ops.Neural_Network import run_mlp
|
|
||||||
from math_ops.Math_Ops import Math_Ops as U
|
|
||||||
import numpy as np
|
|
||||||
from agent.Base_Agent import Base_Agent
|
|
||||||
|
|
||||||
|
|
||||||
class Push_RL:
|
|
||||||
|
|
||||||
def __init__(self, base_agent: Base_Agent):
|
|
||||||
self.world = base_agent.world
|
|
||||||
self.description = 'RL push'
|
|
||||||
self.auto_head = True
|
|
||||||
self.env_LL = Env_LL(base_agent, 0.9 if self.world.robot.type == 3 else 1.2)
|
|
||||||
self.env_HL = Env_HL(base_agent)
|
|
||||||
self.phase = 0
|
|
||||||
self.counter = 0
|
|
||||||
self.behavior = base_agent.behavior
|
|
||||||
self.path_manager = base_agent.path_manager
|
|
||||||
self.inv_kinematics = base_agent.inv_kinematics
|
|
||||||
|
|
||||||
# 模型加载(这部分可能在反编译中丢失)
|
|
||||||
with open(U.get_active_directory([
|
|
||||||
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_LL_R1_X9_49152000_steps.pkl"
|
|
||||||
][self.world.robot.type]), 'rb') as f:
|
|
||||||
self.model_LL = pickle.load(f)
|
|
||||||
with open(U.get_active_directory([
|
|
||||||
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl",
|
|
||||||
"/behaviors/custom/Push_RL/push_HL_R1_X9_1966080_steps.pkl"
|
|
||||||
][self.world.robot.type]), 'rb') as f:
|
|
||||||
self.model_HL = pickle.load(f)
|
|
||||||
def execute(self, reset, stop=False):
|
|
||||||
''' Just push the ball autonomously, no target is required '''
|
|
||||||
w = self.world
|
|
||||||
r = self.world.robot
|
|
||||||
bp = w.ball_abs_pos[:2]
|
|
||||||
me = r.loc_head_position[:2]
|
|
||||||
step_gen = self.behavior.get_custom_behavior_object('Walk').env.step_generator
|
|
||||||
reset_push = False
|
|
||||||
|
|
||||||
if reset:
|
|
||||||
self.phase = 0
|
|
||||||
b_rel = w.ball_rel_torso_cart_pos
|
|
||||||
if self.behavior.previous_behavior == 'Dribble':
|
|
||||||
if b_rel[0] < 0.25:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
self.phase = 0
|
|
||||||
elif abs(b_rel[1]) < 0.07:
|
|
||||||
self.phase = 1
|
|
||||||
reset_push = True
|
|
||||||
|
|
||||||
if self.phase == 0:
|
|
||||||
goal_target = (15.1, np.clip(bp[1], -0.7, 0.7))
|
|
||||||
goal_ori = U.vector_angle(goal_target - bp)
|
|
||||||
vec_me_ball_ori = U.vector_angle(bp - me)
|
|
||||||
rel_curr_angle = U.normalize_deg(vec_me_ball_ori - goal_ori)
|
|
||||||
abs_targ_angle = goal_ori + np.clip(rel_curr_angle, -60, 60)
|
|
||||||
|
|
||||||
if bp[1] > 9:
|
|
||||||
abs_targ_angle = np.clip(abs_targ_angle, -160, -20)
|
|
||||||
elif bp[1] < -9:
|
|
||||||
abs_targ_angle = np.clip(abs_targ_angle, 20, 160)
|
|
||||||
|
|
||||||
if bp[0] > 14:
|
|
||||||
if bp[1] > 1.1:
|
|
||||||
abs_targ_angle = np.clip(abs_targ_angle, -140, -100)
|
|
||||||
elif bp[1] < -1.1:
|
|
||||||
abs_targ_angle = np.clip(abs_targ_angle, 100, 140)
|
|
||||||
else:
|
|
||||||
abs_targ_angle = goal_ori
|
|
||||||
|
|
||||||
ball_dist = np.linalg.norm(bp - me)
|
|
||||||
ori = None if ball_dist > 0.8 else abs_targ_angle
|
|
||||||
(next_pos, next_ori, dist_to_final_target) = self.path_manager.get_path_to_ball(
|
|
||||||
x_ori=abs_targ_angle, x_dev=-0.19, torso_ori=ori)
|
|
||||||
|
|
||||||
b_rel = w.ball_rel_torso_cart_pos
|
|
||||||
ang_diff = abs(U.normalize_deg(abs_targ_angle - r.imu_torso_orientation))
|
|
||||||
|
|
||||||
# 检查是否可以进入阶段1
|
|
||||||
if b_rel[0] >= 0.25 and abs(b_rel[1]) < 0.05 and step_gen.state_is_left_active and step_gen.switch and \
|
|
||||||
w.time_local_ms - w.ball_abs_pos_last_update < 300 and ang_diff < 10:
|
|
||||||
reset_push = True
|
|
||||||
self.phase += 1
|
|
||||||
self.counter = 0
|
|
||||||
else:
|
|
||||||
dist = max(0.13, dist_to_final_target)
|
|
||||||
reset_walk = reset or self.behavior.previous_behavior != 'Walk'
|
|
||||||
self.behavior.execute_sub_behavior('Walk', reset_walk, next_pos, True,
|
|
||||||
next_ori if dist_to_final_target < 1 else None, True, dist)
|
|
||||||
|
|
||||||
if stop:
|
|
||||||
self.phase = 0 # Reset phase on forced stop
|
|
||||||
return True
|
|
||||||
|
|
||||||
if self.phase == 1:
|
|
||||||
# 检查是否要离开场地
|
|
||||||
leaving_field = False
|
|
||||||
if (bp[1] > 9 and r.imu_torso_orientation > 0) or \
|
|
||||||
(bp[1] < -9 and r.imu_torso_orientation < 0) or \
|
|
||||||
(bp[0] > 14 and abs(bp[1]) > 1.1):
|
|
||||||
leaving_field = abs(r.imu_torso_orientation) < 90
|
|
||||||
|
|
||||||
ball_hip = self.inv_kinematics.torso_to_hip_transform(w.ball_rel_torso_cart_pos)[:2]
|
|
||||||
dist_ball_our_goal = np.linalg.norm(bp - (-15, 0))
|
|
||||||
dist_us_our_goal = np.linalg.norm(me - (-15, 0))
|
|
||||||
|
|
||||||
# 检查是否丢球
|
|
||||||
lost = abs(ball_hip[1]) > 0.2
|
|
||||||
ball_unseen = w.time_local_ms - w.ball_last_seen >= 400
|
|
||||||
ball_far = np.linalg.norm(ball_hip) > 0.3
|
|
||||||
|
|
||||||
terminal = leaving_field or (dist_ball_our_goal + 0.2 < dist_us_our_goal) or \
|
|
||||||
(not ball_unseen and ball_far and lost)
|
|
||||||
|
|
||||||
if stop or terminal:
|
|
||||||
self.phase += 1
|
|
||||||
elif self.counter % 25 == 0:
|
|
||||||
obs = self.env_HL.observe(reset_push)
|
|
||||||
action = run_mlp(obs, self.model_HL)
|
|
||||||
self.env_LL.HL_abs_direction = self.env_HL.execute(action)
|
|
||||||
|
|
||||||
self.counter += 1
|
|
||||||
self.env_LL.push_speed = 1
|
|
||||||
obs = self.env_LL.observe(reset_push)
|
|
||||||
action = run_mlp(obs, self.model_LL)
|
|
||||||
self.env_LL.execute(action)
|
|
||||||
|
|
||||||
# 绘制调试信息
|
|
||||||
d = w.draw
|
|
||||||
if d.enabled:
|
|
||||||
vec = U.vector_from_angle(self.env_LL.HL_abs_direction)
|
|
||||||
d.line(me, me + vec, 4, d.Color.red, 'opp_vec')
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.phase > 1:
|
|
||||||
WIND_DOWN_STEPS = 50
|
|
||||||
self.env_LL.push_speed = 1 - self.phase / WIND_DOWN_STEPS
|
|
||||||
self.env_LL.HL_abs_direction = r.imu_torso_orientation
|
|
||||||
obs = self.env_LL.observe(reset_push)
|
|
||||||
action = run_mlp(obs, self.model_LL)
|
|
||||||
self.env_LL.execute(action)
|
|
||||||
self.phase += 1
|
|
||||||
|
|
||||||
if self.phase >= WIND_DOWN_STEPS - 5 or np.linalg.norm(r.get_head_abs_vel(4)) < 0.15:
|
|
||||||
self.phase = 0
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def is_ready(self):
|
|
||||||
''' Returns True if Push Behavior is ready to start under current game/robot conditions '''
|
|
||||||
return True
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,2 +0,0 @@
|
|||||||
{Fri 19:02:40 Step:2513} World_Parser.py: Received field line with NaNs [ 2.98 43.85 21.32 nan nan nan]
|
|
||||||
{Fri 19:03:14 Step:4213} World_Parser.py: Received field line with NaNs [ 3.38 35.5 20.11 nan nan nan]
|
|
@ -1,396 +0,0 @@
|
|||||||
import math
|
|
||||||
import random
|
|
||||||
|
|
||||||
from agent.Base_Agent import Base_Agent as Agent
|
|
||||||
from behaviors.custom.Step.Step import Step
|
|
||||||
from world.commons.Draw import Draw
|
|
||||||
from stable_baselines3 import PPO
|
|
||||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
|
||||||
from scripts.commons.Server import Server
|
|
||||||
from scripts.commons.Train_Base import Train_Base
|
|
||||||
from time import sleep
|
|
||||||
import os, gym
|
|
||||||
import numpy as np
|
|
||||||
from math_ops.Math_Ops import Math_Ops as U
|
|
||||||
from math_ops.Math_Ops import Math_Ops as M
|
|
||||||
from behaviors.custom.Step.Step_Generator import Step_Generator
|
|
||||||
|
|
||||||
'''
|
|
||||||
Objective:
|
|
||||||
Learn how to run forward using step primitive
|
|
||||||
----------
|
|
||||||
- class Basic_Run: implements an OpenAI custom gym
|
|
||||||
- class Train: implements algorithms to train a new model or test an existing model
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class dribble(gym.Env):
|
|
||||||
def __init__(self, ip, server_p, monitor_p, r_type, enable_draw) -> None:
|
|
||||||
self.abs_ori = 75
|
|
||||||
self.ball_dist_hip_center_2d = 0
|
|
||||||
self.ball_dist_hip_center = None
|
|
||||||
self.internal_rel_orientation = None
|
|
||||||
self.dribble_speed = 1
|
|
||||||
self.gym_last_internal_abs_ori = None
|
|
||||||
self.internal_target_vel = None
|
|
||||||
self.Gen_player_pos = None
|
|
||||||
self.internal_target = None
|
|
||||||
self.values_l = None
|
|
||||||
self.values_r = None
|
|
||||||
self.reset_time = None
|
|
||||||
self.behavior = None
|
|
||||||
self.robot_type = r_type
|
|
||||||
self.kick_ori = 0
|
|
||||||
self.terminal = False
|
|
||||||
# Args: Server IP, Agent Port, Monitor Port, Uniform No., Robot Type, Team Name, Enable Log, Enable Draw
|
|
||||||
self.player = Agent(ip, server_p, monitor_p, 1, self.robot_type, "Gym", True, True)
|
|
||||||
|
|
||||||
self.step_counter = 0 # to limit episode size
|
|
||||||
self.ik = self.player.inv_kinematics
|
|
||||||
self.dribble_rel_orientation = 0 # relative to imu_torso_orientation (in degrees)
|
|
||||||
# Step behavior defaults
|
|
||||||
self.STEP_DUR = 8
|
|
||||||
self.STEP_Z_SPAN = 0.02
|
|
||||||
self.STEP_Z_MAX = 0.70
|
|
||||||
nao_specs = self.ik.NAO_SPECS
|
|
||||||
self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height
|
|
||||||
feet_y_dev = nao_specs[0] * 1.12 # wider step
|
|
||||||
sample_time = self.player.world.robot.STEPTIME
|
|
||||||
max_ankle_z = nao_specs[5]
|
|
||||||
self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
|
|
||||||
self.DEFAULT_ARMS = np.array([-90, -90, 8, 8, 90, 90, 70, 70], np.float32)
|
|
||||||
|
|
||||||
self.act = np.zeros(16, np.float32) # memory variable
|
|
||||||
self.path_manager = self.player.path_manager
|
|
||||||
|
|
||||||
# State space
|
|
||||||
obs_size = 76
|
|
||||||
self.obs = np.zeros(obs_size, np.float32)
|
|
||||||
self.observation_space = gym.spaces.Box(low=np.full(obs_size, -np.inf, np.float32),
|
|
||||||
high=np.full(obs_size, np.inf, np.float32), dtype=np.float32)
|
|
||||||
|
|
||||||
self.ball_x_center = 0.21
|
|
||||||
self.ball_y_center = -0.045
|
|
||||||
# Action space
|
|
||||||
|
|
||||||
MAX = np.finfo(np.float32).max
|
|
||||||
self.no_of_actions = act_size = 16
|
|
||||||
self.action_space = gym.spaces.Box(low=np.full(act_size, -MAX, np.float32),
|
|
||||||
high=np.full(act_size, MAX, np.float32), dtype=np.float32)
|
|
||||||
|
|
||||||
# Place ball far away to keep landmarks in FoV (head follows ball while using Step behavior)
|
|
||||||
self.player.scom.unofficial_move_ball((14, 0, 0.042))
|
|
||||||
|
|
||||||
self.player.scom.unofficial_set_play_mode("PlayOn")
|
|
||||||
self.player.scom.unofficial_move_ball((0, 0, 0))
|
|
||||||
|
|
||||||
def observe(self, init=False, virtual_ball=False):
|
|
||||||
r = self.player.world.robot
|
|
||||||
w = self.player.world
|
|
||||||
|
|
||||||
if init: # reset variables
|
|
||||||
self.step_counter = 0
|
|
||||||
self.act = np.zeros(16, np.float32) # memory variable
|
|
||||||
|
|
||||||
# index observation naive normalization
|
|
||||||
self.obs[0] = min(self.step_counter, 12 * 8) / 100 # simple counter: 0,1,2,3...
|
|
||||||
self.obs[1] = r.loc_head_z * 3 # z coordinate (torso)
|
|
||||||
self.obs[2] = r.loc_head_z_vel / 2 # z velocity (torso)
|
|
||||||
self.obs[3] = r.imu_torso_roll / 15 # absolute torso roll in deg
|
|
||||||
self.obs[4] = r.imu_torso_pitch / 15 # absolute torso pitch in deg
|
|
||||||
self.obs[5:8] = r.gyro / 100 # gyroscope
|
|
||||||
self.obs[8:11] = r.acc / 10 # accelerometer
|
|
||||||
|
|
||||||
self.obs[11:17] = r.frp.get('lf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01,
|
|
||||||
0.01) # left foot: relative point of origin (p) and force vector (f) -> (px,py,pz,fx,fy,fz)*
|
|
||||||
self.obs[17:23] = r.frp.get('rf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01,
|
|
||||||
0.01) # right foot: relative point of origin (p) and force vector (f) -> (px,py,pz,fx,fy,fz)*
|
|
||||||
# *if foot is not touching the ground, then (px=0,py=0,pz=0,fx=0,fy=0,fz=0)
|
|
||||||
|
|
||||||
self.obs[23:43] = r.joints_position[2:22] / 100 # position of all joints except head & toes (for robot type 4)
|
|
||||||
self.obs[43:63] = r.joints_speed[2:22] / 6.1395 # speed of all joints except head & toes (for robot type 4)
|
|
||||||
|
|
||||||
'''
|
|
||||||
Expected observations for walking state:
|
|
||||||
Time step R 0 1 2 3 4 5 6 7 0
|
|
||||||
Progress 1 0 .14 .28 .43 .57 .71 .86 1 0
|
|
||||||
Left leg active T F F F F F F F F T
|
|
||||||
'''
|
|
||||||
|
|
||||||
if init: # the walking parameters refer to the last parameters in effect (after a reset, they are pointless)
|
|
||||||
self.obs[63] = 1 # step progress
|
|
||||||
self.obs[64] = 1 # 1 if left leg is active
|
|
||||||
self.obs[65] = 0 # 1 if right leg is active
|
|
||||||
self.obs[66] = 0
|
|
||||||
else:
|
|
||||||
self.obs[63] = self.step_generator.external_progress # step progress
|
|
||||||
self.obs[64] = float(self.step_generator.state_is_left_active) # 1 if left leg is active
|
|
||||||
self.obs[65] = float(not self.step_generator.state_is_left_active) # 1 if right leg is active
|
|
||||||
self.obs[66] = math.sin(self.step_generator.state_current_ts / self.step_generator.ts_per_step * math.pi)
|
|
||||||
#ball
|
|
||||||
ball_rel_hip_center = self.ik.torso_to_hip_transform(w.ball_rel_torso_cart_pos)
|
|
||||||
ball_dist_hip_center = np.linalg.norm(ball_rel_hip_center)
|
|
||||||
if init:
|
|
||||||
self.obs[67:70] = (0, 0, 0) # Initial velocity is 0
|
|
||||||
elif w.ball_is_visible:
|
|
||||||
self.obs[67:70] = (ball_rel_hip_center - self.obs[
|
|
||||||
70:73]) * 10 # Ball velocity, relative to ankle's midpoint
|
|
||||||
|
|
||||||
self.obs[70:73] = ball_rel_hip_center # Ball position, relative to hip
|
|
||||||
self.obs[73] = self.ball_dist_hip_center * 2
|
|
||||||
|
|
||||||
if virtual_ball: # simulate the ball between the robot's feet
|
|
||||||
self.obs[67:74] = (0, 0, 0, 0.05, 0, -0.175, 0.36)
|
|
||||||
|
|
||||||
'''
|
|
||||||
Create internal target with a smoother variation
|
|
||||||
'''
|
|
||||||
|
|
||||||
MAX_ROTATION_DIFF = 20 # max difference (degrees) per visual step
|
|
||||||
MAX_ROTATION_DIST = 80
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.internal_rel_orientation = 0
|
|
||||||
self.internal_target_vel = 0
|
|
||||||
self.gym_last_internal_abs_ori = r.imu_torso_orientation # for training purposes (reward)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------- compute internal target
|
|
||||||
|
|
||||||
if w.vision_is_up_to_date:
|
|
||||||
previous_internal_rel_orientation = np.copy(self.internal_rel_orientation)
|
|
||||||
|
|
||||||
internal_ori_diff = np.clip(M.normalize_deg(self.dribble_rel_orientation - self.internal_rel_orientation),
|
|
||||||
-MAX_ROTATION_DIFF, MAX_ROTATION_DIFF)
|
|
||||||
self.internal_rel_orientation = np.clip(M.normalize_deg(self.internal_rel_orientation + internal_ori_diff),
|
|
||||||
-MAX_ROTATION_DIST, MAX_ROTATION_DIST)
|
|
||||||
|
|
||||||
# Observations
|
|
||||||
self.internal_target_vel = self.internal_rel_orientation - previous_internal_rel_orientation
|
|
||||||
|
|
||||||
self.gym_last_internal_abs_ori = self.internal_rel_orientation + r.imu_torso_orientation
|
|
||||||
# ----------------------------------------------------------------- observations
|
|
||||||
|
|
||||||
self.obs[74] = self.internal_rel_orientation / MAX_ROTATION_DIST
|
|
||||||
self.obs[75] = self.internal_target_vel / MAX_ROTATION_DIFF
|
|
||||||
|
|
||||||
return self.obs
|
|
||||||
|
|
||||||
def execute_ik(self, l_pos, l_rot, r_pos, r_rot):
|
|
||||||
r = self.player.world.robot
|
|
||||||
# Apply IK to each leg + Set joint targets
|
|
||||||
|
|
||||||
# Left leg
|
|
||||||
indices, self.values_l, error_codes = self.ik.leg(l_pos, l_rot, True, dynamic_pose=False)
|
|
||||||
|
|
||||||
r.set_joints_target_position_direct(indices, self.values_l, harmonize=False)
|
|
||||||
|
|
||||||
# Right leg
|
|
||||||
indices, self.values_r, error_codes = self.ik.leg(r_pos, r_rot, False, dynamic_pose=False)
|
|
||||||
|
|
||||||
r.set_joints_target_position_direct(indices, self.values_r, harmonize=False)
|
|
||||||
|
|
||||||
def sync(self):
|
|
||||||
''' Run a single simulation step '''
|
|
||||||
r = self.player.world.robot
|
|
||||||
self.player.scom.commit_and_send(r.get_command())
|
|
||||||
self.player.scom.receive()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
# print("reset")
|
|
||||||
'''
|
|
||||||
Reset and stabilize the robot
|
|
||||||
Note: for some behaviors it would be better to reduce stabilization or add noise
|
|
||||||
'''
|
|
||||||
self.abs_ori = 45
|
|
||||||
self.player.scom.unofficial_set_play_mode("PlayOn")
|
|
||||||
|
|
||||||
Gen_ball_pos = [- 9, 0, 0]
|
|
||||||
self.Gen_player_pos = (Gen_ball_pos[0] - 1.5, Gen_ball_pos[1], 0.5)
|
|
||||||
self.player.scom.unofficial_move_ball((Gen_ball_pos[0], Gen_ball_pos[1], Gen_ball_pos[2]))
|
|
||||||
self.step_counter = 0
|
|
||||||
self.behavior = self.player.behavior
|
|
||||||
r = self.player.world.robot
|
|
||||||
w = self.player.world
|
|
||||||
t = w.time_local_ms
|
|
||||||
self.reset_time = t
|
|
||||||
self.b_rel = w.ball_rel_torso_cart_pos[:2]
|
|
||||||
self.path_manager = self.player.path_manager
|
|
||||||
|
|
||||||
for _ in range(25):
|
|
||||||
self.player.scom.unofficial_beam(self.Gen_player_pos, 0) # beam player continuously (floating above ground)
|
|
||||||
self.player.behavior.execute("Zero_Bent_Knees")
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
# beam player to ground
|
|
||||||
self.player.scom.unofficial_beam(self.Gen_player_pos, 0)
|
|
||||||
r.joints_target_speed[
|
|
||||||
0] = 0.01 # move head to trigger physics update (rcssserver3d bug when no joint is moving)
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
# stabilize on ground
|
|
||||||
for _ in range(7):
|
|
||||||
self.player.behavior.execute("Zero_Bent_Knees")
|
|
||||||
self.sync()
|
|
||||||
# walk to ball
|
|
||||||
while True:
|
|
||||||
self.b_rel = w.ball_rel_torso_cart_pos[:2]
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.player.behavior.execute_to_completion("Get_Up")
|
|
||||||
if 0.26 > self.b_rel[0] > 0.18 and abs(self.b_rel[1]) < 0.04 and w.ball_is_visible:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.player.behavior.execute_to_completion("Get_Up")
|
|
||||||
|
|
||||||
reset_walk = self.behavior.previous_behavior != "Walk" # reset walk if it wasn't the previous behavior
|
|
||||||
rel_target = self.b_rel + (-0.23, 0) # relative target is a circle (center: ball, radius:0.23m)
|
|
||||||
rel_ori = M.vector_angle(self.b_rel) # relative orientation of ball, NOT the target!
|
|
||||||
dist = max(0.08, np.linalg.norm(rel_target) * 0.7) # slow approach
|
|
||||||
self.behavior.execute_sub_behavior("Walk", reset_walk, rel_target, False, rel_ori, False,
|
|
||||||
dist) # target, is_target_abs, ori, is_ori_abs, distance
|
|
||||||
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
self.act = np.zeros(self.no_of_actions, np.float32)
|
|
||||||
return self.observe(True)
|
|
||||||
|
|
||||||
def render(self, mode='human', close=False):
|
|
||||||
return
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
Draw.clear_all()
|
|
||||||
self.player.terminate()
|
|
||||||
|
|
||||||
def step(self, action):
|
|
||||||
|
|
||||||
r = (self.
|
|
||||||
player.world.robot)
|
|
||||||
|
|
||||||
w = self.player.world
|
|
||||||
d = w.draw
|
|
||||||
if w.ball_abs_pos[1] > 0: #
|
|
||||||
dribble_target = (15, 5)
|
|
||||||
else:
|
|
||||||
dribble_target = (15, -5)
|
|
||||||
|
|
||||||
self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
|
|
||||||
# exponential moving average
|
|
||||||
self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed
|
|
||||||
|
|
||||||
# execute Step behavior to extract the target positions of each leg (we will override these targets)
|
|
||||||
lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
|
|
||||||
self.STEP_Z_SPAN,
|
|
||||||
self.leg_length * self.STEP_Z_MAX)
|
|
||||||
|
|
||||||
# Leg IK
|
|
||||||
a = self.act
|
|
||||||
l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
|
|
||||||
r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
|
|
||||||
l_foot_rot = a[6:9] * (2, 2, 3)
|
|
||||||
r_foot_rot = a[9:12] * (2, 2, 3)
|
|
||||||
|
|
||||||
# Limit leg yaw/pitch (and add bias)
|
|
||||||
l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
|
|
||||||
r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
|
|
||||||
|
|
||||||
# Arms actions
|
|
||||||
arms = np.copy(self.DEFAULT_ARMS) # default arms pose
|
|
||||||
arms[0:4] += a[12:16] * 4 # arms pitch+roll
|
|
||||||
|
|
||||||
# Set target positions
|
|
||||||
self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs
|
|
||||||
r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) # arms
|
|
||||||
self.sync()
|
|
||||||
self.step_counter += 1
|
|
||||||
|
|
||||||
# 收集观测
|
|
||||||
obs = self.observe()
|
|
||||||
angle_rad = np.radians(self.gym_last_internal_abs_ori) # 将角度转换为弧度
|
|
||||||
unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)]) # 计算单位向量
|
|
||||||
if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0:
|
|
||||||
cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / (
|
|
||||||
np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2]))
|
|
||||||
else:
|
|
||||||
cos_theta = 0
|
|
||||||
# 计算奖励
|
|
||||||
reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta
|
|
||||||
|
|
||||||
if self.ball_dist_hip_center_2d < 0.115:
|
|
||||||
reward = 0
|
|
||||||
# 判断终止
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.terminal = True
|
|
||||||
elif w.time_local_ms - self.reset_time > 7500 * 3 or np.linalg.norm(
|
|
||||||
w.ball_cheat_abs_pos[:2] - r.loc_head_position[:2]) > 0.45 or not w.is_ball_abs_pos_from_vision:
|
|
||||||
self.terminal = True
|
|
||||||
else:
|
|
||||||
self.terminal = False
|
|
||||||
return obs, reward, self.terminal, {}
|
|
||||||
|
|
||||||
|
|
||||||
class Train(Train_Base):
|
|
||||||
def __init__(self, script) -> None:
|
|
||||||
super().__init__(script)
|
|
||||||
|
|
||||||
def train(self, args):
|
|
||||||
|
|
||||||
# --------------------------------------- Learning parameters
|
|
||||||
n_envs = min(1, os.cpu_count())
|
|
||||||
n_steps_per_env = 1024 # RolloutBuffer is of size (n_steps_per_env * n_envs)
|
|
||||||
minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs)
|
|
||||||
total_steps = 50000000
|
|
||||||
learning_rate = 3e-4
|
|
||||||
folder_name = f'Sprint_R{self.robot_type}'
|
|
||||||
model_path = f'./scripts/gyms/logs/{folder_name}/'
|
|
||||||
|
|
||||||
# print("Model path:", model_path)
|
|
||||||
|
|
||||||
# --------------------------------------- Run algorithm
|
|
||||||
def init_env(i_env):
|
|
||||||
def thunk():
|
|
||||||
return dribble(self.ip, self.server_p + i_env, self.monitor_p_1000 + i_env, self.robot_type, False)
|
|
||||||
|
|
||||||
return thunk
|
|
||||||
|
|
||||||
servers = Server(self.server_p, self.monitor_p_1000, n_envs + 1) # include 1 extra server for testing
|
|
||||||
|
|
||||||
env = SubprocVecEnv([init_env(i) for i in range(n_envs)])
|
|
||||||
eval_env = SubprocVecEnv([init_env(n_envs)])
|
|
||||||
|
|
||||||
try:
|
|
||||||
if "model_file" in args: # retrain
|
|
||||||
model = PPO.load(args["model_file"], env=env, device="cuda", n_envs=n_envs, n_steps=n_steps_per_env,
|
|
||||||
batch_size=minibatch_size, learning_rate=learning_rate)
|
|
||||||
else: # train new model
|
|
||||||
model = PPO("MlpPolicy", env=env, verbose=1, n_steps=n_steps_per_env, batch_size=minibatch_size,
|
|
||||||
learning_rate=learning_rate, device="cuda")
|
|
||||||
|
|
||||||
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
|
||||||
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20,
|
|
||||||
backup_env_file=__file__)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
sleep(1) # wait for child processes
|
|
||||||
("\nctrl+c pressed, aborting...\n")
|
|
||||||
servers.kill()
|
|
||||||
return
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
eval_env.close()
|
|
||||||
servers.kill()
|
|
||||||
|
|
||||||
def test(self, args):
|
|
||||||
|
|
||||||
# Uses different server and monitor ports
|
|
||||||
server = Server(self.server_p - 1, self.monitor_p, 1)
|
|
||||||
env = dribble(self.ip, self.server_p - 1, self.monitor_p, self.robot_type, True)
|
|
||||||
model = PPO.load(args["model_file"], env=env)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.export_model(args["model_file"], args["model_file"] + ".pkl",
|
|
||||||
False) # Export to pkl to create custom behavior
|
|
||||||
self.test_model(model, env, log_path=args["folder_dir"], model_path=args["folder_dir"])
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print()
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
server.kill()
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,403 +0,0 @@
|
|||||||
import math
|
|
||||||
import random
|
|
||||||
import pickle
|
|
||||||
from agent.Base_Agent import Base_Agent as Agent
|
|
||||||
from behaviors.custom.Step.Step import Step
|
|
||||||
from world.commons.Draw import Draw
|
|
||||||
from stable_baselines3 import PPO
|
|
||||||
from stable_baselines3.common.vec_env import SubprocVecEnv
|
|
||||||
from scripts.commons.Server import Server
|
|
||||||
from scripts.commons.Train_Base import Train_Base
|
|
||||||
from time import sleep
|
|
||||||
import os, gym
|
|
||||||
import numpy as np
|
|
||||||
from math_ops.Math_Ops import Math_Ops as U
|
|
||||||
from math_ops.Math_Ops import Math_Ops as M
|
|
||||||
from behaviors.custom.Step.Step_Generator import Step_Generator
|
|
||||||
from sb3_contrib import GAIL
|
|
||||||
|
|
||||||
'''
|
|
||||||
Objective:
|
|
||||||
Learn how to run forward using step primitive
|
|
||||||
----------
|
|
||||||
- class Basic_Run: implements an OpenAI custom gym
|
|
||||||
- class Train: implements algorithms to train a new model or test an existing model
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class dribble(gym.Env):
|
|
||||||
def __init__(self, ip, server_p, monitor_p, r_type, enable_draw) -> None:
|
|
||||||
self.expert_data = dict()
|
|
||||||
self.abs_ori = 75
|
|
||||||
self.ball_dist_hip_center_2d = 0
|
|
||||||
self.ball_dist_hip_center = None
|
|
||||||
self.internal_rel_orientation = None
|
|
||||||
self.dribble_speed = 1
|
|
||||||
self.gym_last_internal_abs_ori = None
|
|
||||||
self.internal_target_vel = None
|
|
||||||
self.Gen_player_pos = None
|
|
||||||
self.internal_target = None
|
|
||||||
self.values_l = None
|
|
||||||
self.values_r = None
|
|
||||||
self.reset_time = None
|
|
||||||
self.behavior = None
|
|
||||||
self.robot_type = r_type
|
|
||||||
self.kick_ori = 0
|
|
||||||
self.terminal = False
|
|
||||||
# Args: Server IP, Agent Port, Monitor Port, Uniform No., Robot Type, Team Name, Enable Log, Enable Draw
|
|
||||||
self.player = Agent(ip, server_p, monitor_p, 1, self.robot_type, "Gym", True, True)
|
|
||||||
|
|
||||||
self.step_counter = 0 # to limit episode size
|
|
||||||
self.ik = self.player.inv_kinematics
|
|
||||||
self.dribble_rel_orientation = 0 # relative to imu_torso_orientation (in degrees)
|
|
||||||
# Step behavior defaults
|
|
||||||
self.STEP_DUR = 8
|
|
||||||
self.STEP_Z_SPAN = 0.02
|
|
||||||
self.STEP_Z_MAX = 0.70
|
|
||||||
nao_specs = self.ik.NAO_SPECS
|
|
||||||
self.leg_length = nao_specs[1] + nao_specs[3] # upper leg height + lower leg height
|
|
||||||
feet_y_dev = nao_specs[0] * 1.12 # wider step
|
|
||||||
sample_time = self.player.world.robot.STEPTIME
|
|
||||||
max_ankle_z = nao_specs[5]
|
|
||||||
self.step_generator = Step_Generator(feet_y_dev, sample_time, max_ankle_z)
|
|
||||||
self.DEFAULT_ARMS = np.array([-90, -90, 8, 8, 90, 90, 70, 70], np.float32)
|
|
||||||
|
|
||||||
self.act = np.zeros(16, np.float32) # memory variable
|
|
||||||
self.path_manager = self.player.path_manager
|
|
||||||
|
|
||||||
# State space
|
|
||||||
obs_size = 76
|
|
||||||
self.obs = np.zeros(obs_size, np.float32)
|
|
||||||
self.observation_space = gym.spaces.Box(low=np.full(obs_size, -np.inf, np.float32),
|
|
||||||
high=np.full(obs_size, np.inf, np.float32), dtype=np.float32)
|
|
||||||
|
|
||||||
self.ball_x_center = 0.21
|
|
||||||
self.ball_y_center = -0.045
|
|
||||||
# Action space
|
|
||||||
|
|
||||||
MAX = np.finfo(np.float32).max
|
|
||||||
self.no_of_actions = act_size = 16
|
|
||||||
self.action_space = gym.spaces.Box(low=np.full(act_size, -MAX, np.float32),
|
|
||||||
high=np.full(act_size, MAX, np.float32), dtype=np.float32)
|
|
||||||
|
|
||||||
# Place ball far away to keep landmarks in FoV (head follows ball while using Step behavior)
|
|
||||||
self.player.scom.unofficial_move_ball((14, 0, 0.042))
|
|
||||||
|
|
||||||
self.player.scom.unofficial_set_play_mode("PlayOn")
|
|
||||||
self.player.scom.unofficial_move_ball((0, 0, 0))
|
|
||||||
|
|
||||||
def observe(self, init=False, virtual_ball=False):
|
|
||||||
r = self.player.world.robot
|
|
||||||
w = self.player.world
|
|
||||||
|
|
||||||
if init: # reset variables
|
|
||||||
self.step_counter = 0
|
|
||||||
self.act = np.zeros(16, np.float32) # memory variable
|
|
||||||
|
|
||||||
# index observation naive normalization
|
|
||||||
self.obs[0] = min(self.step_counter, 12 * 8) / 100 # simple counter: 0,1,2,3...
|
|
||||||
self.obs[1] = r.loc_head_z * 3 # z coordinate (torso)
|
|
||||||
self.obs[2] = r.loc_head_z_vel / 2 # z velocity (torso)
|
|
||||||
self.obs[3] = r.imu_torso_roll / 15 # absolute torso roll in deg
|
|
||||||
self.obs[4] = r.imu_torso_pitch / 15 # absolute torso pitch in deg
|
|
||||||
self.obs[5:8] = r.gyro / 100 # gyroscope
|
|
||||||
self.obs[8:11] = r.acc / 10 # accelerometer
|
|
||||||
|
|
||||||
self.obs[11:17] = r.frp.get('lf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01,
|
|
||||||
0.01) # left foot: relative point of origin (p) and force vector (f) -> (px,py,pz,fx,fy,fz)*
|
|
||||||
self.obs[17:23] = r.frp.get('rf', np.zeros(6)) * (10, 10, 10, 0.01, 0.01,
|
|
||||||
0.01) # right foot: relative point of origin (p) and force vector (f) -> (px,py,pz,fx,fy,fz)*
|
|
||||||
# *if foot is not touching the ground, then (px=0,py=0,pz=0,fx=0,fy=0,fz=0)
|
|
||||||
|
|
||||||
self.obs[23:43] = r.joints_position[2:22] / 100 # position of all joints except head & toes (for robot type 4)
|
|
||||||
self.obs[43:63] = r.joints_speed[2:22] / 6.1395 # speed of all joints except head & toes (for robot type 4)
|
|
||||||
|
|
||||||
if init: # the walking parameters refer to the last parameters in effect (after a reset, they are pointless)
|
|
||||||
self.obs[63] = 1 # step progress
|
|
||||||
self.obs[64] = 1 # 1 if left leg is active
|
|
||||||
self.obs[65] = 0 # 1 if right leg is active
|
|
||||||
self.obs[66] = 0
|
|
||||||
else:
|
|
||||||
self.obs[63] = self.step_generator.external_progress # step progress
|
|
||||||
self.obs[64] = float(self.step_generator.state_is_left_active) # 1 if left leg is active
|
|
||||||
self.obs[65] = float(not self.step_generator.state_is_left_active) # 1 if right leg is active
|
|
||||||
self.obs[66] = math.sin(self.step_generator.state_current_ts / self.step_generator.ts_per_step * math.pi)
|
|
||||||
|
|
||||||
# Ball
|
|
||||||
ball_rel_hip_center = self.ik.torso_to_hip_transform(w.ball_rel_torso_cart_pos)
|
|
||||||
self.ball_dist_hip_center = np.linalg.norm(ball_rel_hip_center)
|
|
||||||
ball_rel_torso_xy = w.ball_rel_torso_cart_pos[:2] # 取X和Y分量
|
|
||||||
self.ball_dist_hip_center_2d = np.linalg.norm(ball_rel_torso_xy)
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.obs[67:70] = (0, 0, 0) # Initial velocity is 0
|
|
||||||
elif w.ball_is_visible:
|
|
||||||
self.obs[67:70] = (ball_rel_hip_center - self.obs[
|
|
||||||
70:73]) * 10 # Ball velocity, relative to ankle's midpoint
|
|
||||||
|
|
||||||
self.obs[70:73] = ball_rel_hip_center # Ball position, relative to hip
|
|
||||||
self.obs[73] = self.ball_dist_hip_center * 2
|
|
||||||
|
|
||||||
if virtual_ball: # simulate the ball between the robot's feet
|
|
||||||
self.obs[67:74] = (0, 0, 0, 0.05, 0, -0.175, 0.36)
|
|
||||||
|
|
||||||
'''
|
|
||||||
Create internal target with a smoother variation
|
|
||||||
'''
|
|
||||||
|
|
||||||
MAX_ROTATION_DIFF = 20 # max difference (degrees) per visual step
|
|
||||||
MAX_ROTATION_DIST = 80
|
|
||||||
|
|
||||||
if init:
|
|
||||||
self.internal_rel_orientation = 0
|
|
||||||
self.internal_target_vel = 0
|
|
||||||
self.gym_last_internal_abs_ori = r.imu_torso_orientation # for training purposes (reward)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------- compute internal target
|
|
||||||
|
|
||||||
if w.vision_is_up_to_date:
|
|
||||||
previous_internal_rel_orientation = np.copy(self.internal_rel_orientation)
|
|
||||||
|
|
||||||
internal_ori_diff = np.clip(M.normalize_deg(self.dribble_rel_orientation - self.internal_rel_orientation),
|
|
||||||
-MAX_ROTATION_DIFF, MAX_ROTATION_DIFF)
|
|
||||||
self.internal_rel_orientation = np.clip(M.normalize_deg(self.internal_rel_orientation + internal_ori_diff),
|
|
||||||
-MAX_ROTATION_DIST, MAX_ROTATION_DIST)
|
|
||||||
|
|
||||||
# Observations
|
|
||||||
self.internal_target_vel = self.internal_rel_orientation - previous_internal_rel_orientation
|
|
||||||
|
|
||||||
self.gym_last_internal_abs_ori = self.internal_rel_orientation + r.imu_torso_orientation
|
|
||||||
# ----------------------------------------------------------------- observations
|
|
||||||
|
|
||||||
self.obs[74] = self.internal_rel_orientation / MAX_ROTATION_DIST
|
|
||||||
self.obs[75] = self.internal_target_vel / MAX_ROTATION_DIFF
|
|
||||||
|
|
||||||
return self.obs
|
|
||||||
|
|
||||||
def sync(self):
|
|
||||||
''' Run a single simulation step '''
|
|
||||||
r = self.player.world.robot
|
|
||||||
self.player.scom.commit_and_send(r.get_command())
|
|
||||||
self.player.scom.receive()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
# print("reset")
|
|
||||||
'''
|
|
||||||
Reset and stabilize the robot
|
|
||||||
Note: for some behaviors it would be better to reduce stabilization or add noise
|
|
||||||
'''
|
|
||||||
self.abs_ori = 45
|
|
||||||
self.player.scom.unofficial_set_play_mode("PlayOn")
|
|
||||||
|
|
||||||
Gen_ball_pos = [- 9, 0, 0]
|
|
||||||
self.Gen_player_pos = (Gen_ball_pos[0] - 1.5, Gen_ball_pos[1], 0.5)
|
|
||||||
self.player.scom.unofficial_move_ball((Gen_ball_pos[0], Gen_ball_pos[1], Gen_ball_pos[2]))
|
|
||||||
self.step_counter = 0
|
|
||||||
self.behavior = self.player.behavior
|
|
||||||
r = self.player.world.robot
|
|
||||||
w = self.player.world
|
|
||||||
t = w.time_local_ms
|
|
||||||
self.reset_time = t
|
|
||||||
self.b_rel = w.ball_rel_torso_cart_pos[:2]
|
|
||||||
self.path_manager = self.player.path_manager
|
|
||||||
|
|
||||||
for _ in range(25):
|
|
||||||
self.player.scom.unofficial_beam(self.Gen_player_pos, 0) # beam player continuously (floating above ground)
|
|
||||||
self.player.behavior.execute("Zero_Bent_Knees")
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
# beam player to ground
|
|
||||||
self.player.scom.unofficial_beam(self.Gen_player_pos, 0)
|
|
||||||
r.joints_target_speed[
|
|
||||||
0] = 0.01 # move head to trigger physics update (rcssserver3d bug when no joint is moving)
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
# stabilize on ground
|
|
||||||
for _ in range(7):
|
|
||||||
self.player.behavior.execute("Zero_Bent_Knees")
|
|
||||||
self.sync()
|
|
||||||
# walk to ball
|
|
||||||
while True:
|
|
||||||
self.b_rel = w.ball_rel_torso_cart_pos[:2]
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.player.behavior.execute_to_completion("Get_Up")
|
|
||||||
if 0.26 > self.b_rel[0] > 0.18 and abs(self.b_rel[1]) < 0.04 and w.ball_is_visible:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.player.behavior.execute_to_completion("Get_Up")
|
|
||||||
|
|
||||||
reset_walk = self.behavior.previous_behavior != "Walk" # reset walk if it wasn't the previous behavior
|
|
||||||
rel_target = self.b_rel + (-0.23, 0) # relative target is a circle (center: ball, radius:0.23m)
|
|
||||||
rel_ori = M.vector_angle(self.b_rel) # relative orientation of ball, NOT the target!
|
|
||||||
dist = max(0.08, np.linalg.norm(rel_target) * 0.7) # slow approach
|
|
||||||
self.behavior.execute_sub_behavior("Walk", reset_walk, rel_target, False, rel_ori, False,
|
|
||||||
dist) # target, is_target_abs, ori, is_ori_abs, distance
|
|
||||||
|
|
||||||
self.sync()
|
|
||||||
|
|
||||||
self.act = np.zeros(self.no_of_actions, np.float32)
|
|
||||||
return self.observe(True)
|
|
||||||
|
|
||||||
def render(self, mode='human', close=False):
|
|
||||||
return
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
Draw.clear_all()
|
|
||||||
self.player.terminate()
|
|
||||||
|
|
||||||
def run_mlp(self, obs, weights, activation_function="tanh"):
|
|
||||||
'''
|
|
||||||
Run multilayer perceptron using numpy
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
obs : ndarray
|
|
||||||
float32 array with neural network inputs
|
|
||||||
weights : list
|
|
||||||
list of MLP layers of type (bias, kernel)
|
|
||||||
activation_function : str
|
|
||||||
activation function for hidden layers
|
|
||||||
set to "none" to disable
|
|
||||||
'''
|
|
||||||
|
|
||||||
obs = obs.astype(np.float32, copy=False)
|
|
||||||
out = obs
|
|
||||||
|
|
||||||
for w in weights[:-1]: # for each hidden layer
|
|
||||||
out = np.matmul(w[1], out) + w[0]
|
|
||||||
if activation_function == "tanh":
|
|
||||||
np.tanh(out, out=out)
|
|
||||||
elif activation_function != "none":
|
|
||||||
raise NotImplementedError
|
|
||||||
return np.matmul(weights[-1][1], out) + weights[-1][0] # final layer
|
|
||||||
|
|
||||||
def step(self, action):
|
|
||||||
|
|
||||||
r = (self.
|
|
||||||
player.world.robot)
|
|
||||||
|
|
||||||
w = self.player.world
|
|
||||||
d = w.draw
|
|
||||||
if w.ball_abs_pos[1] > 0: #
|
|
||||||
dribble_target = (15, 5)
|
|
||||||
else:
|
|
||||||
dribble_target = (15, -5)
|
|
||||||
with open(M.get_active_directory([
|
|
||||||
"/home/her-darling/APOLLO_PRO/TEST_Dribble/scripts/utils/gail/dribble_long_R1_00_178M.pkl",
|
|
||||||
"/home/her-darling/APOLLO_PRO/TEST_Dribble/scripts/utils/gail/dribble_long_R1_00_178M.pkl",
|
|
||||||
"/home/her-darling/APOLLO_PRO/TEST_Dribble/scripts/utils/gail/dribble_long_R1_00_178M.pkl",
|
|
||||||
"/home/her-darling/APOLLO_PRO/TEST_Dribble/scripts/utils/gail/dribble_long_R1_00_178M.pkl",
|
|
||||||
"/home/her-darling/APOLLO_PRO/TEST_Dribble/scripts/utils/gail/dribble_long_R1_00_178M.pkl"
|
|
||||||
][r.type]), 'rb') as f:
|
|
||||||
model = pickle.load(f)
|
|
||||||
# 收集观测
|
|
||||||
obs = self.observe()
|
|
||||||
action = self.run_mlp(obs, model)
|
|
||||||
self.sync()
|
|
||||||
self.step_counter += 1
|
|
||||||
angle_rad = np.radians(self.gym_last_internal_abs_ori) # 将角度转换为弧度
|
|
||||||
unit_vector = np.array([np.cos(angle_rad), np.sin(angle_rad)]) # 计算单位向量
|
|
||||||
if np.linalg.norm(w.ball_cheat_abs_vel[:2]) != 0:
|
|
||||||
cos_theta = np.dot(unit_vector, w.ball_cheat_abs_vel[:2]) / (
|
|
||||||
np.linalg.norm(unit_vector) * np.linalg.norm(w.ball_cheat_abs_vel[:2]))
|
|
||||||
else:
|
|
||||||
cos_theta = 0
|
|
||||||
#获取专家数据
|
|
||||||
self.expert_data = {
|
|
||||||
"actions": action,
|
|
||||||
"obs": obs
|
|
||||||
}
|
|
||||||
# 计算奖励
|
|
||||||
reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta
|
|
||||||
|
|
||||||
if self.ball_dist_hip_center_2d < 0.115:
|
|
||||||
reward = 0
|
|
||||||
# 判断终止
|
|
||||||
if self.player.behavior.is_ready("Get_Up"):
|
|
||||||
self.terminal = True
|
|
||||||
elif w.time_local_ms - self.reset_time > 7500 * 3 or np.linalg.norm(
|
|
||||||
w.ball_cheat_abs_pos[:2] - r.loc_head_position[:2]) > 0.45 or not w.is_ball_abs_pos_from_vision:
|
|
||||||
self.terminal = True
|
|
||||||
else:
|
|
||||||
self.terminal = False
|
|
||||||
return obs, reward, self.terminal, {}
|
|
||||||
|
|
||||||
|
|
||||||
class Train(Train_Base):
|
|
||||||
def __init__(self, script) -> None:
|
|
||||||
super().__init__(script)
|
|
||||||
|
|
||||||
def train(self, args):
|
|
||||||
|
|
||||||
# --------------------------------------- Learning parameters
|
|
||||||
n_envs = min(1, os.cpu_count())
|
|
||||||
n_steps_per_env = 1024 # RolloutBuffer is of size (n_steps_per_env * n_envs)
|
|
||||||
minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs)
|
|
||||||
total_steps = 50000000
|
|
||||||
learning_rate = 3e-4
|
|
||||||
folder_name = f'Sprint_R{self.robot_type}'
|
|
||||||
model_path = f'./scripts/gyms/logs/{folder_name}/'
|
|
||||||
expert_data = dribble.__init__().expert_data
|
|
||||||
|
|
||||||
# print("Model path:", model_path)
|
|
||||||
|
|
||||||
# --------------------------------------- Run algorithm
|
|
||||||
def init_env(i_env):
|
|
||||||
def thunk():
|
|
||||||
return dribble(self.ip, self.server_p + i_env, self.monitor_p_1000 + i_env, self.robot_type, False)
|
|
||||||
|
|
||||||
return thunk
|
|
||||||
|
|
||||||
servers = Server(self.server_p, self.monitor_p_1000, n_envs + 1) # include 1 extra server for testing
|
|
||||||
|
|
||||||
env = SubprocVecEnv([init_env(i) for i in range(n_envs)])
|
|
||||||
eval_env = SubprocVecEnv([init_env(n_envs)])
|
|
||||||
|
|
||||||
try:
|
|
||||||
if "model_file" in args: # retrain
|
|
||||||
model = GAIL.load(args["model_file"], env=env, device="cuda", n_envs=n_envs, n_steps=n_steps_per_env,
|
|
||||||
batch_size=minibatch_size, learning_rate=learning_rate)
|
|
||||||
else: # train new model
|
|
||||||
model = GAIL("MlpPolicy", env=env, expert_dataset=expert_data, verbose=1, n_steps=n_steps_per_env, batch_size=minibatch_size,
|
|
||||||
learning_rate=learning_rate, device="cuda")
|
|
||||||
|
|
||||||
model_path = self.learn_model(model, total_steps, model_path, eval_env=eval_env,
|
|
||||||
eval_freq=n_steps_per_env * 20, save_freq=n_steps_per_env * 20,
|
|
||||||
backup_env_file=__file__)
|
|
||||||
|
|
||||||
# model = GAIL(
|
|
||||||
# "MlpPolicy",
|
|
||||||
# env,
|
|
||||||
# expert_dataset=expert_data, # 提供专家数据
|
|
||||||
# verbose=1,
|
|
||||||
# n_steps=2048, # 每次更新的步数
|
|
||||||
# batch_size=64, # 批量大小
|
|
||||||
# learning_rate=3e-4
|
|
||||||
# )
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
sleep(1) # wait for child processes
|
|
||||||
("\nctrl+c pressed, aborting...\n")
|
|
||||||
servers.kill()
|
|
||||||
return
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
eval_env.close()
|
|
||||||
servers.kill()
|
|
||||||
|
|
||||||
def test(self, args):
|
|
||||||
|
|
||||||
# Uses different server and monitor ports
|
|
||||||
server = Server(self.server_p - 1, self.monitor_p, 1)
|
|
||||||
env = dribble(self.ip, self.server_p - 1, self.monitor_p, self.robot_type, True)
|
|
||||||
model = GAIL.load(args["model_file"], env=env)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.export_model(args["model_file"], args["model_file"] + ".pkl",
|
|
||||||
False) # Export to pkl to create custom behavior
|
|
||||||
self.test_model(model, env, log_path=args["folder_dir"], model_path=args["folder_dir"])
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print()
|
|
||||||
|
|
||||||
env.close()
|
|
||||||
server.kill()
|
|
@ -94,9 +94,7 @@ class Path_Manager():
|
|||||||
|
|
||||||
def get_hard_radius(t):
|
def get_hard_radius(t):
|
||||||
if t.unum in priority_unums:
|
if t.unum in priority_unums:
|
||||||
if len(priority_unums) > 2:
|
return 1.0 # extra distance for priority roles
|
||||||
return 1.5
|
|
||||||
return None
|
|
||||||
else:
|
else:
|
||||||
return t.state_ground_area[1]+0.2
|
return t.state_ground_area[1]+0.2
|
||||||
|
|
||||||
@ -115,8 +113,8 @@ class Path_Manager():
|
|||||||
soft_radius = 0.6
|
soft_radius = 0.6
|
||||||
hard_radius = lambda o : 0.2
|
hard_radius = lambda o : 0.2
|
||||||
elif mode == Path_Manager.MODE_DRIBBLE:
|
elif mode == Path_Manager.MODE_DRIBBLE:
|
||||||
soft_radius = 1.6
|
soft_radius = 2.3
|
||||||
hard_radius = lambda o : o.state_ground_area[1]+0.3
|
hard_radius = lambda o : o.state_ground_area[1]+0.9
|
||||||
else:
|
else:
|
||||||
soft_radius = 1.0
|
soft_radius = 1.0
|
||||||
hard_radius = lambda o : o.state_ground_area[1]+0.2
|
hard_radius = lambda o : o.state_ground_area[1]+0.2
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user