|
|
|
@ -293,6 +293,30 @@ class dribble(gym.Env):
|
|
|
|
|
arms[0:4] += a[12:16] * 4 # arms pitch+roll
|
|
|
|
|
|
|
|
|
|
return l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms
|
|
|
|
|
|
|
|
|
|
def loss(self, obs, action_p, action_r):
|
|
|
|
|
r = self.player.world.robot
|
|
|
|
|
with open(M.get_active_directory([
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl"
|
|
|
|
|
][r.type]), 'rb') as f:
|
|
|
|
|
model = pickle.load(f)
|
|
|
|
|
act = run_mlp(obs, model)
|
|
|
|
|
a_l_ankle_pos, a_r_ankle_pos, a_l_foot_rot, a_r_foot_rot, a_arms = self.execute(act)
|
|
|
|
|
act_p = np.concatenate((a_l_ankle_pos, a_r_ankle_pos))
|
|
|
|
|
act_r = np.concatenate((a_l_foot_rot, a_r_foot_rot, a_arms))
|
|
|
|
|
action_p_tensor = torch.from_numpy(action_p)
|
|
|
|
|
action_r_tensor = torch.from_numpy(action_r)
|
|
|
|
|
act_p_tensor = torch.from_numpy(act_p)
|
|
|
|
|
act_r_tensor = torch.from_numpy(act_r)
|
|
|
|
|
loss_p = torch.exp(-40*torch.norm(action_p_tensor - act_p_tensor, p=2))
|
|
|
|
|
loss_r = torch.exp(-0.1*torch.norm(action_r_tensor - act_r_tensor, p=2))
|
|
|
|
|
loss = loss_p + loss_r
|
|
|
|
|
return loss
|
|
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
|
r = (self.
|
|
|
|
|
player.world.robot)
|
|
|
|
@ -305,31 +329,12 @@ class dribble(gym.Env):
|
|
|
|
|
dribble_target = (15, -5)
|
|
|
|
|
|
|
|
|
|
self.dribble_rel_orientation = self.path_manager.get_dribble_path(optional_2d_target=dribble_target)[1]
|
|
|
|
|
# # exponential moving average
|
|
|
|
|
# self.act = 0.85 * self.act + 0.15 * action * 0.7 * 0.95 * self.dribble_speed
|
|
|
|
|
#
|
|
|
|
|
# # execute Step behavior to extract the target positions of each leg (we will override these targets)
|
|
|
|
|
# lfy, lfz, rfy, rfz = self.step_generator.get_target_positions(self.step_counter == 0, self.STEP_DUR,
|
|
|
|
|
# self.STEP_Z_SPAN,
|
|
|
|
|
# self.leg_length * self.STEP_Z_MAX)
|
|
|
|
|
#
|
|
|
|
|
# # Leg IK
|
|
|
|
|
# a = self.act
|
|
|
|
|
# l_ankle_pos = (a[0] * 0.025 - 0.01, a[1] * 0.01 + lfy, a[2] * 0.01 + lfz)
|
|
|
|
|
# r_ankle_pos = (a[3] * 0.025 - 0.01, a[4] * 0.01 + rfy, a[5] * 0.01 + rfz)
|
|
|
|
|
# l_foot_rot = a[6:9] * (2, 2, 3)
|
|
|
|
|
# r_foot_rot = a[9:12] * (2, 2, 3)
|
|
|
|
|
#
|
|
|
|
|
# # Limit leg yaw/pitch (and add bias)
|
|
|
|
|
# l_foot_rot[2] = max(0, l_foot_rot[2] + 18.3)
|
|
|
|
|
# r_foot_rot[2] = min(0, r_foot_rot[2] - 18.3)
|
|
|
|
|
#
|
|
|
|
|
# # Arms actions
|
|
|
|
|
# arms = np.copy(self.DEFAULT_ARMS) # default arms pose
|
|
|
|
|
# arms[0:4] += a[12:16] * 4 # arms pitch+roll
|
|
|
|
|
|
|
|
|
|
l_ankle_pos, r_ankle_pos, l_foot_rot, r_foot_rot, arms = self.execute(action)
|
|
|
|
|
|
|
|
|
|
action_p = np.concatenate((l_ankle_pos, r_ankle_pos))
|
|
|
|
|
action_r = np.concatenate((l_foot_rot, r_foot_rot, arms))
|
|
|
|
|
|
|
|
|
|
# Set target positions
|
|
|
|
|
self.execute_ik(l_ankle_pos, l_foot_rot, r_ankle_pos, r_foot_rot) # legs
|
|
|
|
|
r.set_joints_target_position_direct(slice(14, 22), arms, harmonize=False) # arms
|
|
|
|
@ -346,26 +351,10 @@ class dribble(gym.Env):
|
|
|
|
|
else:
|
|
|
|
|
cos_theta = 0
|
|
|
|
|
|
|
|
|
|
with open(M.get_active_directory([
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl",
|
|
|
|
|
"/behaviors/custom/Dribble/dribble_long_R1_00_178M.pkl"
|
|
|
|
|
][r.type]), 'rb') as f:
|
|
|
|
|
model = pickle.load(f)
|
|
|
|
|
act = run_mlp(obs, model)
|
|
|
|
|
a_l_ankle_pos, a_r_ankle_pos, a_l_foot_rot, a_r_foot_rot, a_arms = self.execute(act)
|
|
|
|
|
act_p = np.concatenate((a_l_ankle_pos, a_r_ankle_pos))
|
|
|
|
|
act_r = np.concatenate((a_l_foot_rot, a_r_foot_rot, a_arms))
|
|
|
|
|
action_p_tensor = torch.from_numpy(action_p)
|
|
|
|
|
action_r_tensor = torch.from_numpy(action_r)
|
|
|
|
|
act_p_tensor = torch.from_numpy(act_p)
|
|
|
|
|
act_r_tensor = torch.from_numpy(act_r)
|
|
|
|
|
loss_p = torch.exp(-torch.norm(action_p_tensor - act_p_tensor, p=2))
|
|
|
|
|
loss_r = torch.exp(-torch.norm(action_r_tensor - act_r_tensor, p=2))
|
|
|
|
|
loss = self.loss(obs, action_p, action_r)
|
|
|
|
|
|
|
|
|
|
# 计算奖励
|
|
|
|
|
reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta + loss_p + loss_r
|
|
|
|
|
reward = np.linalg.norm(w.ball_cheat_abs_vel) * cos_theta
|
|
|
|
|
|
|
|
|
|
if self.ball_dist_hip_center_2d < 0.115:
|
|
|
|
|
reward = 0
|
|
|
|
@ -387,7 +376,7 @@ class Train(Train_Base):
|
|
|
|
|
def train(self, args):
|
|
|
|
|
|
|
|
|
|
# --------------------------------------- Learning parameters
|
|
|
|
|
n_envs = min(16, os.cpu_count())
|
|
|
|
|
n_envs = min(12, os.cpu_count())
|
|
|
|
|
n_steps_per_env = 1024 # RolloutBuffer is of size (n_steps_per_env * n_envs)
|
|
|
|
|
minibatch_size = 64 # should be a factor of (n_steps_per_env * n_envs)
|
|
|
|
|
total_steps = 50000000
|
|
|
|
|