|
|
|
@ -283,15 +283,16 @@ class sprint(gym.Env):
|
|
|
|
|
self.player.terminate()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_random_target(self, position, x_range=(-15, 15), y_range=(-10, 10)):
|
|
|
|
|
def generate_random_target(self, position):
|
|
|
|
|
while True:
|
|
|
|
|
x = np.random.uniform(x_range[0], x_range[1])
|
|
|
|
|
y = np.random.uniform(y_range[0], y_range[1])
|
|
|
|
|
|
|
|
|
|
if np.linalg.norm(np.array([x, y]) - position) >= 10:
|
|
|
|
|
angle = np.random.uniform(0, 2 * np.pi)
|
|
|
|
|
X = position[0] * 10 * np.cos(angle)
|
|
|
|
|
Y = position[1] * 10 * np.sin(angle)
|
|
|
|
|
if -13 <= X <= 13 and -8 <= Y <= 8:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
self.walk_target = np.array([x, y])
|
|
|
|
|
self.walk_target = np.array([X, Y])
|
|
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
|
|
|
|
|
|
r = (self.
|
|
|
|
@ -303,7 +304,7 @@ class sprint(gym.Env):
|
|
|
|
|
self.walk_rel_target = M.rotate_2d_vec(
|
|
|
|
|
(self.walk_target[0] - r.loc_head_position[0], self.walk_target[1] - r.loc_head_position[1]), -r.imu_torso_orientation)
|
|
|
|
|
self.walk_distance = np.linalg.norm(self.walk_target - r.loc_head_position[:2])
|
|
|
|
|
if self.walk_distance <= 0.5:
|
|
|
|
|
if self.walk_distance <= 0.3:
|
|
|
|
|
self.generate_random_target(r.loc_head_position[:2])
|
|
|
|
|
self.walk_rel_target = M.rotate_2d_vec(
|
|
|
|
|
(self.walk_target[0] - r.loc_head_position[0], self.walk_target[1] - r.loc_head_position[1]),
|
|
|
|
@ -344,12 +345,14 @@ class sprint(gym.Env):
|
|
|
|
|
robot_speed = np.linalg.norm(r.loc_torso_velocity[:2])
|
|
|
|
|
direction_error = abs(self.walk_rel_orientation)
|
|
|
|
|
direction_error = min(direction_error, 10)
|
|
|
|
|
reward = robot_speed**2 * (1 - direction_error / 10) * 0.2
|
|
|
|
|
if self.walk_distance < 0.5:
|
|
|
|
|
reward = robot_speed * (1 - direction_error / 10) * 0.02
|
|
|
|
|
if self.walk_distance < 0.3:
|
|
|
|
|
reward += 10
|
|
|
|
|
|
|
|
|
|
if self.player.behavior.is_ready("Get_Up"):
|
|
|
|
|
self.terminal = True
|
|
|
|
|
elif w.time_local_ms - self.reset_time > 15000 * 2:
|
|
|
|
|
self.terminal = True
|
|
|
|
|
else:
|
|
|
|
|
self.terminal = False
|
|
|
|
|
return obs, reward, self.terminal, {}
|
|
|
|
|