new generate_random_target

2024-11-19 20:27:21 +08:00 · 2024-11-19 20:27:21 +08:00 · 4569c0a6ed
commit 4569c0a6ed
parent 57b1496ff6
1 changed files with 12 additions and 9 deletions
--- a/scripts/gyms/Sprint.py
+++ b/scripts/gyms/Sprint.py
@ -283,15 +283,16 @@ class sprint(gym.Env):
        self.player.terminate()


-    def generate_random_target(self, position, x_range=(-15, 15), y_range=(-10, 10)):
+    def generate_random_target(self, position):
        while True:
-            x = np.random.uniform(x_range[0], x_range[1])
-            y = np.random.uniform(y_range[0], y_range[1])
-
-            if np.linalg.norm(np.array([x, y]) - position) >= 10:
+            angle = np.random.uniform(0, 2 * np.pi)
+            X = position[0] * 10 * np.cos(angle)
+            Y = position[1] * 10 * np.sin(angle)
+            if -13 <= X <= 13 and -8 <= Y <= 8:
                break

-        self.walk_target = np.array([x, y])
+        self.walk_target = np.array([X, Y])
+
    def step(self, action):

        r = (self.
@ -303,7 +304,7 @@ class sprint(gym.Env):
        self.walk_rel_target = M.rotate_2d_vec(
            (self.walk_target[0] - r.loc_head_position[0], self.walk_target[1] - r.loc_head_position[1]), -r.imu_torso_orientation)
        self.walk_distance = np.linalg.norm(self.walk_target - r.loc_head_position[:2])
-        if self.walk_distance <= 0.5:
+        if self.walk_distance <= 0.3:
            self.generate_random_target(r.loc_head_position[:2])
            self.walk_rel_target = M.rotate_2d_vec(
                (self.walk_target[0] - r.loc_head_position[0], self.walk_target[1] - r.loc_head_position[1]),
@ -344,12 +345,14 @@ class sprint(gym.Env):
        robot_speed = np.linalg.norm(r.loc_torso_velocity[:2])
        direction_error = abs(self.walk_rel_orientation)
        direction_error = min(direction_error, 10)
-        reward = robot_speed**2 * (1 - direction_error / 10) * 0.2
-        if self.walk_distance < 0.5:
+        reward = robot_speed * (1 - direction_error / 10) * 0.02
+        if self.walk_distance < 0.3:
            reward += 10

        if self.player.behavior.is_ready("Get_Up"):
            self.terminal = True
+        elif w.time_local_ms - self.reset_time > 15000 * 2:
+            self.terminal = True
        else:
            self.terminal = False
        return obs, reward, self.terminal, {}