new

2024-11-16 11:46:25 +08:00 · 2024-11-16 11:46:25 +08:00 · 06ed0b96bb
commit 06ed0b96bb
parent 7748c64b44
1 changed files with 13 additions and 20 deletions
--- a/scripts/gyms/Sprint.py
+++ b/scripts/gyms/Sprint.py
@ -11,7 +11,6 @@ from scripts.commons.Train_Base import Train_Base
 from time import sleep
 import os, gym
 import numpy as np
-from math_ops.Math_Ops import Math_Ops as U
 from math_ops.Math_Ops import Math_Ops as M
 from behaviors.custom.Step.Step_Generator import Step_Generator

@ -233,9 +232,9 @@ class sprint(gym.Env):
        t = w.time_local_ms
        self.reset_time = t

-        self.generate_random_target(self.Gen_player_pos)
+        self.target = np.array([3, 0])
        distance = np.linalg.norm(self.target - self.Gen_player_pos[:2])
-        self.walk_rel_target = self.target - self.Gen_player_pos[:2]
+        self.walk_rel_target = self.target
        self.walk_distance = distance
        self.walk_rel_orientation = M.vector_angle(self.walk_rel_target)

@ -298,17 +297,10 @@ class sprint(gym.Env):
        internal_dist = np.linalg.norm(self.internal_target)
        action_mult = 1 if internal_dist > 0.2 else (0.7 / 0.2) * internal_dist + 0.3
        self.walk_rel_target = M.rotate_2d_vec(
-            (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]), -r.imu_torso_orientation)
+            (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]),
+            -r.imu_torso_orientation)
        self.walk_distance = np.linalg.norm(self.walk_rel_target)
-        self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3
-        if self.walk_distance <= 0.5:
-            self.generate_random_target(r.loc_head_position)
-            self.walk_rel_target = M.rotate_2d_vec(
-                (self.target[0] - r.loc_head_position[0], self.target[1] - r.loc_head_position[1]),
-                -r.imu_torso_orientation)
-            self.walk_distance = np.linalg.norm(self.walk_rel_target)
-            self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.3
-
+        self.walk_rel_orientation = M.vector_angle(self.walk_rel_target) * 0.5
        # exponential moving average
        self.act = 0.8 * self.act + 0.2 * action * action_mult * 0.7

@ -340,12 +332,14 @@ class sprint(gym.Env):
        self.sync()
        self.step_counter += 1
        obs = self.observe()
-        velocity_rel_orientation = M.vector_angle(M.rotate_2d_vec((r.loc_torso_velocity[0], r.loc_torso_velocity[1]), r.imu_torso_orientation)) * 0.3
-        direction_error = min(abs(velocity_rel_orientation - self.walk_rel_orientation), 10)
-        robot_speed = np.linalg.norm(r.loc_torso_velocity[:2])
-        reward = robot_speed * (1.5 - direction_error / 10)
-        if self.walk_distance < 0.5:
-            reward += 10
+        dirct = abs((self.target[1] - r.loc_head_position[1]) / (self.target[0] - r.loc_head_position[0]))
+        # direction_error = abs(r.imu_torso_orientation - M.vector_angle(self.target - r.loc_head_position[:2]))
+        reward = (r.loc_torso_velocity[0] * 0.1 + dirct * r.loc_torso_velocity[1] * 0.1
+                  + 0.1 * math.exp(-10*abs(self.walk_rel_orientation)))
+        if np.linalg.norm(self.target - r.loc_head_position[:2]) < 0.3:
+            reward += 50
+            self.generate_random_target(self.target)
+

        if self.player.behavior.is_ready("Get_Up"):
            self.terminal = True
@ -355,7 +349,6 @@ class sprint(gym.Env):
            self.terminal = False
        return obs, reward, self.terminal, {}

-
 class Train(Train_Base):
    def __init__(self, script) -> None:
        super().__init__(script)