Init gym

2025-11-19 08:08:22 -05:00
commit eaaa5519bd
256 changed files with 46657 additions and 0 deletions
--- a/scripts/commons/Script.py
+++ b/scripts/commons/Script.py
@@ -0,0 +1,307 @@
+from os import path, listdir, getcwd, cpu_count
+from os.path import join, realpath, dirname, isfile, isdir, getmtime
+from scripts.commons.UI import UI
+import __main__
+import argparse,json,sys
+import pickle
+import subprocess
+
+
+class Script():
+    ROOT_DIR = path.dirname(path.dirname(realpath( join(getcwd(), dirname(__file__))) )) # project root directory
+
+    def __init__(self, cpp_builder_unum=0) -> None:
+
+        '''
+        Arguments specification
+        -----------------------
+        - To add new arguments, edit the information below
+        - After changing information below, the config.json file must be manually deleted
+        - In other modules, these arguments can be accessed by their 1-letter ID
+        '''
+        # list of arguments: 1-letter ID, Description, Hardcoded default
+        self.options = {'i': ('Server Hostname/IP', 'localhost'),
+                        'p': ('Agent Port',         '3100'),
+                        'm': ('Monitor Port',       '3200'),
+                        't': ('Team Name',          'FCPortugal'),
+                        'u': ('Uniform Number',     '1'),
+                        'r': ('Robot Type',         '1'),
+                        'P': ('Penalty Shootout',   '0'),
+                        'F': ('magmaFatProxy',      '0'),
+                        'D': ('Debug Mode',         '1')}
+
+        # list of arguments: 1-letter ID, data type, choices      
+        self.op_types = {'i': (str, None),
+                         'p': (int, None),
+                         'm': (int, None),
+                         't': (str, None),
+                         'u': (int, range(1,12)),
+                         'r': (int, [0,1,2,3,4]),
+                         'P': (int, [0,1]),
+                         'F': (int, [0,1]),
+                         'D': (int, [0,1])}
+            
+        '''
+        End of arguments specification
+        '''
+
+        self.read_or_create_config()
+
+        #advance help text position
+        formatter = lambda prog: argparse.HelpFormatter(prog,max_help_position=52)
+        parser = argparse.ArgumentParser(formatter_class=formatter)
+
+        o = self.options
+        t = self.op_types
+
+        for id in self.options: # shorter metavar for aesthetic reasons
+            parser.add_argument(f"-{id}", help=f"{o[id][0]:30}[{o[id][1]:20}]", type=t[id][0], nargs='?', default=o[id][1], metavar='X', choices=t[id][1])
+        
+        self.args = parser.parse_args()
+
+        if getattr(sys, 'frozen', False): # disable debug mode when running from binary
+            self.args.D = 0
+
+        self.players = [] # list of created players
+
+        Script.build_cpp_modules(exit_on_build = (cpp_builder_unum != 0 and cpp_builder_unum != self.args.u))
+
+        if self.args.D:
+            try:
+                print(f"\nNOTE: for help run \"python {__main__.__file__} -h\"")
+            except:
+                pass
+
+            columns = [[],[],[]]
+            for key, value in vars(self.args).items():
+                columns[0].append(o[key][0])
+                columns[1].append(o[key][1])
+                columns[2].append(value)
+
+            UI.print_table(columns, ["Argument","Default at /config.json","Active"], alignment=["<","^","^"])
+
+
+    def read_or_create_config(self) -> None:
+
+        if not path.isfile('config.json'):       # save hardcoded default values if file does not exist
+            with open("config.json", "w") as f:
+                json.dump(self.options, f, indent=4)
+        else:                                    # load user-defined values (that can be overwritten by command-line arguments)
+            if path.getsize("config.json") == 0: # wait for possible write operation when launching multiple agents
+                from time import sleep
+                sleep(1)
+            if path.getsize("config.json") == 0: # abort after 1 second
+                print("Aborting: 'config.json' is empty. Manually verify and delete if still empty.")
+                exit()
+                
+            with open("config.json", "r") as f:
+                self.options = json.loads(f.read())
+
+
+    @staticmethod
+    def build_cpp_modules(special_environment_prefix=[], exit_on_build=False):
+        '''
+        Build C++ modules in folder /cpp using Pybind11
+        
+        Parameters
+        ----------
+        special_environment_prefix : `list`
+            command prefix to run a given command in the desired environment
+            useful to compile C++ modules for different python interpreter versions (other than default version)
+            Conda Env. example: ['conda', 'run', '-n', 'myEnv']
+            If [] the default python interpreter is used as compilation target
+        exit_on_build : bool
+            exit if there is something to build (so that only 1 player per team builds c++ modules)
+        '''
+        cpp_path = Script.ROOT_DIR + "/cpp/"
+        exclusions = ["__pycache__"]
+
+        cpp_modules = [d for d in listdir(cpp_path) if isdir(join(cpp_path, d)) and d not in exclusions]
+
+        if not cpp_modules: return #no modules to build
+
+        python_cmd = f"python{sys.version_info.major}.{sys.version_info.minor}" # "python3" can select the wrong version, this prevents that
+
+        def init():
+            print("--------------------------\nC++ modules:",cpp_modules)
+
+            try:
+                process = subprocess.Popen(special_environment_prefix+[python_cmd, "-m", "pybind11", "--includes"], stdout=subprocess.PIPE)
+                (includes, err) = process.communicate()
+                process.wait()
+            except:
+                print(f"Error while executing child program: '{python_cmd} -m pybind11 --includes'")
+                exit()
+
+            includes = includes.decode().rstrip() # strip trailing newlines (and other whitespace chars)
+            print("Using Pybind11 includes: '",includes,"'",sep="")
+            return includes
+
+        nproc = str(cpu_count())
+        zero_modules = True
+
+        for module in cpp_modules:
+            module_path = join(cpp_path, module)
+
+            # skip module if there is no Makefile (typical distribution case)
+            if not isfile(join(module_path, "Makefile")):
+                continue
+
+            # skip module in certain conditions
+            if isfile(join(module_path, module+".so")) and isfile(join(module_path, module+".c_info")):
+                with open(join(module_path, module+".c_info"), 'rb') as f:
+                    info = pickle.load(f)
+                if info == python_cmd:
+                    code_mod_time = max(getmtime(join(module_path, f)) for f in listdir(module_path) if f.endswith(".cpp") or f.endswith(".h"))
+                    bin_mod_time = getmtime(join(module_path, module+".so"))
+                    if bin_mod_time + 30 > code_mod_time: # favor not building with a margin of 30s (scenario: we unzip the fcpy project, including the binaries, the modification times are all similar)
+                        continue
+
+            # init: print stuff & get Pybind11 includes
+            if zero_modules:
+                if exit_on_build:
+                    print("There are C++ modules to build. This player is not allowed to build. Aborting.")
+                    exit()
+                zero_modules = False
+                includes = init()
+
+            # build module
+            print(f'{f"Building: {module}... ":40}',end='',flush=True)
+            process = subprocess.Popen(['make', '-j'+nproc, 'PYBIND_INCLUDES='+includes], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=module_path)
+            (output, err) = process.communicate()
+            exit_code = process.wait()
+            if exit_code == 0:
+                print("success!")
+                with open(join(module_path, module+".c_info"),"wb") as f: # save python version
+                    pickle.dump(python_cmd, f, protocol=4) # protocol 4 is backward compatible with Python 3.4
+            else:
+                print("Aborting! Building errors:")
+                print(output.decode(), err.decode())
+                exit()     
+
+        if not zero_modules:
+            print("All modules were built successfully!\n--------------------------")
+
+
+    def batch_create(self, agent_cls, args_per_player):    
+        ''' Creates batch of agents '''
+
+        for a in args_per_player:
+            self.players.append( agent_cls(*a) )
+
+    def batch_execute_agent(self, index : slice = slice(None)):  
+        ''' 
+        Executes agent normally (including commit & send)
+
+        Parameters
+        ----------
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''   
+        for p in self.players[index]:
+            p.think_and_send()
+
+    def batch_execute_behavior(self, behavior, index : slice = slice(None)):
+        '''
+        Executes behavior
+
+        Parameters
+        ----------
+        behavior : str
+            name of behavior to execute
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''
+        for p in self.players[index]:
+            p.behavior.execute(behavior)
+
+    def batch_commit_and_send(self, index : slice = slice(None)):
+        '''
+        Commits & sends data to server
+
+        Parameters
+        ----------
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''
+        for p in self.players[index]:
+            p.scom.commit_and_send( p.world.robot.get_command() ) 
+
+    def batch_receive(self, index : slice = slice(None), update=True):
+        ''' 
+        Waits for server messages
+
+        Parameters
+        ----------
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        update : bool
+            update world state based on information received from server
+            if False, the agent becomes unaware of itself and its surroundings
+            which is useful for reducing cpu resources for dummy agents in demonstrations
+        '''
+        for p in self.players[index]:
+            p.scom.receive(update)
+
+    def batch_commit_beam(self, pos2d_and_rotation, index : slice = slice(None)):
+        '''
+        Beam all player to 2D position with a given rotation
+
+        Parameters
+        ----------
+        pos2d_and_rotation : `list`
+            iterable of 2D positions and rotations e.g. [(0,0,45),(-5,0,90)]
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''        
+        for p, pos_rot in zip(self.players[index], pos2d_and_rotation): 
+            p.scom.commit_beam(pos_rot[0:2],pos_rot[2])
+
+    def batch_unofficial_beam(self, pos3d_and_rotation, index : slice = slice(None)):
+        '''
+        Beam all player to 3D position with a given rotation
+
+        Parameters
+        ----------
+        pos3d_and_rotation : `list`
+            iterable of 3D positions and rotations e.g. [(0,0,0.5,45),(-5,0,0.5,90)]
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''        
+        for p, pos_rot in zip(self.players[index], pos3d_and_rotation): 
+            p.scom.unofficial_beam(pos_rot[0:3],pos_rot[3])
+
+    def batch_terminate(self, index : slice = slice(None)):
+        '''
+        Close all sockets connected to the agent port
+        For scripts where the agent lives until the application ends, this is not needed
+
+        Parameters
+        ----------
+        index : slice
+            subset of agents
+            (e.g. index=slice(1,2) will select the second agent)
+            (e.g. index=slice(1,3) will select the second and third agents)
+            by default, all agents are selected
+        '''
+        for p in self.players[index]:
+            p.terminate()
+        del self.players[index] # delete selection
--- a/scripts/commons/Server.py
+++ b/scripts/commons/Server.py
@@ -0,0 +1,60 @@
+import subprocess
+
+class Server():
+    def __init__(self, first_server_p, first_monitor_p, n_servers) -> None:
+        try:
+            import psutil
+            self.check_running_servers(psutil, first_server_p, first_monitor_p, n_servers)
+        except ModuleNotFoundError:
+            print("Info: Cannot check if the server is already running, because the psutil module was not found")
+            
+        self.first_server_p = first_server_p
+        self.n_servers = n_servers
+        self.rcss_processes = []
+
+        # makes it easier to kill test servers without affecting train servers
+        cmd = "simspark" if n_servers == 1 else "rcssserver3d"
+        for i in range(n_servers):
+            self.rcss_processes.append(
+                subprocess.Popen((f"{cmd} --agent-port {first_server_p+i} --server-port {first_monitor_p+i}").split(),
+                stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, start_new_session=True)
+            )
+
+    def check_running_servers(self, psutil, first_server_p, first_monitor_p, n_servers):
+        ''' Check if any server is running on chosen ports '''
+        found = False
+        p_list = [p for p in psutil.process_iter() if p.cmdline() and p.name() in ["rcssserver3d","simspark"]]
+        range1 = (first_server_p, first_server_p  + n_servers)
+        range2 = (first_monitor_p,first_monitor_p + n_servers)
+        bad_processes = []
+
+        for p in p_list:  
+            # currently ignoring remaining default port when only one of the ports is specified (uncommon scenario)
+            ports = [int(arg) for arg in p.cmdline()[1:] if arg.isdigit()]
+            if len(ports) == 0:
+                ports = [3100,3200] # default server ports (changing this is unlikely)
+
+            conflicts = [str(port) for port in ports if (
+                (range1[0] <= port < range1[1]) or (range2[0] <= port < range2[1]) )]
+
+            if len(conflicts)>0:
+                if not found:
+                    print("\nThere are already servers running on the same port(s)!")
+                    found = True
+                bad_processes.append(p)
+                print(f"Port(s) {','.join(conflicts)} already in use by \"{' '.join(p.cmdline())}\" (PID:{p.pid})")
+
+        if found:
+            print()
+            while True:
+                inp = input("Enter 'kill' to kill these processes or ctrl+c to abort. ")
+                if inp == "kill":
+                    for p in bad_processes:
+                        p.kill()
+                    return
+            
+
+    def kill(self):
+        for p in self.rcss_processes:
+            p.kill()
+        print(f"Killed {self.n_servers} rcssserver3d processes starting at {self.first_server_p}")
--- a/scripts/commons/Train_Base.py
+++ b/scripts/commons/Train_Base.py
@@ -0,0 +1,494 @@
+from datetime import datetime, timedelta
+from itertools import count
+from os import listdir
+from os.path import isdir, join, isfile
+from scripts.commons.UI import UI
+from shutil import copy
+from stable_baselines3 import PPO
+from stable_baselines3.common.base_class import BaseAlgorithm
+from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, CallbackList, BaseCallback
+from typing import Callable
+from world.World import World
+from xml.dom import minidom
+import numpy as np
+import os, time, math, csv, select, sys
+import pickle
+import xml.etree.ElementTree as ET
+
+
+class Train_Base():
+    def __init__(self, script) -> None:
+        '''
+        When training with multiple environments (multiprocessing):
+            The server port is incremented as follows:
+                self.server_p, self.server_p+1, self.server_p+2, ...
+            We add +1000 to the initial monitor port, so than we can have more than 100 environments:
+                self.monitor_p+1000, self.monitor_p+1001, self.monitor_p+1002, ...
+        When testing we use self.server_p and self.monitor_p
+        '''
+
+        args = script.args
+        self.script = script
+        self.ip = args.i
+        self.server_p = args.p              # (initial) server port
+        self.monitor_p = args.m             # monitor port when testing
+        self.monitor_p_1000 = args.m + 1000 # initial monitor port when training
+        self.robot_type = args.r
+        self.team = args.t
+        self.uniform = args.u
+        self.cf_last_time = 0
+        self.cf_delay = 0
+        self.cf_target_period = World.STEPTIME # target simulation speed while testing (default: real-time)
+
+    @staticmethod
+    def prompt_user_for_model():
+
+        gyms_logs_path = "./scripts/gyms/logs/"
+        folders = [f for f in listdir(gyms_logs_path) if isdir(join(gyms_logs_path, f))]
+        folders.sort(key=lambda f: os.path.getmtime(join(gyms_logs_path, f)), reverse=True) # sort by modification date
+
+        while True:
+            try:
+                folder_name = UI.print_list(folders,prompt="Choose folder (ctrl+c to return): ")[1]
+            except KeyboardInterrupt:
+                print()
+                return None # ctrl+c
+
+            folder_dir = os.path.join(gyms_logs_path, folder_name)
+            models = [m[:-4] for m in listdir(folder_dir) if isfile(join(folder_dir, m)) and m.endswith(".zip")]
+
+            if not models:
+                print("The chosen folder does not contain any .zip file!")
+                continue
+
+            models.sort(key=lambda m: os.path.getmtime(join(folder_dir, m+".zip")), reverse=True) # sort by modification date
+            
+            try:
+                model_name = UI.print_list(models,prompt="Choose model (ctrl+c to return): ")[1]
+                break
+            except KeyboardInterrupt:
+                print()
+
+        return {"folder_dir":folder_dir, "folder_name":folder_name, "model_file":os.path.join(folder_dir, model_name+".zip")}
+
+
+    def control_fps(self, read_input = False):
+        ''' Add delay to control simulation speed '''
+
+        if read_input:
+            speed = input()
+            if speed == '':
+                self.cf_target_period = 0
+                print(f"Changed simulation speed to MAX")
+            else:
+                if speed == '0':
+                    inp = input("Paused. Set new speed or '' to use previous speed:")
+                    if inp != '':
+                        speed = inp   
+
+                try:
+                    speed = int(speed)
+                    assert speed >= 0
+                    self.cf_target_period = World.STEPTIME * 100 / speed
+                    print(f"Changed simulation speed to {speed}%")
+                except:
+                    print("""Train_Base.py: 
+    Error: To control the simulation speed, enter a non-negative integer.
+    To disable this control module, use test_model(..., enable_FPS_control=False) in your gyms environment.""")
+
+        now = time.time()
+        period = now - self.cf_last_time
+        self.cf_last_time = now
+        self.cf_delay += (self.cf_target_period - period)*0.9
+        if self.cf_delay > 0:
+            time.sleep(self.cf_delay)
+        else:
+            self.cf_delay = 0
+
+
+    def test_model(self, model:BaseAlgorithm, env, log_path:str=None, model_path:str=None, max_episodes=0, enable_FPS_control=True, verbose=1):
+        '''
+        Test model and log results
+
+        Parameters
+        ----------
+        model : BaseAlgorithm
+            Trained model 
+        env : Env
+            Gym-like environment
+        log_path : str
+            Folder where statistics file is saved, default is `None` (no file is saved)
+        model_path : str
+            Folder where it reads evaluations.npz to plot it and create evaluations.csv, default is `None` (no plot, no csv)
+        max_episodes : int
+            Run tests for this number of episodes
+            Default is 0 (run until user aborts)
+        verbose : int
+            0 - no output (except if enable_FPS_control=True)
+            1 - print episode statistics
+        '''
+
+        if model_path is not None:
+            assert os.path.isdir(model_path), f"{model_path} is not a valid path"
+            self.display_evaluations(model_path)
+
+        if log_path is not None:
+            assert os.path.isdir(log_path), f"{log_path} is not a valid path"
+
+            # If file already exists, don't overwrite
+            if os.path.isfile(log_path + "/test.csv"):
+                for i in range(1000):
+                    p = f"{log_path}/test_{i:03}.csv"
+                    if not os.path.isfile(p):
+                        log_path = p
+                        break
+            else:
+                log_path += "/test.csv"
+            
+            with open(log_path, 'w') as f:
+                f.write("reward,ep. length,rew. cumulative avg., ep. len. cumulative avg.\n")
+            print("Train statistics are saved to:", log_path)
+
+        if enable_FPS_control: # control simulation speed (using non blocking user input)
+            print("\nThe simulation speed can be changed by sending a non-negative integer\n"
+                  "(e.g. '50' sets speed to 50%, '0' pauses the simulation, '' sets speed to MAX)\n")
+
+        ep_reward = 0
+        ep_length = 0
+        rewards_sum = 0
+        reward_min = math.inf
+        reward_max = -math.inf
+        ep_lengths_sum = 0
+        ep_no = 0
+
+        obs = env.reset()
+        while True:
+            action, _states = model.predict(obs, deterministic=True)
+            obs, reward, done, info = env.step(action)
+            ep_reward += reward
+            ep_length += 1
+
+            if enable_FPS_control: # control simulation speed (using non blocking user input)
+                self.control_fps(select.select([sys.stdin], [], [], 0)[0]) 
+
+            if done:
+                obs = env.reset()
+                rewards_sum += ep_reward
+                ep_lengths_sum += ep_length
+                reward_max = max(ep_reward, reward_max)
+                reward_min = min(ep_reward, reward_min)
+                ep_no += 1
+                avg_ep_lengths = ep_lengths_sum/ep_no
+                avg_rewards = rewards_sum/ep_no
+
+                if verbose > 0:
+                    print(  f"\rEpisode: {ep_no:<3}  Ep.Length: {ep_length:<4.0f}  Reward: {ep_reward:<6.2f}                                                             \n",
+                        end=f"--AVERAGE--   Ep.Length: {avg_ep_lengths:<4.0f}  Reward: {avg_rewards:<6.2f}  (Min: {reward_min:<6.2f}  Max: {reward_max:<6.2f})", flush=True)
+                
+                if log_path is not None:
+                    with open(log_path, 'a') as f:
+                        writer = csv.writer(f)
+                        writer.writerow([ep_reward, ep_length, avg_rewards, avg_ep_lengths])
+                
+                if ep_no == max_episodes:
+                    return
+
+                ep_reward = 0
+                ep_length = 0
+
+    def learn_model(self, model:BaseAlgorithm, total_steps:int, path:str, eval_env=None, eval_freq=None, eval_eps=5, save_freq=None, backup_env_file=None, export_name=None):
+        '''
+        Learn Model for a specific number of time steps
+
+        Parameters
+        ----------
+        model : BaseAlgorithm
+            Model to train
+        total_steps : int
+            The total number of samples (env steps) to train on
+        path : str
+            Path where the trained model is saved
+            If the path already exists, an incrementing number suffix is added
+        eval_env : Env
+            Environment to periodically test the model
+            Default is None (no periodical evaluation)
+        eval_freq : int
+            Evaluate the agent every X steps
+            Default is None (no periodical evaluation)
+        eval_eps : int
+            Evaluate the agent for X episodes (both eval_env and eval_freq must be defined)
+            Default is 5
+        save_freq : int
+            Saves model at every X steps
+            Default is None (no periodical checkpoint)
+        backup_gym_file : str
+            Generates backup of environment file in model's folder
+            Default is None (no backup)
+        export_name : str
+            If export_name and save_freq are defined, a model is exported every X steps
+            Default is None (no export)
+
+        Returns
+        -------
+        model_path : str
+            Directory where model was actually saved (considering incremental suffix)
+
+        Notes
+        -----
+        If `eval_env` and `eval_freq` were specified:
+            - The policy will be evaluated in `eval_env` every `eval_freq` steps
+            - Evaluation results will be saved in `path` and shown at the end of training
+            - Every time the results improve, the model is saved
+        '''
+
+        start = time.time()
+        start_date = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
+
+        # If path already exists, add suffix to avoid overwriting
+        if os.path.isdir(path):
+            for i in count():
+                p = path.rstrip("/")+f'_{i:03}/'
+                if not os.path.isdir(p):
+                    path = p
+                    break
+        os.makedirs(path)
+
+        # Backup environment file
+        if backup_env_file is not None:
+            backup_file = os.path.join(path, os.path.basename(backup_env_file))
+            copy(backup_env_file, backup_file)
+
+        evaluate = bool(eval_env is not None and eval_freq is not None)
+
+        # Create evaluation callback
+        eval_callback = None if not evaluate else EvalCallback(eval_env, n_eval_episodes=eval_eps, eval_freq=eval_freq, log_path=path,
+                                                               best_model_save_path=path, deterministic=True, render=False)
+
+        # Create custom callback to display evaluations
+        custom_callback = None if not evaluate else Cyclic_Callback(eval_freq, lambda:self.display_evaluations(path,True))
+
+        # Create checkpoint callback
+        checkpoint_callback = None if save_freq is None else CheckpointCallback(save_freq=save_freq, save_path=path, name_prefix="model", verbose=1)
+
+        # Create custom callback to export checkpoint models
+        export_callback = None if save_freq is None or export_name is None else Export_Callback(save_freq, path, export_name)
+
+        callbacks = CallbackList([c for c in [eval_callback, custom_callback, checkpoint_callback, export_callback] if c is not None])
+
+        model.learn( total_timesteps=total_steps, callback=callbacks )
+        model.save( os.path.join(path, "last_model") )
+
+        # Display evaluations if they exist
+        if evaluate:
+            self.display_evaluations(path)
+
+        # Display timestamps + Model path
+        end_date = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
+        duration = timedelta(seconds=int(time.time()-start))
+        print(f"Train start:     {start_date}")
+        print(f"Train end:       {end_date}")
+        print(f"Train duration:  {duration}")
+        print(f"Model path:      {path}")
+        
+        # Append timestamps to backup environment file
+        if backup_env_file is not None:
+            with open(backup_file, 'a') as f:
+                f.write(f"\n# Train start:    {start_date}\n")
+                f.write(  f"# Train end:      {end_date}\n")
+                f.write(  f"# Train duration: {duration}")
+
+        return path
+
+    def display_evaluations(self, path, save_csv=False):
+
+        eval_npz = os.path.join(path, "evaluations.npz")
+
+        if not os.path.isfile(eval_npz):
+            return
+
+        console_width = 80
+        console_height = 18
+        symb_x = "\u2022"
+        symb_o = "\u007c"
+        symb_xo = "\u237f"
+
+        with np.load(eval_npz) as data:
+            time_steps = data["timesteps"]
+            results_raw = np.mean(data["results"],axis=1)
+            ep_lengths_raw = np.mean(data["ep_lengths"],axis=1)
+        sample_no = len(results_raw)
+
+        xvals = np.linspace(0, sample_no-1, 80)
+        results    = np.interp(xvals, range(sample_no), results_raw)
+        ep_lengths = np.interp(xvals, range(sample_no), ep_lengths_raw)
+
+        results_limits    = np.min(results),    np.max(results)
+        ep_lengths_limits = np.min(ep_lengths), np.max(ep_lengths)
+
+        results_discrete    = np.digitize(results,    np.linspace(results_limits[0]-1e-5, results_limits[1]+1e-5,    console_height+1))-1
+        ep_lengths_discrete = np.digitize(ep_lengths, np.linspace(0,                      ep_lengths_limits[1]+1e-5, console_height+1))-1
+
+        matrix = np.zeros((console_height, console_width, 2), int)
+        matrix[results_discrete[0]   ][0][0] = 1    # draw 1st column
+        matrix[ep_lengths_discrete[0]][0][1] = 1    # draw 1st column
+        rng = [[results_discrete[0], results_discrete[0]], [ep_lengths_discrete[0], ep_lengths_discrete[0]]]
+
+        # Create continuous line for both plots
+        for k in range(2):
+            for i in range(1,console_width):
+                x = [results_discrete, ep_lengths_discrete][k][i]
+                if x > rng[k][1]:
+                    rng[k] = [rng[k][1]+1, x]
+                elif x < rng[k][0]:
+                    rng[k] = [x, rng[k][0]-1]
+                else:
+                    rng[k] = [x,x]
+                for j in range(rng[k][0],rng[k][1]+1):
+                    matrix[j][i][k] = 1
+
+        print(f'{"-"*console_width}')
+        for l in reversed(range(console_height)):
+            for c in range(console_width):
+                if   np.all(matrix[l][c] == 0): print(end=" ")
+                elif np.all(matrix[l][c] == 1): print(end=symb_xo)
+                elif matrix[l][c][0] == 1:      print(end=symb_x)
+                else:                           print(end=symb_o)
+            print()
+        print(f'{"-"*console_width}')
+        print(f"({symb_x})-reward          min:{results_limits[0]:11.2f}    max:{results_limits[1]:11.2f}")
+        print(f"({symb_o})-ep. length      min:{ep_lengths_limits[0]:11.0f}    max:{ep_lengths_limits[1]:11.0f}    {time_steps[-1]/1000:15.0f}k steps")
+        print(f'{"-"*console_width}')
+
+        # save CSV
+        if save_csv:
+            eval_csv = os.path.join(path, "evaluations.csv")
+            with open(eval_csv, 'a+') as f:
+                writer = csv.writer(f)
+                if sample_no == 1:
+                    writer.writerow(["time_steps", "reward ep.", "length"])
+                writer.writerow([time_steps[-1],results_raw[-1],ep_lengths_raw[-1]])
+
+
+    def generate_slot_behavior(self, path, slots, auto_head:bool, XML_name):
+        '''
+        Function that generates the XML file for the optimized slot behavior, overwriting previous files
+        '''
+
+        file = os.path.join( path, XML_name )
+
+        # create the file structure
+        auto_head = '1' if auto_head else '0'
+        EL_behavior = ET.Element('behavior',{'description':'Add description to XML file', "auto_head":auto_head})
+
+        for i,s in enumerate(slots):
+            EL_slot = ET.SubElement(EL_behavior, 'slot', {'name':str(i), 'delta':str(s[0]/1000)})
+            for j in s[1]: # go through all joint indices
+                ET.SubElement(EL_slot, 'move', {'id':str(j), 'angle':str(s[2][j])})
+
+        # create XML file
+        xml_rough = ET.tostring( EL_behavior, 'utf-8' )
+        xml_pretty = minidom.parseString(xml_rough).toprettyxml(indent="  ")
+        with open(file, "w") as x:
+            x.write(xml_pretty)
+        
+        print(file, "was created!")
+
+    @staticmethod
+    def linear_schedule(initial_value: float) -> Callable[[float], float]:
+        '''
+        Linear learning rate schedule
+
+        Parameters
+        ----------
+        initial_value : float
+            Initial learning rate
+        
+        Returns
+        -------
+        schedule : Callable[[float], float]
+            schedule that computes current learning rate depending on remaining progress
+        '''
+        def func(progress_remaining: float) -> float:
+            '''
+            Compute learning rate according to current progress
+
+            Parameters
+            ----------
+            progress_remaining : float
+                Progress will decrease from 1 (beginning) to 0
+            
+            Returns
+            -------
+            learning_rate : float
+                Learning rate according to current progress
+            '''
+            return progress_remaining * initial_value
+
+        return func
+
+    @staticmethod
+    def export_model(input_file, output_file, add_sufix=True):
+        '''
+        Export model weights to binary file
+
+        Parameters
+        ----------
+        input_file : str
+            Input file, compatible with algorithm
+        output_file : str
+            Output file, including directory
+        add_sufix : bool
+            If true, a suffix is appended to the file name: output_file + "_{index}.pkl"
+        '''
+
+        # If file already exists, don't overwrite
+        if add_sufix:
+            for i in count():
+                f = f"{output_file}_{i:03}.pkl"
+                if not os.path.isfile(f):
+                    output_file = f
+                    break
+        
+        model = PPO.load(input_file)
+        weights = model.policy.state_dict() # dictionary containing network layers
+
+        w = lambda name : weights[name].detach().cpu().numpy() # extract weights from policy
+
+        var_list = []
+        for i in count(0,2): # add hidden layers (step=2 because that's how SB3 works)
+            if f"mlp_extractor.policy_net.{i}.bias" not in weights:
+                break
+            var_list.append([w(f"mlp_extractor.policy_net.{i}.bias"), w(f"mlp_extractor.policy_net.{i}.weight"), "tanh"])
+
+        var_list.append( [w("action_net.bias"), w("action_net.weight"), "none"] ) # add final layer
+        
+        with open(output_file,"wb") as f:
+            pickle.dump(var_list, f, protocol=4) # protocol 4 is backward compatible with Python 3.4
+
+
+
+class Cyclic_Callback(BaseCallback):
+    ''' Stable baselines custom callback '''
+    def __init__(self, freq, function):
+        super(Cyclic_Callback, self).__init__(1)
+        self.freq = freq
+        self.function = function
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.freq == 0:
+            self.function()
+        return True # If the callback returns False, training is aborted early
+
+class Export_Callback(BaseCallback):
+    ''' Stable baselines custom callback '''
+    def __init__(self, freq, load_path, export_name):
+        super(Export_Callback, self).__init__(1)
+        self.freq = freq
+        self.load_path = load_path
+        self.export_name = export_name
+
+    def _on_step(self) -> bool:
+        if self.n_calls % self.freq == 0:
+            path = os.path.join(self.load_path, f"model_{self.num_timesteps}_steps.zip")
+            Train_Base.export_model(path, f"./scripts/gyms/export/{self.export_name}")
+        return True # If the callback returns False, training is aborted early
--- a/scripts/commons/UI.py
+++ b/scripts/commons/UI.py
@@ -0,0 +1,302 @@
+from itertools import zip_longest
+from math import inf
+import math
+import numpy as np
+import shutil
+
+class UI():
+    console_width = 80
+    console_height = 24
+    
+    @staticmethod
+    def read_particle(prompt, str_options, dtype=str, interval=[-inf,inf]):
+        ''' 
+        Read particle from user from a given dtype or from a str_options list
+
+        Parameters
+        ----------
+        prompt : `str`
+            prompt to show user before reading input
+        str_options : `list`
+            list of str options (in addition to dtype if dtype is not str)
+        dtype : `class`
+            if dtype is str, then user must choose a value from str_options, otherwise it can also send a dtype value
+        interval : `list`
+            [>=min,<max] interval for numeric dtypes
+        
+        Returns
+        -------
+        choice : `int` or dtype
+            index of str_options (int) or value (dtype)
+        is_str_option : `bool`
+            True if `choice` is an index from str_options
+        '''
+        # Check if user has no choice
+        if dtype is str and len(str_options) == 1:
+            print(prompt, str_options[0], sep="")
+            return 0, True
+        elif dtype is int and interval[0] == interval[1]-1:
+            print(prompt, interval[0], sep="")
+            return interval[0], False
+
+        while True:
+            inp = input(prompt)
+            if inp in str_options: 
+                return str_options.index(inp), True
+
+            if dtype is not str:
+                try:
+                    inp = dtype(inp)
+                    if inp >= interval[0] and inp < interval[1]:
+                        return inp, False
+                except:
+                    pass
+            
+            print("Error: illegal input! Options:", str_options, f" or  {dtype}" if dtype != str else "")
+
+    @staticmethod
+    def read_int(prompt, min, max):
+        ''' 
+        Read int from user in a given interval
+        :param prompt: prompt to show user before reading input
+        :param min: minimum input (inclusive)
+        :param max: maximum input (exclusive)
+        :return: choice
+        '''
+        while True:
+            inp = input(prompt)
+            try:
+                inp = int(inp)
+                assert inp >= min and inp < max
+                return inp
+            except:
+                print(f"Error: illegal input! Choose number between {min} and {max-1}")
+
+    @staticmethod
+    def print_table(data, titles=None, alignment=None, cols_width=None, cols_per_title=None, margins=None, numbering=None, prompt=None):
+        '''
+        Print table
+        
+        Parameters
+        ----------
+        data : `list`
+            list of columns, where each column is a list of items
+        titles : `list`
+            list of titles for each column, default is `None` (no titles)
+        alignment : `list`
+            list of alignments per column (excluding titles), default is `None` (left alignment for all cols)
+        cols_width : `list`
+            list of widths per column, default is `None` (fit to content)
+            Positive values indicate a fixed column width
+            Zero indicates that the column will fit its content
+        cols_per_title : `list`
+            maximum number of subcolumns per title, default is `None` (1 subcolumn per title)
+        margins : `list`
+            number of added leading and trailing spaces per column, default is `None` (margin=2 for all columns)
+        numbering : `list`
+            list of booleans per columns, indicating whether to assign numbers to each option
+        prompt : `str`
+            the prompt string, if given, is printed after the table before reading input
+
+        Returns
+        -------
+        index : `int`
+            returns global index of selected item (relative to table)
+        col_index : `int`
+            returns local index of selected item (relative to column)
+        column : `int`
+            returns number of column of selected item (starts at 0)
+        * if `numbering` or `prompt` are `None`, `None` is returned
+        
+
+        Example
+        -------
+        titles = ["Name","Age"]
+        data = [[John,Graciete], [30,50]]
+        alignment = ["<","^"]               # 1st column is left-aligned, 2nd is centered
+        cols_width = [10,5]                # 1st column's width=10, 2nd column's width=5
+        margins = [3,3]                    
+        numbering = [True,False]           # prints: [0-John,1-Graciete][30,50]
+        prompt = "Choose a person:"
+        '''
+        
+        #--------------------------------------------- parameters
+        cols_no = len(data)
+
+        if alignment is None:
+            alignment = ["<"]*cols_no
+
+        if cols_width is None:
+            cols_width = [0]*cols_no
+
+        if numbering is None:
+            numbering = [False]*cols_no
+            any_numbering = False
+        else:
+            any_numbering = True
+
+        if margins is None:
+            margins = [2]*cols_no
+
+        # Fit column to content + margin, if required
+        subcol = [] # subcolumn length and widths
+        for i in range(cols_no):
+            subcol.append([[],[]])
+            if cols_width[i] == 0:
+                numbering_width = 4 if numbering[i] else 0
+                if cols_per_title is None or cols_per_title[i] < 2:
+                    cols_width[i] = max([len(str(item))+numbering_width for item in data[i]]) + margins[i]*2
+                else:
+                    subcol[i][0] = math.ceil(len(data[i])/cols_per_title[i]) # subcolumn maximum length
+                    cols_per_title[i] = math.ceil(len(data[i])/subcol[i][0]) # reduce number of columns as needed
+                    cols_width[i] = margins[i]*(1+cols_per_title[i]) - (1 if numbering[i] else 0) # remove one if numbering, same as when printing
+                    for j in range(cols_per_title[i]):
+                        subcol_data_width = max([len(str(item))+numbering_width for item in data[i][j*subcol[i][0]:j*subcol[i][0]+subcol[i][0]]])
+                        cols_width[i] += subcol_data_width     # add subcolumn data width to column width
+                        subcol[i][1].append(subcol_data_width) # save subcolumn data width
+                        
+                if titles is not None: # expand to acomodate titles if needed
+                    cols_width[i] = max(cols_width[i], len(titles[i]) + margins[i]*2  )
+
+        if any_numbering:
+            no_of_items=0
+            cumulative_item_per_col=[0] # useful for getting the local index
+            for i in range(cols_no):
+                assert type(data[i]) == list, "In function 'print_table', 'data' must be a list of lists!"
+
+                if numbering[i]:
+                    data[i] = [f"{n+no_of_items:3}-{d}" for n,d in enumerate(data[i])]
+                    no_of_items+=len(data[i])
+                cumulative_item_per_col.append(no_of_items)
+
+        table_width = sum(cols_width)+cols_no-1
+
+        #--------------------------------------------- col titles
+        print(f'{"="*table_width}')
+        if titles is not None:
+            for i in range(cols_no):
+                print(f'{titles[i]:^{cols_width[i]}}', end='|' if i < cols_no - 1 else '')
+            print()
+            for i in range(cols_no):
+                print(f'{"-"*cols_width[i]}', end='+' if i < cols_no - 1 else '')
+            print()
+
+        #--------------------------------------------- merge subcolumns
+        if cols_per_title is not None:
+            for i,col in enumerate(data):
+                if cols_per_title[i] < 2:
+                    continue
+                for k in range(subcol[i][0]): # create merged items
+                    col[k] = (" "*margins[i]).join( f'{col[item]:{alignment[i]}{subcol[i][1][subcol_idx]}}' 
+                                                    for subcol_idx, item in enumerate(range(k,len(col),subcol[i][0])) )
+                del col[subcol[i][0]:] # delete repeated items
+        
+        #--------------------------------------------- col items
+        for line in zip_longest(*data):       
+            for i,item in enumerate(line):
+                l_margin = margins[i]-1 if numbering[i] else margins[i] # adjust margins when there are numbered options
+                item = "" if item is None else f'{" "*l_margin}{item}{" "*margins[i]}' # add margins
+                print(f'{item:{alignment[i]}{cols_width[i]}}', end='')
+                if i < cols_no - 1:
+                    print(end='|')
+            print(end="\n")
+        print(f'{"="*table_width}')
+
+        #--------------------------------------------- prompt
+        if prompt is None:
+            return None
+
+        if not any_numbering:
+            print(prompt)
+            return None
+
+        index = UI.read_int(prompt, 0, no_of_items)
+
+        for i,n in enumerate(cumulative_item_per_col):
+            if index < n:
+                return index, index-cumulative_item_per_col[i-1], i-1
+
+        raise ValueError('Failed to catch illegal input')
+
+
+    @staticmethod
+    def print_list(data, numbering=True, prompt=None, divider=" | ", alignment="<", min_per_col=6):
+        '''
+        Print list - prints list, using as many columns as possible
+        
+        Parameters
+        ----------
+        data : `list`
+            list of items
+        numbering : `bool`
+            assigns number to each option
+        prompt : `str`
+            the prompt string, if given, is printed after the table before reading input
+        divider : `str`
+            string that divides columns
+        alignment : `str`
+            f-string style alignment ( '<', '>', '^' )
+        min_per_col : int
+            avoid splitting columns with fewer items
+        
+        Returns
+        -------
+        item : `int`, item
+            returns tuple with global index of selected item and the item object,
+            or `None` (if `numbering` or `prompt` are `None`)
+
+        '''
+        
+        WIDTH = shutil.get_terminal_size()[0]
+
+        data_size = len(data)   
+        items = []
+        items_len = []
+
+        #--------------------------------------------- Add numbers, margins and divider
+        for i in range(data_size):
+            number = f"{i}-" if numbering else ""
+            items.append( f"{divider}{number}{data[i]}" )
+            items_len.append( len(items[-1]) )
+
+        max_cols = np.clip((WIDTH+len(divider)) // min(items_len),1,math.ceil(data_size/max(min_per_col,1))) # width + len(divider) because it is not needed in last col
+
+        #--------------------------------------------- Check maximum number of columns, considering content width (min:1)
+        for i in range(max_cols,0,-1):
+            cols_width = []
+            cols_items = []
+            table_width = 0
+            a,b = divmod(data_size,i)
+            for col in range(i):
+                start = a*col + min(b,col)
+                end = start+a+(1 if col<b else 0)
+                cols_items.append( items[start:end] )
+                col_width = max(items_len[start:end])
+                cols_width.append( col_width )
+                table_width += col_width
+            if table_width <= WIDTH+len(divider):
+                break
+        table_width -= len(divider)
+        
+        #--------------------------------------------- Print columns
+        print("="*table_width)
+        for row in range(math.ceil(data_size / i)):
+            for col in range(i):
+                content = cols_items[col][row] if len(cols_items[col]) > row else divider # print divider when there are no items
+                if col == 0:
+                    l = len(divider)
+                    print(end=f"{content[l:]:{alignment}{cols_width[col]-l}}")  # remove divider from 1st col
+                else:
+                    print(end=f"{content    :{alignment}{cols_width[col]  }}") 
+            print()  
+        print("="*table_width)
+
+        #--------------------------------------------- Prompt
+        if prompt is None:
+            return None
+
+        if numbering is None:
+            return None
+        else:
+            idx = UI.read_int( prompt, 0, data_size )
+            return idx, data[idx]
--- a/scripts/commons/pycache/Script.cpython-313.pyc
+++ b/scripts/commons/pycache/Script.cpython-313.pyc
--- a/scripts/commons/pycache/Server.cpython-313.pyc
+++ b/scripts/commons/pycache/Server.cpython-313.pyc
--- a/scripts/commons/pycache/Train_Base.cpython-313.pyc
+++ b/scripts/commons/pycache/Train_Base.cpython-313.pyc
--- a/scripts/commons/pycache/UI.cpython-313.pyc
+++ b/scripts/commons/pycache/UI.cpython-313.pyc