Dribble/scripts/commons/Train_Base.py

from datetime import datetime, timedelta
from itertools import count
from os import listdir
from os.path import isdir, join, isfile
from scripts.commons.UI import UI
from shutil import copy
from stable_baselines3 import PPO
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, CallbackList, BaseCallback
from typing import Callable
from world.World import World
from xml.dom import minidom
import numpy as np
import os, time, math, csv, select, sys
import pickle
import xml.etree.ElementTree as ET


class Train_Base():
    def __init__(self, script) -> None:
        '''
        When training with multiple environments (multiprocessing):
            The server port is incremented as follows:
                self.server_p, self.server_p+1, self.server_p+2, ...
            We add +1000 to the initial monitor port, so than we can have more than 100 environments:
                self.monitor_p+1000, self.monitor_p+1001, self.monitor_p+1002, ...
        When testing we use self.server_p and self.monitor_p
        '''

        args = script.args
        self.script = script
        self.ip = args.i
        self.server_p = args.p              # (initial) server port
        self.monitor_p = args.m             # monitor port when testing
        self.monitor_p_1000 = args.m + 1000 # initial monitor port when training
        self.robot_type = args.r
        self.team = args.t
        self.uniform = args.u
        self.cf_last_time = 0
        self.cf_delay = 0
        self.cf_target_period = World.STEPTIME # target simulation speed while testing (default: real-time)

    @staticmethod
    def prompt_user_for_model():

        gyms_logs_path = "./scripts/gyms/logs/"
        folders = [f for f in listdir(gyms_logs_path) if isdir(join(gyms_logs_path, f))]
        folders.sort(key=lambda f: os.path.getmtime(join(gyms_logs_path, f)), reverse=True) # sort by modification date

        while True:
            try:
                folder_name = UI.print_list(folders,prompt="Choose folder (ctrl+c to return): ")[1]
            except KeyboardInterrupt:
                print()
                return None # ctrl+c

            folder_dir = os.path.join(gyms_logs_path, folder_name)
            models = [m[:-4] for m in listdir(folder_dir) if isfile(join(folder_dir, m)) and m.endswith(".zip")]

            if not models:
                print("The chosen folder does not contain any .zip file!")
                continue

            models.sort(key=lambda m: os.path.getmtime(join(folder_dir, m+".zip")), reverse=True) # sort by modification date
            
            try:
                model_name = UI.print_list(models,prompt="Choose model (ctrl+c to return): ")[1]
                break
            except KeyboardInterrupt:
                print()

        return {"folder_dir":folder_dir, "folder_name":folder_name, "model_file":os.path.join(folder_dir, model_name+".zip")}


    def control_fps(self, read_input = False):
        ''' Add delay to control simulation speed '''

        if read_input:
            speed = input()
            if speed == '':
                self.cf_target_period = 0
                print(f"Changed simulation speed to MAX")
            else:
                if speed == '0':
                    inp = input("Paused. Set new speed or '' to use previous speed:")
                    if inp != '':
                        speed = inp   

                try:
                    speed = int(speed)
                    assert speed >= 0
                    self.cf_target_period = World.STEPTIME * 100 / speed
                    print(f"Changed simulation speed to {speed}%")
                except:
                    print("""Train_Base.py: 
    Error: To control the simulation speed, enter a non-negative integer.
    To disable this control module, use test_model(..., enable_FPS_control=False) in your gym environment.""")

        now = time.time()
        period = now - self.cf_last_time
        self.cf_last_time = now
        self.cf_delay += (self.cf_target_period - period)*0.9
        if self.cf_delay > 0:
            time.sleep(self.cf_delay)
        else:
            self.cf_delay = 0


    def test_model(self, model:BaseAlgorithm, env, log_path:str=None, model_path:str=None, max_episodes=0, enable_FPS_control=True, verbose=1):
        '''
        Test model and log results

        Parameters
        ----------
        model : BaseAlgorithm
            Trained model 
        env : Env
            Gym-like environment
        log_path : str
            Folder where statistics file is saved, default is `None` (no file is saved)
        model_path : str
            Folder where it reads evaluations.npz to plot it and create evaluations.csv, default is `None` (no plot, no csv)
        max_episodes : int
            Run tests for this number of episodes
            Default is 0 (run until user aborts)
        verbose : int
            0 - no output (except if enable_FPS_control=True)
            1 - print episode statistics
        '''

        if model_path is not None:
            assert os.path.isdir(model_path), f"{model_path} is not a valid path"
            self.display_evaluations(model_path)

        if log_path is not None:
            assert os.path.isdir(log_path), f"{log_path} is not a valid path"

            # If file already exists, don't overwrite
            if os.path.isfile(log_path + "/test.csv"):
                for i in range(1000):
                    p = f"{log_path}/test_{i:03}.csv"
                    if not os.path.isfile(p):
                        log_path = p
                        break
            else:
                log_path += "/test.csv"
            
            with open(log_path, 'w') as f:
                f.write("reward,ep. length,rew. cumulative avg., ep. len. cumulative avg.\n")
            print("Train statistics are saved to:", log_path)

        if enable_FPS_control: # control simulation speed (using non blocking user input)
            print("\nThe simulation speed can be changed by sending a non-negative integer\n"
                  "(e.g. '50' sets speed to 50%, '0' pauses the simulation, '' sets speed to MAX)\n")

        ep_reward = 0
        ep_length = 0
        rewards_sum = 0
        reward_min = math.inf
        reward_max = -math.inf
        ep_lengths_sum = 0
        ep_no = 0

        obs = env.reset()
        while True:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            ep_reward += reward
            ep_length += 1

            if enable_FPS_control: # control simulation speed (using non blocking user input)
                self.control_fps(select.select([sys.stdin], [], [], 0)[0]) 

            if done:
                obs = env.reset()
                rewards_sum += ep_reward
                ep_lengths_sum += ep_length
                reward_max = max(ep_reward, reward_max)
                reward_min = min(ep_reward, reward_min)
                ep_no += 1
                avg_ep_lengths = ep_lengths_sum/ep_no
                avg_rewards = rewards_sum/ep_no

                if verbose > 0:
                    print(  f"\rEpisode: {ep_no:<3}  Ep.Length: {ep_length:<4.0f}  Reward: {ep_reward:<6.2f}                                                             \n",
                        end=f"--AVERAGE--   Ep.Length: {avg_ep_lengths:<4.0f}  Reward: {avg_rewards:<6.2f}  (Min: {reward_min:<6.2f}  Max: {reward_max:<6.2f})", flush=True)
                
                if log_path is not None:
                    with open(log_path, 'a') as f:
                        writer = csv.writer(f)
                        writer.writerow([ep_reward, ep_length, avg_rewards, avg_ep_lengths])
                
                if ep_no == max_episodes:
                    return

                ep_reward = 0
                ep_length = 0

    def learn_model(self, model:BaseAlgorithm, total_steps:int, path:str, eval_env=None, eval_freq=None, eval_eps=5, save_freq=None, backup_env_file=None, export_name=None):
        '''
        Learn Model for a specific number of time steps

        Parameters
        ----------
        model : BaseAlgorithm
            Model to train
        total_steps : int
            The total number of samples (env steps) to train on
        path : str
            Path where the trained model is saved
            If the path already exists, an incrementing number suffix is added
        eval_env : Env
            Environment to periodically test the model
            Default is None (no periodical evaluation)
        eval_freq : int
            Evaluate the agent every X steps
            Default is None (no periodical evaluation)
        eval_eps : int
            Evaluate the agent for X episodes (both eval_env and eval_freq must be defined)
            Default is 5
        save_freq : int
            Saves model at every X steps
            Default is None (no periodical checkpoint)
        backup_gym_file : str
            Generates backup of environment file in model's folder
            Default is None (no backup)
        export_name : str
            If export_name and save_freq are defined, a model is exported every X steps
            Default is None (no export)

        Returns
        -------
        model_path : str
            Directory where model was actually saved (considering incremental suffix)

        Notes
        -----
        If `eval_env` and `eval_freq` were specified:
            - The policy will be evaluated in `eval_env` every `eval_freq` steps
            - Evaluation results will be saved in `path` and shown at the end of training
            - Every time the results improve, the model is saved
        '''

        start = time.time()
        start_date = datetime.now().strftime("%d/%m/%Y %H:%M:%S")

        # If path already exists, add suffix to avoid overwriting
        if os.path.isdir(path):
            for i in count():
                p = path.rstrip("/")+f'_{i:03}/'
                if not os.path.isdir(p):
                    path = p
                    break
        os.makedirs(path)

        # Backup environment file
        if backup_env_file is not None:
            backup_file = os.path.join(path, os.path.basename(backup_env_file))
            copy(backup_env_file, backup_file)

        evaluate = bool(eval_env is not None and eval_freq is not None)

        # Create evaluation callback
        eval_callback = None if not evaluate else EvalCallback(eval_env, n_eval_episodes=eval_eps, eval_freq=eval_freq, log_path=path,
                                                               best_model_save_path=path, deterministic=True, render=False)

        # Create custom callback to display evaluations
        custom_callback = None if not evaluate else Cyclic_Callback(eval_freq, lambda:self.display_evaluations(path,True))

        # Create checkpoint callback
        checkpoint_callback = None if save_freq is None else CheckpointCallback(save_freq=save_freq, save_path=path, name_prefix="model", verbose=1)

        # Create custom callback to export checkpoint models
        export_callback = None if save_freq is None or export_name is None else Export_Callback(save_freq, path, export_name)

        callbacks = CallbackList([c for c in [eval_callback, custom_callback, checkpoint_callback, export_callback] if c is not None])

        model.learn( total_timesteps=total_steps, callback=callbacks )
        model.save( os.path.join(path, "last_model") )

        # Display evaluations if they exist
        if evaluate:
            self.display_evaluations(path)

        # Display timestamps + Model path
        end_date = datetime.now().strftime('%d/%m/%Y %H:%M:%S')
        duration = timedelta(seconds=int(time.time()-start))
        print(f"Train start:     {start_date}")
        print(f"Train end:       {end_date}")
        print(f"Train duration:  {duration}")
        print(f"Model path:      {path}")
        
        # Append timestamps to backup environment file
        if backup_env_file is not None:
            with open(backup_file, 'a') as f:
                f.write(f"\n# Train start:    {start_date}\n")
                f.write(  f"# Train end:      {end_date}\n")
                f.write(  f"# Train duration: {duration}")

        return path

    def display_evaluations(self, path, save_csv=False):

        eval_npz = os.path.join(path, "evaluations.npz")

        if not os.path.isfile(eval_npz):
            return

        console_width = 80
        console_height = 18
        symb_x = "\u2022"
        symb_o = "\u007c"
        symb_xo = "\u237f"

        with np.load(eval_npz) as data:
            time_steps = data["timesteps"]
            results_raw = np.mean(data["results"],axis=1)
            ep_lengths_raw = np.mean(data["ep_lengths"],axis=1)
        sample_no = len(results_raw)

        xvals = np.linspace(0, sample_no-1, 80)
        results    = np.interp(xvals, range(sample_no), results_raw)
        ep_lengths = np.interp(xvals, range(sample_no), ep_lengths_raw)

        results_limits    = np.min(results),    np.max(results)
        ep_lengths_limits = np.min(ep_lengths), np.max(ep_lengths)

        results_discrete    = np.digitize(results,    np.linspace(results_limits[0]-1e-5, results_limits[1]+1e-5,    console_height+1))-1
        ep_lengths_discrete = np.digitize(ep_lengths, np.linspace(0,                      ep_lengths_limits[1]+1e-5, console_height+1))-1

        matrix = np.zeros((console_height, console_width, 2), int)
        matrix[results_discrete[0]   ][0][0] = 1    # draw 1st column
        matrix[ep_lengths_discrete[0]][0][1] = 1    # draw 1st column
        rng = [[results_discrete[0], results_discrete[0]], [ep_lengths_discrete[0], ep_lengths_discrete[0]]]

        # Create continuous line for both plots
        for k in range(2):
            for i in range(1,console_width):
                x = [results_discrete, ep_lengths_discrete][k][i]
                if x > rng[k][1]:
                    rng[k] = [rng[k][1]+1, x]
                elif x < rng[k][0]:
                    rng[k] = [x, rng[k][0]-1]
                else:
                    rng[k] = [x,x]
                for j in range(rng[k][0],rng[k][1]+1):
                    matrix[j][i][k] = 1

        print(f'{"-"*console_width}')
        for l in reversed(range(console_height)):
            for c in range(console_width):
                if   np.all(matrix[l][c] == 0): print(end=" ")
                elif np.all(matrix[l][c] == 1): print(end=symb_xo)
                elif matrix[l][c][0] == 1:      print(end=symb_x)
                else:                           print(end=symb_o)
            print()
        print(f'{"-"*console_width}')
        print(f"({symb_x})-reward          min:{results_limits[0]:11.2f}    max:{results_limits[1]:11.2f}")
        print(f"({symb_o})-ep. length      min:{ep_lengths_limits[0]:11.0f}    max:{ep_lengths_limits[1]:11.0f}    {time_steps[-1]/1000:15.0f}k steps")
        print(f'{"-"*console_width}')

        # save CSV
        if save_csv:
            eval_csv = os.path.join(path, "evaluations.csv")
            with open(eval_csv, 'a+') as f:
                writer = csv.writer(f)
                if sample_no == 1:
                    writer.writerow(["time_steps", "reward ep.", "length"])
                writer.writerow([time_steps[-1],results_raw[-1],ep_lengths_raw[-1]])


    def generate_slot_behavior(self, path, slots, auto_head:bool, XML_name):
        '''
        Function that generates the XML file for the optimized slot behavior, overwriting previous files
        '''

        file = os.path.join( path, XML_name )

        # create the file structure
        auto_head = '1' if auto_head else '0'
        EL_behavior = ET.Element('behavior',{'description':'Add description to XML file', "auto_head":auto_head})

        for i,s in enumerate(slots):
            EL_slot = ET.SubElement(EL_behavior, 'slot', {'name':str(i), 'delta':str(s[0]/1000)})
            for j in s[1]: # go through all joint indices
                ET.SubElement(EL_slot, 'move', {'id':str(j), 'angle':str(s[2][j])})

        # create XML file
        xml_rough = ET.tostring( EL_behavior, 'utf-8' )
        xml_pretty = minidom.parseString(xml_rough).toprettyxml(indent="  ")
        with open(file, "w") as x:
            x.write(xml_pretty)
        
        print(file, "was created!")

    @staticmethod
    def linear_schedule(initial_value: float) -> Callable[[float], float]:
        '''
        Linear learning rate schedule

        Parameters
        ----------
        initial_value : float
            Initial learning rate
        
        Returns
        -------
        schedule : Callable[[float], float]
            schedule that computes current learning rate depending on remaining progress
        '''
        def func(progress_remaining: float) -> float:
            '''
            Compute learning rate according to current progress

            Parameters
            ----------
            progress_remaining : float
                Progress will decrease from 1 (beginning) to 0
            
            Returns
            -------
            learning_rate : float
                Learning rate according to current progress
            '''
            return progress_remaining * initial_value

        return func

    @staticmethod
    def export_model(input_file, output_file, add_sufix=True):
        '''
        Export model weights to binary file

        Parameters
        ----------
        input_file : str
            Input file, compatible with algorithm
        output_file : str
            Output file, including directory
        add_sufix : bool
            If true, a suffix is appended to the file name: output_file + "_{index}.pkl"
        '''

        # If file already exists, don't overwrite
        if add_sufix:
            for i in count():
                f = f"{output_file}_{i:03}.pkl"
                if not os.path.isfile(f):
                    output_file = f
                    break
        
        model = PPO.load(input_file)
        weights = model.policy.state_dict() # dictionary containing network layers

        w = lambda name : weights[name].detach().cpu().numpy() # extract weights from policy

        var_list = []
        for i in count(0,2): # add hidden layers (step=2 because that's how SB3 works)
            if f"mlp_extractor.policy_net.{i}.bias" not in weights:
                break
            var_list.append([w(f"mlp_extractor.policy_net.{i}.bias"), w(f"mlp_extractor.policy_net.{i}.weight"), "tanh"])

        var_list.append( [w("action_net.bias"), w("action_net.weight"), "none"] ) # add final layer
        
        with open(output_file,"wb") as f:
            pickle.dump(var_list, f, protocol=4) # protocol 4 is backward compatible with Python 3.4


class Cyclic_Callback(BaseCallback):
    ''' Stable baselines custom callback '''
    def __init__(self, freq, function):
        super(Cyclic_Callback, self).__init__(1)
        self.freq = freq
        self.function = function

    def _on_step(self) -> bool:
        if self.n_calls % self.freq == 0:
            self.function()
        return True # If the callback returns False, training is aborted early

class Export_Callback(BaseCallback):
    ''' Stable baselines custom callback '''
    def __init__(self, freq, load_path, export_name):
        super(Export_Callback, self).__init__(1)
        self.freq = freq
        self.load_path = load_path
        self.export_name = export_name

    def _on_step(self) -> bool:
        if self.n_calls % self.freq == 0:
            path = os.path.join(self.load_path, f"model_{self.num_timesteps}_steps.zip")
            Train_Base.export_model(path, f"./scripts/gyms/export/{self.export_name}")
        return True # If the callback returns False, training is aborted early
first commit 7 months ago			`from datetime import datetime, timedelta`
			`from itertools import count`
			`from os import listdir`
			`from os.path import isdir, join, isfile`
			`from scripts.commons.UI import UI`
			`from shutil import copy`
			`from stable_baselines3 import PPO`
			`from stable_baselines3.common.base_class import BaseAlgorithm`
			`from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback, CallbackList, BaseCallback`
			`from typing import Callable`
			`from world.World import World`
			`from xml.dom import minidom`
			`import numpy as np`
			`import os, time, math, csv, select, sys`
			`import pickle`
			`import xml.etree.ElementTree as ET`


			`class Train_Base():`
			`def __init__(self, script) -> None:`
			`'''`
			`When training with multiple environments (multiprocessing):`
			`The server port is incremented as follows:`
			`self.server_p, self.server_p+1, self.server_p+2, ...`
			`We add +1000 to the initial monitor port, so than we can have more than 100 environments:`
			`self.monitor_p+1000, self.monitor_p+1001, self.monitor_p+1002, ...`
			`When testing we use self.server_p and self.monitor_p`
			`'''`

			`args = script.args`
			`self.script = script`
			`self.ip = args.i`
			`self.server_p = args.p # (initial) server port`
			`self.monitor_p = args.m # monitor port when testing`
			`self.monitor_p_1000 = args.m + 1000 # initial monitor port when training`
			`self.robot_type = args.r`
			`self.team = args.t`
			`self.uniform = args.u`
			`self.cf_last_time = 0`
			`self.cf_delay = 0`
			`self.cf_target_period = World.STEPTIME # target simulation speed while testing (default: real-time)`

			`@staticmethod`
			`def prompt_user_for_model():`

			`gyms_logs_path = "./scripts/gyms/logs/"`
			`folders = [f for f in listdir(gyms_logs_path) if isdir(join(gyms_logs_path, f))]`
			`folders.sort(key=lambda f: os.path.getmtime(join(gyms_logs_path, f)), reverse=True) # sort by modification date`

			`while True:`
			`try:`
			`folder_name = UI.print_list(folders,prompt="Choose folder (ctrl+c to return): ")[1]`
			`except KeyboardInterrupt:`
			`print()`
			`return None # ctrl+c`

			`folder_dir = os.path.join(gyms_logs_path, folder_name)`
			`models = [m[:-4] for m in listdir(folder_dir) if isfile(join(folder_dir, m)) and m.endswith(".zip")]`

			`if not models:`
			`print("The chosen folder does not contain any .zip file!")`
			`continue`

			`models.sort(key=lambda m: os.path.getmtime(join(folder_dir, m+".zip")), reverse=True) # sort by modification date`

			`try:`
			`model_name = UI.print_list(models,prompt="Choose model (ctrl+c to return): ")[1]`
			`break`
			`except KeyboardInterrupt:`
			`print()`

			`return {"folder_dir":folder_dir, "folder_name":folder_name, "model_file":os.path.join(folder_dir, model_name+".zip")}`


			`def control_fps(self, read_input = False):`
			`''' Add delay to control simulation speed '''`

			`if read_input:`
			`speed = input()`
			`if speed == '':`
			`self.cf_target_period = 0`
			`print(f"Changed simulation speed to MAX")`
			`else:`
			`if speed == '0':`
			`inp = input("Paused. Set new speed or '' to use previous speed:")`
			`if inp != '':`
			`speed = inp`

			`try:`
			`speed = int(speed)`
			`assert speed >= 0`
			`self.cf_target_period = World.STEPTIME * 100 / speed`
			`print(f"Changed simulation speed to {speed}%")`
			`except:`
			`print("""Train_Base.py:`
			`Error: To control the simulation speed, enter a non-negative integer.`
			`To disable this control module, use test_model(..., enable_FPS_control=False) in your gym environment.""")`

			`now = time.time()`
			`period = now - self.cf_last_time`
			`self.cf_last_time = now`
			`self.cf_delay += (self.cf_target_period - period)*0.9`
			`if self.cf_delay > 0:`
			`time.sleep(self.cf_delay)`
			`else:`
			`self.cf_delay = 0`


			`def test_model(self, model:BaseAlgorithm, env, log_path:str=None, model_path:str=None, max_episodes=0, enable_FPS_control=True, verbose=1):`
			`'''`
			`Test model and log results`

			`Parameters`
			`----------`
			`model : BaseAlgorithm`
			`Trained model`
			`env : Env`
			`Gym-like environment`
			`log_path : str`
			Folder where statistics file is saved, default is `None` (no file is saved)
			`model_path : str`
			Folder where it reads evaluations.npz to plot it and create evaluations.csv, default is `None` (no plot, no csv)
			`max_episodes : int`
			`Run tests for this number of episodes`
			`Default is 0 (run until user aborts)`
			`verbose : int`
			`0 - no output (except if enable_FPS_control=True)`
			`1 - print episode statistics`
			`'''`

			`if model_path is not None:`
			`assert os.path.isdir(model_path), f"{model_path} is not a valid path"`
			`self.display_evaluations(model_path)`

			`if log_path is not None:`
			`assert os.path.isdir(log_path), f"{log_path} is not a valid path"`

			`# If file already exists, don't overwrite`
			`if os.path.isfile(log_path + "/test.csv"):`
			`for i in range(1000):`
			`p = f"{log_path}/test_{i:03}.csv"`
			`if not os.path.isfile(p):`
			`log_path = p`
			`break`
			`else:`
			`log_path += "/test.csv"`

			`with open(log_path, 'w') as f:`
			`f.write("reward,ep. length,rew. cumulative avg., ep. len. cumulative avg.\n")`
			`print("Train statistics are saved to:", log_path)`

			`if enable_FPS_control: # control simulation speed (using non blocking user input)`
			`print("\nThe simulation speed can be changed by sending a non-negative integer\n"`
			`"(e.g. '50' sets speed to 50%, '0' pauses the simulation, '' sets speed to MAX)\n")`

			`ep_reward = 0`
			`ep_length = 0`
			`rewards_sum = 0`
			`reward_min = math.inf`
			`reward_max = -math.inf`
			`ep_lengths_sum = 0`
			`ep_no = 0`

			`obs = env.reset()`
			`while True:`
			`action, _states = model.predict(obs, deterministic=True)`
			`obs, reward, done, info = env.step(action)`
			`ep_reward += reward`
			`ep_length += 1`

			`if enable_FPS_control: # control simulation speed (using non blocking user input)`
			`self.control_fps(select.select([sys.stdin], [], [], 0)[0])`

			`if done:`
			`obs = env.reset()`
			`rewards_sum += ep_reward`
			`ep_lengths_sum += ep_length`
			`reward_max = max(ep_reward, reward_max)`
			`reward_min = min(ep_reward, reward_min)`
			`ep_no += 1`
			`avg_ep_lengths = ep_lengths_sum/ep_no`
			`avg_rewards = rewards_sum/ep_no`

			`if verbose > 0:`
			`print( f"\rEpisode: {ep_no:<3} Ep.Length: {ep_length:<4.0f} Reward: {ep_reward:<6.2f} \n",`
			`end=f"--AVERAGE-- Ep.Length: {avg_ep_lengths:<4.0f} Reward: {avg_rewards:<6.2f} (Min: {reward_min:<6.2f} Max: {reward_max:<6.2f})", flush=True)`

			`if log_path is not None:`
			`with open(log_path, 'a') as f:`
			`writer = csv.writer(f)`
			`writer.writerow([ep_reward, ep_length, avg_rewards, avg_ep_lengths])`

			`if ep_no == max_episodes:`
			`return`

			`ep_reward = 0`
			`ep_length = 0`

			`def learn_model(self, model:BaseAlgorithm, total_steps:int, path:str, eval_env=None, eval_freq=None, eval_eps=5, save_freq=None, backup_env_file=None, export_name=None):`
			`'''`
			`Learn Model for a specific number of time steps`

			`Parameters`
			`----------`
			`model : BaseAlgorithm`
			`Model to train`
			`total_steps : int`
			`The total number of samples (env steps) to train on`
			`path : str`
			`Path where the trained model is saved`
			`If the path already exists, an incrementing number suffix is added`
			`eval_env : Env`
			`Environment to periodically test the model`
			`Default is None (no periodical evaluation)`
			`eval_freq : int`
			`Evaluate the agent every X steps`
			`Default is None (no periodical evaluation)`
			`eval_eps : int`
			`Evaluate the agent for X episodes (both eval_env and eval_freq must be defined)`
			`Default is 5`
			`save_freq : int`
			`Saves model at every X steps`
			`Default is None (no periodical checkpoint)`
			`backup_gym_file : str`
			`Generates backup of environment file in model's folder`
			`Default is None (no backup)`
			`export_name : str`
			`If export_name and save_freq are defined, a model is exported every X steps`
			`Default is None (no export)`

			`Returns`
			`-------`
			`model_path : str`
			`Directory where model was actually saved (considering incremental suffix)`

			`Notes`
			`-----`
			If `eval_env` and `eval_freq` were specified:
			- The policy will be evaluated in `eval_env` every `eval_freq` steps
			- Evaluation results will be saved in `path` and shown at the end of training
			`- Every time the results improve, the model is saved`
			`'''`

			`start = time.time()`
			`start_date = datetime.now().strftime("%d/%m/%Y %H:%M:%S")`

			`# If path already exists, add suffix to avoid overwriting`
			`if os.path.isdir(path):`
			`for i in count():`
			`p = path.rstrip("/")+f'_{i:03}/'`
			`if not os.path.isdir(p):`
			`path = p`
			`break`
			`os.makedirs(path)`

			`# Backup environment file`
			`if backup_env_file is not None:`
			`backup_file = os.path.join(path, os.path.basename(backup_env_file))`
			`copy(backup_env_file, backup_file)`

			`evaluate = bool(eval_env is not None and eval_freq is not None)`

			`# Create evaluation callback`
			`eval_callback = None if not evaluate else EvalCallback(eval_env, n_eval_episodes=eval_eps, eval_freq=eval_freq, log_path=path,`
			`best_model_save_path=path, deterministic=True, render=False)`

			`# Create custom callback to display evaluations`
			`custom_callback = None if not evaluate else Cyclic_Callback(eval_freq, lambda:self.display_evaluations(path,True))`

			`# Create checkpoint callback`
			`checkpoint_callback = None if save_freq is None else CheckpointCallback(save_freq=save_freq, save_path=path, name_prefix="model", verbose=1)`

			`# Create custom callback to export checkpoint models`
			`export_callback = None if save_freq is None or export_name is None else Export_Callback(save_freq, path, export_name)`

			`callbacks = CallbackList([c for c in [eval_callback, custom_callback, checkpoint_callback, export_callback] if c is not None])`

			`model.learn( total_timesteps=total_steps, callback=callbacks )`
			`model.save( os.path.join(path, "last_model") )`

			`# Display evaluations if they exist`
			`if evaluate:`
			`self.display_evaluations(path)`

			`# Display timestamps + Model path`
			`end_date = datetime.now().strftime('%d/%m/%Y %H:%M:%S')`
			`duration = timedelta(seconds=int(time.time()-start))`
			`print(f"Train start: {start_date}")`
			`print(f"Train end: {end_date}")`
			`print(f"Train duration: {duration}")`
			`print(f"Model path: {path}")`

			`# Append timestamps to backup environment file`
			`if backup_env_file is not None:`
			`with open(backup_file, 'a') as f:`
			`f.write(f"\n# Train start: {start_date}\n")`
			`f.write( f"# Train end: {end_date}\n")`
			`f.write( f"# Train duration: {duration}")`

			`return path`

			`def display_evaluations(self, path, save_csv=False):`

			`eval_npz = os.path.join(path, "evaluations.npz")`

			`if not os.path.isfile(eval_npz):`
			`return`

			`console_width = 80`
			`console_height = 18`
			`symb_x = "\u2022"`
			`symb_o = "\u007c"`
			`symb_xo = "\u237f"`

			`with np.load(eval_npz) as data:`
			`time_steps = data["timesteps"]`
			`results_raw = np.mean(data["results"],axis=1)`
			`ep_lengths_raw = np.mean(data["ep_lengths"],axis=1)`
			`sample_no = len(results_raw)`

			`xvals = np.linspace(0, sample_no-1, 80)`
			`results = np.interp(xvals, range(sample_no), results_raw)`
			`ep_lengths = np.interp(xvals, range(sample_no), ep_lengths_raw)`

			`results_limits = np.min(results), np.max(results)`
			`ep_lengths_limits = np.min(ep_lengths), np.max(ep_lengths)`

			`results_discrete = np.digitize(results, np.linspace(results_limits[0]-1e-5, results_limits[1]+1e-5, console_height+1))-1`
			`ep_lengths_discrete = np.digitize(ep_lengths, np.linspace(0, ep_lengths_limits[1]+1e-5, console_height+1))-1`

			`matrix = np.zeros((console_height, console_width, 2), int)`
			`matrix[results_discrete[0] ][0][0] = 1 # draw 1st column`
			`matrix[ep_lengths_discrete[0]][0][1] = 1 # draw 1st column`
			`rng = [[results_discrete[0], results_discrete[0]], [ep_lengths_discrete[0], ep_lengths_discrete[0]]]`

			`# Create continuous line for both plots`
			`for k in range(2):`
			`for i in range(1,console_width):`
			`x = [results_discrete, ep_lengths_discrete][k][i]`
			`if x > rng[k][1]:`
			`rng[k] = [rng[k][1]+1, x]`
			`elif x < rng[k][0]:`
			`rng[k] = [x, rng[k][0]-1]`
			`else:`
			`rng[k] = [x,x]`
			`for j in range(rng[k][0],rng[k][1]+1):`
			`matrix[j][i][k] = 1`

			`print(f'{"-"*console_width}')`
			`for l in reversed(range(console_height)):`
			`for c in range(console_width):`
			`if np.all(matrix[l][c] == 0): print(end=" ")`
			`elif np.all(matrix[l][c] == 1): print(end=symb_xo)`
			`elif matrix[l][c][0] == 1: print(end=symb_x)`
			`else: print(end=symb_o)`
			`print()`
			`print(f'{"-"*console_width}')`
			`print(f"({symb_x})-reward min:{results_limits[0]:11.2f} max:{results_limits[1]:11.2f}")`
			`print(f"({symb_o})-ep. length min:{ep_lengths_limits[0]:11.0f} max:{ep_lengths_limits[1]:11.0f} {time_steps[-1]/1000:15.0f}k steps")`
			`print(f'{"-"*console_width}')`

			`# save CSV`
			`if save_csv:`
			`eval_csv = os.path.join(path, "evaluations.csv")`
			`with open(eval_csv, 'a+') as f:`
			`writer = csv.writer(f)`
			`if sample_no == 1:`
			`writer.writerow(["time_steps", "reward ep.", "length"])`
			`writer.writerow([time_steps[-1],results_raw[-1],ep_lengths_raw[-1]])`


			`def generate_slot_behavior(self, path, slots, auto_head:bool, XML_name):`
			`'''`
			`Function that generates the XML file for the optimized slot behavior, overwriting previous files`
			`'''`

			`file = os.path.join( path, XML_name )`

			`# create the file structure`
			`auto_head = '1' if auto_head else '0'`
			`EL_behavior = ET.Element('behavior',{'description':'Add description to XML file', "auto_head":auto_head})`

			`for i,s in enumerate(slots):`
			`EL_slot = ET.SubElement(EL_behavior, 'slot', {'name':str(i), 'delta':str(s[0]/1000)})`
			`for j in s[1]: # go through all joint indices`
			`ET.SubElement(EL_slot, 'move', {'id':str(j), 'angle':str(s[2][j])})`

			`# create XML file`
			`xml_rough = ET.tostring( EL_behavior, 'utf-8' )`
			`xml_pretty = minidom.parseString(xml_rough).toprettyxml(indent=" ")`
			`with open(file, "w") as x:`
			`x.write(xml_pretty)`

			`print(file, "was created!")`

			`@staticmethod`
			`def linear_schedule(initial_value: float) -> Callable[[float], float]:`
			`'''`
			`Linear learning rate schedule`

			`Parameters`
			`----------`
			`initial_value : float`
			`Initial learning rate`

			`Returns`
			`-------`
			`schedule : Callable[[float], float]`
			`schedule that computes current learning rate depending on remaining progress`
			`'''`
			`def func(progress_remaining: float) -> float:`
			`'''`
			`Compute learning rate according to current progress`

			`Parameters`
			`----------`
			`progress_remaining : float`
			`Progress will decrease from 1 (beginning) to 0`

			`Returns`
			`-------`
			`learning_rate : float`
			`Learning rate according to current progress`
			`'''`
			`return progress_remaining * initial_value`

			`return func`

			`@staticmethod`
			`def export_model(input_file, output_file, add_sufix=True):`
			`'''`
			`Export model weights to binary file`

			`Parameters`
			`----------`
			`input_file : str`
			`Input file, compatible with algorithm`
			`output_file : str`
			`Output file, including directory`
			`add_sufix : bool`
			`If true, a suffix is appended to the file name: output_file + "_{index}.pkl"`
			`'''`

			`# If file already exists, don't overwrite`
			`if add_sufix:`
			`for i in count():`
			`f = f"{output_file}_{i:03}.pkl"`
			`if not os.path.isfile(f):`
			`output_file = f`
			`break`

			`model = PPO.load(input_file)`
			`weights = model.policy.state_dict() # dictionary containing network layers`

			`w = lambda name : weights[name].detach().cpu().numpy() # extract weights from policy`

			`var_list = []`
			`for i in count(0,2): # add hidden layers (step=2 because that's how SB3 works)`
			`if f"mlp_extractor.policy_net.{i}.bias" not in weights:`
			`break`
			`var_list.append([w(f"mlp_extractor.policy_net.{i}.bias"), w(f"mlp_extractor.policy_net.{i}.weight"), "tanh"])`

			`var_list.append( [w("action_net.bias"), w("action_net.weight"), "none"] ) # add final layer`

			`with open(output_file,"wb") as f:`
			`pickle.dump(var_list, f, protocol=4) # protocol 4 is backward compatible with Python 3.4`



			`class Cyclic_Callback(BaseCallback):`
			`''' Stable baselines custom callback '''`
			`def __init__(self, freq, function):`
			`super(Cyclic_Callback, self).__init__(1)`
			`self.freq = freq`
			`self.function = function`

			`def _on_step(self) -> bool:`
			`if self.n_calls % self.freq == 0:`
			`self.function()`
			`return True # If the callback returns False, training is aborted early`

			`class Export_Callback(BaseCallback):`
			`''' Stable baselines custom callback '''`
			`def __init__(self, freq, load_path, export_name):`
			`super(Export_Callback, self).__init__(1)`
			`self.freq = freq`
			`self.load_path = load_path`
			`self.export_name = export_name`

			`def _on_step(self) -> bool:`
			`if self.n_calls % self.freq == 0:`
			`path = os.path.join(self.load_path, f"model_{self.num_timesteps}_steps.zip")`
			`Train_Base.export_model(path, f"./scripts/gyms/export/{self.export_name}")`
			`return True # If the callback returns False, training is aborted early`