-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGradientStudy.py
164 lines (134 loc) · 7.5 KB
/
GradientStudy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# coding: utf-8
import numpy as np
import matplotlib.pyplot as plt
import argparse
from progress.bar import Bar
from PIL import Image
from PIL import ImageDraw
import gym
from stable_baselines3 import SAC
from stable_baselines3.common.evaluation import evaluate_policy
import colorTest
from vector_util import *
from slowBar import SlowBar
from savedGradient import SavedGradient
# To test (~5 minutes computing time)
# python3 GradientStudy.py --env Pendulum-v0 --directory Models/Pendulum --min_iter 500 --max_iter 10000 --step_iter 500 --eval_maxiter 5
if __name__ == "__main__":
print("Parsing arguments")
parser = argparse.ArgumentParser()
# Model parameters
parser.add_argument('--env', default='Swimmer-v2', type=str)
parser.add_argument('--policy', default = 'MlpPolicy', type=str) # Policy of the model
parser.add_argument('--tau', default=0.005, type=float) # the soft update coefficient (“Polyak update”, between 0 and 1)
parser.add_argument('--gamma', default=1, type=float) # the discount fmodel
parser.add_argument('--learning_rate', default=0.0003, type=float) #learning rate for adam optimizer, the same learning rate will be used
# for all networks (Q-Values, model and Value function) it can be a function
# of the current progress remaining (from 1 to 0)
# Tools parameters
parser.add_argument('--minalpha', default=0.0, type=float)# start value for alpha, good value : 0.0
parser.add_argument('--maxalpha', default=10, type=float)# end value for alpha, good value : large 100, around model 10
parser.add_argument('--stepalpha', default=0.25, type=float)# step for alpha in the loop, good value : precise 0.5 or 1, less precise 2 or 3
parser.add_argument('--eval_maxiter', default=5, type=float)# number of steps for the evaluation.
# Drawing parameters
parser.add_argument('--pixelWidth', default=20, type=int)# width of each pixel
parser.add_argument('--pixelHeight', default=10, type=int)# height of each pixel
parser.add_argument('--maxValue', default=360, type=int)# max score value for colormap used (dependent of benchmark used)
parser.add_argument('--line_height', default=3, type=int) # The height in number of pixel for each result
# Dot product parameters
parser.add_argument('--dotWidth', default=150, type=int)# max width of the dot product (added on the side)
parser.add_argument('--dotText', default=True, type=str)# true if want to show value of the dot product
parser.add_argument('--xMargin', default=10, type=int) # xMargin for the side panel
# File management
parser.add_argument('--directory', default="Models", type=str)# name of the directory containing the models to load
parser.add_argument('--basename', default="rl_model_", type=str)# file prefix for the loaded model
parser.add_argument('--min_iter', default=1, type=int)# iteration (file suffix) of the first model
parser.add_argument('--max_iter', default=10, type=int)# iteration (file suffix) of the last model
parser.add_argument('--step_iter', default=1, type=int)# iteration step between two consecutive models
# Output parameters
parser.add_argument('--saveFile', default=True, type=bool) # True if want to save the Gradient as SavedGradient
parser.add_argument('--saveImage', default=True, type=bool) # True if want to save the Image of the Gradient
parser.add_argument('--directoryFile', default="SavedGradient", type=str) # name of the directory where SavedGradient is saved
parser.add_argument('--directoryImage', default="Gradient_output", type=str) # name of the output directory that will contain the image
args = parser.parse_args()
# Creating environment and initialising model and parameters
print("Creating environment\n")
env = gym.make(args.env)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = int(env.action_space.high[0])
# Instantiating the model
model = SAC(args.policy, args.env,
learning_rate=args.learning_rate,
tau=args.tau,
gamma=args.gamma)
theta0 = model.policy.parameters_to_vector()
num_params = len(theta0)
print('\n')
# Name of the model files to analyse consecutively with the same set of directions:
filename_list = [args.basename+str(i)+'_steps' for i in range(args.min_iter,
args.max_iter,
args.step_iter)]
# Compute fitness over these directions :
previous_theta = None # Stores theta
newGradient = SavedGradient(directions=[], results=[], red_markers=[], green_markers=[],
nbLines=args.line_height, pixelWidth=args.pixelWidth, pixelHeight=args.pixelHeight, maxValue=args.maxValue,
dotText=args.dotText, dotWidth=args.dotWidth, xMargin=args.xMargin, yMargin=int(args.pixelHeight/2)) # Storing new SavedGradient
for indice_file in range(len(filename_list)):
# Change which model to load
filename = filename_list[indice_file]
# Load the model
print("\nSTARTING : "+str(filename))
model = SAC.load("{}/{}".format(args.directory, filename))
# Get the new parameters
theta0 = model.policy.parameters_to_vector()
base_vect = theta0 if previous_theta is None else theta0 - previous_theta
previous_theta = theta0
print("Loaded parameters from file")
# Evaluate the Model : mean, std
print("Evaluating the model...")
init_score, score_std = evaluate_policy(model, env, n_eval_episodes=args.eval_maxiter, warn=False)
print("Model initial fitness : ", (init_score, score_std))
# Study the geometry around the model
print("Starting study around the model...")
# Norm of the model
length_dist = euclidienne(base_vect, np.zeros(np.shape(base_vect)))
# Direction taken by the model (normalized)
d = np.zeros(np.shape(base_vect)) if length_dist ==0 else base_vect / length_dist
newGradient.directions.append(d)
# New parameters following the direction
theta_plus, theta_minus = getPointsDirection(theta0, num_params, args.minalpha, args.maxalpha, args.stepalpha, d)
# Evaluate using new parameters
scores_plus, scores_minus = [], []
with SlowBar("Evaluating along its direction", max=len(theta_plus)) as bar:
for param_i in range(len(theta_plus)):
# Go forward in the direction
model.policy.load_from_vector(theta_plus[param_i])
# Get the new performance
scores_plus.append(evaluate_policy(model, env, n_eval_episodes=args.eval_maxiter, warn=False)[0])
# Go backward in the direction
model.policy.load_from_vector(theta_minus[param_i])
# Get the new performance
scores_minus.append(evaluate_policy(model, env, n_eval_episodes=args.eval_maxiter, warn=False)[0])
bar.next()
# Inverting scores for a symetrical Vignette (theta_minus going left, theta_plus going right)
scores_minus = scores_minus[::-1]
line = scores_minus + [init_score] + scores_plus
# Adding the results
last_params_marker = int(length_dist/args.stepalpha)
# Mark two consecutive positions on the line
marker_actor = int((len(line)-1)/2)
marker_last = max(marker_actor-last_params_marker, 0)
# A list of the markers, previous will be shown in red and current in green
newGradient.red_markers.append(marker_last)
newGradient.green_markers.append(marker_actor)
# Putting it all together
newGradient.results.append(line)
try:
# Assembling the image, saving it if asked
newGradient.computeImage(saveImage=args.saveImage, filename=args.basename+'_gradient', directory=args.directoryImage)
except Exception as e:
newGradient.saveGradient(args.basename, args.directoryFile+'/temp')
# Saving the SavedGradient if asked
if args.saveFile is True: newGradient.saveGradient(args.basename, args.directoryFile)
env.close()