Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
H
Hello World
Manage
Activity
Members
Labels
Plan
Issues
6
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Operate
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
OST
SA
projects
Hello World
Commits
5a21cc68
Commit
5a21cc68
authored
1 year ago
by
Andri Joos
Browse files
Options
Downloads
Plain Diff
Merge branch '19-refactor-start'
parents
ccd73d55
c140a8ba
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
envs/StartEnvironment.py
+1
-11
1 addition, 11 deletions
envs/StartEnvironment.py
train/start_training.py
+9
-45
9 additions, 45 deletions
train/start_training.py
train/utils.py
+3
-5
3 additions, 5 deletions
train/utils.py
with
13 additions
and
61 deletions
envs/StartEnvironment.py
+
1
−
11
View file @
5a21cc68
...
...
@@ -63,21 +63,11 @@ class StartEnvironment(BaseEnvironment):
geo_point
=
state
.
gps_location
height_delta
=
geo_point
.
altitude
-
self
.
_desired_height
normalized_height_delta
=
self
.
_normalize_height_delta
(
height_delta
)
# acceleration = self._client.getImuData().linear_acceleration
velocity
=
state
.
kinematics_estimated
.
linear_velocity
return
np
.
array
([
normalized_height_delta
,
velocity
.
z_val
],
dtype
=
np
.
float64
)
def
_calculate_reward
(
self
,
obs
:
NDArray
)
->
np
.
float32
:
# if abs(obs[0]) < 0.2:
# return np.float32(1)
# else:
# return np.float32(0)
abs_relative_height
=
abs
(
obs
[
0
])
reward
=
-
abs_relative_height
# if abs_relative_height < 0.5:
# reward += 1
return
np
.
float32
(
reward
)
return
np
.
float32
(
abs
(
obs
[
0
]))
def
_normalize_height_delta
(
self
,
height
:
float
)
->
float
:
return
height
This diff is collapsed.
Click to expand it.
train/start_training.py
+
9
−
45
View file @
5a21cc68
from
envs
import
StartEnvironment
,
TestEnvironment
from
envs
import
StartEnvironment
import
os
import
train.utils
as
utils
import
numpy
as
np
...
...
@@ -9,36 +9,21 @@ from tf_agents.policies.random_py_policy import RandomPyPolicy
SIM_IP
=
"
192.168.8.195
"
# Use "num_iterations = 1e6" for better results (2 hrs)
# 1e5 is just so this doesn't take too long (1 hr)
num_episodes
=
10000
num_steps_per_episode
=
2000
initial_collect_steps
=
2000
# @param {type:"integer"}
collect_steps_per_iteration
=
1
# @param {type:"integer"}
replay_buffer_capacity
=
100000
# @param {type:"integer"}
replay_buffer_server
=
"
localhost:40000
"
replay_buffer_capacity
=
100000
batch_size
=
1024
# @param {type:"integer"}
batch_size
=
1024
critic_learning_rate
=
3e-4
# @param {type:"number"}
actor_learning_rate
=
3e-4
# @param {type:"number"}
alpha_learning_rate
=
3e-4
# @param {type:"number"}
target_update_tau
=
0.005
# @param {type:"number"}
target_update_period
=
1
# @param {type:"number"}
gamma
=
0.9
# @param {type:"number"}
reward_scale_factor
=
1.0
# @param {type:"number"}
critic_learning_rate
=
3e-4
actor_learning_rate
=
3e-4
alpha_learning_rate
=
3e-4
gamma
=
0.9
actor_fc_layer_params
=
(
16
,
16
)
critic_joint_fc_layer_params
=
actor_fc_layer_params
log_interval
=
5000
# @param {type:"integer"}
num_eval_episodes
=
20
# @param {type:"integer"}
eval_interval
=
5000
# @param {type:"integer"}
policy_save_interval
=
1000
# @param {type:"integer"}
tempdir
=
"
out/
"
is_initial
=
utils
.
is_initial
(
tempdir
)
...
...
@@ -47,19 +32,6 @@ utils.set_gpu()
train_env
=
StartEnvironment
(
ip
=
SIM_IP
,
desired_height
=
160
,
dynamic_start
=
True
)
eval_env
=
StartEnvironment
(
ip
=
SIM_IP
,
desired_height
=
180
,
dynamic_start
=
False
)
train_tf_env
=
TFPyEnvironment
(
train_env
)
# py_env = TestEnvironment(ip=SIM_IP)
# env = TFPyEnvironment(py_env)
# env.reset()
# action = np.array([0,0.5,0.5,0.5], dtype=np.float32)
# init_action = np.array([0.5, 0.5, 0.5, 0])
# for i in range(35):
# py_env.step(init_action)
# while True:
# t = py_env.step(action)
# m = 0
observation_spec
=
train_tf_env
.
observation_spec
()
action_spec
=
train_tf_env
.
action_spec
()
...
...
@@ -67,9 +39,9 @@ time_step_spec = train_tf_env.time_step_spec()
train_step
=
train_utils
.
create_train_step
()
tf_agent
=
utils
.
create_agent
(
action_spec
,
observation_spec
,
time_step_spec
,
train_step
,
actor_fc_layer_params
,
critic_joint_fc_layer_params
,
alpha_learning_rate
=
alpha_learning_rate
,
actor_learning_rate
=
actor_learning_rate
,
critic_learning_rate
=
critic_learning_rate
,
gamma
=
gamma
,
reward_scale_factor
=
reward_scale_factor
)
tf_agent
=
utils
.
create_agent
(
action_spec
,
observation_spec
,
time_step_spec
,
train_step
,
actor_fc_layer_params
,
critic_joint_fc_layer_params
,
alpha_learning_rate
=
alpha_learning_rate
,
actor_learning_rate
=
actor_learning_rate
,
critic_learning_rate
=
critic_learning_rate
,
gamma
=
gamma
)
reverb_replay
,
rb_observer
=
utils
.
create_replay_buffer
(
tf_agent
.
collect_data_spec
,
replay_buffer_server
,
replay_buffer_capacity
)
reverb_replay
,
rb_observer
=
utils
.
create_replay_buffer
(
tf_agent
.
collect_data_spec
,
replay_buffer_capacity
)
collect_policy
=
utils
.
create_policy
(
tf_agent
.
collect_policy
)
eval_policy
=
utils
.
create_policy
(
tf_agent
.
policy
)
...
...
@@ -87,15 +59,8 @@ random_actor, collect_actor, eval_actor = utils.create_actors(
checkpoint_dir
=
os
.
path
.
join
(
tempdir
,
'
checkpoint
'
)
train_checkpointer
,
policy_saver
=
utils
.
create_checkpoint
(
tempdir
,
tf_agent
,
reverb_replay
,
train_step
)
# reverb_replay = train_checkpointer.manager.checkpoint.replay_buffer # must be done manually, gets somehow not restored
agent_learner
=
utils
.
create_learner
(
tf_agent
,
train_step
,
tempdir
,
num_steps_per_episode
,
reverb_replay
,
batch_size
)
# eval_env.reset()
# eval_actor.run()
# env.reset()
# eval_actor.run()
utils
.
train_loop
(
agent_learner
,
collect_actor
,
...
...
@@ -108,6 +73,5 @@ utils.train_loop(
train_step
,
num_episodes
,
num_steps_per_episode
,
epsilon
=
0.2
,
is_initial
=
is_initial
)
This diff is collapsed.
Click to expand it.
train/utils.py
+
3
−
5
View file @
5a21cc68
...
...
@@ -42,7 +42,7 @@ def create_agent(action_spec: Any,
target_update_tau
:
Float
=
0.005
,
target_update_period
:
Int
=
1
,
gamma
:
Float
=
0.99
,
reward_scale_factor
:
Float
=
0.8
,
reward_scale_factor
:
Float
=
1.0
,
)
->
SacAgent
:
critic_net
=
CriticNetwork
(
(
observation_spec
,
action_spec
),
...
...
@@ -74,7 +74,7 @@ def create_agent(action_spec: Any,
reward_scale_factor
=
reward_scale_factor
,
train_step_counter
=
train_step
)
def
create_replay_buffer
(
collect_data_spec
:
NestedTensorSpec
,
server_address
:
str
,
replay_buffer_capacity
:
int
)
->
Tuple
[
ReverbReplayBuffer
,
ReverbAddTrajectoryObserver
]:
def
create_replay_buffer
(
collect_data_spec
:
NestedTensorSpec
,
replay_buffer_capacity
:
int
)
->
Tuple
[
ReverbReplayBuffer
,
ReverbAddTrajectoryObserver
]:
table_name
=
'
uniform_table
'
table
=
reverb
.
Table
(
table_name
,
...
...
@@ -185,14 +185,11 @@ def train_loop(learner: Learner,
train_steps_per_episode
:
int
,
initial_steps
:
int
=
1000
,
initial_learning_iterations
:
int
=
1
,
episode_learning_iterations
:
int
=
10
,
epsilon
:
float
=
None
,
eval_interval
:
int
=
5
,
is_initial
:
bool
=
True
,
outdir
:
str
=
"
out
"
,
):
policy_dir
=
os
.
path
.
join
(
outdir
,
"
policy
"
)
policy_saver
.
save
(
policy_dir
)
for
_
in
range
(
initial_steps
):
if
is_initial
:
random_actor
.
run
()
...
...
@@ -211,6 +208,7 @@ def train_loop(learner: Learner,
learner
.
run
(
iterations
=
1
)
checkpointer
.
save
(
train_step
)
policy_saver
.
save
(
policy_dir
)
if
i
%
eval_interval
==
0
:
print
(
"
######## Evaluation episode {} ########
"
.
format
(
i
))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment