Attachment 'start_training_wall.py'
Download 1 #!/usr/bin/env python
2
3 import gym
4 import numpy
5 import time
6 import qlearn
7 from gym import wrappers
8 # ROS packages required
9 import rospy
10 import rospkg
11 # import our training environment
12 import my_turtlebot2_wall
13
14
15 if __name__ == '__main__':
16
17 rospy.init_node('example_turtlebot2_maze_qlearn', anonymous=True, log_level=rospy.WARN)
18
19 # Create the Gym environment
20 env = gym.make('MyTurtleBot2Wall-v0')
21 rospy.loginfo("Gym environment done")
22
23 # Set the logging system
24 rospack = rospkg.RosPack()
25 pkg_path = rospack.get_path('my_turtlebot2_training')
26 outdir = pkg_path + '/training_results'
27 env = wrappers.Monitor(env, outdir, force=True)
28 rospy.loginfo("Monitor Wrapper started")
29
30 last_time_steps = numpy.ndarray(0)
31
32 # Loads parameters from the ROS param server
33 # Parameters are stored in a yaml file inside the config directory
34 # They are loaded at runtime by the launch file
35 Alpha = rospy.get_param("/turtlebot2/alpha")
36 Epsilon = rospy.get_param("/turtlebot2/epsilon")
37 Gamma = rospy.get_param("/turtlebot2/gamma")
38 epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
39 nepisodes = rospy.get_param("/turtlebot2/nepisodes")
40 nsteps = rospy.get_param("/turtlebot2/nsteps")
41
42 running_step = rospy.get_param("/turtlebot2/running_step")
43
44 # Initialises the algorithm that we are going to use for learning
45 qlearn = qlearn.QLearn(actions=range(env.action_space.n),
46 alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
47 initial_epsilon = qlearn.epsilon
48
49 start_time = time.time()
50 highest_reward = 0
51
52 # Starts the main training loop: the one about the episodes to do
53 for x in range(nepisodes):
54 rospy.logdebug("############### WALL START EPISODE=>" + str(x))
55
56 cumulated_reward = 0
57 done = False
58 if qlearn.epsilon > 0.05:
59 qlearn.epsilon *= epsilon_discount
60
61 # Initialize the environment and get first state of the robot
62 observation = env.reset()
63 state = ''.join(map(str, observation))
64
65 # Show on screen the actual situation of the robot
66 # env.render()
67 # for each episode, we test the robot for nsteps
68 for i in range(nsteps):
69 rospy.logwarn("############### Start Step=>" + str(i))
70 # Pick an action based on the current state
71 action = qlearn.chooseAction(state)
72 rospy.logwarn("Next action is:%d", action)
73 # Execute the action in the environment and get feedback
74 observation, reward, done, info = env.step(action)
75
76 rospy.logwarn(str(observation) + " " + str(reward))
77 cumulated_reward += reward
78 if highest_reward < cumulated_reward:
79 highest_reward = cumulated_reward
80
81 nextState = ''.join(map(str, observation))
82
83 # Make the algorithm learn based on the results
84 rospy.logwarn("# state we were=>" + str(state))
85 rospy.logwarn("# action that we took=>" + str(action))
86 rospy.logwarn("# reward that action gave=>" + str(reward))
87 rospy.logwarn("# episode cumulated_reward=>" + str(cumulated_reward))
88 rospy.logwarn("# State in which we will start next step=>" + str(nextState))
89 qlearn.learn(state, action, reward, nextState)
90
91 if not (done):
92 rospy.logwarn("NOT DONE")
93 state = nextState
94 else:
95 rospy.logwarn("DONE")
96 last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
97 break
98 rospy.logwarn("############### END Step=>" + str(i))
99 #raw_input("Next Step...PRESS KEY")
100 # rospy.sleep(2.0)
101 m, s = divmod(int(time.time() - start_time), 60)
102 h, m = divmod(m, 60)
103 rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
104 round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
105 cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s)))
106
107 rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
108 initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))
109
110 l = last_time_steps.tolist()
111 l.sort()
112
113 # print("Parameters: a="+str)
114 rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
115 rospy.loginfo("Best 100 score: {:0.2f}".format(reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))
116
117 env.close()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.