Attachment 'my_start_qlearning_wall_v2.py'
Download 1 #!/usr/bin/env python
2
3 import gym
4 import numpy
5 import time
6 import qlearn
7 from gym import wrappers
8 # ROS packages required
9 import rospy
10 import rospkg
11 from openai_ros.openai_ros_common import StartOpenAI_ROS_Environment
12
13
14 if __name__ == '__main__':
15
16 rospy.init_node('example_turtlebot2_maze_qlearn',
17 anonymous=True, log_level=rospy.WARN)
18
19 # Init OpenAI_ROS ENV
20 task_and_robot_environment_name = rospy.get_param(
21 '/turtlebot2/task_and_robot_environment_name')
22 env = StartOpenAI_ROS_Environment(
23 task_and_robot_environment_name)
24 # Create the Gym environment
25 rospy.loginfo("Gym environment done")
26 rospy.loginfo("Starting Learning")
27
28 # Set the logging system
29 rospack = rospkg.RosPack()
30 pkg_path = rospack.get_path('turtle2_openai_ros_example')
31 outdir = pkg_path + '/training_results'
32 env = wrappers.Monitor(env, outdir, force=True)
33 rospy.loginfo("Monitor Wrapper started")
34
35 last_time_steps = numpy.ndarray(0)
36
37 # Loads parameters from the ROS param server
38 # Parameters are stored in a yaml file inside the config directory
39 # They are loaded at runtime by the launch file
40 Alpha = rospy.get_param("/turtlebot2/alpha")
41 Epsilon = rospy.get_param("/turtlebot2/epsilon")
42 Gamma = rospy.get_param("/turtlebot2/gamma")
43 epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
44 nepisodes = rospy.get_param("/turtlebot2/nepisodes")
45 nsteps = rospy.get_param("/turtlebot2/nsteps")
46
47 running_step = rospy.get_param("/turtlebot2/running_step")
48
49 # Initialises the algorithm that we are going to use for learning
50 qlearn = qlearn.QLearn(actions=range(env.action_space.n),
51 alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
52 initial_epsilon = qlearn.epsilon
53
54 start_time = time.time()
55 highest_reward = 0
56
57 # Starts the main training loop: the one about the episodes to do
58 for x in range(nepisodes):
59 rospy.logdebug("############### WALL START EPISODE=>" + str(x))
60
61 cumulated_reward = 0
62 done = False
63 if qlearn.epsilon > 0.05:
64 qlearn.epsilon *= epsilon_discount
65
66 # Initialize the environment and get first state of the robot
67 observation = env.reset()
68 state = ''.join(map(str, observation))
69
70 # Show on screen the actual situation of the robot
71 # env.render()
72 # for each episode, we test the robot for nsteps
73 for i in range(nsteps):
74 rospy.logwarn("############### Start Step=>" + str(i))
75 # Pick an action based on the current state
76 action = qlearn.chooseAction(state)
77 rospy.logwarn("Next action is:%d", action)
78 # Execute the action in the environment and get feedback
79 observation, reward, done, info = env.step(action)
80
81 rospy.logwarn(str(observation) + " " + str(reward))
82 cumulated_reward += reward
83 if highest_reward < cumulated_reward:
84 highest_reward = cumulated_reward
85
86 nextState = ''.join(map(str, observation))
87
88 # Make the algorithm learn based on the results
89 rospy.logwarn("# state we were=>" + str(state))
90 rospy.logwarn("# action that we took=>" + str(action))
91 rospy.logwarn("# reward that action gave=>" + str(reward))
92 rospy.logwarn("# episode cumulated_reward=>" +
93 str(cumulated_reward))
94 rospy.logwarn(
95 "# State in which we will start next step=>" + str(nextState))
96 qlearn.learn(state, action, reward, nextState)
97
98 if not (done):
99 rospy.logwarn("NOT DONE")
100 state = nextState
101 else:
102 rospy.logwarn("DONE")
103 last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
104 break
105 rospy.logwarn("############### END Step=>" + str(i))
106 #raw_input("Next Step...PRESS KEY")
107 # rospy.sleep(2.0)
108 m, s = divmod(int(time.time() - start_time), 60)
109 h, m = divmod(m, 60)
110 rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
111 round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
112 cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s)))
113
114 rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
115 initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))
116
117 l = last_time_steps.tolist()
118 l.sort()
119
120 # print("Parameters: a="+str)
121 rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
122 rospy.loginfo("Best 100 score: {:0.2f}".format(
123 reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))
124
125 env.close()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.