• attachment:my_start_qlearning_wall_v2.py of openai_ros/TurtleBot2 with openai_ros

Attachment 'my_start_qlearning_wall_v2.py'

Download

   1 #!/usr/bin/env python
   2 
   3 import gym
   4 import numpy
   5 import time
   6 import qlearn
   7 from gym import wrappers
   8 # ROS packages required
   9 import rospy
  10 import rospkg
  11 from openai_ros.openai_ros_common import StartOpenAI_ROS_Environment
  12 
  13 
  14 if __name__ == '__main__':
  15 
  16     rospy.init_node('example_turtlebot2_maze_qlearn',
  17                     anonymous=True, log_level=rospy.WARN)
  18 
  19     # Init OpenAI_ROS ENV
  20     task_and_robot_environment_name = rospy.get_param(
  21         '/turtlebot2/task_and_robot_environment_name')
  22     env = StartOpenAI_ROS_Environment(
  23         task_and_robot_environment_name)
  24     # Create the Gym environment
  25     rospy.loginfo("Gym environment done")
  26     rospy.loginfo("Starting Learning")
  27 
  28     # Set the logging system
  29     rospack = rospkg.RosPack()
  30     pkg_path = rospack.get_path('turtle2_openai_ros_example')
  31     outdir = pkg_path + '/training_results'
  32     env = wrappers.Monitor(env, outdir, force=True)
  33     rospy.loginfo("Monitor Wrapper started")
  34 
  35     last_time_steps = numpy.ndarray(0)
  36 
  37     # Loads parameters from the ROS param server
  38     # Parameters are stored in a yaml file inside the config directory
  39     # They are loaded at runtime by the launch file
  40     Alpha = rospy.get_param("/turtlebot2/alpha")
  41     Epsilon = rospy.get_param("/turtlebot2/epsilon")
  42     Gamma = rospy.get_param("/turtlebot2/gamma")
  43     epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
  44     nepisodes = rospy.get_param("/turtlebot2/nepisodes")
  45     nsteps = rospy.get_param("/turtlebot2/nsteps")
  46 
  47     running_step = rospy.get_param("/turtlebot2/running_step")
  48 
  49     # Initialises the algorithm that we are going to use for learning
  50     qlearn = qlearn.QLearn(actions=range(env.action_space.n),
  51                            alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
  52     initial_epsilon = qlearn.epsilon
  53 
  54     start_time = time.time()
  55     highest_reward = 0
  56 
  57     # Starts the main training loop: the one about the episodes to do
  58     for x in range(nepisodes):
  59         rospy.logdebug("############### WALL START EPISODE=>" + str(x))
  60 
  61         cumulated_reward = 0
  62         done = False
  63         if qlearn.epsilon > 0.05:
  64             qlearn.epsilon *= epsilon_discount
  65 
  66         # Initialize the environment and get first state of the robot
  67         observation = env.reset()
  68         state = ''.join(map(str, observation))
  69 
  70         # Show on screen the actual situation of the robot
  71         # env.render()
  72         # for each episode, we test the robot for nsteps
  73         for i in range(nsteps):
  74             rospy.logwarn("############### Start Step=>" + str(i))
  75             # Pick an action based on the current state
  76             action = qlearn.chooseAction(state)
  77             rospy.logwarn("Next action is:%d", action)
  78             # Execute the action in the environment and get feedback
  79             observation, reward, done, info = env.step(action)
  80 
  81             rospy.logwarn(str(observation) + " " + str(reward))
  82             cumulated_reward += reward
  83             if highest_reward < cumulated_reward:
  84                 highest_reward = cumulated_reward
  85 
  86             nextState = ''.join(map(str, observation))
  87 
  88             # Make the algorithm learn based on the results
  89             rospy.logwarn("# state we were=>" + str(state))
  90             rospy.logwarn("# action that we took=>" + str(action))
  91             rospy.logwarn("# reward that action gave=>" + str(reward))
  92             rospy.logwarn("# episode cumulated_reward=>" +
  93                           str(cumulated_reward))
  94             rospy.logwarn(
  95                 "# State in which we will start next step=>" + str(nextState))
  96             qlearn.learn(state, action, reward, nextState)
  97 
  98             if not (done):
  99                 rospy.logwarn("NOT DONE")
 100                 state = nextState
 101             else:
 102                 rospy.logwarn("DONE")
 103                 last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
 104                 break
 105             rospy.logwarn("############### END Step=>" + str(i))
 106             #raw_input("Next Step...PRESS KEY")
 107             # rospy.sleep(2.0)
 108         m, s = divmod(int(time.time() - start_time), 60)
 109         h, m = divmod(m, 60)
 110         rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
 111             round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
 112             cumulated_reward) + "     Time: %d:%02d:%02d" % (h, m, s)))
 113 
 114     rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
 115         initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))
 116 
 117     l = last_time_steps.tolist()
 118     l.sort()
 119 
 120     # print("Parameters: a="+str)
 121     rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
 122     rospy.loginfo("Best 100 score: {:0.2f}".format(
 123         reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))
 124 
 125     env.close()

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2018-08-01 18:38:25, 99.9 KB) [[attachment:gym_computers1.png]]
  • [get | view] (2018-08-01 18:38:01, 45.6 KB) [[attachment:gym_computers2.png]]
  • [get | view] (2018-08-01 18:38:11, 77.6 KB) [[attachment:gym_computers3.png]]
  • [get | view] (2018-08-01 18:13:37, 5413.9 KB) [[attachment:maze_training.gif]]
  • [get | view] (2019-04-11 15:12:41, 4.8 KB) [[attachment:my_start_qlearning_wall_v2.py]]
  • [get | view] (2018-08-01 18:02:54, 1.6 KB) [[attachment:my_turtlebot2_maze_params.yaml]]
  • [get | view] (2018-08-02 10:26:36, 12.5 KB) [[attachment:my_turtlebot2_wall.py]]
  • [get | view] (2018-08-01 18:24:41, 1.6 KB) [[attachment:my_turtlebot2_wall_params.yaml]]
  • [get | view] (2019-10-10 16:43:26, 2.1 KB) [[attachment:qlearn.py]]
  • [get | view] (2018-08-01 18:16:23, 28.8 KB) [[attachment:reward1.png]]
  • [get | view] (2018-08-01 18:55:23, 50.0 KB) [[attachment:ria_logo.png]]
  • [get | view] (2018-08-02 10:18:02, 4.5 KB) [[attachment:start_qlearning.py]]
  • [get | view] (2018-08-01 18:10:03, 0.4 KB) [[attachment:start_training.launch]]
  • [get | view] (2018-08-02 10:24:21, 4.5 KB) [[attachment:start_training.py]]
  • [get | view] (2018-08-01 18:26:18, 4.5 KB) [[attachment:start_training_wall.py]]
  • [get | view] (2018-08-01 17:32:53, 339.6 KB) [[attachment:turtlebot2_maze.png]]
  • [get | view] (2019-04-11 15:12:53, 0.4 KB) [[attachment:turtlebot2_openai_qlearn_params_wall_v2.yaml]]
  • [get | view] (2018-08-02 10:28:28, 20436.3 KB) [[attachment:turtlebot2_tut_maze.gif]]
  • [get | view] (2018-08-01 18:20:02, 124.3 KB) [[attachment:turtlebot2_wall.png]]
  • [get | view] (2018-08-02 10:27:55, 400.6 KB) [[attachment:turtlebot_maze_v2.png]]
  • [get | view] (2018-08-01 18:34:36, 3992.2 KB) [[attachment:wall_training.gif]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.