attachment:start_training.py of openai_ros/TurtleBot2 with openai_ros

   1 #!/usr/bin/env python
   2 
   3 import gym
   4 import numpy
   5 import time
   6 import qlearn
   7 from gym import wrappers
   8 # ROS packages required
   9 import rospy
  10 import rospkg
  11 # import our training environment
  12 from openai_ros.task_envs.turtlebot2 import turtlebot2_maze
  13 
  14 
  15 if __name__ == '__main__':
  16 
  17     rospy.init_node('turtlebot2_maze_qlearn', anonymous=True, log_level=rospy.WARN)
  18 
  19     # Create the Gym environment
  20     env = gym.make('TurtleBot2Maze-v0')
  21     rospy.loginfo("Gym environment done")
  22 
  23     # Set the logging system
  24     rospack = rospkg.RosPack()
  25     pkg_path = rospack.get_path('my_turtlebot2_training')
  26     outdir = pkg_path + '/training_results'
  27     env = wrappers.Monitor(env, outdir, force=True)
  28     rospy.loginfo("Monitor Wrapper started")
  29 
  30     last_time_steps = numpy.ndarray(0)
  31 
  32     # Loads parameters from the ROS param server
  33     # Parameters are stored in a yaml file inside the config directory
  34     # They are loaded at runtime by the launch file
  35     Alpha = rospy.get_param("/turtlebot2/alpha")
  36     Epsilon = rospy.get_param("/turtlebot2/epsilon")
  37     Gamma = rospy.get_param("/turtlebot2/gamma")
  38     epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
  39     nepisodes = rospy.get_param("/turtlebot2/nepisodes")
  40     nsteps = rospy.get_param("/turtlebot2/nsteps")
  41 
  42     running_step = rospy.get_param("/turtlebot2/running_step")
  43 
  44     # Initialises the algorithm that we are going to use for learning
  45     qlearn = qlearn.QLearn(actions=range(env.action_space.n),
  46                            alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
  47     initial_epsilon = qlearn.epsilon
  48 
  49     start_time = time.time()
  50     highest_reward = 0
  51 
  52     # Starts the main training loop: the one about the episodes to do
  53     for x in range(nepisodes):
  54         rospy.logdebug("############### START EPISODE=>" + str(x))
  55 
  56         cumulated_reward = 0
  57         done = False
  58         if qlearn.epsilon > 0.05:
  59             qlearn.epsilon *= epsilon_discount
  60 
  61         # Initialize the environment and get first state of the robot
  62         observation = env.reset()
  63         state = ''.join(map(str, observation))
  64 
  65         # Show on screen the actual situation of the robot
  66         # env.render()
  67         # for each episode, we test the robot for nsteps
  68         for i in range(nsteps):
  69             rospy.logwarn("############### Start Step=>" + str(i))
  70             # Pick an action based on the current state
  71             action = qlearn.chooseAction(state)
  72             rospy.logwarn("Next action is:%d", action)
  73             # Execute the action in the environment and get feedback
  74             observation, reward, done, info = env.step(action)
  75 
  76             rospy.logwarn(str(observation) + " " + str(reward))
  77             cumulated_reward += reward
  78             if highest_reward < cumulated_reward:
  79                 highest_reward = cumulated_reward
  80 
  81             nextState = ''.join(map(str, observation))
  82 
  83             # Make the algorithm learn based on the results
  84             rospy.logwarn("# state we were=>" + str(state))
  85             rospy.logwarn("# action that we took=>" + str(action))
  86             rospy.logwarn("# reward that action gave=>" + str(reward))
  87             rospy.logwarn("# episode cumulated_reward=>" + str(cumulated_reward))
  88             rospy.logwarn("# State in which we will start next step=>" + str(nextState))
  89             qlearn.learn(state, action, reward, nextState)
  90 
  91             if not (done):
  92                 rospy.logwarn("NOT DONE")
  93                 state = nextState
  94             else:
  95                 rospy.logwarn("DONE")
  96                 last_time_steps = numpy.append(last_time_steps, [int(i + 1)])
  97                 break
  98             rospy.logwarn("############### END Step=>" + str(i))
  99             #raw_input("Next Step...PRESS KEY")
 100             # rospy.sleep(2.0)
 101         m, s = divmod(int(time.time() - start_time), 60)
 102         h, m = divmod(m, 60)
 103         rospy.logerr(("EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(
 104             round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(
 105             cumulated_reward) + "     Time: %d:%02d:%02d" % (h, m, s)))
 106 
 107     rospy.loginfo(("\n|" + str(nepisodes) + "|" + str(qlearn.alpha) + "|" + str(qlearn.gamma) + "|" + str(
 108         initial_epsilon) + "*" + str(epsilon_discount) + "|" + str(highest_reward) + "| PICTURE |"))
 109 
 110     l = last_time_steps.tolist()
 111     l.sort()
 112 
 113     # print("Parameters: a="+str)
 114     rospy.loginfo("Overall score: {:0.2f}".format(last_time_steps.mean()))
 115     rospy.loginfo("Best 100 score: {:0.2f}".format(reduce(lambda x, y: x + y, l[-100:]) / len(l[-100:])))
 116 
 117     env.close()
ROS 2 Documentation

Wiki

Page

User

Attachment 'start_training.py'

Attached Files