attachment:qlearn.py of openai_ros/TurtleBot2 with openai_ros

   1 '''
   2 Q-learning approach for different RL problems
   3 as part of the basic series on reinforcement learning @
   4 https://github.com/vmayoral/basic_reinforcement_learning
   5  
   6 Inspired by https://gym.openai.com/evaluations/eval_kWknKOkPQ7izrixdhriurA
   7  
   8         @author: Victor Mayoral Vilches <victor@erlerobotics.com>
   9 '''
  10 import random
  11 
  12 class QLearn:
  13     def __init__(self, actions, epsilon, alpha, gamma):
  14         self.q = {}
  15         self.epsilon = epsilon  # exploration constant
  16         self.alpha = alpha      # discount constant
  17         self.gamma = gamma      # discount factor
  18         self.actions = actions
  19 
  20     def getQ(self, state, action):
  21         return self.q.get((state, action), 0.0)
  22 
  23     def learnQ(self, state, action, reward, value):
  24         '''
  25         Q-learning:
  26             Q(s, a) += alpha * (reward(s,a) + max(Q(s') - Q(s,a))            
  27         '''
  28         oldv = self.q.get((state, action), None)
  29         if oldv is None:
  30             self.q[(state, action)] = reward
  31         else:
  32             self.q[(state, action)] = oldv + self.alpha * (value - oldv)
  33 
  34     def chooseAction(self, state, return_q=False):
  35         q = [self.getQ(state, a) for a in self.actions]
  36         maxQ = max(q)
  37 
  38         if random.random() < self.epsilon:
  39             minQ = min(q); mag = max(abs(minQ), abs(maxQ))
  40             # add random values to all the actions, recalculate maxQ
  41             q = [q[i] + random.random() * mag - .5 * mag for i in range(len(self.actions))] 
  42             maxQ = max(q)
  43 
  44         count = q.count(maxQ)
  45         # In case there're several state-action max values 
  46         # we select a random one among them
  47         if count > 1:
  48             best = [i for i in range(len(self.actions)) if q[i] == maxQ]
  49             i = random.choice(best)
  50         else:
  51             i = q.index(maxQ)
  52 
  53         action = self.actions[i]        
  54         if return_q: # if they want it, give it!
  55             return action, q
  56         return action
  57 
  58     def learn(self, state1, action1, reward, state2):
  59         maxqnew = max([self.getQ(state2, a) for a in self.actions])
  60         self.learnQ(state1, action1, reward, reward + self.gamma*maxqnew)
ROS 2 Documentation

Wiki

Page

User

Attachment 'qlearn.py'

Attached Files