Artificial Intelligence - هوش مصنوعی - نمايش پست تنها - Q-Learning

**Astaraki** · ۰۵-۳۱-۱۳۸۹, ۰۶:۵۶ بعد از ظهر

نقل قول:

نوشته اصلي بوسيله sara.jalali

salam! man be sheddat be yek piadesazi az Q learning niaz daram ke bebinam tavabe ash ro chetor bayad nevesht.age komak konin kheili mamnoon misham

اينجا رو هم مطالعه کنيد

کد متلب:

كد:

function q=ReinforcementLearning
clc;
format short
format compact
% Two input: R and gamma
% immediate reward matrix; 
% row and column = states; -Inf = no door between room
R=[-inf,-inf,-inf,-inf,   0, -inf;
   -inf,-inf,-inf,   0,-inf, 100;
   -inf,-inf,-inf,   0,-inf, -inf;
   -inf,   0,   0,-inf,   0, -inf;
      0,-inf,-inf,   0,-inf, 100;
   -inf,   0,-inf,-inf,   0, 100];

gamma=0.80;            % learning parameter
q=zeros(size(R));      % initialize Q as zero
q1=ones(size(R))*inf;  % initialize previous Q as big number
count=0;               % counter
for episode=0:50000
   % random initial state
   y=randperm(size(R,1));
   state=y(1);
   
   % select any action from this state
   x=find(R(state,:)>=0);        % find possible action of this state
   if size(x,1)>0,
      x1=RandomPermutation(x);   % randomize the possible action
      x1=x1(1);                  % select an action 
   end
   qMax=max(q,[],2);
   q(state,x1)= R(state,x1)+gamma*qMax(x1);   % get max of all actions 
   state=x1;
   
   % break if convergence: small deviation on q for 1000 consecutive
   if sum(sum(abs(q1-q)))<0.0001 & sum(sum(q >0))
      if count>1000,
         episode        % report last episode
         break          % for
      else
         count=count+1; % set counter if deviation of q is small
      end
   else
      q1=q;
      count=0; % reset counter when deviation of q from previous q is large
   end
end 
%normalize q
g=max(max(q));
if g>0, 
   q=100*q/g;
end
 
function y=RandomPermutation(A)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % return random permutation of matrix A
 % unlike randperm(n) that give permutation of integer 1:n only,
 % RandomPermutation rearrange member of matrix A randomly
 % This function is useful for MonteCarlo Simulation, 
 %  Bootstrap sampling, game, etc.
 % 
 % 
 % example: A = [ 2, 1, 5, 3]
 % RandomPermutation(A) may produce [ 1, 5, 3, 2] or [ 5, 3, 2, 3]
 % 
 % example: 
 % A=magic(3)
 % RandomPermutation(A)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   [r,c]=size(A);
   b=reshape(A,r*c,1);       % convert to column vector
   x=randperm(r*c);          % make integer permutation of similar array as key
   w=[b,x'];                 % combine matrix and key
   d=sortrows(w,2);          % sort according to key
   y=reshape(d(:,1),r,c);    % return back the matrix