Artificial Intelligence - هوش مصنوعی

Artificial Intelligence - هوش مصنوعی (http://artificial.ir/intelligence/)

- يادگيري تقويتي(Reinforcement Learning) (http://artificial.ir/intelligence/forum96.html)

- - Q-Learning (http://artificial.ir/intelligence/thread1075.html)

Q-Learning
l

* معرفی
* الگوریتم یادگیری
* مثالی از یک عامل
* مثالی از برج هانوی
* اثبات همگرایی
* یادگیری Q برای MDP غیر قطعی

salam! man be sheddat be yek piadesazi az Q learning niaz daram ke bebinam tavabe ash ro chetor bayad nevesht.age komak konin kheili mamnoon misham

این برنامه پیاده سازیه روش Q-Learning هست که برای یافتن یک هدف در مکان نا معلوم به وسیله ی یک agent نوشته شده.
در این برنامه ابتدا به وسیله ی این الگوریتم و چندین بار حرکت agent در مسیر ، هزینه هایی برای این مسیر پیدا میشه و در پایان هم بهترین مسیر تا هدف رو انتخاب میکنیم (در این برنامه از الگوریتم Dijkstra برای این کار استفاده شده)
به این نکته هم توجه داشت باشین که بهترین مسیر در این روش ، مسیری است که بزرگترین هرینه رو در هر مرحله انتخاب کنه

كد:

//Q-Learning

#include

#include

#include

using namespace std;



const int SIZE=6;

//*********************

struct node

{

       int destination;

       int s;

       float distance;

       string p;

};

//*********************

int max_distance(node []);

void dijkstra ( int ,int, float [][6] );

//*********************



class Q_Learning

{

       public:

              Q_Learning(int,int);

              void displayR();

              void displayQ();

              void routing();

       private:

               float Q_cal(int,int);

               float find_max(int);

               float Q[6][6];

               float R[6][6];

               int start_state;

               int goal_state;

               float Y;

               // state shoroo ham bayad bashe baadan

};

//*********************

Q_Learning :: Q_Learning(int s , int g)

{

           int i,j,k,l,state,action;

           float temp;

           vector states;

           Y=0.8;

           start_state=s;

           goal_state=g;

           for ( i=0 ; i< if * vector<1200 (k="0" episode displayR(); R namayeshe srand(time(0)); R[i][goal_state]="=" 0 R[5][5]="0;" R[4][5]="0;R[5][1]=0;R[5][4]=0;" R[3][4]="0;R[4][0]=0;R[4][3]=0;" R[2][3]="0;R[3][1]=0;R[3][2]=0;" R[0][4]="0;R[1][3]=0;R[1][5]=0;" R[i][j]="-1;" j++ i++> states;

      for (i=0 ; i< if && max="Q[b][" ) :: Q_Learning float ********************* j++) j::min(); // MIN_FLOAT

         }

     dijkstra ( start_state ,goal_state,  temp );

}

//*********************



void Q_Learning :: displayR()//namayeshe maghadire R

{

     int i,j;

     char item[6]={'A','B','C','D','E','F'};

     system("cls");

     cout<<"\n -1 --> No way\n 0 --> Is way\n 100 --> Direct way\n";

     cout<<"\nRoads matrix = :\n\n  ";

     for (i=0 ; i<6 ; i++)

         cout<<<" int { void } ?; main() while char ; cin :: Q_Learning ********************* j++) j<::min();

    for (i=0 ; i max && a[i].s == 0)

        {

           max = a[i].distance;

           max_index = i;

        }

    }

    return max_index;

}

//*********************

void dijkstra ( int v ,int g, float P[6][6] )

{

     int u,i,j,k;

     float NP[6][6];

     char ch[6]={'A','B','C','D','E','F'};

     node state[6];

     

     for (i=0 ; i< SIZE ; i++)

         for (j=0 ; j< SIZE ; j++)

         {

             NP[i][j]=P[i][j];

             if (NP[i][j] == 0 && i!=j)

                NP[i][j]=std::numeric_limits::min(); // MIN_FLOAT

         }

     

     for (i=0 ; i< SIZE ; i++)

     {

         state[i].destination=i;

         state[i].s=0;

         state[i].distance=NP[v][i];

         state[i].p = v+'0';

     }

     state[v].s=1;

     

     

     for (j=1 ; j< SIZE ; j++)

     {

         u=max_distance(state);

         state[u].s=1;

         state[u].p += u+'0';

         

         for (k=0 ; k< SIZE ; k++)

             if (state[k].s == 0)

             {

                if ( NP[u][k] != std::numeric_limits::min() )

                   if ( state[k].distance < state[u].distance + NP[u][k] )

                   {

                        state[k].distance = state[u].distance + NP[u][k];

                        state[k].p = state[u].p;

                   }

             }

             

     }

     /*for (i=0 ;i< SIZE ; i++)

     {

         cout<<"From "<<<" (start state) to ";

         cout<<<'\t';

         //cout<<<'\t';

         cout<<"Distance: ";

         cout<<(int)state[i].distance<<"\t\t";



         cout<<"Rout:";

         for (j=0 ; j<<' '<<< cout<

نقل قول:

نوشته اصلي بوسيله sara.jalali (پست 8541)

salam! man be sheddat be yek piadesazi az Q learning niaz daram ke bebinam tavabe ash ro chetor bayad nevesht.age komak konin kheili mamnoon misham

اينجا رو هم مطالعه کنيد:15:

کد متلب:

كد:

function q=ReinforcementLearning

clc;

format short

format compact

% Two input: R and gamma

% immediate reward matrix; 

% row and column = states; -Inf = no door between room

R=[-inf,-inf,-inf,-inf,   0, -inf;

   -inf,-inf,-inf,   0,-inf, 100;

   -inf,-inf,-inf,   0,-inf, -inf;

   -inf,   0,   0,-inf,   0, -inf;

      0,-inf,-inf,   0,-inf, 100;

   -inf,   0,-inf,-inf,   0, 100];



gamma=0.80;            % learning parameter

q=zeros(size(R));      % initialize Q as zero

q1=ones(size(R))*inf;  % initialize previous Q as big number

count=0;               % counter

for episode=0:50000

   % random initial state

   y=randperm(size(R,1));

   state=y(1);

   

   % select any action from this state

   x=find(R(state,:)>=0);        % find possible action of this state

   if size(x,1)>0,

      x1=RandomPermutation(x);   % randomize the possible action

      x1=x1(1);                  % select an action 

   end

   qMax=max(q,[],2);

   q(state,x1)= R(state,x1)+gamma*qMax(x1);   % get max of all actions 

   state=x1;

   

   % break if convergence: small deviation on q for 1000 consecutive

   if sum(sum(abs(q1-q)))<0.0001 & sum(sum(q >0))

      if count>1000,

         episode        % report last episode

         break          % for

      else

         count=count+1; % set counter if deviation of q is small

      end

   else

      q1=q;

      count=0; % reset counter when deviation of q from previous q is large

   end

end 

%normalize q

g=max(max(q));

if g>0, 

   q=100*q/g;

end

 

function y=RandomPermutation(A)

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 % return random permutation of matrix A

 % unlike randperm(n) that give permutation of integer 1:n only,

 % RandomPermutation rearrange member of matrix A randomly

 % This function is useful for MonteCarlo Simulation, 

 %  Bootstrap sampling, game, etc.

 % 

 % 

 % example: A = [ 2, 1, 5, 3]

 % RandomPermutation(A) may produce [ 1, 5, 3, 2] or [ 5, 3, 2, 3]

 % 

 % example: 

 % A=magic(3)

 % RandomPermutation(A)

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

   [r,c]=size(A);

   b=reshape(A,r*c,1);       % convert to column vector

   x=randperm(r*c);          % make integer permutation of similar array as key

   w=[b,x'];                 % combine matrix and key

   d=sortrows(w,2);          % sort according to key

   y=reshape(d(:,1),r,c);    % return back the matrix

سلام ! ممنون از راهنمایی هاتون، راستش من به دو مقاله احتیاج دارم که برای کارم خیلی مفید هستند و الی از طریق google نمی شه pdf اونها رو گرفت. عنوان ایم دو مقاله رو در زیر گذاشتم:
1)“Routing with
compression in wireless sensor networks: A Q-learning approach,”
2)“Q-probabilistic routing in wireless sensor networks,”