-
Notifications
You must be signed in to change notification settings - Fork 24
/
main_0.m
208 lines (190 loc) · 11.4 KB
/
main_0.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
%%
% Authors: Kien Vu
% Date Update on 2017 Feb 14
% MATLAB version: 9.1 (R2016b)
% OS: Windows 7 amd64 version 6.1
% Java version: 1.7.0_60
% Yalmip lastest version
% Name Status Version Location
% ------------------------------------------------------------
% Mosek selected,default 7.1.0.12 {cvx}\mosek\w64
% SDPT3 4.0 {cvx}\sdpt3
% SeDuMi 1.34 {cvx}\sedumi
%%
clc;
% clear all;
% close all;
rng('shuffle'); % the random number generator based on the current time so that rand, randi, and randn produce a different sequence of numbers after each time you call rng.
random_seed = rng;
rng(random_seed);
% load the parameter
parameter;
global Iterations
global N_Flow % number of flows
global N_SubF % number of each subflows for each flow, assume that each flow is divided equally
global N_BSs % the number of SC BSs
global N_Actions % The number of actions : Each user is assumed to choose two paths among 4 paths, then
global alpha1 % fraction of data, user 1 divides for subflow1
global alpha2 % fraction of data, user 1 divides for subflow1
Iterations = Iterations + 1;
%% Learning parameters
Learning_Utility_Estimate = zeros(N_Flow,N_Actions,Iterations); % The utility function
Utility = zeros(N_Flow,Iterations); % The utility function
Regret = zeros(N_Flow,N_Actions,Iterations); % The regret
Probability = 1/N_Actions*ones(N_Flow,N_Actions,Iterations); % The regret
Learning_Utility_Observe = zeros(N_Flow,N_Actions,Iterations); % The utility function
BG_probability = 1/N_Actions*ones(N_Flow,N_Actions,Iterations); % The utility function
selected_action = zeros(N_Flow,Iterations);
% classical learning
class_Probability = 1/N_Actions*ones(N_Flow,N_Actions,Iterations); % The classical regret learning
class_selected_action = zeros(N_Flow,Iterations);
% learning rate and initial setting
kappa = [2 5 10 50]; % Boltzmann temperature
X = [1 2 3 4 5 6]; % Action index
%% Lyapunov Parameters
network_queue = zeros( N_SubF, 1 + N_BSs, Iterations); % Network queue
virtual_queue = 1/Iterations*ones( N_SubF, 1 + N_BSs, Iterations); % virture queue
serving_rate = zeros( N_SubF, 1 + N_BSs, Iterations); % serving rate
remaining_data = zeros( N_SubF, 1 + N_BSs, Iterations); % reamining data
incoming_data = zeros( N_SubF, 1 + N_BSs, Iterations); % incomming data
auxiliary_var = zeros( N_SubF, 1 + N_BSs, Iterations); % auxililary variables
indicator_bs = zeros(N_BSs+1,N_SubF,Iterations); % which BS turns on
delay_require = zeros( N_SubF, 1 + N_BSs, Iterations); % Data required for delay
transmit_power = zeros( 1 + N_BSs, N_SubF, Iterations); % transmit power
% Optimization setup
beta1 = 4; % lets say we can guarantee up to 4
epsilon1 = 0.05; % Probabilistic delay constraint
%% Simulation starts
% Learning part runs for a long term period, let says after 10 slots, while
% Rate allocation runs for short term period, at each time slot.
same_action = 0;
for iter = 2:Iterations
% Note to perform learning part in long term period, a frame, let say 10 slots,
% while the rate allocation is done at each time slot basic, subframe.
if iter == 2 || mod(iter-2,10) == 0
if iter == 2 % At the beginning assume that the network is not congested at all
network_queue(:,:,iter-1) = ones(N_SubF, 1 + N_BSs); % The congested level of each BS
end
% The player selects the action based on the probability distribution
% from previous state t-1
selected_action(1,iter) = get_x_from_pmf(X,Probability(1,:,iter-1));
selected_action(2,iter) = get_x_from_pmf(X,Probability(2,:,iter-1));
% Receive the feeback if two players play the same action or not, now
% Check number of playing same actions
if check_action(selected_action(1,iter), selected_action(2,iter)) == 1
Learning_Utility_Observe(:,:,iter) = cal_gameutility( network_queue(:,:,iter-1)); % Congested_BSs(:,iter)
else % should punish when playing same action
same_action = same_action + 1;
Learning_Utility_Observe(:,:,iter) = 1/2 * cal_gameutility( network_queue(:,:,iter-1) );
end
% Select for the baseline - classical learning
class_selected_action(1,iter) = get_x_from_pmf(X,class_Probability(1,:,iter-1));
class_selected_action(2,iter) = get_x_from_pmf(X,class_Probability(2,:,iter-1));
for player = 1:N_Flow
for action = 1:N_Actions
% Check actions and Update rules for learning procedure
if player == 1
if action == selected_action(1,iter)
Learning_Utility_Estimate(player,action,iter) = Learning_Utility_Estimate(player,action,iter-1) + 1/(1+iter).^(0.5) ...
*(Learning_Utility_Observe(player,action,iter) - Learning_Utility_Estimate(player,action,iter-1));
Utility(player,iter) = Learning_Utility_Estimate(player,action,iter);
else
Learning_Utility_Estimate(player,action,iter) = Learning_Utility_Estimate(player,action,iter-1);
end
else
if action == selected_action(2,iter)
Learning_Utility_Estimate(player,action,iter) = Learning_Utility_Estimate(player,action,iter-1) + 1/(1+iter).^(0.5) ...
*(Learning_Utility_Observe(player,action,iter) - Learning_Utility_Estimate(player,action,iter-1));
Utility(player,iter) = Learning_Utility_Estimate(player,action,iter);
else
Learning_Utility_Estimate(player,action,iter) = Learning_Utility_Estimate(player,action,iter-1);
end
end
% Regret update
Regret(player,action,iter) = max(0,Regret(player,action,iter-1) + 1/(1+iter).^(0.55) ...
*(Learning_Utility_Observe(player,action,iter) - Learning_Utility_Estimate(player,action,iter) - Regret(player,action,iter-1)));
end
for action = 1:N_Actions
% Calculate the BG probability distributions
BG_probability(player,action,iter) = exp(1/kappa(1)*max(Regret(player,action,iter), 0))/ ...
sum( exp(1/kappa(1)*max(Regret(player,:,iter), 0)) ); % do I need to extract the current action
% Probability
Probability(player,action,iter) = Probability(player,action,iter-1) + 1/(1+iter).^(0.6) ...
*(BG_probability(player,action,iter) - Probability(player,action,iter-1));
end
% end update rules check if the probability distribution for each action does not change, then convegence.
end
% Lyapunov starts from here at begining of a frame
% Auxiliary Variable Selection
auxiliary_var(:,:,iter) = auxiliary_var_selection(virtual_queue(:,:,iter));
indicator_BSs = routingtable (selected_action(:,iter));
auxiliary_var(:,:,iter) = auxiliary_var(:,:,iter).*indicator_BSs';
% Calculate the remaining traffic by adding the incoming and subtract the outcoming
% Calculate the delay required
for nbs = 1:N_BSs+1
for nsf = 1:N_SubF
remaining_data(nsf,nbs,iter) = max(remaining_data(nsf,nbs,iter) - serving_rate(nsf,nbs,iter-1), 0) + incoming_data(nsf,nbs,iter-1); % outcommingdata X
delay_require(nsf,nbs,iter) = indicator_BSs(nbs, nsf) * max(0, alpha1*(0 - beta1*epsilon1) + remaining_data(nsf,nbs,iter) ); % this is a simplify constraint
end
end
% Rate Allocation
[serving_rate(:,:,iter), transmit_power(:,:,iter) ] = rate_allocation_simple(virtual_queue(:,:,iter), ...
selected_action(:,iter), delay_require(:,:,iter) );
incoming_data(:,:,iter) = incoming_traffic(selected_action(:,iter), transmit_power(:,:,iter));
% Network queue update
network_queue(:,:,iter + 1) = network_queue_update(selected_action(:,iter),...
network_queue(:,:,iter), serving_rate(:,:,iter), incoming_data(:,:,iter)' );
% Virtual Queue Update
virtual_queue(:,:,iter + 1) = virtual_queue_update(virtual_queue(:,:,iter),...
auxiliary_var(:,:,iter), serving_rate(:,:,iter));
%% Update other mod(iter+2,10) not equal 0
else % the action from 2 to 11 will be the same, playing the same action as before
for player = 1:N_Flow
for action = 1:N_Actions
selected_action(1,iter) = selected_action(1,iter-1);
selected_action(2,iter) = selected_action(2,iter-1);
Probability(player,action,iter) = Probability(player,action,iter-1);
BG_probability(player,action,iter) = BG_probability(player,action,iter-1);
Regret(player,action,iter) = Regret(player,action,iter-1);
Learning_Utility_Estimate(player,action,iter) = Learning_Utility_Estimate(player,action,iter-1);
Utility(player,iter) = Utility(player, iter-1);
Learning_Utility_Observe(player,action,iter) = Learning_Utility_Observe(player,action,iter-1);
end
end
% During the short TTI, 1 ms, there is no incoming data for MBS, while the
% SCs try to push all flow data
auxiliary_var(:,:,iter) = auxiliary_var_selection(virtual_queue(:,:,iter));
indicator_BSs = routingtable (selected_action(:,iter));
auxiliary_var(:,:,iter) = auxiliary_var(:,:,iter).*indicator_BSs';
% Calculate the remaining traffic by adding the incoming and subtract the outcoming
% Calculate the delay required
for nbs = 1:N_BSs+1
for nsf = 1:N_SubF
remaining_data(nsf,nbs,iter) = max(remaining_data(nsf,nbs,iter) - serving_rate(nsf,nbs,iter-1), 0) + incoming_data(nsf,nbs,iter-1); % outcommingdata X
delay_require(nsf,nbs,iter) = indicator_BSs(nbs, nsf) * max(0, -alpha1*beta1*epsilon1 + remaining_data(nsf,nbs,iter) ); % this is a simplify constraint
end
end
% Rate Allocation
serving_rate(:,:,iter) = serving_rate(:,:,iter-1);
transmit_power(:,:,iter)= transmit_power(:,:,iter-1) ; % possible transmit power
% need to determine any remaining traffic in the queue at previous time slot
incoming_data(:,:,iter) = incoming_traffic_0(selected_action(:,iter), transmit_power(:,:,iter), network_queue(:,:,iter-1)); % network_queue(:,:,iter-1)
% Network queue update,
alpha1 = 0; % no data arrive this point
network_queue(:,:,iter + 1) = network_queue_update(selected_action(:,iter),...
network_queue(:,:,iter), serving_rate(:,:,iter), incoming_data(:,:,iter)' );
% Virtual Queue Update
virtual_queue(:,:,iter + 1) = virtual_queue_update(virtual_queue(:,:,iter),...
auxiliary_var(:,:,iter), serving_rate(:,:,iter));
alpha1 = alpha2 ; % restore
end
%% baselines
end
fprintf('\nNumber of times both players choosing the path with common BS is %d ~ %f%%\n', same_action*9, 100*same_action/Iterations*9)
fprintf('\nFinished\n');
serie = rand;
file = strcat('multihop','No_Iterations_',num2str(Iterations),'_Arrival_',num2str(alpha1),'_','rand',num2str(serie),'_','On',num2str(date),'.mat');
cd Results
save(file);
cd ..
plotdata;