development
Ugo Finnendahl 5 years ago
parent a85cf3a47f
commit c717501c24
  1. 9
      css/src/style.scss
  2. 2
      css/style.min.css
  3. 36
      index.html
  4. 24
      js/rl.js
  5. 221
      js/view.js

@ -4,6 +4,7 @@
} }
body{ body{
// background-color: ; // background-color: ;
font-family: sans-serif;
} }
#container{ #container{
height: 100vh; height: 100vh;
@ -30,3 +31,11 @@ nav{
width: 20vw; width: 20vw;
height: 10vw; height: 10vw;
} }
.sliders{
position: absolute;
top: 7vh;
left: 2vw;
width: 20vw;
height: 10vw;
}

2
css/style.min.css vendored

@ -1 +1 @@
*{margin:0;padding:0}#container{height:100vh;position:relative}#canvas{height:100%}nav{position:absolute;top:10px;left:10px}.absolute{position:absolute;top:0;left:0}.plot{position:absolute;top:2vh;right:2vw;width:20vw;height:10vw} *{margin:0;padding:0}body{font-family:sans-serif}#container{height:100vh;position:relative}#canvas{height:100%}nav{position:absolute;top:10px;left:10px}.absolute{position:absolute;top:0;left:0}.plot{position:absolute;top:2vh;right:2vw;width:20vw;height:10vw}.sliders{position:absolute;top:7vh;left:2vw;width:20vw;height:10vw}

@ -8,6 +8,13 @@
<script src="https://cdnjs.cloudflare.com/ajax/libs/gsap/1.20.3/TweenMax.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/gsap/1.20.3/TweenMax.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.7.1/Chart.min.js"></script> <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/2.7.1/Chart.min.js"></script>
<script src="https://unpkg.com/vue-chartjs@3.4.2/dist/vue-chartjs.js"></script> <script src="https://unpkg.com/vue-chartjs@3.4.2/dist/vue-chartjs.js"></script>
<script src="https://cdn.jsdelivr.net/npm/vue-slider-component@3.0.41/dist/vue-slider-component.umd.min.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/vue-slider-component@3.0.41/theme/default.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.css" integrity="sha384-zB1R0rpPzHqg7Kpt0Aljp8JPLqbXI3bhnPWROx27a9N0Ll6ZP/+DiW/UqRcLbRjq" crossorigin="anonymous">
<!-- The loading of KaTeX is deferred to speed up page rendering -->
<script src="https://cdn.jsdelivr.net/npm/katex@0.11.1/dist/katex.min.js" integrity="sha384-y23I5Q6l+B6vatafAwxRu/0oK/79VlbSz7Q9aiSZUvyWYIYsd+qj+o24G5ZU2zJz" crossorigin="anonymous"></script>
<title>RL exhibit - prototype</title> <title>RL exhibit - prototype</title>
<link rel="stylesheet" href="css/style.min.css"> <link rel="stylesheet" href="css/style.min.css">
@ -28,24 +35,41 @@
<v-line v-for="x in maze.width+1" :config="get_grid_line_config(x-1)"></v-line> <v-line v-for="x in maze.width+1" :config="get_grid_line_config(x-1)"></v-line>
</v-group> --> </v-group> -->
<v-rect v-for="(t_type, idx) in maze.map.flat()" :config="get_tile_config(idx, t_type)" :key="idx" ></v-rect> <v-rect v-for="(t_type, idx) in maze.map.flat()" :config="get_tile_config(idx, t_type)" :key="idx" ></v-rect>
<v-group v-for="(action,idx) in q_table" :config="get_field_config(idx)">
<v-shape v-for="(value, key) in action" :config="get_triangle_config(value, key)"></v-shape>
<v-text v-for="i in 4" :config="get_q_text_config(action,i)"></v-text>
</v-group>
<v-regular-polygon :config="agent_config"></v-regular-polygon> <v-regular-polygon :config="agent_config"></v-regular-polygon>
</v-group> </v-group>
</v-layer> </v-layer>
</v-stage> </v-stage>
<line-chart css-classes="plot" :chart-data="datacollection" :options="{responsive: true, maintainAspectRatio: false,scales: {xAxes: [{ticks:{maxTicksLimit:11}}]},legend: {display: false}}"></line-chart> <line-chart css-classes="plot" :chart-data="datacollection" :options="plot_options"></line-chart>
<div class="sliders">
<h1>Learning Rate {{learning_rate}}</h1>
<vue-slider v-model="learning_rate" :drag-on-click="true" v-bind="slider_config"></vue-slider>
<h1>Discount Factor {{discount_factor}}</h1>
<vue-slider v-model="discount_factor" :drag-on-click="true" v-bind="slider_config"></vue-slider>
<h1>Epsilon {{epsilon}}</h1>
<vue-slider v-model="epsilon" :drag-on-click="true" v-bind="slider_config"></vue-slider>
<h1>Current Score</h1>
<h2>{{score}}</h2>
<div id="test" style="position: absolute;bottom: 10vh;width:90vw"></div>
</div>
</div> </div>
<nav> <nav>
<button class="button" onclick="machine.run(1)">run 100 episodes!</button> <button class="button" onclick="machine.run(1)">run 1 episode!</button>
<button class="button" onclick="machine.run(100)">run 100 episodes!</button>
<button class="button" onclick="machine.auto_step();">auto step!</button> <button class="button" onclick="machine.auto_step();">auto step!</button>
<button class="button" onclick="machine.greedy_step();">greedy step!</button> <button class="button" onclick="machine.greedy_step();">greedy step!</button>
<button class="button" onclick="machine.reset_machine()">reset machine</button>
</nav> </nav>
<script> <script>
var map = [ var map = [
[0, 0, 4, 8, 0, 0, 0, 0], [0, 0, 1, 8, 0, 0, 0, 0],
[0, 0, 4, 4, 4, 4, 0, 0], [0, 0, 1, 1, 1, 1, 0, 0],
[4, 0, 0, 0, 0, 4, 0, 4], [1, 0, 0, 0, 0, 1, 0, 1],
[0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0],
[2, 0, 1, 0, 4, 0, 0, 4] [2, 0, 1, 0, 1, 0, 0, 1]
]; ];
</script> </script>
<script src="js/rl.js"></script> <script src="js/rl.js"></script>

@ -8,25 +8,29 @@ class RL_machine {
learning_rate, learning_rate,
discount_factor, discount_factor,
epsilon=0) { epsilon=0) {
this.q_table = actions_per_state.map((c) => c.reduce((o,n) => {o[n]=0; return o},{})); this.actions_per_state = actions_per_state;
this.transactions = transactions; this.transactions = transactions;
this.rewards = rewards; this.rewards = rewards;
this.lr = learning_rate; this.lr = learning_rate;
this.df = discount_factor; this.df = discount_factor;
this.state = start_state;
this.start_state = start_state; this.start_state = start_state;
this.end_score = end_score; this.end_score = end_score;
this.end_states = end_states; this.end_states = end_states;
this.episode = 0;
this.epsilon = epsilon; this.epsilon = epsilon;
this.score = 0; this.q_table = this.actions_per_state.map((c) => c.reduce((o,n) => {o[n]=0; return o},{}));
this.running = false; this.reset_machine();
this.score_history = [];
} }
reset_machine(){ reset_machine(){
this.q_table = this.q_table.map((c) => c.map((a) => a.fill(0))); for (var q in this.q_table){
for (var key in this.q_table[q]){
this.q_table[q][key] = 0;
}
}
this.episode = 0; this.episode = 0;
this.state = this.start_state; this.state = this.start_state;
this.score = 0;
this.running = false;
this.score_history = [];
} }
new_episode(){ new_episode(){
// add_new_episode_callback // add_new_episode_callback
@ -127,6 +131,10 @@ class Maze {
for (let idy=0; idy<this.map.length; idy++){ for (let idy=0; idy<this.map.length; idy++){
for (let idx=0; idx<this.map[0].length; idx++){ for (let idx=0; idx<this.map[0].length; idx++){
var action = []; var action = [];
if (this.map[idy][idx] == tile.wall){
actions.push(action);
continue;
}
if (idy != 0){ if (idy != 0){
if(this.map[idy-1][idx] != tile.wall){ if(this.map[idy-1][idx] != tile.wall){
action.push(dir.UP); action.push(dir.UP);
@ -177,7 +185,7 @@ class Maze {
} }
} }
const reward = {[tile.regular]:-1,[tile.dangerous]:-1000,[tile.end]:1000,[tile.start]:-1}; const reward = {[tile.regular]:-1,[tile.dangerous]:-100,[tile.end]:1000,[tile.start]:-1};
var maze = new Maze(map, reward); var maze = new Maze(map, reward);
var learning_rate = 0.75; var learning_rate = 0.75;

@ -25,6 +25,8 @@ Vue.component('line-chart', {
}) })
var palette = ['#00429d', '#06449d', '#0d469d', '#12489d', '#164a9d', '#1a4c9c', '#1d4e9c', '#20509c', '#23529c', '#26549c', '#28569b', '#2b589b', '#2d5a9b', '#305c9b', '#325e9a', '#34609a', '#37629a', '#39649a', '#3b6699', '#3d6899', '#3f6999', '#416b98', '#436d98', '#456f97', '#477197', '#497397', '#4b7596', '#4d7796', '#4f7995', '#517b95', '#537d94', '#557f94', '#578193', '#598392', '#5b8592', '#5d8791', '#5f8991', '#618a90', '#638c8f', '#658e8f', '#67908e', '#69928d', '#6b948d', '#6d968c', '#6f988b', '#719a8b', '#739c8a', '#759e89', '#77a088', '#79a287', '#7ba386', '#7da586', '#80a785', '#82a984', '#84ab83', '#86ad82', '#88af81', '#8ab180', '#8cb37f', '#8fb57e', '#91b77d', '#93b87c', '#95ba7a', '#97bc79', '#9abe78', '#9cc077', '#9ec276', '#a1c474', '#a3c673', '#a5c872', '#a8c970', '#aacb6f', '#accd6e', '#afcf6c', '#b1d16b', '#b4d369', '#b6d568', '#b8d766', '#bbd864', '#bdda63', '#c0dc61', '#c2de5f', '#c5e05d', '#c8e25b', '#cae459', '#cde657', '#cfe755', '#d2e953', '#d5eb50', '#d7ed4e', '#daef4b', '#ddf049', '#dff246', '#e2f443', '#e5f640', '#e8f83c', '#ebfa39', '#edfb35', '#f0fd31', '#f3ff2c']
Array.prototype.simpleSMA = function(N) { Array.prototype.simpleSMA = function(N) {
return this.map( return this.map(
function(el, index, _arr) { function(el, index, _arr) {
@ -55,16 +57,32 @@ return this.map(
app = new Vue({ app = new Vue({
el: '#app', el: '#app',
components: {
VueSlider: window['vue-slider-component']
},
data: { data: {
width: 0, width: 0,
height: 0, height: 0,
q_table: machine.q_table, q_table: machine.q_table,
maze: maze, maze: maze,
state: {x:0,y:0}, state: {
x: 0,
y: 0
},
state_tween: new TimelineLite(), state_tween: new TimelineLite(),
score: machine.score, score: machine.score,
score_history: machine.score_history, score_history: machine.score_history,
labels: [], labels: [],
learning_rate: machine.lr,
discount_factor: machine.df,
epsilon: machine.epsilon,
slider_config: {
min: 0,
max: 1,
duration: 0,
interval: 0.01,
tooltip: 'none'
}
}, },
created() { created() {
// Resize handler // Resize handler
@ -75,8 +93,13 @@ app = new Vue({
var $this = this; var $this = this;
this.state = this.s2p(s); this.state = this.s2p(s);
Object.defineProperty(machine, 'state', { Object.defineProperty(machine, 'state', {
get: function() { return this._state }, get: function() {
set: function(ne) { this._state=ne; $this.handleState(this._state); } return this._state
},
set: function(ne) {
this._state = ne;
$this.handleState(this._state);
}
}); });
machine.state = s; machine.state = s;
// Score wrapper // Score wrapper
@ -84,8 +107,13 @@ app = new Vue({
var $this = this; var $this = this;
this.score = s; this.score = s;
Object.defineProperty(machine, 'score', { Object.defineProperty(machine, 'score', {
get: function() { return this._score }, get: function() {
set: function(ne) { this._score=ne; $this.score=ne} return this._score
},
set: function(ne) {
this._score = ne;
$this.score = ne
}
}); });
machine.score = s; machine.score = s;
// Score history wrapper // Score history wrapper
@ -93,8 +121,13 @@ app = new Vue({
var $this = this; var $this = this;
this.score_history = s; this.score_history = s;
Object.defineProperty(machine, 'score_history', { Object.defineProperty(machine, 'score_history', {
get: function() { return this._score_history }, get: function() {
set: function(ne) { this._score_history=ne; $this.score_history=ne} return this._score_history
},
set: function(ne) {
this._score_history = ne;
$this.score_history = ne
}
}); });
machine.score_history = s; machine.score_history = s;
}, },
@ -105,26 +138,44 @@ app = new Vue({
datacollection: function() { datacollection: function() {
return { return {
labels: Array.from(Array(this.score_history.length).keys()), labels: Array.from(Array(this.score_history.length).keys()),
datasets: [ datasets: [{
{
label: 'Data One', label: 'Data One',
backgroundColor: 'rgb(0,0,0,0)', backgroundColor: 'rgb(0,0,0,0)',
data: this.score_history.simpleSMA(Math.round(50)), data: this.score_history,//.simpleSMA(Math.round(50)),
fill: false, fill: false,
borderColor: 'rgb(255, 159, 64)', borderColor: 'rgb(255, 159, 64)',
pointRadius: 1, pointRadius: 1,
}, },
{ // {
label: 'Data One', // label: 'Data One',
backgroundColor: 'rgb(0,0,0,0)', // backgroundColor: 'rgb(0,0,0,0)',
data: this.score_history.max(), // data: this.score_history.max(),
fill: false, // fill: false,
borderColor: 'rgb(64, 159, 255)', // borderColor: 'rgb(64, 159, 255)',
pointRadius: 1, // pointRadius: 1,
}, // },
] ]
} }
}, },
plot_options: function() {
var $this = this;
return {
responsive: true,
maintainAspectRatio: false,
scales: {
xAxes: [{
// type: 'linear',
ticks: {
maxTicksLimit: 8,
maxRotation: 0,
}
}]
},
legend: {
display: false
}
}
},
stage_config: function() { stage_config: function() {
return { return {
width: this.width, width: this.width,
@ -182,6 +233,20 @@ app = new Vue({
strokeW: function() { strokeW: function() {
return this.base_size / 50; return this.base_size / 50;
}, },
extreme_q_values: function(){
var max = -10*30;
var min = 10*30;
for (field in this.q_table) {
for (key in this.q_table[field]){
if (this.q_table[field][key]<min){
min = this.q_table[field][key];
} else if (this.q_table[field][key]>max){
max = this.q_table[field][key];
}
}
}
return {min:min,max:max};
}
}, },
methods: { methods: {
s2p: function(state) { s2p: function(state) {
@ -199,7 +264,10 @@ app = new Vue({
}, },
handleState: function(s) { handleState: function(s) {
if (!machine.running) { if (!machine.running) {
this.state_tween.to(this.state, 0.2, { x: this.s2p(s).x, y: this.s2p(s).y }); this.state_tween.to(this.state, 0.2, {
x: this.s2p(s).x,
y: this.s2p(s).y
});
} else { } else {
this.state = this.s2p(s); this.state = this.s2p(s);
} }
@ -234,13 +302,107 @@ app = new Vue({
} }
return false; return false;
}, },
get_field_config: function(state) {
var pos = this.s2p(state);
return {
x: this.base_size * pos.x+this.base_size/2,
y: this.base_size * pos.y+this.base_size/2,
}
},
get_q_text_config: function (val, i) {
var off, key;
switch (i) {
case 1:
off = {
align: "center",
verticalAlign: "top",
};
key = dir.UP;
break;
case 2:
off = {
align: "right",
verticalAlign: "middle",
};
key = dir.RIGHT;
break;
case 3:
off = {
align: "center",
verticalAlign: "bottom",
};
key = dir.DOWN;
break;
case 4:
off = {
align: "left",
verticalAlign: "middle",
};
key = dir.LEFT;
break;
}
if (val[key] === undefined) {
return {}
}
return {
fontSize: this.base_size/7,
fontFamily: 'Calibri',
fill: 'black',
text: +val[key].toFixed(2)+'',
width: this.base_size-5,
height: this.base_size-5,
...off,
offset: {
x: (this.base_size-5)/2,
y: (this.base_size-5)/2,
}
}
},
get_triangle_config: function(value, d) {
var rot = 0;
switch (d) {
case dir.UP:
rot = -90;
break;
case dir.RIGHT:
rot = 0;
break;
case dir.DOWN:
rot = 90;
break;
case dir.LEFT:
rot = 180;
break;
}
var $this = this;
var norma_value = (value-this.extreme_q_values.min)/((this.extreme_q_values.max-this.extreme_q_values.min)||1);
return {
sceneFunc: function(context, shape) {
context.beginPath();
context.moveTo(0, 0);
context.lineTo($this.base_size / 2, $this.base_size / 2);
context.lineTo($this.base_size / 2, -$this.base_size / 2);
context.lineTo(0, 0);
context.closePath();
// (!) Konva specific method, it is very important
context.fillStrokeShape(shape);
},
fill: palette[Math.round(norma_value*99)],
stroke: 'black',
strokeWidth: 0,
rotation: rot,
}
},
get_tile_config: function(i, t_type, local = false) { get_tile_config: function(i, t_type, local = false) {
var pos = this.s2p(i); var pos = this.s2p(i);
var over = {}; var over = {};
// not in plus // not in plus
if (local) { if (local) {
if (!this.in_plus(this.s2p(i),{x:Math.round(this.state.x),y:Math.round(this.state.y)})) { if (!this.in_plus(this.s2p(i), {
x: Math.round(this.state.x),
y: Math.round(this.state.y)
})) {
over = { over = {
opacity: 0, opacity: 0,
fill: "#eee" fill: "#eee"
@ -299,4 +461,23 @@ app = new Vue({
} }
} }
}, },
watch: {
learning_rate: function(new_val) {
machine.lr = new_val;
render_latex();
},
discount_factor: function(new_val) {
machine.df = new_val;
render_latex();
},
epsilon: function(new_val) {
machine.epsilon = new_val;
}
}
}) })
function render_latex() {
// (1-lr) * Q[state, action] + lr * (reward + gamma * np.max(Q[new_state, :])
katex.render(`Q(s,a)\\leftarrow${(1-machine.lr).toFixed(2)}Q(s,a)+${machine.lr.toFixed(2)}(reward + ${machine.df.toFixed(2)} * \\max_a(Q(s', a))`, document.getElementById('test'),{displayMode: true,});
}
render_latex();

Loading…
Cancel
Save