{"id":1572,"date":"2023-08-16T17:37:52","date_gmt":"2023-08-16T17:37:52","guid":{"rendered":"https:\/\/www.gptmain.news\/?p=1572"},"modified":"2023-08-16T17:37:52","modified_gmt":"2023-08-16T17:37:52","slug":"q-%d1%86%d0%b5%d0%bb%d0%b8-%d0%b4%d0%b2%d0%be%d0%b9%d0%bd%d0%be%d0%b9-dqn-%d0%b8-%d0%b4%d1%83%d1%8d%d0%bb%d1%8c%d0%bd%d1%8b%d0%b9-dqn-gptmain-news","status":"publish","type":"post","link":"https:\/\/gptmain.news\/?p=1572","title":{"rendered":"Q-\u0446\u0435\u043b\u0438, \u0434\u0432\u043e\u0439\u043d\u043e\u0439 DQN \u0438 \u0434\u0443\u044d\u043b\u044c\u043d\u044b\u0439 DQN\n | GPTMain News"},"content":{"rendered":"<div id=\"\">\n<p>\u0418 \u0441\u043d\u043e\u0432\u0430 \u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435,<\/p>\n<p>\u0421\u0435\u0433\u043e\u0434\u043d\u044f\u0448\u043d\u044f\u044f \u0442\u0435\u043c\u0430\u2026 \u043d\u0443, \u0442\u0430\u043a\u0430\u044f \u0436\u0435, \u043a\u0430\u043a \u0438 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0430\u044f.  Q Learning \u0438 Deep Q Networks.  \u0412 \u043f\u0440\u043e\u0448\u043b\u044b\u0439 \u0440\u0430\u0437 \u043c\u044b \u043e\u0431\u044a\u044f\u0441\u043d\u0438\u043b\u0438, \u0447\u0442\u043e \u0442\u0430\u043a\u043e\u0435 Q Learning \u0438 \u043a\u0430\u043a \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430 \u0434\u043b\u044f \u043d\u0430\u0445\u043e\u0436\u0434\u0435\u043d\u0438\u044f Q-\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0439 \u0438, \u043a\u0430\u043a \u0441\u043b\u0435\u0434\u0441\u0442\u0432\u0438\u0435, \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0439 \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0438.  \u041f\u043e\u0437\u0436\u0435 \u043c\u044b \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u043b\u0438 Deep Q Networks \u0438 \u0442\u043e, \u043a\u0430\u043a \u0432\u043c\u0435\u0441\u0442\u043e \u0442\u043e\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0442\u044c \u0432\u0441\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q-\u0442\u0430\u0431\u043b\u0438\u0446\u044b, \u043c\u044b \u043f\u043e\u0437\u0432\u043e\u043b\u0438\u043b\u0438 Deep Neural Network \u043d\u0430\u0443\u0447\u0438\u0442\u044c\u0441\u044f \u0438\u0445 \u0430\u043f\u043f\u0440\u043e\u043a\u0441\u0438\u043c\u0438\u0440\u043e\u0432\u0430\u0442\u044c.<\/p>\n<p>\u0421\u0435\u0442\u0438 Deep Q \u043f\u0440\u0438\u043d\u0438\u043c\u0430\u044e\u0442 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0432\u0445\u043e\u0434\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0441\u0440\u0435\u0434\u044b \u0438 \u0432\u044b\u0432\u043e\u0434\u044f\u0442 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.  \u041c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u0442, \u043a\u0430\u043a\u043e\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0432\u044b\u043f\u043e\u043b\u043d\u0438\u0442 \u0430\u0433\u0435\u043d\u0442.  \u041e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0430\u0433\u0435\u043d\u0442\u043e\u0432 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u043f\u043e\u0442\u0435\u0440\u0438 <strong>\u041e\u0448\u0438\u0431\u043a\u0430 \u0422\u0414<\/strong>, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0431\u043e\u0439 \u0440\u0430\u0437\u043d\u0438\u0446\u0443 \u043c\u0435\u0436\u0434\u0443 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u043e \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u0434\u043b\u044f \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438 \u0442\u0435\u043a\u0443\u0449\u0438\u043c \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u043e\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q (\u043a\u0430\u043a \u043f\u0440\u0435\u0434\u043b\u0430\u0433\u0430\u0435\u0442 \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430).  \u0412 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0435 \u043d\u0430\u043c \u0443\u0434\u0430\u0435\u0442\u0441\u044f \u0430\u043f\u043f\u0440\u043e\u043a\u0441\u0438\u043c\u0438\u0440\u043e\u0432\u0430\u0442\u044c Q-\u0442\u0430\u0431\u043b\u0438\u0446\u044b \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u043e\u0439 \u0441\u0435\u0442\u0438.<\/p>\n<p>\u0412\u0441\u0435 \u0438\u0434\u0435\u0442 \u043d\u043e\u0440\u043c\u0430\u043b\u044c\u043d\u043e.  \u041d\u043e, \u043a\u043e\u043d\u0435\u0447\u043d\u043e, \u0432\u043e\u0437\u043d\u0438\u043a\u0430\u0435\u0442 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043f\u0440\u043e\u0431\u043b\u0435\u043c.  \u041f\u0440\u043e\u0441\u0442\u043e \u0442\u0430\u043a \u043f\u0440\u043e\u0434\u0432\u0438\u0433\u0430\u044e\u0442\u0441\u044f \u043d\u0430\u0443\u0447\u043d\u044b\u0435 \u0438\u0441\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u043d\u0438\u044f.  \u0418, \u043a\u043e\u043d\u0435\u0447\u043d\u043e \u0436\u0435, \u043c\u044b \u043f\u0440\u0438\u0434\u0443\u043c\u0430\u043b\u0438 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043e\u0442\u043b\u0438\u0447\u043d\u044b\u0445 \u0440\u0435\u0448\u0435\u043d\u0438\u0439.<\/p>\n<h2 id=\"moving-q-targets\">\u041f\u0435\u0440\u0435\u043c\u0435\u0449\u0435\u043d\u0438\u0435 Q-\u0446\u0435\u043b\u0435\u0439<\/h2>\n<p>\u041f\u0435\u0440\u0432\u0430\u044f \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0430 \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u043d\u0430\u0437\u044b\u0432\u0430\u0435\u0442\u0441\u044f \u0434\u0432\u0438\u0436\u0443\u0449\u0438\u043c\u0438\u0441\u044f Q-\u043c\u0438\u0448\u0435\u043d\u044f\u043c\u0438.  \u041a\u0430\u043a \u043c\u044b \u0432\u0438\u0434\u0435\u043b\u0438, \u043f\u0435\u0440\u0432\u044b\u043c \u043a\u043e\u043c\u043f\u043e\u043d\u0435\u043d\u0442\u043e\u043c \u043e\u0448\u0438\u0431\u043a\u0438 TD (TD \u043e\u0437\u043d\u0430\u0447\u0430\u0435\u0442 Temporal Difference) \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f Q-Target, \u0438 \u043e\u043d \u0440\u0430\u0441\u0441\u0447\u0438\u0442\u044b\u0432\u0430\u0435\u0442\u0441\u044f \u043a\u0430\u043a \u043d\u0435\u043c\u0435\u0434\u043b\u0435\u043d\u043d\u043e\u0435 \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435 \u043f\u043b\u044e\u0441 \u0434\u0438\u0441\u043a\u043e\u043d\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u043e\u0435 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0435 Q-\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u0434\u043b\u044f \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u041a\u043e\u0433\u0434\u0430 \u043c\u044b \u043e\u0431\u0443\u0447\u0430\u0435\u043c \u043d\u0430\u0448\u0435\u0433\u043e \u0430\u0433\u0435\u043d\u0442\u0430, \u043c\u044b \u043e\u0431\u043d\u043e\u0432\u043b\u044f\u0435\u043c \u0432\u0435\u0441\u0430 \u0432 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0438 \u0441 \u043e\u0448\u0438\u0431\u043a\u043e\u0439 TD.  \u041d\u043e \u043e\u0434\u043d\u0438 \u0438 \u0442\u0435 \u0436\u0435 \u0432\u0435\u0441\u0430 \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u044e\u0442\u0441\u044f \u043a\u0430\u043a \u043a \u0446\u0435\u043b\u0435\u0432\u043e\u043c\u0443, \u0442\u0430\u043a \u0438 \u043a \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u0438\u0440\u0443\u0435\u043c\u043e\u043c\u0443 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044e.  \u0412\u044b \u0432\u0438\u0434\u0438\u0442\u0435 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443?<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi mathvariant=\"normal\">\u0394<\/mi><mi>\u0436<\/mi><mo>&#8220;=&#8221;<\/mo><mi>\u03b1<\/mi><mrow><mo fence=\"true\">[<\/mo><mrow><mo fence=\"true\">(<\/mo><mi>R<\/mi><mo>+<\/mo><mi>\u03b3<\/mi><munder><mo><mi>max<\/mi><mo>\u2061<\/mo><\/mo><mi>a<\/mi><\/munder><mover accent=\"true\"><mi>Q<\/mi><mo>^<\/mo><\/mover><mrow><mo fence=\"true\">(<\/mo><msup><mi>s<\/mi><mo mathvariant=\"normal\" lspace=\"0em\" rspace=\"0em\">\u2032<\/mo><\/msup><mo separator=\"true\">,<\/mo><mi>a<\/mi><mo separator=\"true\">,<\/mo><mi>w<\/mi><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><mo>\u2212<\/mo><mover accent=\"true\"><mi>Q<\/mi><mo>^<\/mo><\/mover><mo stretchy=\"false\">(<\/mo><mi>s<\/mi><mo separator=\"true\">,<\/mo><mi>a<\/mi><mo separator=\"true\">,<\/mo><mi>w<\/mi><mo stretchy=\"false\">)<\/mo><mo fence=\"true\">]<\/mo><\/mrow><msub><mi mathvariant=\"normal\">\u2207<\/mi><mi>\u0436<\/mi><\/msub><mover accent=\"true\"><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mo>^<\/mo><\/mover><mo stretchy=\"false\">(<\/mo><mi>\u0441<\/mi><mo separator=\"true\">,<\/mo><mi>\u0430<\/mi><mo separator=\"true\">,<\/mo><mi>\u0436<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">\\\u0414\u0435\u043b\u044c\u0442\u0430 \u0448=\\\u0430\u043b\u044c\u0444\u0430\\\u0432\u043b\u0435\u0432\u043e[\\left(R+\\gamma \\max _{a} \\hat{Q}\\left(s^{\\prime}, a, w\\right)\\right)-\\hat{Q}(s, a, w)\\right] \\nabla_{w} \\hat{Q}(s, a, w)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.68333em;vertical-align:0em\"\/><span class=\"mord\">\u0394<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.02691em\">\u0436<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.8499999999999999em;vertical-align:-0.7em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">[<\/span><\/span><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord mathnormal\" style=\"margin-right:0.00773em\">R<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.05556em\">\u03b3<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.43056em\"><span style=\"top:-2.4em;margin-left:0em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">a<\/span><\/span><\/span><\/span><span style=\"top:-3em\"><span class=\"pstrut\" style=\"height:3em\"\/><span><span class=\"mop\">max<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9467699999999999em\"><span style=\"top:-3em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><\/span><\/span><span style=\"top:-3.25233em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"accent-body\" style=\"left:-0.16666em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.19444em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.801892em\"><span style=\"top:-3.113em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mtight\">\u2032<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">a<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.02691em\">w<\/span><span class=\"mclose delimcenter\" style=\"top:0em\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9467699999999999em\"><span style=\"top:-3em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">Q<\/span><\/span><\/span><span style=\"top:-3.25233em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"accent-body\" style=\"left:-0.16666em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.19444em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">a<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.02691em\">w<\/span><span class=\"mclose\">)<\/span><span class=\"mclose delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">]<\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord\">\u2207<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.151392em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.02691em\">\u0436<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mord accent\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9467699999999999em\"><span style=\"top:-3em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><\/span><\/span><span style=\"top:-3.25233em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"accent-body\" style=\"left:-0.16666em\"><span class=\"mord\">^<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.19444em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u0441<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">\u0430<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.02691em\">\u0436<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/div>\n<p>\u041c\u044b \u043f\u0435\u0440\u0435\u043c\u0435\u0449\u0430\u0435\u043c \u0432\u044b\u0432\u043e\u0434 \u0431\u043b\u0438\u0436\u0435 \u043a \u0446\u0435\u043b\u0438, \u043d\u043e \u043c\u044b \u0442\u0430\u043a\u0436\u0435 \u043f\u0435\u0440\u0435\u043c\u0435\u0449\u0430\u0435\u043c \u0446\u0435\u043b\u044c.  \u0418\u0442\u0430\u043a, \u043c\u044b \u0432 \u043a\u043e\u043d\u0435\u0447\u043d\u043e\u043c \u0438\u0442\u043e\u0433\u0435 \u043f\u0440\u0435\u0441\u043b\u0435\u0434\u0443\u0435\u043c \u0446\u0435\u043b\u044c, \u0438 \u043c\u044b \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c \u0441\u0438\u043b\u044c\u043d\u043e \u043a\u043e\u043b\u0435\u0431\u043b\u044e\u0449\u0438\u0439\u0441\u044f \u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u043e\u0447\u043d\u044b\u0439 \u043f\u0440\u043e\u0446\u0435\u0441\u0441.  \u0411\u044b\u043b\u043e \u0431\u044b \u043d\u0435\u043f\u043b\u043e\u0445\u043e \u043e\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u0446\u0435\u043b\u044c \u0444\u0438\u043a\u0441\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u043e\u0439, \u043f\u043e\u043a\u0430 \u043c\u044b \u043e\u0431\u0443\u0447\u0430\u0435\u043c \u0441\u0435\u0442\u044c.  \u0427\u0442\u043e \u0436, DeepMind \u0441\u0434\u0435\u043b\u0430\u043b \u0438\u043c\u0435\u043d\u043d\u043e \u044d\u0442\u043e.<\/p>\n<p>\u0412\u043c\u0435\u0441\u0442\u043e \u0442\u043e\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043e\u0434\u043d\u0443 \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u0443\u044e \u0441\u0435\u0442\u044c, \u043e\u043d \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 \u0434\u0432\u0435.  \u0414\u0430, \u0432\u044b \u043d\u0435 \u043e\u0441\u043b\u044b\u0448\u0430\u043b\u0438\u0441\u044c!  (\u0432\u0440\u043e\u0434\u0435 \u0431\u044b \u043e\u0434\u043d\u043e\u0433\u043e \u0443\u0436\u0435 \u043d\u0435 \u0445\u0432\u0430\u0442\u0438\u043b\u043e).<\/p>\n<p>\u041e\u0434\u043d\u0430 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u043e\u0441\u043d\u043e\u0432\u043d\u043e\u0439 \u0441\u0435\u0442\u0438 Deep Q, \u0430 \u0432\u0442\u043e\u0440\u0430\u044f (\u043d\u0430\u0437\u044b\u0432\u0430\u0435\u0442\u0441\u044f <strong>\u0426\u0435\u043b\u0435\u0432\u0430\u044f \u0441\u0435\u0442\u044c<\/strong>) \u0434\u043b\u044f \u0438\u0441\u043a\u043b\u044e\u0447\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0433\u043e \u0438 \u043f\u0435\u0440\u0438\u043e\u0434\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u043e\u0431\u043d\u043e\u0432\u043b\u0435\u043d\u0438\u044f \u0432\u0435\u0441\u043e\u0432 \u0446\u0435\u043b\u0438.  \u042d\u0442\u0430 \u0442\u0435\u0445\u043d\u0438\u043a\u0430 \u043d\u0430\u0437\u044b\u0432\u0430\u0435\u0442\u0441\u044f <strong>\u0424\u0438\u043a\u0441\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 Q-\u0446\u0435\u043b\u0438.<\/strong> \u041d\u0430 \u0441\u0430\u043c\u043e\u043c \u0434\u0435\u043b\u0435 \u0432\u0435\u0441\u0430 \u0444\u0438\u043a\u0441\u0438\u0440\u043e\u0432\u0430\u043d\u044b \u0434\u043b\u044f \u0431\u043e\u043b\u044c\u0448\u0435\u0439 \u0447\u0430\u0441\u0442\u0438 \u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u043a\u0438 \u0438 \u043e\u0431\u043d\u043e\u0432\u043b\u044f\u044e\u0442\u0441\u044f \u0442\u043e\u043b\u044c\u043a\u043e \u0432\u0440\u0435\u043c\u044f \u043e\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">class<\/span><span class=\"token plain\"> <\/span><span class=\"token class-name\">DQNAgent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">__init__<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">_build_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">target_model <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">_build_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">update_target_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">update_target_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">target_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">set_weights<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">get_weights<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<h2 id=\"maximization-bias\">\u041f\u0440\u0435\u0434\u0432\u0437\u044f\u0442\u043e\u0441\u0442\u044c \u043c\u0430\u043a\u0441\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0438<\/h2>\n<p>\u041f\u0440\u0435\u0434\u0432\u0437\u044f\u0442\u043e\u0441\u0442\u044c \u043c\u0430\u043a\u0441\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0438 \u2014 \u044d\u0442\u043e \u0441\u043a\u043b\u043e\u043d\u043d\u043e\u0441\u0442\u044c Deep Q Networks \u043f\u0435\u0440\u0435\u043e\u0446\u0435\u043d\u0438\u0432\u0430\u0442\u044c \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f-\u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 (Q).  \u041f\u043e\u0447\u0435\u043c\u0443 \u044d\u0442\u043e \u043f\u0440\u043e\u0438\u0441\u0445\u043e\u0434\u0438\u0442?  \u041f\u043e\u0434\u0443\u043c\u0430\u0439\u0442\u0435, \u0447\u0442\u043e \u0435\u0441\u043b\u0438 \u043f\u043e \u043a\u0430\u043a\u043e\u0439-\u0442\u043e \u043f\u0440\u0438\u0447\u0438\u043d\u0435 \u0441\u0435\u0442\u044c \u0437\u0430\u0432\u044b\u0448\u0430\u0435\u0442 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u0434\u043b\u044f \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f, \u044d\u0442\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0431\u0443\u0434\u0435\u0442 \u0432\u044b\u0431\u0440\u0430\u043d\u043e \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0434\u043b\u044f \u043f\u0435\u0440\u0435\u0445\u043e\u0434\u0430 \u043a \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u043c\u0443 \u0448\u0430\u0433\u0443, \u0438 \u0442\u0430\u043a\u043e\u0435 \u0436\u0435 \u0437\u0430\u0432\u044b\u0448\u0435\u043d\u043d\u043e\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u0431\u0443\u0434\u0435\u0442 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c\u0441\u044f \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f.  \u0414\u0440\u0443\u0433\u0438\u043c\u0438 \u0441\u043b\u043e\u0432\u0430\u043c\u0438, \u043d\u0435\u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e \u043e\u0446\u0435\u043d\u0438\u0442\u044c, \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u043b\u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0441 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043b\u0443\u0447\u0448\u0438\u043c.  \u041a\u0430\u043a \u044d\u0442\u043e \u0440\u0435\u0448\u0438\u0442\u044c?  \u041e\u0442\u0432\u0435\u0442 \u043e\u0447\u0435\u043d\u044c \u0438\u043d\u0442\u0435\u0440\u0435\u0441\u043d\u044b\u0439 \u043c\u0435\u0442\u043e\u0434 \u0438 \u043d\u0430\u0437\u044b\u0432\u0430\u0435\u0442\u0441\u044f:<\/p>\n<h2 id=\"double-deep-q-network\">\u0414\u0432\u043e\u0439\u043d\u0430\u044f \u0433\u043b\u0443\u0431\u043e\u043a\u0430\u044f \u0441\u0435\u0442\u044c Q<\/h2>\n<p>\u0427\u0442\u043e\u0431\u044b \u0443\u0441\u0442\u0440\u0430\u043d\u0438\u0442\u044c \u043f\u0440\u0435\u0434\u0432\u0437\u044f\u0442\u043e\u0441\u0442\u044c \u043c\u0430\u043a\u0441\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0438, \u043c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0434\u0432\u0435 \u0441\u0435\u0442\u0438 Deep Q.<\/p>\n<ul>\n<li>\n<p>\u0421 \u043e\u0434\u043d\u043e\u0439 \u0441\u0442\u043e\u0440\u043e\u043d\u044b, DQN \u043e\u0442\u0432\u0435\u0447\u0430\u0435\u0442 \u0437\u0430 <strong>\u0432\u044b\u0431\u043e\u0440<\/strong> \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f (\u0441 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c) \u043a\u0430\u043a \u0432\u0441\u0435\u0433\u0434\u0430.<\/p>\n<\/li>\n<li>\n<p>\u0421 \u0434\u0440\u0443\u0433\u043e\u0439 \u0441\u0442\u043e\u0440\u043e\u043d\u044b, \u0446\u0435\u043b\u0435\u0432\u0430\u044f \u0441\u0435\u0442\u044c \u043e\u0442\u0432\u0435\u0447\u0430\u0435\u0442 \u0437\u0430 <strong>\u043e\u0446\u0435\u043d\u043a\u0430<\/strong><br \/>\n\u044d\u0442\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.<\/p>\n<\/li>\n<\/ul>\n<p>\u0425\u0438\u0442\u0440\u043e\u0441\u0442\u044c \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u0446\u0435\u043b\u0435\u0432\u043e\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u0430\u0432\u0442\u043e\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438 \u0441\u043e\u0437\u0434\u0430\u0435\u0442\u0441\u044f \u043d\u0435 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c Q, \u0430 \u0446\u0435\u043b\u0435\u0432\u043e\u0439 \u0441\u0435\u0442\u044c\u044e.  \u0414\u0440\u0443\u0433\u0438\u043c\u0438 \u0441\u043b\u043e\u0432\u0430\u043c\u0438, \u043c\u044b \u0432\u044b\u0437\u044b\u0432\u0430\u0435\u043c \u0446\u0435\u043b\u0435\u0432\u0443\u044e \u0441\u0435\u0442\u044c \u0434\u043b\u044f \u0432\u044b\u0447\u0438\u0441\u043b\u0435\u043d\u0438\u044f \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0434\u043b\u044f \u0432\u044b\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u044f \u044d\u0442\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0432 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438.  \u0418 \u043a\u0430\u043a \u043f\u043e\u0431\u043e\u0447\u043d\u044b\u0439 \u044d\u0444\u0444\u0435\u043a\u0442, \u043c\u044b \u0442\u0430\u043a\u0436\u0435 \u0440\u0435\u0448\u0430\u0435\u043c \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443 \u0441 \u0434\u0432\u0438\u0436\u0443\u0449\u0435\u0439\u0441\u044f \u043c\u0438\u0448\u0435\u043d\u044c\u044e.  \u0410\u043a\u043a\u0443\u0440\u0430\u0442\u043d\u043e, \u0432\u0435\u0440\u043d\u043e?  \u0414\u0432\u0435 \u043f\u0442\u0438\u0446\u044b \u0441 \u043e\u0434\u043d\u0438\u043c \u043a\u0430\u043c\u043d\u0435\u043c.  \u041e\u0442\u0434\u0435\u043b\u0438\u0432 \u0432\u044b\u0431\u043e\u0440 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u043e\u0442 \u0433\u0435\u043d\u0435\u0440\u0430\u0446\u0438\u0438 \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0441\u0443\u0449\u0435\u0441\u0442\u0432\u0435\u043d\u043d\u043e \u0443\u043c\u0435\u043d\u044c\u0448\u0438\u0442\u044c \u043f\u0435\u0440\u0435\u043e\u0446\u0435\u043d\u043a\u0443 \u0438 \u0442\u0440\u0435\u043d\u0438\u0440\u043e\u0432\u0430\u0442\u044c\u0441\u044f \u0431\u044b\u0441\u0442\u0440\u0435\u0435 \u0438 \u043d\u0430\u0434\u0435\u0436\u043d\u0435\u0435.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">train_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">len<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">&lt;<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">train_start<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        batch_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">min<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">len<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        mini_batch <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">sample<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        update_input <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">zeros<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        update_target <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">zeros<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> i <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            update_input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> mini_batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">append<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">mini_batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">append<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">mini_batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            update_target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> mini_batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">append<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">mini_batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        target <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">update_input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        target_next <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">update_target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        target_val <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">target_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">update_target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> i <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">else<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                a <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">argmax<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">target_next<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">discount_factor <\/span><span class=\"token operator\">*<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">target_val<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">i<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">fit<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">update_input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> target<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> batch_size<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">epochs<\/span><span class=\"token operator\">=<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> verbose<\/span><span class=\"token operator\">=<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<p>\u0414\u0443\u043c\u0430\u0435\u0448\u044c, \u044d\u0442\u043e \u0432\u0441\u0435?  \u0418\u0437\u0432\u0438\u043d\u0438\u0442\u0435, \u0447\u0442\u043e \u043f\u043e\u0434\u0432\u0435\u043b \u0432\u0430\u0441.  \u041c\u044b \u0441\u043e\u0431\u0438\u0440\u0430\u0435\u043c\u0441\u044f \u043f\u043e\u0439\u0442\u0438 \u0435\u0449\u0435 \u0434\u0430\u043b\u044c\u0448\u0435.  \u0427\u0442\u043e \u0442\u0435\u043f\u0435\u0440\u044c?  \u0421\u043e\u0431\u0438\u0440\u0430\u0435\u0442\u0435\u0441\u044c \u043b\u0438 \u0432\u044b \u0434\u043e\u0431\u0430\u0432\u0438\u0442\u044c \u0442\u0440\u0435\u0442\u044c\u044e \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u0443\u044e \u0441\u0435\u0442\u044c?  \u0425\u0430-\u0445\u0430!!<\/p>\n<p>\u041d\u0443 \u0442\u0438\u043f\u0430.  \u041a\u0442\u043e \u0441\u043c\u0435\u0435\u0442\u0441\u044f \u0441\u0435\u0439\u0447\u0430\u0441?<\/p>\n<div class=\"inline-newsletter-form\">\n<p><h2>\u0417\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u0435 \u043d\u0430\u0448\u0443 \u0431\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0443\u044e \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u043d\u0443\u044e \u043a\u043d\u0438\u0433\u0443 \u043f\u043e \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u043c\u0443 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c<\/h2>\n<h4>\u041c\u044b \u043e\u0431\u044a\u0435\u0434\u0438\u043d\u0438\u043b\u0438 \u0432\u0441\u0435 \u0441\u0442\u0430\u0442\u044c\u0438 \u043f\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u0432 \u043e\u0434\u0438\u043d PDF-\u0444\u0430\u0439\u043b.  \u041f\u043e\u044d\u0442\u043e\u043c\u0443, \u0435\u0441\u043b\u0438 \u0443 \u0432\u0430\u0441 \u043d\u0435\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u0447\u0438\u0442\u0430\u0442\u044c \u0432\u0441\u044e \u0441\u0442\u0430\u0442\u044c\u044e \u0438\u043b\u0438 \u0432\u044b \u0445\u043e\u0442\u0438\u0442\u0435, \u0447\u0442\u043e\u0431\u044b pdf-\u0432\u0435\u0440\u0441\u0438\u044f \u0447\u0438\u0442\u0430\u043b\u0430\u0441\u044c \u0432 \u0430\u0432\u0442\u043e\u043d\u043e\u043c\u043d\u043e\u043c \u0440\u0435\u0436\u0438\u043c\u0435, \u043d\u0430\u0436\u043c\u0438\u0442\u0435 \u043a\u043d\u043e\u043f\u043a\u0443 \u043d\u0438\u0436\u0435.<\/h4>\n<\/p>\n<div id=\"mauticform_wrapper__drlebook\" class=\"mauticform-wrapper\">\n<div class=\"form-privacy-text--dark\">\n<p>* \u041c\u044b \u0441\u0442\u0440\u0435\u043c\u0438\u043c\u0441\u044f \u043a \u0432\u0430\u0448\u0435\u0439 \u043a\u043e\u043d\u0444\u0438\u0434\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u0438.  AI Summer \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 \u043f\u0440\u0435\u0434\u043e\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u043d\u0443\u044e \u0432\u0430\u043c\u0438 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u044e \u0434\u043b\u044f \u043e\u0442\u043f\u0440\u0430\u0432\u043a\u0438 \u0432\u0430\u043c \u043d\u0430\u0448\u0435\u0433\u043e \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u043e\u043d\u043d\u043e\u0433\u043e \u0431\u044e\u043b\u043b\u0435\u0442\u0435\u043d\u044f \u0438 \u0441\u0432\u044f\u0437\u0438 \u0441 \u0432\u0430\u043c\u0438 \u043f\u043e \u043f\u043e\u0432\u043e\u0434\u0443 \u043d\u0430\u0448\u0438\u0445 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u043e\u0432.  \u0412\u044b \u043c\u043e\u0436\u0435\u0442\u0435 \u043e\u0442\u043a\u0430\u0437\u0430\u0442\u044c\u0441\u044f \u043e\u0442 \u043f\u043e\u0434\u043f\u0438\u0441\u043a\u0438 \u043d\u0430 \u044d\u0442\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u044f \u0432 \u043b\u044e\u0431\u043e\u0435 \u0432\u0440\u0435\u043c\u044f.  \u0414\u043b\u044f \u043f\u043e\u043b\u0443\u0447\u0435\u043d\u0438\u044f \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0439 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u0438 \u043e\u0437\u043d\u0430\u043a\u043e\u043c\u044c\u0442\u0435\u0441\u044c \u0441 \u043d\u0430\u0448\u0438\u043c<!-- --> \u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430 \u043a\u043e\u043d\u0444\u0438\u0434\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u0438.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<h2 id=\"dueling-deep-q-networks\">\u0414\u0443\u044d\u043b\u0438 \u0441\u0435\u0442\u0435\u0439 Deep Q<\/h2>\n<p>\u0414\u0430\u0432\u0430\u0439\u0442\u0435 \u0441\u043d\u0430\u0447\u0430\u043b\u0430 \u043e\u0431\u043d\u043e\u0432\u0438\u043c \u043e\u0441\u043d\u043e\u0432\u0443 Q Learning.  \u0417\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0443\u044e\u0442 \u043c\u0435\u0442\u0440\u0438\u043a\u0435 \u0442\u043e\u0433\u043e, \u043d\u0430\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0445\u043e\u0440\u043e\u0448\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0434\u043b\u044f \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f, \u0432\u0435\u0440\u043d\u043e?  \u0412\u043e\u0442 \u043f\u043e\u0447\u0435\u043c\u0443 \u044d\u0442\u043e \u0444\u0443\u043d\u043a\u0446\u0438\u044f \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f-\u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438.  \u041c\u0435\u0442\u0440\u0438\u043a\u0430 \u2014 \u044d\u0442\u043e \u043d\u0435 \u0447\u0442\u043e \u0438\u043d\u043e\u0435, \u043a\u0430\u043a \u043e\u0436\u0438\u0434\u0430\u0435\u043c\u0430\u044f \u043e\u0442\u0434\u0430\u0447\u0430 \u044d\u0442\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u043e\u0442 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u0424\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u043c\u043e\u0436\u043d\u043e \u0440\u0430\u0437\u0431\u0438\u0442\u044c \u043d\u0430 \u0434\u0432\u0435 \u0447\u0430\u0441\u0442\u0438: \u0444\u0443\u043d\u043a\u0446\u0438\u044e \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f V(s) \u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430 A(s, a).  \u0418 \u0434\u0430, \u043c\u044b \u043f\u0440\u043e\u0441\u0442\u043e \u0432\u0432\u043e\u0434\u0438\u043c \u0435\u0449\u0435 \u043e\u0434\u043d\u0443 \u0444\u0443\u043d\u043a\u0446\u0438\u044e:<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mo stretchy=\"false\">(<\/mo><mi>\u0441<\/mi><mo separator=\"true\">,<\/mo><mi>\u0430<\/mi><mo stretchy=\"false\">)<\/mo><mo>&#8220;=&#8221;<\/mo><mi>\u0412<\/mi><mo stretchy=\"false\">(<\/mo><mi>\u0441<\/mi><mo stretchy=\"false\">)<\/mo><mo>+<\/mo><mi>\u0410<\/mi><mo stretchy=\"false\">(<\/mo><mi>\u0441<\/mi><mo separator=\"true\">,<\/mo><mi>\u0430<\/mi><mo stretchy=\"false\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">Q(s, \u0430)=V(s)+A(s, \u0430)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u0441<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">\u0430<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.22222em\">\u0412<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u0441<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord mathnormal\">\u0410<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u0441<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">\u0430<\/span><span class=\"mclose\">)<\/span><\/span><\/span><\/span><\/span><\/div>\n<p>\u0424\u0443\u043d\u043a\u0446\u0438\u044f \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430 \u0444\u0438\u043a\u0441\u0438\u0440\u0443\u0435\u0442, \u043d\u0430\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043b\u0443\u0447\u0448\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u043f\u043e \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044e \u0441 \u0434\u0440\u0443\u0433\u0438\u043c\u0438 \u0432 \u0434\u0430\u043d\u043d\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438, \u0432 \u0442\u043e \u0432\u0440\u0435\u043c\u044f \u043a\u0430\u043a, \u043a\u0430\u043a \u043c\u044b \u0437\u043d\u0430\u0435\u043c, \u0444\u0443\u043d\u043a\u0446\u0438\u044f \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 \u0444\u0438\u043a\u0441\u0438\u0440\u0443\u0435\u0442, \u043d\u0430\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0445\u043e\u0440\u043e\u0448\u043e \u0431\u044b\u0442\u044c \u0432 \u044d\u0442\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438.  \u0412\u0441\u044f \u0438\u0434\u0435\u044f Dueling Q Networks \u043e\u0441\u043d\u043e\u0432\u0430\u043d\u0430 \u043d\u0430 <strong>\u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u0435 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 Q \u0432 \u0432\u0438\u0434\u0435 \u0441\u0443\u043c\u043c\u044b \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 \u0438 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u0430<\/strong>.  \u0423 \u043d\u0430\u0441 \u043f\u0440\u043e\u0441\u0442\u043e \u0435\u0441\u0442\u044c \u0434\u0432\u0435 \u0441\u0435\u0442\u0438 \u0434\u043b\u044f \u0438\u0437\u0443\u0447\u0435\u043d\u0438\u044f \u043a\u0430\u0436\u0434\u043e\u0439 \u0447\u0430\u0441\u0442\u0438 \u0441\u0443\u043c\u043c\u044b, \u0430 \u0437\u0430\u0442\u0435\u043c \u043c\u044b \u043e\u0431\u044a\u0435\u0434\u0438\u043d\u044f\u0435\u043c \u0438\u0445 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u044b.<\/p>\n<p><span class=\"gatsby-resp-image-wrapper\" style=\"position:relative;display:block;margin-left:auto;margin-right:auto;max-width:1042px\"><\/p>\n<p>    <span class=\"gatsby-resp-image-background-image\" style=\"padding-bottom:51%;position:relative;bottom:0;left:0;background-image:url('data:image\/jpeg;base64,\/9j\/2wBDABALDA4MChAODQ4SERATGCgaGBYWGDEjJR0oOjM9PDkzODdASFxOQERXRTc4UG1RV19iZ2hnPk1xeXBkeFxlZ2P\/2wBDARESEhgVGC8aGi9jQjhCY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2P\/wgARCAAKABQDASIAAhEBAxEB\/8QAGAAAAgMAAAAAAAAAAAAAAAAAAAIBAwX\/xAAUAQEAAAAAAAAAAAAAAAAAAAAA\/9oADAMBAAIQAxAAAAHamxBwD\/\/EABcQAAMBAAAAAAAAAAAAAAAAAAABEAL\/2gAIAQEAAQUCQrmf\/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP\/aAAgBAwEBPwE\/\/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP\/aAAgBAgEBPwE\/\/8QAFxAAAwEAAAAAAAAAAAAAAAAAARAgMf\/aAAgBAQAGPwLYK\/\/EABoQAAIDAQEAAAAAAAAAAAAAAAERABAhMbH\/2gAIAQEAAT8hIZswwk6HJ7V\/\/9oADAMBAAIAAwAAABDHz\/\/EABYRAAMAAAAAAAAAAAAAAAAAAAEQMf\/aAAgBAwEBPxARf\/\/EABQRAQAAAAAAAAAAAAAAAAAAABD\/2gAIAQIBAT8QP\/\/EABwQAQEBAAEFAAAAAAAAAAAAAAERAFEQITFx4f\/aAAgBAQABPxBT3Ti\/MdVtWPvU5MAEMQIDz6f\/2Q==');background-size:cover;display:block\"\/><br \/>\n  <img decoding=\"async\" class=\"gatsby-resp-image-image\" alt=\"\u0414\u0414\u041a\u041d\" title=\"\u0414\u0414\u041a\u041d\" src=\"https:\/\/theaisummer.com\/static\/b0f4c8c3f3a5158b5899aa52575eaea0\/95a07\/DDQN.jpg\" srcset=\"\/static\/b0f4c8c3f3a5158b5899aa52575eaea0\/f93b5\/DDQN.jpg 300w,\/static\/b0f4c8c3f3a5158b5899aa52575eaea0\/b4294\/DDQN.jpg 600w,\/static\/b0f4c8c3f3a5158b5899aa52575eaea0\/95a07\/DDQN.jpg 1042w\" sizes=\"(max-width: 1042px) 100vw, 1042px\" style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0\" loading=\"lazy\"\/><\/p>\n<p>    <\/span><br \/>\n<em>\u0414\u0443\u044d\u043b\u044c \u0441\u0435\u0442\u0435\u0432\u044b\u0445 \u0430\u0440\u0445\u0438\u0442\u0435\u043a\u0442\u0443\u0440 \u0434\u043b\u044f \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c<\/em><\/p>\n<p>\u0417\u0430\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u0435\u043c \u043b\u0438 \u043c\u044b \u044d\u0442\u0438\u043c \u0447\u0442\u043e-\u0442\u043e?  \u041a\u043e\u043d\u0435\u0447\u043d\u043e, \u043c\u044b \u0434\u0435\u043b\u0430\u0435\u043c.  \u0422\u0435\u043f\u0435\u0440\u044c \u0430\u0433\u0435\u043d\u0442\u044b \u043c\u043e\u0433\u0443\u0442 \u043e\u0446\u0435\u043d\u0438\u0432\u0430\u0442\u044c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435, \u043d\u0435 \u0437\u0430\u0431\u043e\u0442\u044f\u0441\u044c \u043e \u043f\u043e\u0441\u043b\u0435\u0434\u0441\u0442\u0432\u0438\u044f\u0445 \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0438\u0437 \u044d\u0442\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u042d\u0442\u043e \u043e\u0437\u043d\u0430\u0447\u0430\u0435\u0442, \u0447\u0442\u043e \u0444\u0443\u043d\u043a\u0446\u0438\u0438, \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u044e\u0449\u0438\u0435, \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043b\u0438 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0445\u043e\u0440\u043e\u0448\u0438\u043c \u0438\u043b\u0438 \u043d\u0435\u0442, \u043d\u0435 \u043e\u0431\u044f\u0437\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u0441\u043e\u0432\u043f\u0430\u0434\u0430\u044e\u0442 \u0441 \u0444\u0443\u043d\u043a\u0446\u0438\u044f\u043c\u0438, \u043e\u0446\u0435\u043d\u0438\u0432\u0430\u044e\u0449\u0438\u043c\u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435.  \u0418, \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e, \u0435\u043c\u0443 \u0432\u043e\u043e\u0431\u0449\u0435 \u043d\u0435 \u043d\u0443\u0436\u043d\u043e \u0437\u0430\u0431\u043e\u0442\u0438\u0442\u044c\u0441\u044f \u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f\u0445.  \u041d\u0435\u0440\u0435\u0434\u043a\u0438 \u0441\u043b\u0443\u0447\u0430\u0438, \u043a\u043e\u0433\u0434\u0430 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0438\u0437 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u043d\u0438\u043a\u0430\u043a \u043d\u0435 \u0432\u043b\u0438\u044f\u044e\u0442 \u043d\u0430 \u043e\u043a\u0440\u0443\u0436\u0430\u044e\u0449\u0443\u044e \u0441\u0440\u0435\u0434\u0443.  \u0422\u0430\u043a \u0437\u0430\u0447\u0435\u043c \u0438\u0445 \u0443\u0447\u0438\u0442\u044b\u0432\u0430\u0442\u044c?<\/p>\n<p>*<!-- --> \u041a\u0440\u0430\u0442\u043a\u043e\u0435 \u043f\u0440\u0438\u043c\u0435\u0447\u0430\u043d\u0438\u0435: \u0435\u0441\u043b\u0438 \u0432\u044b \u0432\u043d\u0438\u043c\u0430\u0442\u0435\u043b\u044c\u043d\u043e \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0438\u0442\u0435 \u043d\u0430 \u0438\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435, \u0432\u044b \u0443\u0432\u0438\u0434\u0438\u0442\u0435, \u0447\u0442\u043e \u0434\u043b\u044f \u043e\u0431\u044a\u0435\u0434\u0438\u043d\u0435\u043d\u0438\u044f \u0432\u044b\u0445\u043e\u0434\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0434\u0432\u0443\u0445 \u0441\u0435\u0442\u0435\u0439 \u043c\u044b \u043d\u0435 \u043f\u0440\u043e\u0441\u0442\u043e \u0434\u043e\u0431\u0430\u0432\u043b\u044f\u0435\u043c \u0438\u0445.  \u041f\u0440\u0438\u0447\u0438\u043d\u043e\u0439 \u044d\u0442\u043e\u0433\u043e \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0430 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u0438.  \u0415\u0441\u043b\u0438 \u0443 \u043d\u0430\u0441 \u0435\u0441\u0442\u044c Q, \u043c\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u043c \u043d\u0430\u0439\u0442\u0438 V \u0438 A. \u0422\u0430\u043a\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c, \u043c\u044b \u043d\u0435 \u043c\u043e\u0436\u0435\u043c \u0432\u044b\u043f\u043e\u043b\u043d\u0438\u0442\u044c \u043e\u0431\u0440\u0430\u0442\u043d\u043e\u0435 \u0440\u0430\u0441\u043f\u0440\u043e\u0441\u0442\u0440\u0430\u043d\u0435\u043d\u0438\u0435.  \u0412\u043c\u0435\u0441\u0442\u043e \u044d\u0442\u043e\u0433\u043e \u043c\u044b \u043f\u0440\u0435\u0434\u043f\u043e\u0447\u0438\u0442\u0430\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0441\u0440\u0435\u0434\u043d\u0435\u0435 \u043f\u0440\u0435\u0438\u043c\u0443\u0449\u0435\u0441\u0442\u0432\u043e \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u0443\u0440\u043e\u0432\u043d\u044f (\u0432\u044b\u0447\u0438\u0442\u0430\u0435\u043c\u044b\u0439 \u0442\u0435\u0440\u043c\u0438\u043d).<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">build_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">input<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">shape<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        shared <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Conv2D<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">32<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">8<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">8<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> strides<\/span><span class=\"token operator\">=<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        shared <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Conv2D<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">64<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">4<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> strides<\/span><span class=\"token operator\">=<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">2<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">shared<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        shared <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Conv2D<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">64<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">3<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> strides<\/span><span class=\"token operator\">=<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">shared<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        flatten <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Flatten<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">shared<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        advantage_fc <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">512<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">flatten<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        advantage <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">advantage_fc<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        advantage <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Lambda<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">lambda<\/span><span class=\"token plain\"> a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"> a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">-<\/span><span class=\"token plain\"> K<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">mean<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> keepdims<\/span><span class=\"token operator\">=<\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                           output_shape<\/span><span class=\"token operator\">=<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">advantage<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        value_fc <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">512<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">flatten<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        value <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">  Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">value_fc<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        value <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Lambda<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">lambda<\/span><span class=\"token plain\"> s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"> K<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">expand_dims<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">-<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                       output_shape<\/span><span class=\"token operator\">=<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">value<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        q_value <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> merge<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">value<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> advantage<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> mode<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'sum'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">inputs<\/span><span class=\"token operator\">=<\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">input<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> outputs<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">q_value<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">summary<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> model<\/span><\/p><\/pre>\n<p>\u0418 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u0435, \u043d\u043e \u043d\u0435 \u043c\u0435\u043d\u0435\u0435 \u0432\u0430\u0436\u043d\u043e\u0435: \u0443 \u043d\u0430\u0441 \u0435\u0441\u0442\u044c \u0435\u0449\u0435 \u043e\u0434\u043d\u0430 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0430 \u0434\u043b\u044f \u043e\u0431\u0441\u0443\u0436\u0434\u0435\u043d\u0438\u044f, \u0438 \u043e\u043d\u0430 \u0441\u0432\u044f\u0437\u0430\u043d\u0430 \u0441 \u043e\u043f\u0442\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0435\u0439 \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u044f \u043e\u043f\u044b\u0442\u0430.<\/p>\n<h2 id=\"prioritized-experience-replay\">\u041f\u043e\u0432\u0442\u043e\u0440 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u043d\u043e\u0433\u043e \u043e\u043f\u044b\u0442\u0430<\/h2>\n<p>\u0412\u044b \u043f\u043e\u043c\u043d\u0438\u0442\u0435, \u0447\u0442\u043e \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u0435 \u043e\u043f\u044b\u0442\u0430 \u2014 \u044d\u0442\u043e \u043a\u043e\u0433\u0434\u0430 \u043c\u044b \u0432\u0440\u0435\u043c\u044f \u043e\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u043f\u0440\u043e\u0438\u0433\u0440\u044b\u0432\u0430\u0435\u043c \u0430\u0433\u0435\u043d\u0442\u0443 \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u044b\u0435 \u043f\u0440\u043e\u0448\u043b\u044b\u0435 \u043f\u0435\u0440\u0435\u0436\u0438\u0432\u0430\u043d\u0438\u044f, \u0447\u0442\u043e\u0431\u044b \u043e\u043d \u043d\u0435 \u0437\u0430\u0431\u044b\u043b \u0438\u0445?  \u0415\u0441\u043b\u0438 \u0432\u044b \u044d\u0442\u043e\u0433\u043e \u043d\u0435 \u0441\u0434\u0435\u043b\u0430\u0435\u0442\u0435, \u0442\u0435\u043f\u0435\u0440\u044c \u0432\u044b \u044d\u0442\u043e \u0441\u0434\u0435\u043b\u0430\u0435\u0442\u0435.  \u041d\u043e \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u043f\u0435\u0440\u0435\u0436\u0438\u0432\u0430\u043d\u0438\u044f \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u0431\u043e\u043b\u0435\u0435 \u0437\u043d\u0430\u0447\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u043c\u0438, \u0447\u0435\u043c \u0434\u0440\u0443\u0433\u0438\u0435.  \u0412 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0435 \u043c\u044b \u0434\u043e\u043b\u0436\u043d\u044b \u0440\u0430\u0441\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u044b \u0434\u043b\u044f \u0438\u0445 \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u044f.  \u0414\u043b\u044f \u044d\u0442\u043e\u0433\u043e \u0432\u043c\u0435\u0441\u0442\u043e \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0439 \u0432\u044b\u0431\u043e\u0440\u043a\u0438 (\u0438\u0437 \u0440\u0430\u0432\u043d\u043e\u043c\u0435\u0440\u043d\u043e\u0433\u043e \u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u0438\u044f) \u043c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0432\u044b\u0431\u043e\u0440\u043a\u0443 \u0441 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u043e\u043c.  \u0412 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u0430 \u043c\u044b \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u043c \u0432\u0435\u043b\u0438\u0447\u0438\u043d\u0443 \u043e\u0448\u0438\u0431\u043a\u0438 TD (\u043f\u043b\u044e\u0441 \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u0430\u044f \u043a\u043e\u043d\u0441\u0442\u0430\u043d\u0442\u0430, \u0447\u0442\u043e\u0431\u044b \u0438\u0437\u0431\u0435\u0436\u0430\u0442\u044c \u043d\u0443\u043b\u0435\u0432\u043e\u0439 \u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e\u0441\u0442\u0438 \u0432\u044b\u0431\u043e\u0440\u0430 \u043e\u043f\u044b\u0442\u0430).<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>\u043f<\/mi><mo>&#8220;=&#8221;<\/mo><mi mathvariant=\"normal\">\u2223<\/mi><mi>\u0434\u0435\u043b\u044c\u0442\u0430<\/mi><mi mathvariant=\"normal\">\u2223<\/mi><mo>+<\/mo><mi>\u03b5<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">\u0440=|\\\u0434\u0435\u043b\u044c\u0442\u0430|+\\\u0432\u0430\u0440\u0435\u043f\u0441\u0438\u043b\u043e\u043d<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:0.625em;vertical-align:-0.19444em\"\/><span class=\"mord mathnormal\">\u043f<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord\">\u2223<\/span><span class=\"mord mathnormal\" style=\"margin-right:0.03785em\">\u0434\u0435\u043b\u044c\u0442\u0430<\/span><span class=\"mord\">\u2223<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:0.43056em;vertical-align:0em\"\/><span class=\"mord mathnormal\">\u03b5<\/span><\/span><\/span><\/span><\/span><\/div>\n<p>\u0426\u0435\u043d\u0442\u0440\u0430\u043b\u044c\u043d\u0430\u044f \u0438\u0434\u0435\u044f: <strong>\u0427\u0435\u043c \u0432\u044b\u0448\u0435 \u043e\u0448\u0438\u0431\u043a\u0430 \u043c\u0435\u0436\u0434\u0443 \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u043e\u043c \u0438 \u0446\u0435\u043b\u044c\u044e, \u0442\u0435\u043c \u0430\u043a\u0442\u0443\u0430\u043b\u044c\u043d\u0435\u0435 \u0443\u0437\u043d\u0430\u0442\u044c \u043e\u0431 \u044d\u0442\u043e\u043c<\/strong>.<\/p>\n<p>\u0418 \u0447\u0442\u043e\u0431\u044b \u0433\u0430\u0440\u0430\u043d\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c, \u0447\u0442\u043e \u043c\u044b \u043d\u0435 \u0432\u0441\u0435\u0433\u0434\u0430 \u0431\u0443\u0434\u0435\u043c \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u044c \u043e\u0434\u0438\u043d \u0438 \u0442\u043e\u0442 \u0436\u0435 \u043e\u043f\u044b\u0442, \u043c\u044b \u0434\u043e\u0431\u0430\u0432\u043b\u044f\u0435\u043c \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u0443\u044e \u0441\u0442\u043e\u0445\u0430\u0441\u0442\u0438\u0447\u043d\u043e\u0441\u0442\u044c, \u0438 \u0432\u0441\u0435 \u0433\u043e\u0442\u043e\u0432\u043e.  \u041a\u0440\u043e\u043c\u0435 \u0442\u043e\u0433\u043e, \u0434\u043b\u044f \u0441\u043d\u0438\u0436\u0435\u043d\u0438\u044f \u0441\u043b\u043e\u0436\u043d\u043e\u0441\u0442\u0438 \u043c\u044b \u0441\u043e\u0445\u0440\u0430\u043d\u044f\u0435\u043c \u043e\u043f\u044b\u0442 \u0432 \u0431\u0438\u043d\u0430\u0440\u043d\u043e\u043c \u0434\u0435\u0440\u0435\u0432\u0435 \u043f\u043e\u0434 \u043d\u0430\u0437\u0432\u0430\u043d\u0438\u0435\u043c SumTree.<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><mi>\u043f<\/mi><mo stretchy=\"false\">(<\/mo><mi>\u044f<\/mi><mo stretchy=\"false\">)<\/mo><mo>&#8220;=&#8221;<\/mo><mfrac><msubsup><mi>\u043f<\/mi><mi>\u044f<\/mi><mi>\u03b1<\/mi><\/msubsup><mrow><munder><mo>\u2211<\/mo><mi>\u043a<\/mi><\/munder><msubsup><mi>\u043f<\/mi><mi>\u043a<\/mi><mi>\u03b1<\/mi><\/msubsup><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">P (i) = \\ frac {p_ {i} ^ {\\ alpha}} {\\ sum_ {k} p_ {k} ^ {\\ alpha}}<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.13889em\">\u043f<\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u044f<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:2.3287em;vertical-align:-0.9873080000000001em\"\/><span class=\"mord\"><span class=\"mopen nulldelimiter\"\/><span class=\"mfrac\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:1.341392em\"><span style=\"top:-2.314em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"mord\"><span class=\"mop\"><span class=\"mop op-symbol small-op\" style=\"position:relative;top:-0.0000050000000000050004em\">\u2211<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.1863979999999999em\"><span style=\"top:-2.40029em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em\">\u043a<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.29971000000000003em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u043f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.6461919999999999em\"><span style=\"top:-2.398692em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03148em\">\u043a<\/span><\/span><\/span><\/span><span style=\"top:-3.0448em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.30130799999999996em\"><span\/><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"top:-3.23em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"frac-line\" style=\"border-bottom-width:0.04em\"\/><\/span><span style=\"top:-3.677em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathnormal\">\u043f<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.664392em\"><span style=\"top:-2.441336em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u044f<\/span><\/span><\/span><\/span><span style=\"top:-3.063em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.0037em\">\u03b1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.258664em\"><span\/><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.9873080000000001em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mclose nulldelimiter\"\/><\/span><\/span><\/span><\/span><\/span><\/div>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">from<\/span><span class=\"token plain\"> SumTree <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">import<\/span><span class=\"token plain\"> SumTree<\/span><\/p><p><span class=\"token plain\"\/><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">class<\/span><span class=\"token plain\"> <\/span><span class=\"token class-name\">PER<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\">  <\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    e <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.01<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    a <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.6<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">__init__<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> capacity<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">tree <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> SumTree<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">capacity<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">_getPriority<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> error<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">error <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">e<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">**<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">a<\/span><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">add<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> error<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> sample<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        p <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">_getPriority<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">error<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">tree<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">add<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">p<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> sample<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">sample<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> n<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        batch <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        segment <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">tree<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">total<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">\/<\/span><span class=\"token plain\"> n<\/span><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> i <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">n<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            a <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> segment <\/span><span class=\"token operator\">*<\/span><span class=\"token plain\"> i<\/span><\/p><p><span class=\"token plain\">            b <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> segment <\/span><span class=\"token operator\">*<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">i <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            s <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">uniform<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> b<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">idx<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> p<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> data<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">tree<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">get<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            batch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">append<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">idx<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> data<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> batch<\/span><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">update<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> idx<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> error<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        p <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">_getPriority<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">error<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">tree<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">update<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">idx<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> p<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<p>\u042d\u0442\u043e \u0431\u044b\u043b\u043e \u043c\u043d\u043e\u0433\u043e.  \u041c\u043d\u043e\u0433\u043e \u043d\u043e\u0432\u043e\u0439 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u0438, \u043c\u043d\u043e\u0433\u043e \u043d\u043e\u0432\u044b\u0445 \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439.  \u041d\u043e \u0442\u043e\u043b\u044c\u043a\u043e \u043f\u043e\u0434\u0443\u043c\u0430\u0439\u0442\u0435, \u0447\u0442\u043e \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u043e\u0431\u044a\u0435\u0434\u0438\u043d\u0438\u0442\u044c \u0438\u0445 \u0432\u0441\u0435 \u0432\u043c\u0435\u0441\u0442\u0435.  \u0418 \u043c\u044b \u0434\u0435\u043b\u0430\u0435\u043c \u044d\u0442\u043e.<\/p>\n<p>\u042f \u043f\u043e\u043f\u044b\u0442\u0430\u043b\u0441\u044f \u0434\u0430\u0442\u044c \u043a\u0440\u0430\u0442\u043a\u0438\u0439 \u043e\u0431\u0437\u043e\u0440 \u0441\u0430\u043c\u044b\u0445 \u0432\u0430\u0436\u043d\u044b\u0445 \u043d\u0435\u0434\u0430\u0432\u043d\u0438\u0445 \u0443\u0441\u0438\u043b\u0438\u0439 \u0432 \u044d\u0442\u043e\u0439 \u043e\u0431\u043b\u0430\u0441\u0442\u0438, \u043e\u043f\u0438\u0440\u0430\u044f\u0441\u044c \u043d\u0430 \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0438\u043d\u0442\u0443\u0438\u0442\u0438\u0432\u043d\u044b\u0435 \u043c\u044b\u0441\u043b\u0438 \u0438 \u043d\u0435\u043c\u043d\u043e\u0433\u043e \u043c\u0430\u0442\u0435\u043c\u0430\u0442\u0438\u043a\u0438.  \u0412\u043e\u0442 \u043f\u043e\u0447\u0435\u043c\u0443 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u0442\u0430\u043a \u0432\u0430\u0436\u043d\u043e.  \u0421\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442 \u0442\u0430\u043a \u043c\u043d\u043e\u0433\u043e \u043f\u043e\u0442\u0435\u043d\u0446\u0438\u0430\u043b\u0430 \u0438 \u0442\u0430\u043a \u043c\u043d\u043e\u0433\u043e \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u0435\u0439 \u0434\u043b\u044f \u0443\u043b\u0443\u0447\u0448\u0435\u043d\u0438\u0439, \u0447\u0442\u043e \u0432\u044b \u043f\u0440\u043e\u0441\u0442\u043e \u043d\u0435 \u043c\u043e\u0436\u0435\u0442\u0435 \u0438\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0442\u043e\u0442 \u0444\u0430\u043a\u0442, \u0447\u0442\u043e \u043e\u043d \u0441\u0442\u0430\u043d\u0435\u0442 \u043a\u0440\u0443\u043f\u043d\u044b\u043c \u0438\u0433\u0440\u043e\u043a\u043e\u043c \u0432 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 \u0418\u0418 (\u0435\u0441\u043b\u0438 \u044d\u0442\u043e \u0443\u0436\u0435 \u043d\u0435 \u0442\u0430\u043a).  \u041d\u043e \u0438\u043c\u0435\u043d\u043d\u043e \u043f\u043e\u044d\u0442\u043e\u043c\u0443 \u0442\u0430\u043a \u0442\u0440\u0443\u0434\u043d\u043e \u0443\u0447\u0438\u0442\u044c\u0441\u044f \u0438 \u0438\u0434\u0442\u0438 \u0432 \u043d\u043e\u0433\u0443 \u0441 \u044d\u0442\u0438\u043c.<\/p>\n<p>\u0415\u0441\u043b\u0438 \u0432\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u043f\u043e\u0433\u0440\u0443\u0437\u0438\u0442\u044c\u0441\u044f \u0433\u043b\u0443\u0431\u0436\u0435, \u044f \u043d\u0430\u0441\u0442\u043e\u044f\u0442\u0435\u043b\u044c\u043d\u043e \u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u0443\u044e \u043f\u0440\u043e\u0439\u0442\u0438 \u043a\u0443\u0440\u0441 Advanced AI: Deep Reinforcement Learning Course \u0432 Python \u043d\u0430 Udemy.<\/p>\n<p>\u0412 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0439 \u0440\u0430\u0437 \u043c\u044b \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u043c \u0433\u0440\u0430\u0434\u0438\u0435\u043d\u0442\u044b \u043f\u043e\u043b\u0438\u0442\u0438\u043a \u0438 \u0430\u043b\u0433\u043e\u0440\u0438\u0442\u043c REINFORCE.<\/p>\n<p>\u041d\u0430\u043c \u043e\u0441\u0442\u0430\u0435\u0442\u0441\u044f \u0442\u043e\u043b\u044c\u043a\u043e \u0443\u0447\u0438\u0442\u044c\u0441\u044f\u2026<\/p>\n<div class=\"dl-prod-book-inline-banner\">\n<div class=\"dl-prod-book-inline-banner__image gatsby-image-wrapper\" style=\"position:relative;overflow:hidden\"><img decoding=\"async\" aria-hidden=\"true\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAZCAYAAAAxFw7TAAAACXBIWXMAAAsTAAALEwEAmpwYAAAFzklEQVQ4y1WU2VOb1xnGzyfVbt1O6tST4AVjglc2s1sskgxC8GlBIAmQkMRugtlBbAJjwAKMDYbYYOyMSTJt46YzSV1PO2k7uWimnfauF532or3oTC\/7L3R6+ev7SaGeXjxzzrf9znOe9\/2OUtm1qAs3UVl2tOybmLJlvGBDy7Kina9JyZRSNWaRKVPGzCq0c5WYDJ21oE6Xp5VRhlI5TtR79WmdrkG9UykPqmQuOlONelc+OCUvn5KX3y5B\/UD0drG8Z9wrRZ0sFri8a4DPiNRlF2aRAQyPrxNff87U2gFtw6tU+keIjCWJTawzvvKY2Y0Dhha2GVnaQY\/FCQ3d5dbsfTFQgSlLzIh7pa75UBdFWW5qe1fpTuzROb\/PVX2Y0uA0ocltAiMbtE9sEZ3ZJTi6SfPQBs7uRapDs\/iH1sStwM7ZUedtAsz1425uIhDw4vU6OVdSj8ftoMxWj8\/bwPVqB5lFdgotdhx1VtwNVoJuK9fK7ehOGzeqqrHW1lNgcXDich1KK2zn4wUPP7zj4XfbLg4THkZ7mni55ObpjJsv111M9Xn40xOdtqAH3ethc9TFyzs6S+\/r7Md1\/rDVwMsFJyfzpB7HSjqo8wXJqGynJ9aC2+9nsLuZtrbmFLjR18xkvwe\/30NvxEul04tN99Ag4NsxF5FWF\/W6Tle7zndzddlySSeqMIa6HkXlRVD5HaIwKleUJ7oYxJwXSs8vNcv9AOpKi8xFl+X6klGDJimqV649AizrRSvrQZV2YyrtRCuJoRVHMZfIAvltnKrpZ2f2EYVTEr61H5PAtII2TPmtaHkBNKmBltuCKVfgRoFVxQCq\/JaoH1XWh7GAAVeFEU44pjmdPGQucR9HZBLL4h6ZvoS4bJXn4rgglFpU5QXTzgWuVOUQyjKIuvE+R3BzWT9V\/juci39IxsAqnzbO0FQywFnnAGUrh1wJJMWlRHO94w24oD0NVzVjqOpRVNUIpqrhFNTqWmR5ag9\/KMFaU4KvJp9i8cxhzumg0NZLT\/QhVyvku6IIWpFEcz3yBq7scZRtSvKZQKsZF\/AYbk+cj1pn+bN+l78PPqY3usj3yjo4W9xLpkDHJp7SPvYzcSTAUsm\/uEskxS2S4qraedTNWdEMJvu0wOP01Y\/yr+ga\/+zZpcEhkRRIPvkBsotDXL0Wxj\/5gqEP\/yKuelI7Mkv+ZqO4BlzV30E5FlB1CUx1afi9lkW+GNgkw3FbYEGOF4c5WRqmqCjM3PkAv50\/YPfVP8gqHybTcpu3JH\/zUWGVvopqWEY575KCO5fIk9yOX5B\/M8fD8Ypuvl8RJV968UVBN\/+Ww4CvP+WvyedU3JjkRPUI\/1dY5d1AuaXHXPdIwRu\/gVtkVeOczGkkp7yTr9sm4bND+M0hf5u7z4B1ireMzK2i6rFUUZVRVNX8AOXbRDUJ2LMucGkJgWvG3LnIaWlW50U3fzx4xn9+\/3O+jK4SqZrnlMSkORJoRv5G9rKAqpkQoH8L1fKQFPgI3iJjYBtTaI9vh5+SaRujtGaQYNMyDlkkuy2JCm+jWh+ljegr\/K8WKZgBaN5MSZO56UjGvcADLi39iNatz7jSucV3\/Gt8yye78Bm7WMakG7nPCUxc1o4bW06DlO9+esuNRpbiQJexRa7DmxzresC127uUDz\/h3S5x1pRMO7LLb2g1YIvfdIp0iQHSDJhrjY6t1+grP6XjyS9p2X2NY\/MLYge\/5uba57Q+ek388Cv6nv0Kz84rIvu\/IPrgc2pnDjmmL6XaLr1lo8qpSic5G9sls2+P4AeviG3\/BHviCRVy7NfM7BBc2Wf00QtCy7s4kp9gufdj3uvbJVt+Q5PkmgYmDKC0jCeZVuNKKuCs4cf0LD\/j4foMQ4tTDC8MMZ24JadOHwtLg1intzgzsifZiSPb7BtYastHMKNdjFECvzoubjY+YmL1OcH5PYpHdsjo2uKdzodcuLVNSXyf7DEBSlG0+oU3sLo5\/gubMZbW7M93hgAAAABJRU5ErkJggg==\" alt=\"\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb\" style=\"position:absolute;top:0;left:0;width:100%;height:100%;object-fit:contain;object-position:center;opacity:1;transition-delay:500ms\"\/><noscript><picture><source srcset=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/69585\/deep-learning-book-cover.png 200w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/497c6\/deep-learning-book-cover.png 400w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png 720w\" sizes=\"(max-width: 720px) 100vw, 720px\"\/><img decoding=\"async\" loading=\"lazy\" sizes=\"(max-width: 720px) 100vw, 720px\" srcset=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/69585\/deep-learning-book-cover.png 200w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/497c6\/deep-learning-book-cover.png 400w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png 720w\" src=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png\" alt=\"\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb\" style=\"position:absolute;top:0;left:0;opacity:1;width:100%;height:100%;object-fit:cover;object-position:center\"\/><\/picture><\/noscript><\/div>\n<div class=\"dl-prod-book-inline-banner__text\">\n<h2>\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb \ud83d\udcd6<\/h2>\n<h4>\u0423\u0437\u043d\u0430\u0439\u0442\u0435, \u043a\u0430\u043a \u0441\u043e\u0437\u0434\u0430\u0432\u0430\u0442\u044c, \u043e\u0431\u0443\u0447\u0430\u0442\u044c, \u0440\u0430\u0437\u0432\u0435\u0440\u0442\u044b\u0432\u0430\u0442\u044c, \u043c\u0430\u0441\u0448\u0442\u0430\u0431\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0438 \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0442\u044c \u043c\u043e\u0434\u0435\u043b\u0438 \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f.  \u0418\u0437\u0443\u0447\u0438\u0442\u0435 \u0438\u043d\u0444\u0440\u0430\u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u0443 \u043c\u0430\u0448\u0438\u043d\u043d\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0438 MLOps \u043d\u0430 \u043f\u0440\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 \u043f\u0440\u0438\u043c\u0435\u0440\u0430\u0445.<\/h4>\n<p>\u0423\u0437\u043d\u0430\u0442\u044c \u0431\u043e\u043b\u044c\u0448\u0435<\/p><\/div>\n<\/div>\n<p><em class=\"affiliate-disclosure\">* \u0420\u0430\u0441\u043a\u0440\u044b\u0442\u0438\u0435 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u0438: \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435, \u0447\u0442\u043e \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0438\u0437 \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u043d\u043d\u044b\u0445 \u0432\u044b\u0448\u0435 \u0441\u0441\u044b\u043b\u043e\u043a \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043f\u0430\u0440\u0442\u043d\u0435\u0440\u0441\u043a\u0438\u043c\u0438 \u0441\u0441\u044b\u043b\u043a\u0430\u043c\u0438, \u0438 \u043c\u044b \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0445 \u0437\u0430\u0442\u0440\u0430\u0442 \u0434\u043b\u044f \u0432\u0430\u0441 \u043f\u043e\u043b\u0443\u0447\u0438\u043c \u043a\u043e\u043c\u0438\u0441\u0441\u0438\u044e, \u0435\u0441\u043b\u0438 \u0432\u044b \u0440\u0435\u0448\u0438\u0442\u0435 \u0441\u043e\u0432\u0435\u0440\u0448\u0438\u0442\u044c \u043f\u043e\u043a\u0443\u043f\u043a\u0443 \u043f\u043e\u0441\u043b\u0435 \u043f\u0435\u0440\u0435\u0445\u043e\u0434\u0430 \u043f\u043e \u0441\u0441\u044b\u043b\u043a\u0435.<\/em><\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u0418 \u0441\u043d\u043e\u0432\u0430 \u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435, \u0421\u0435\u0433\u043e\u0434\u043d\u044f\u0448\u043d\u044f\u044f \u0442\u0435\u043c\u0430\u2026 \u043d\u0443, \u0442\u0430\u043a\u0430\u044f \u0436\u0435, \u043a\u0430\u043a \u0438 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0430\u044f. Q Learning \u0438 Deep Q Networks. \u0412 \u043f\u0440\u043e\u0448\u043b\u044b\u0439 \u0440\u0430\u0437 \u043c\u044b \u043e\u0431\u044a\u044f\u0441\u043d\u0438\u043b\u0438, \u0447\u0442\u043e \u0442\u0430\u043a\u043e\u0435 Q Learning \u0438 \u043a\u0430\u043a \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430 \u0434\u043b\u044f \u043d\u0430\u0445\u043e\u0436\u0434\u0435\u043d\u0438\u044f Q-\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0439 \u0438, \u043a\u0430\u043a \u0441\u043b\u0435\u0434\u0441\u0442\u0432\u0438\u0435, \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0439 \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0438. \u041f\u043e\u0437\u0436\u0435 \u043c\u044b \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u0438\u043b\u0438 Deep Q Networks \u0438 \u0442\u043e, \u043a\u0430\u043a \u0432\u043c\u0435\u0441\u0442\u043e \u0442\u043e\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0442\u044c \u0432\u0441\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q-\u0442\u0430\u0431\u043b\u0438\u0446\u044b, [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":1573,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":{"0":"post-1572","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-ai-research-and-news"},"_links":{"self":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts\/1572","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1572"}],"version-history":[{"count":0,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts\/1572\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/media\/1573"}],"wp:attachment":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1572"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1572"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1572"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}