{"id":1576,"date":"2023-08-16T18:05:08","date_gmt":"2023-08-16T18:05:08","guid":{"rendered":"https:\/\/www.gptmain.news\/?p=1576"},"modified":"2023-08-16T18:05:08","modified_gmt":"2023-08-16T18:05:08","slug":"%d0%b3%d0%bb%d1%83%d0%b1%d0%be%d0%ba%d0%be%d0%b5-%d0%be%d0%b1%d1%83%d1%87%d0%b5%d0%bd%d0%b8%d0%b5-q-%d0%b8-%d1%81%d0%b5%d1%82%d0%b8-deep-q-gptmain-news","status":"publish","type":"post","link":"https:\/\/gptmain.news\/?p=1576","title":{"rendered":"\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 Q \u0438 \u0441\u0435\u0442\u0438 Deep Q\n | GPTMain News"},"content":{"rendered":"<div id=\"\">\n<p>\u041f\u0443\u0442\u0435\u0448\u0435\u0441\u0442\u0432\u0438\u0435 \u043a \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0430\u0435\u0442\u0441\u044f\u2026 \u041f\u0440\u0438\u0448\u043b\u043e \u0432\u0440\u0435\u043c\u044f \u043f\u0440\u043e\u0430\u043d\u0430\u043b\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043f\u0435\u0447\u0430\u043b\u044c\u043d\u043e \u0438\u0437\u0432\u0435\u0441\u0442\u043d\u043e\u0435 Q-\u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0438 \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0435\u0442\u044c, \u043a\u0430\u043a \u043e\u043d\u043e \u0441\u0442\u0430\u043b\u043e \u043d\u043e\u0432\u044b\u043c \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u043e\u043c \u0432 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 \u0418\u0418 (\u0441 \u043d\u0435\u0431\u043e\u043b\u044c\u0448\u043e\u0439 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439).<\/p>\n<p>\u041f\u0435\u0440\u0432\u043e-\u043d\u0430\u043f\u0435\u0440\u0432\u043e.  \u0412 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u043c \u043f\u043e\u0441\u0442\u0435 \u043c\u044b \u0443\u0432\u0438\u0434\u0435\u043b\u0438 \u043e\u0441\u043d\u043e\u0432\u043d\u0443\u044e \u043a\u043e\u043d\u0446\u0435\u043f\u0446\u0438\u044e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u0438 \u0441\u0444\u043e\u0440\u043c\u0443\u043b\u0438\u0440\u043e\u0432\u0430\u043b\u0438 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f \u0430\u0433\u0435\u043d\u0442\u0430, \u0441\u0440\u0435\u0434\u0443, \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 (S), \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 (A) \u0438 \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435 (R).  \u041c\u044b \u0433\u043e\u0432\u043e\u0440\u0438\u043b\u0438 \u043e \u0442\u043e\u043c, \u043a\u0430\u043a \u0432\u0435\u0441\u044c \u043f\u0440\u043e\u0446\u0435\u0441\u0441 \u043c\u043e\u0436\u043d\u043e \u043e\u043f\u0438\u0441\u0430\u0442\u044c \u043a\u0430\u043a \u043c\u0430\u0440\u043a\u043e\u0432\u0441\u043a\u0438\u0439 \u043f\u0440\u043e\u0446\u0435\u0441\u0441 \u043f\u0440\u0438\u043d\u044f\u0442\u0438\u044f \u0440\u0435\u0448\u0435\u043d\u0438\u0439, \u0438 \u0432\u0432\u0435\u043b\u0438 \u0442\u0435\u0440\u043c\u0438\u043d\u044b \u00ab\u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0430\u00bb \u0438 \u00ab\u0446\u0435\u043d\u043d\u043e\u0441\u0442\u044c\u00bb.  \u041d\u0430\u043a\u043e\u043d\u0435\u0446, \u0443 \u043d\u0430\u0441 \u0431\u044b\u043b \u043a\u0440\u0430\u0442\u043a\u0438\u0439 \u043e\u0431\u0449\u0438\u0439 \u043e\u0431\u0437\u043e\u0440 \u043e\u0441\u043d\u043e\u0432\u043d\u044b\u0445 \u043c\u0435\u0442\u043e\u0434\u043e\u0432.<\/p>\n<p>\u041f\u043e\u043c\u043d\u0438\u0442\u0435, \u0447\u0442\u043e \u0446\u0435\u043b\u044c \u0441\u043e\u0441\u0442\u043e\u0438\u0442 \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e\u0431\u044b \u043d\u0430\u0439\u0442\u0438 \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0443, \u0438 \u044d\u0442\u0430 \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0430 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0431\u043e\u0439 \u0441\u043e\u043f\u043e\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u0435 \u043c\u0435\u0436\u0434\u0443 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435\u043c \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f\u043c\u0438.  \u0418\u0442\u0430\u043a, \u043d\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u043d\u0430\u0439\u0442\u0438, \u043a\u0430\u043a\u043e\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u043f\u0440\u0435\u0434\u043f\u0440\u0438\u043d\u044f\u0442\u044c, \u043f\u043e\u043a\u0430 \u043c\u044b \u043d\u0430\u0445\u043e\u0434\u0438\u043c\u0441\u044f \u0432 \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u043d\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438, \u0447\u0442\u043e\u0431\u044b \u043c\u0430\u043a\u0441\u0438\u043c\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043e\u0436\u0438\u0434\u0430\u0435\u043c\u043e\u0435 \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435.  \u041e\u0434\u0438\u043d \u0438\u0437 \u0441\u043f\u043e\u0441\u043e\u0431\u043e\u0432 \u043d\u0430\u0439\u0442\u0438 \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0443 \u2014 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 (\u0431\u0435\u0437\u043c\u043e\u0434\u0435\u043b\u044c\u043d\u044b\u0439 \u043c\u0435\u0442\u043e\u0434).<\/p>\n<p>\u0418 \u0432\u043e\u0442 \u043c\u044b \u0434\u043e\u0431\u0440\u0430\u043b\u0438\u0441\u044c \u0434\u043e \u043d\u043e\u0432\u043e\u0433\u043e \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b\u0430.  \u041d\u0430 \u0441\u0430\u043c\u043e\u043c \u0434\u0435\u043b\u0435 \u0441\u0435\u0433\u043e\u0434\u043d\u044f \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044e\u0442\u0441\u044f \u0434\u0432\u0435 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u0441\u0442\u043e\u0438\u043c\u043e\u0441\u0442\u0438.  \u0424\u0443\u043d\u043a\u0446\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f V(s) \u0438 \u0444\u0443\u043d\u043a\u0446\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f Q(s, a) .<\/p>\n<ul>\n<li>\n<p>\u0424\u0443\u043d\u043a\u0446\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f: \u0434\u043e\u0441\u0442\u0438\u0433\u0430\u0435\u0442\u0441\u044f \u043b\u0438 \u043e\u0436\u0438\u0434\u0430\u0435\u043c\u0430\u044f \u043e\u0442\u0434\u0430\u0447\u0430 \u043f\u0440\u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f\u0445 \u0438\u0437 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0432 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0438\u0438 \u0441 \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u043e\u0439.<\/p>\n<\/li>\n<li>\n<p>\u0424\u0443\u043d\u043a\u0446\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f: \u043e\u0436\u0438\u0434\u0430\u0435\u043c\u044b\u0439 \u0434\u043e\u0445\u043e\u0434 \u0441 \u0443\u0447\u0435\u0442\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.<\/p>\n<\/li>\n<\/ul>\n<p>\u0412 \u0447\u0435\u043c \u0440\u0430\u0437\u043d\u0438\u0446\u0430, \u0441\u043f\u0440\u043e\u0441\u0438\u0442\u0435 \u0432\u044b?  \u041f\u0435\u0440\u0432\u043e\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u2014 \u044d\u0442\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u043a\u043e\u043d\u043a\u0440\u0435\u0442\u043d\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u0412\u0442\u043e\u0440\u043e\u0439 \u2014 \u044d\u0442\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u044d\u0442\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u043f\u043b\u044e\u0441 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0432\u0441\u0435\u0445 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u044b\u0445 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0439 \u0438\u0437 \u044d\u0442\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><msup><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mi>\u03c0<\/mi><\/msup><mo stretchy=\"false\">(<\/mo><mi>\u0441<\/mi><mo separator=\"true\">,<\/mo><mi>\u0430<\/mi><mo stretchy=\"false\">)<\/mo><mo>&#8220;=&#8221;<\/mo><msub><mi mathvariant=\"double-struck\">\u0415<\/mi><mi>\u03c0<\/mi><\/msub><mrow><mo fence=\"true\">[<\/mo><msub><mi>R<\/mi><mi>t<\/mi><\/msub><mi mathvariant=\"normal\">\u2223<\/mi><msub><mi>s<\/mi><mi>t<\/mi><\/msub><mo>=<\/mo><mi>s<\/mi><mo separator=\"true\">,<\/mo><msub><mi>a<\/mi><mi>t<\/mi><\/msub><mo>=<\/mo><mi>a<\/mi><mo fence=\"true\">]<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">Q ^ {\\ pi} (s, a) = \\ mathbb {E} _ {\\ pi} \\ \u0432\u043b\u0435\u0432\u043e[R_{t} | s_{t}=s, a_{t}=a\\right]<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7143919999999999em\"><span style=\"top:-3.113em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em\">\u03c0<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><span class=\"mopen\">(<\/span><span class=\"mord mathnormal\">\u0441<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">\u0430<\/span><span class=\"mclose\">)<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord\"><span class=\"mord\"><span class=\"mord mathbb\">\u0415<\/span><\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.151392em\"><span style=\"top:-2.5500000000000003em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\" style=\"margin-right:0.03588em\">\u03c0<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">[<\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.00773em\">R<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:-0.00773em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mord\">\u2223<\/span><span class=\"mord\"><span class=\"mord mathnormal\">s<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mord mathnormal\">s<\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">a<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">t<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">=<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mord mathnormal\">a<\/span><span class=\"mclose delimcenter\" style=\"top:0em\">]<\/span><\/span><\/span><\/span><\/span><\/span><\/div>\n<p>\u041a\u043e\u0433\u0434\u0430 \u0443 \u043d\u0430\u0441 \u0435\u0441\u0442\u044c \u0444\u0443\u043d\u043a\u0446\u0438\u044f \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f, \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q, \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u043f\u0440\u043e\u0441\u0442\u043e \u0432\u044b\u0431\u0440\u0430\u0442\u044c \u0432\u044b\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0441 \u043d\u0430\u0438\u0432\u044b\u0441\u0448\u0438\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u0438\u0437 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u041d\u043e \u043a\u0430\u043a \u043d\u0430\u043c \u043d\u0430\u0439\u0442\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q?<\/p>\n<h2 id=\"what-is-q-learning\">\u0427\u0442\u043e \u0442\u0430\u043a\u043e\u0435 Q \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435?<\/h2>\n<p>\u0418\u0442\u0430\u043a, \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0443\u0437\u043d\u0430\u0432\u0430\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u043c\u0435\u0442\u043e\u0434\u043e\u043c \u043f\u0440\u043e\u0431 \u0438 \u043e\u0448\u0438\u0431\u043e\u043a?  \u0422\u043e\u0447\u043d\u043e.  \u041c\u044b \u0438\u043d\u0438\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u0443\u0435\u043c Q, \u043c\u044b \u0432\u044b\u0431\u0438\u0440\u0430\u0435\u043c \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0438 \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0435\u043c \u0435\u0433\u043e, \u043c\u044b \u043e\u0446\u0435\u043d\u0438\u0432\u0430\u0435\u043c \u0435\u0433\u043e, \u0438\u0437\u043c\u0435\u0440\u044f\u044f \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435, \u0438 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c \u043e\u0431\u043d\u043e\u0432\u043b\u044f\u0435\u043c Q.  \u0412\u043e-\u043f\u0435\u0440\u0432\u044b\u0445, \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0441\u0442\u044c \u0431\u0443\u0434\u0435\u0442 \u043a\u043b\u044e\u0447\u0435\u0432\u044b\u043c \u0438\u0433\u0440\u043e\u043a\u043e\u043c, \u043d\u043e \u043f\u043e \u043c\u0435\u0440\u0435 \u0442\u043e\u0433\u043e, \u043a\u0430\u043a \u0430\u0433\u0435\u043d\u0442 \u0438\u0441\u0441\u043b\u0435\u0434\u0443\u0435\u0442 \u043e\u043a\u0440\u0443\u0436\u0430\u044e\u0449\u0443\u044e \u0441\u0440\u0435\u0434\u0443, \u0430\u043b\u0433\u043e\u0440\u0438\u0442\u043c \u043d\u0430\u0439\u0434\u0435\u0442 \u043b\u0443\u0447\u0448\u0435\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.  \u041c\u043e\u0436\u0435\u043c \u043b\u0438 \u043c\u044b \u043e\u043f\u0438\u0441\u0430\u0442\u044c \u044d\u0442\u043e \u043c\u0430\u0442\u0435\u043c\u0430\u0442\u0438\u0447\u0435\u0441\u043a\u0438?<\/p>\n<div class=\"math\"><span class=\"katex-display\"><span class=\"katex\"><span class=\"katex-mathml\"><math xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\" display=\"block\"><semantics><mrow><msub><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mrow><mi>\u0442<\/mi><mo>+<\/mo><mn>1<\/mn><\/mrow><\/msub><mrow><mo fence=\"true\">(<\/mo><msub><mi>\u0441<\/mi><mi>\u0442<\/mi><\/msub><mo separator=\"true\">,<\/mo><msub><mi>\u0430<\/mi><mi>\u0442<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo>&#8220;=&#8221;<\/mo><msub><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mi>\u0442<\/mi><\/msub><mrow><mo fence=\"true\">(<\/mo><msub><mi>\u0441<\/mi><mi>\u0442<\/mi><\/msub><mo separator=\"true\">,<\/mo><msub><mi>\u0430<\/mi><mi>\u0442<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo>+<\/mo><mi>\u03b1<\/mi><mrow><mo fence=\"true\">(<\/mo><msub><mi>\u0440<\/mi><mrow><mi>\u0442<\/mi><mo>+<\/mo><mn>1<\/mn><\/mrow><\/msub><mo>+<\/mo><mi>\u03b3<\/mi><munder><mo><mi>\u041c\u0430\u043a\u0441<\/mi><mo>\u2061<\/mo><\/mo><mi>\u0430<\/mi><\/munder><msub><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mi>\u0442<\/mi><\/msub><mrow><mo fence=\"true\">(<\/mo><msub><mi>\u0441<\/mi><mrow><mi>\u0442<\/mi><mo>+<\/mo><mn>1<\/mn><\/mrow><\/msub><mo separator=\"true\">,<\/mo><mi>\u0430<\/mi><mo fence=\"true\">)<\/mo><\/mrow><mo>\u2212<\/mo><msub><mi>\u0412\u043e\u043f\u0440\u043e\u0441<\/mi><mi>\u0442<\/mi><\/msub><mrow><mo fence=\"true\">(<\/mo><msub><mi>\u0441<\/mi><mi>\u0442<\/mi><\/msub><mo separator=\"true\">,<\/mo><msub><mi>\u0430<\/mi><mi>\u0442<\/mi><\/msub><mo fence=\"true\">)<\/mo><\/mrow><mo fence=\"true\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">Q_{t+1}\\left(s_{t}, a_{t}\\right)=Q_{t}\\left(s_{t}, a_{t}\\right)+\\alpha\\left(r_{t} +1}+\\gamma \\max _{a} Q_{t}\\left(s_{t+1}, a\\right)-Q_{t}\\left(s_{t}, a_{t}\\right) \\\u0432\u0435\u0440\u043d\u043e)<\/annotation><\/semantics><\/math><\/span><span class=\"katex-html\" aria-hidden=\"true\"><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.301108em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><span class=\"mbin mtight\">+<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.208331em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0430<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><span class=\"mrel\">&#8220;=&#8221;<\/span><span class=\"mspace\" style=\"margin-right:0.2777777777777778em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1em;vertical-align:-0.25em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0430<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><\/span><span class=\"base\"><span class=\"strut\" style=\"height:1.8499999999999999em;vertical-align:-0.7em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.0037em\">\u03b1<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">(<\/span><\/span><span class=\"mord\"><span class=\"mord mathnormal\" style=\"margin-right:0.02778em\">\u0440<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.301108em\"><span style=\"top:-2.5500000000000003em;margin-left:-0.02778em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><span class=\"mbin mtight\">+<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.208331em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">+<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mord mathnormal\" style=\"margin-right:0.05556em\">\u03b3<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mop op-limits\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.43056em\"><span style=\"top:-2.4em;margin-left:0em\"><span class=\"pstrut\" style=\"height:3em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0430<\/span><\/span><\/span><\/span><span style=\"top:-3em\"><span class=\"pstrut\" style=\"height:3em\"\/><span><span class=\"mop\">\u041c\u0430\u043a\u0441<\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.7em\"><span\/><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.301108em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><span class=\"mbin mtight\">+<\/span><span class=\"mord mtight\">1<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.208331em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord mathnormal\">\u0430<\/span><span class=\"mclose delimcenter\" style=\"top:0em\">)<\/span><\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mbin\">\u2212<\/span><span class=\"mspace\" style=\"margin-right:0.2222222222222222em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0412\u043e\u043f\u0440\u043e\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"minner\"><span class=\"mopen delimcenter\" style=\"top:0em\">(<\/span><span class=\"mord\"><span class=\"mord mathnormal\">\u0441<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mpunct\">,<\/span><span class=\"mspace\" style=\"margin-right:0.16666666666666666em\"\/><span class=\"mord\"><span class=\"mord mathnormal\">\u0430<\/span><span class=\"msupsub\"><span class=\"vlist-t vlist-t2\"><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.2805559999999999em\"><span style=\"top:-2.5500000000000003em;margin-left:0em;margin-right:0.05em\"><span class=\"pstrut\" style=\"height:2.7em\"\/><span class=\"sizing reset-size6 size3 mtight\"><span class=\"mord mtight\"><span class=\"mord mathnormal mtight\">\u0442<\/span><\/span><\/span><\/span><\/span><span class=\"vlist-s\">\u200b<\/span><\/span><span class=\"vlist-r\"><span class=\"vlist\" style=\"height:0.15em\"><span\/><\/span><\/span><\/span><\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em\">)<\/span><\/span><span class=\"mclose delimcenter\" style=\"top:0em\"><span class=\"delimsizing size2\">)<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/div>\n<p>\u0421\u043f\u0430\u0441\u0438\u0431\u043e, \u0420\u0438\u0447\u0430\u0440\u0434 \u042d. \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u043d.  \u041f\u0440\u0438\u0432\u0435\u0434\u0435\u043d\u043d\u043e\u0435 \u0432\u044b\u0448\u0435 \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0438\u0437\u0432\u0435\u0441\u0442\u043d\u043e \u043a\u0430\u043a \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430 \u0438 \u0438\u0433\u0440\u0430\u0435\u0442 \u043e\u0433\u0440\u043e\u043c\u043d\u0443\u044e \u0440\u043e\u043b\u044c \u0432 \u0441\u043e\u0432\u0440\u0435\u043c\u0435\u043d\u043d\u044b\u0445 \u0438\u0441\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u043d\u0438\u044f\u0445 RL.  \u041d\u043e \u0447\u0442\u043e \u0432 \u043d\u0435\u043c \u0433\u043e\u0432\u043e\u0440\u0438\u0442\u0441\u044f?<\/p>\n<p>\u0417\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q, \u0442\u0430\u043a\u0436\u0435 \u0438\u0437\u0432\u0435\u0441\u0442\u043d\u043e\u0435 \u043a\u0430\u043a \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u0430\u044f \u0431\u0443\u0434\u0443\u0449\u0430\u044f \u043d\u0430\u0433\u0440\u0430\u0434\u0430 \u0437\u0430 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435, \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0431\u043e\u0439 \u043d\u0435\u043c\u0435\u0434\u043b\u0435\u043d\u043d\u0443\u044e \u043d\u0430\u0433\u0440\u0430\u0434\u0443 \u043f\u043b\u044e\u0441 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u0431\u0443\u0434\u0443\u0449\u0443\u044e \u043d\u0430\u0433\u0440\u0430\u0434\u0443 \u0437\u0430 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0435 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435.  \u0418 \u0435\u0441\u043b\u0438 \u043f\u043e\u0434\u0443\u043c\u0430\u0442\u044c, \u0442\u043e \u0432 \u044d\u0442\u043e\u043c \u0435\u0441\u0442\u044c \u0441\u043c\u044b\u0441\u043b.  \u0413\u0430\u043c\u043c\u0430 (\u03b3) \u2014 \u044d\u0442\u043e \u0447\u0438\u0441\u043b\u043e \u043c\u0435\u0436\u0434\u0443 <!-- -->[0,1]<!-- -->  \u0438 \u043e\u043d \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0434\u043b\u044f \u0434\u0438\u0441\u043a\u043e\u043d\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u044f \u043f\u043e \u043f\u0440\u043e\u0448\u0435\u0441\u0442\u0432\u0438\u0438 \u0432\u0440\u0435\u043c\u0435\u043d\u0438, \u0443\u0447\u0438\u0442\u044b\u0432\u0430\u044f \u043f\u0440\u0435\u0434\u043f\u043e\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u0447\u0442\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u0432 \u043d\u0430\u0447\u0430\u043b\u0435 \u0432\u0430\u0436\u043d\u0435\u0435, \u0447\u0435\u043c \u0432 \u043a\u043e\u043d\u0446\u0435 (\u043f\u0440\u0435\u0434\u043f\u043e\u043b\u043e\u0436\u0435\u043d\u0438\u0435, \u043a\u043e\u0442\u043e\u0440\u043e\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0430\u0435\u0442\u0441\u044f \u043c\u043d\u043e\u0433\u0438\u043c\u0438 \u043f\u0440\u0438\u043c\u0435\u0440\u0430\u043c\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u0432 \u0440\u0435\u0430\u043b\u044c\u043d\u043e\u0439 \u0436\u0438\u0437\u043d\u0438).  \u0412 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0435 \u043c\u044b \u043c\u043e\u0436\u0435\u043c <strong>\u043e\u0431\u043d\u043e\u0432\u0438\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u0438\u0442\u0435\u0440\u0430\u0442\u0438\u0432\u043d\u043e<\/strong>.<\/p>\n<p>\u041e\u0441\u043d\u043e\u0432\u043d\u0430\u044f \u043a\u043e\u043d\u0446\u0435\u043f\u0446\u0438\u044f, \u043a\u043e\u0442\u043e\u0440\u0443\u044e \u043d\u0443\u0436\u043d\u043e \u043f\u043e\u043d\u044f\u0442\u044c \u0437\u0434\u0435\u0441\u044c, \u0437\u0430\u043a\u043b\u044e\u0447\u0430\u0435\u0442\u0441\u044f \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u0435 \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430 \u0441\u0432\u044f\u0437\u044b\u0432\u0430\u0435\u0442 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0434\u0440\u0443\u0433 \u0441 \u0434\u0440\u0443\u0433\u043e\u043c \u0438, \u0442\u0430\u043a\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c, \u0441\u0432\u044f\u0437\u044b\u0432\u0430\u0435\u0442 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u0446\u0435\u043d\u043d\u043e\u0441\u0442\u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.  \u042d\u0442\u043e \u043f\u043e\u043c\u043e\u0433\u0430\u0435\u0442 \u043d\u0430\u043c \u043f\u0435\u0440\u0435\u0431\u0438\u0440\u0430\u0442\u044c \u0441\u0440\u0435\u0434\u0443 \u0438 \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0442\u044c \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f, \u043a\u043e\u0442\u043e\u0440\u044b\u0435, \u0432 \u0441\u0432\u043e\u044e \u043e\u0447\u0435\u0440\u0435\u0434\u044c, \u0434\u0430\u044e\u0442 \u043d\u0430\u043c \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0443.<\/p>\n<p>\u0412 \u0441\u0432\u043e\u0435\u0439 \u043f\u0440\u043e\u0441\u0442\u0435\u0439\u0448\u0435\u0439 \u0444\u043e\u0440\u043c\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u044e\u0442 \u0441\u043e\u0431\u043e\u0439 \u043c\u0430\u0442\u0440\u0438\u0446\u0443 \u0441 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f\u043c\u0438 \u0432 \u0432\u0438\u0434\u0435 \u0441\u0442\u0440\u043e\u043a \u0438 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f\u043c\u0438 \u0432 \u0432\u0438\u0434\u0435 \u0441\u0442\u043e\u043b\u0431\u0446\u043e\u0432.  \u041c\u044b \u0438\u043d\u0438\u0446\u0438\u0430\u043b\u0438\u0437\u0438\u0440\u0443\u0435\u043c Q-\u043c\u0430\u0442\u0440\u0438\u0446\u0443 \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u044b\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c, \u0430\u0433\u0435\u043d\u0442 \u043d\u0430\u0447\u0438\u043d\u0430\u0435\u0442 \u0432\u0437\u0430\u0438\u043c\u043e\u0434\u0435\u0439\u0441\u0442\u0432\u043e\u0432\u0430\u0442\u044c \u0441 \u043e\u043a\u0440\u0443\u0436\u0435\u043d\u0438\u0435\u043c \u0438 \u0438\u0437\u043c\u0435\u0440\u044f\u0435\u0442 \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435 \u0437\u0430 \u043a\u0430\u0436\u0434\u043e\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435.  \u0417\u0430\u0442\u0435\u043c \u043e\u043d \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u0442 \u043d\u0430\u0431\u043b\u044e\u0434\u0430\u0435\u043c\u044b\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0438 \u043e\u0431\u043d\u043e\u0432\u043b\u044f\u0435\u0442 \u043c\u0430\u0442\u0440\u0438\u0446\u0443.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token plain\">env <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> gym<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">make<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'MountainCar-v0'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">Q <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">zeros<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">observation_space<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">n<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_space<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">n<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\"\/><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> i <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">episodes<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    s <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">reset<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    reward <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    goal_flag <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token boolean\">False<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> j <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">200<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        a <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">argmax<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Q<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">randn<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_space<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">n<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token operator\">*<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token operator\">\/<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">i<\/span><span class=\"token operator\">+<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        s_new<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">r<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">goal_flag<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">_ <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">step<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        maxQ<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">max<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Q<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">s_new<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        Q<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">+=<\/span><span class=\"token plain\"> lr<\/span><span class=\"token operator\">*<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">r <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> g<\/span><span class=\"token operator\">*<\/span><span class=\"token plain\">maxQ <\/span><span class=\"token operator\">-<\/span><span class=\"token plain\"> Q<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">s<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\">a<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        reward <\/span><span class=\"token operator\">+=<\/span><span class=\"token plain\"> r<\/span><\/p><p><span class=\"token plain\">        s <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> s_new<\/span><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> goal_flag <\/span><span class=\"token operator\">==<\/span><span class=\"token plain\"> <\/span><span class=\"token boolean\">True<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">break<\/span><\/p><\/pre>\n<h2 id=\"exploration-vs-exploitation\">\u0420\u0430\u0437\u0432\u0435\u0434\u043a\u0430 \u043f\u0440\u043e\u0442\u0438\u0432 \u044d\u043a\u0441\u043f\u043b\u0443\u0430\u0442\u0430\u0446\u0438\u0438<\/h2>\n<p>\u0410\u043b\u0433\u043e\u0440\u0438\u0442\u043c, \u043a\u0430\u043a \u043e\u043f\u0438\u0441\u0430\u043d\u043e \u0432\u044b\u0448\u0435, \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0436\u0430\u0434\u043d\u044b\u043c \u0430\u043b\u0433\u043e\u0440\u0438\u0442\u043c\u043e\u043c, \u0442\u0430\u043a \u043a\u0430\u043a \u043e\u043d \u0432\u0441\u0435\u0433\u0434\u0430 \u0432\u044b\u0431\u0438\u0440\u0430\u0435\u0442 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0441 \u043b\u0443\u0447\u0448\u0438\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c.  \u041d\u043e \u0447\u0442\u043e, \u0435\u0441\u043b\u0438 \u043a\u0430\u043a\u043e\u0435-\u0442\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u0438\u043c\u0435\u0435\u0442 \u043e\u0447\u0435\u043d\u044c \u043c\u0430\u043b\u0435\u043d\u044c\u043a\u0443\u044e \u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e\u0441\u0442\u044c \u043f\u043e\u043b\u0443\u0447\u0438\u0442\u044c \u043e\u0447\u0435\u043d\u044c \u0431\u043e\u043b\u044c\u0448\u0443\u044e \u043d\u0430\u0433\u0440\u0430\u0434\u0443?  \u0410\u0433\u0435\u043d\u0442 \u0442\u0443\u0434\u0430 \u043d\u0438\u043a\u043e\u0433\u0434\u0430 \u043d\u0435 \u043f\u043e\u043f\u0430\u0434\u0435\u0442.  \u042d\u0442\u043e \u0438\u0441\u043f\u0440\u0430\u0432\u043b\u0435\u043d\u043e \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u043c \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0433\u043e \u0438\u0441\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u043d\u0438\u044f.  \u0412\u0440\u0435\u043c\u044f \u043e\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u0430\u0433\u0435\u043d\u0442 \u0431\u0443\u0434\u0435\u0442 \u0432\u044b\u043f\u043e\u043b\u043d\u044f\u0442\u044c \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u044b\u0439 \u0445\u043e\u0434, \u043d\u0435 \u0443\u0447\u0438\u0442\u044b\u0432\u0430\u044f \u043e\u043f\u0442\u0438\u043c\u0430\u043b\u044c\u043d\u0443\u044e \u043f\u043e\u043b\u0438\u0442\u0438\u043a\u0443.  \u041d\u043e \u043f\u043e\u0441\u043a\u043e\u043b\u044c\u043a\u0443 \u043c\u044b \u0445\u043e\u0442\u0438\u043c, \u0447\u0442\u043e\u0431\u044b \u0430\u043b\u0433\u043e\u0440\u0438\u0442\u043c \u0441\u0445\u043e\u0434\u0438\u043b\u0441\u044f \u0432 \u043a\u0430\u043a\u043e\u0439-\u0442\u043e \u043c\u043e\u043c\u0435\u043d\u0442, \u043c\u044b \u0441\u043d\u0438\u0436\u0430\u0435\u043c \u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e\u0441\u0442\u044c \u0441\u043b\u0443\u0447\u0430\u0439\u043d\u043e\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f \u043f\u043e \u0445\u043e\u0434\u0443 \u0438\u0433\u0440\u044b.<\/p>\n<h2 id=\"why-going-deep\">\u0417\u0430\u0447\u0435\u043c \u0443\u0433\u043b\u0443\u0431\u043b\u044f\u0442\u044c\u0441\u044f?<\/h2>\n<p>Q \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0445\u043e\u0440\u043e\u0448\u043e.  \u041d\u0438\u043a\u0442\u043e \u043d\u0435 \u043c\u043e\u0436\u0435\u0442 \u044d\u0442\u043e\u0433\u043e \u043e\u0442\u0440\u0438\u0446\u0430\u0442\u044c.  \u041d\u043e \u0442\u043e\u0442 \u0444\u0430\u043a\u0442, \u0447\u0442\u043e \u043e\u043d \u043d\u0435\u044d\u0444\u0444\u0435\u043a\u0442\u0438\u0432\u0435\u043d \u0432 \u0431\u043e\u043b\u044c\u0448\u0438\u0445 \u043f\u0440\u043e\u0441\u0442\u0440\u0430\u043d\u0441\u0442\u0432\u0430\u0445 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0439, \u043e\u0441\u0442\u0430\u0435\u0442\u0441\u044f.  \u041f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u044c\u0442\u0435 \u0441\u0435\u0431\u0435 \u0438\u0433\u0440\u0443 \u0441 1000 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0439 \u0438 1000 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0439 \u0432 \u043a\u0430\u0436\u0434\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438.  \u041d\u0430\u043c \u043f\u043e\u043d\u0430\u0434\u043e\u0431\u0438\u0442\u0441\u044f \u0442\u0430\u0431\u043b\u0438\u0446\u0430 \u0438\u0437 1 \u043c\u0438\u043b\u043b\u0438\u043e\u043d\u0430 \u044f\u0447\u0435\u0435\u043a.  \u0410 \u044d\u0442\u043e \u043e\u0447\u0435\u043d\u044c \u043c\u0430\u043b\u0435\u043d\u044c\u043a\u043e\u0435 \u043f\u0440\u043e\u0441\u0442\u0440\u0430\u043d\u0441\u0442\u0432\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0439 \u043f\u043e \u0441\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044e \u0441 \u0448\u0430\u0445\u043c\u0430\u0442\u0430\u043c\u0438 \u0438\u043b\u0438 \u0433\u043e.  \u041a\u0440\u043e\u043c\u0435 \u0442\u043e\u0433\u043e, \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 Q \u043d\u0435\u043b\u044c\u0437\u044f \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0432 \u043d\u0435\u0438\u0437\u0432\u0435\u0441\u0442\u043d\u044b\u0445 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f\u0445, \u043f\u043e\u0442\u043e\u043c\u0443 \u0447\u0442\u043e \u043e\u043d\u043e \u043d\u0435 \u043c\u043e\u0436\u0435\u0442 \u0432\u044b\u0432\u0435\u0441\u0442\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 Q \u043d\u043e\u0432\u044b\u0445 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0439 \u0438\u0437 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0438\u0445.<\/p>\n<p>\u0427\u0442\u043e, \u0435\u0441\u043b\u0438 \u043c\u044b \u0430\u043f\u043f\u0440\u043e\u043a\u0441\u0438\u043c\u0438\u0440\u0443\u0435\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u0443\u044e \u043c\u043e\u0434\u0435\u043b\u044c \u043c\u0430\u0448\u0438\u043d\u043d\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f.  \u0427\u0442\u043e, \u0435\u0441\u043b\u0438 \u043c\u044b \u0430\u043f\u043f\u0440\u043e\u043a\u0441\u0438\u043c\u0438\u0440\u0443\u0435\u043c \u0438\u0445 \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439?  \u042d\u0442\u0430 \u043f\u0440\u043e\u0441\u0442\u0430\u044f \u0438\u0434\u0435\u044f (\u0438, \u043a\u043e\u043d\u0435\u0447\u043d\u043e \u0436\u0435, \u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446\u0438\u044f) \u0441\u0442\u0430\u043b\u0430 \u043f\u0440\u0438\u0447\u0438\u043d\u043e\u0439 \u043f\u0440\u0438\u043e\u0431\u0440\u0435\u0442\u0435\u043d\u0438\u044f DeepMind \u0443 Google \u0437\u0430 500 \u043c\u0438\u043b\u043b\u0438\u043e\u043d\u043e\u0432 \u0434\u043e\u043b\u043b\u0430\u0440\u043e\u0432.  DeepMind \u043f\u0440\u0435\u0434\u043b\u043e\u0436\u0438\u043b\u0430 \u0430\u043b\u0433\u043e\u0440\u0438\u0442\u043c \u043f\u043e\u0434 \u043d\u0430\u0437\u0432\u0430\u043d\u0438\u0435\u043c Deep Q Learner \u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043b\u0430 \u0435\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u0438\u0433\u0440\u0430\u0442\u044c \u0432 \u0438\u0433\u0440\u044b Atari \u0441 \u0431\u0435\u0437\u0443\u043f\u0440\u0435\u0447\u043d\u044b\u043c \u043c\u0430\u0441\u0442\u0435\u0440\u0441\u0442\u0432\u043e\u043c.<\/p>\n<h2 id=\"deep-q-learning\">\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 Q-\u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435<\/h2>\n<p>\u0412 \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u043c \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0438 Q \u043c\u044b \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u0443\u044e \u0441\u0435\u0442\u044c \u0434\u043b\u044f \u0430\u043f\u043f\u0440\u043e\u043a\u0441\u0438\u043c\u0430\u0446\u0438\u0438 \u0444\u0443\u043d\u043a\u0446\u0438\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q.  \u0421\u0435\u0442\u044c \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u0442 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0432\u0445\u043e\u0434\u043d\u044b\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 (\u0431\u0443\u0434\u044c \u0442\u043e \u043a\u0430\u0434\u0440 \u0442\u0435\u043a\u0443\u0449\u0435\u0433\u043e \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u0438\u043b\u0438 \u043e\u0434\u0438\u043d\u043e\u0447\u043d\u043e\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435) \u0438 \u0432\u044b\u0432\u043e\u0434\u0438\u0442 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0434\u043b\u044f \u0432\u0441\u0435\u0445 \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u044b\u0445 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0439.  \u0421\u0430\u043c\u044b\u0439 \u0431\u043e\u043b\u044c\u0448\u043e\u0439 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u2014 \u044d\u0442\u043e \u043d\u0430\u0448\u0435 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435.  \u041c\u044b \u0432\u0438\u0434\u0438\u043c, \u0447\u0442\u043e \u043c\u044b \u043d\u0435 \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u044b \u043f\u043e\u043b\u043d\u043e\u0441\u0442\u044c\u044e \u0441\u0432\u044f\u0437\u0430\u043d\u043d\u044b\u043c\u0438 \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u043c\u0438 \u0441\u0435\u0442\u044f\u043c\u0438, \u043d\u043e \u043c\u044b \u043c\u043e\u0436\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0441\u0432\u0435\u0440\u0442\u043e\u0447\u043d\u044b\u0435, \u0440\u0435\u043a\u0443\u0440\u0440\u0435\u043d\u0442\u043d\u044b\u0435 \u0438 \u043b\u044e\u0431\u044b\u0435 \u0434\u0440\u0443\u0433\u0438\u0435 \u043c\u043e\u0434\u0435\u043b\u0438, \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u0435 \u043d\u0430\u0448\u0438\u043c \u043f\u043e\u0442\u0440\u0435\u0431\u043d\u043e\u0441\u0442\u044f\u043c.<\/p>\n<p><span class=\"gatsby-resp-image-wrapper\" style=\"position:relative;display:block;margin-left:auto;margin-right:auto;max-width:800px\"><\/p>\n<p>    <span class=\"gatsby-resp-image-background-image\" style=\"padding-bottom:75.33333333333333%;position:relative;bottom:0;left:0;background-image:url('data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAPCAYAAADkmO9VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAECklEQVQ4y02TW0xbdRzHf+fQIcOpixlTWHQBkd6ADUSNvBnYiDEjC3Evy5JlxjezMFF3oefSC2A1XAdmQ+KezEayQqEtlAIr0E1KS1vEy5zJXnRiwOl0cwV6bj9\/hy3GJt\/8zvehn3x\/lz\/4JDFnUG7MaZErcy7L7+f4JUd24TlkTZwEZl6qNQuqZBHUDYuorlNNk0czLx+1CjI8fwbZihNHIPfAa8D3n4fg9VkAn8TdCcnuX+fkz38Zl51\/+ST+K0SA4mYEsyDXWkRNNvK4UWzDDapp8kjQo6+4NgEAAfGRbq+twGgsAtCBtqpetFddws6qHrS\/Tr4kqwxZiwvBKki1Rl6T33Brm4f6tM3qT7V1s6Dhriasv3lrpHo8Fr05s7QYm1tOLoQW53\/wz881ADbyItrcdnS0C\/ixs438MXAhU9KGYOGlGpOgbex3ag9fbdX+KXdoDywEfPEsNuD9L\/P90ejp6WTs5HRy4WQw\/vVHgWikFBSRn1DbPwupfb1B9ZPWiCJwrXAI4SVRB8pvWUREE8koIJqp6t4qKO829N6Fn26HILwUB0oJE\/F58EepZRQEwIEBwKEhwK4uQI6DWjHNEIyWohhNzesXTOceXDCdvX\/ReObvi+QHTLxS9cwphDbvj9lTiYWsyUQ0Kxi7YQjMRxgdyKDHw2AsxmB\/P4M2G3NiAOG5g+1Mfv0lGjxsJz1F2vVYOyynboF4OcXOJCMGAhoIaJhKLhhojgaQKJHM8\/9J99PH36P\/PfFYDPz\/97OSAW84xIRT8a12dV1LxbbaJiiAV7IxftnJjMtuZlQWGK\/EgdGm0B3KQLdYQbc3YRXUMcsjBUpFNZzXhNX3VieLQol4Gy2Fp5QCQQVKKoBX5sCn2MEnizCicDAi81Dm2GStogoWQamjs8GXOVoMLaVkq2q4+0N854+VwL7pVCI4t5y4Suk8s8uJIdq2B4YztoJRSSigZPnDkm2PN8M9u6cR2TJRojtUamo6tUxdN6YPdGnpg9348M0ODffS2WzeHS6cWIx3UJstlLCFErbqFUZVbm1Cc\/8+o\/Wtjmmu9KjKD+ovpYinl8Jnavc5daC2\/navtq4DK1s0zPsAD6\/eGds\/lVwMUrqr11JxD83SQ0CP3nJ5QHGVTymd5X7FUUG+EOqRNQs6UKoz8fR2qU39\/sxb3yrubsIjf\/4WKJtMLPoIdIXSDRJ4kOY5SEAbjCttEFb6IKA4t3ylO0NAmqEDq6wunLXYMULQG\/Q9XerEWF4zVq+tTJoo4RcE6iZoD9XzNMMeWgLHUEJmUmln\/IqdvXLvNN1JORh2FgGwhheYbblWNntHcVbOzgJ2W64ZDLnG7U8+vTe8lAB6w3D9+yWY+zYJke9SMPNNAv4FoHaQ9zZvzmgAAAAASUVORK5CYII=');background-size:cover;display:block\"\/><br \/>\n  <img decoding=\"async\" class=\"gatsby-resp-image-image\" alt=\"DQN\" title=\"DQN\" src=\"https:\/\/theaisummer.com\/static\/3dc0869097a026116c00c49bc0d04c79\/5a190\/DQN.png\" srcset=\"\/static\/3dc0869097a026116c00c49bc0d04c79\/5a46d\/DQN.png 300w,\/static\/3dc0869097a026116c00c49bc0d04c79\/0a47e\/DQN.png 600w,\/static\/3dc0869097a026116c00c49bc0d04c79\/5a190\/DQN.png 800w\" sizes=\"(max-width: 800px) 100vw, 800px\" style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0\" loading=\"lazy\"\/><\/p>\n<p>    <\/span><br \/>\n<em>freecodecamp.org<\/em><\/p>\n<p>\u0414\u0443\u043c\u0430\u044e, \u043f\u043e\u0440\u0430 \u043f\u0440\u0438\u043c\u0435\u043d\u0438\u0442\u044c \u0432\u0441\u0435 \u044d\u0442\u043e \u043d\u0430 \u043f\u0440\u0430\u043a\u0442\u0438\u043a\u0435 \u0438 \u043d\u0430\u0443\u0447\u0438\u0442\u044c \u0430\u0433\u0435\u043d\u0442\u0430 \u0438\u0433\u0440\u0430\u0442\u044c \u0432 Mountain Car.  \u0426\u0435\u043b\u044c \u0441\u043e\u0441\u0442\u043e\u0438\u0442 \u0432 \u0442\u043e\u043c, \u0447\u0442\u043e\u0431\u044b \u0437\u0430\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u043c\u0430\u0448\u0438\u043d\u0443 \u0435\u0445\u0430\u0442\u044c \u0432 \u0433\u043e\u0440\u0443.  \u0414\u0432\u0438\u0433\u0430\u0442\u0435\u043b\u044c \u0430\u0432\u0442\u043e\u043c\u043e\u0431\u0438\u043b\u044f \u043d\u0435\u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u0441\u0438\u043b\u0435\u043d, \u0447\u0442\u043e\u0431\u044b \u043f\u043e\u0434\u043d\u044f\u0442\u044c\u0441\u044f \u0432 \u0433\u043e\u0440\u0443 \u0437\u0430 \u043e\u0434\u0438\u043d \u043f\u0440\u043e\u0445\u043e\u0434.  \u0422\u0430\u043a\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c, \u0435\u0434\u0438\u043d\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0439 \u0441\u043f\u043e\u0441\u043e\u0431 \u0434\u043e\u0431\u0438\u0442\u044c\u0441\u044f \u0443\u0441\u043f\u0435\u0445\u0430 \u2014 \u044d\u0442\u043e \u0435\u0437\u0434\u0438\u0442\u044c \u0442\u0443\u0434\u0430-\u0441\u044e\u0434\u0430, \u0447\u0442\u043e\u0431\u044b \u043d\u0430\u0431\u0440\u0430\u0442\u044c \u043e\u0431\u043e\u0440\u043e\u0442\u044b.<\/p>\n<p><span class=\"gatsby-resp-image-wrapper\" style=\"position:relative;display:block;margin-left:auto;margin-right:auto;max-width:600px\"><\/p>\n<p>    <span class=\"gatsby-resp-image-background-image\" style=\"padding-bottom:66.66666666666666%;position:relative;bottom:0;left:0;background-image:url('data:image\/jpeg;base64,\/9j\/2wBDABALDA4MChAODQ4SERATGCgaGBYWGDEjJR0oOjM9PDkzODdASFxOQERXRTc4UG1RV19iZ2hnPk1xeXBkeFxlZ2P\/2wBDARESEhgVGC8aGi9jQjhCY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2NjY2P\/wgARCAANABQDASIAAhEBAxEB\/8QAFwABAAMAAAAAAAAAAAAAAAAAAAECBf\/EABQBAQAAAAAAAAAAAAAAAAAAAAD\/2gAMAwEAAhADEAAAAd9ALD\/\/xAAXEAADAQAAAAAAAAAAAAAAAAAAARAR\/9oACAEBAAEFApg7\/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP\/aAAgBAwEBPwE\/\/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP\/aAAgBAgEBPwE\/\/8QAFRABAQAAAAAAAAAAAAAAAAAAECH\/2gAIAQEABj8CY\/\/EABsQAAMAAgMAAAAAAAAAAAAAAAABERAhMWFx\/9oACAEBAAE\/IVsbPTKjqMXfOP\/aAAwDAQACAAMAAAAQkA\/\/xAAUEQEAAAAAAAAAAAAAAAAAAAAQ\/9oACAEDAQE\/ED\/\/xAAUEQEAAAAAAAAAAAAAAAAAAAAQ\/9oACAECAQE\/ED\/\/xAAbEAEBAAMAAwAAAAAAAAAAAAABEQAhMRBhkf\/aAAgBAQABPxBRchza8Mp209YQwR0pT5hYKsbZPH\/\/2Q==');background-size:cover;display:block\"\/><br \/>\n  <img decoding=\"async\" class=\"gatsby-resp-image-image\" alt=\"\u041a\u0430\u0440\u0442\u043f\u0443\u043b\" title=\"\u041a\u0430\u0440\u0442\u043f\u0443\u043b\" src=\"https:\/\/theaisummer.com\/static\/20a3f97467eec4613b0e1f7c6303afd1\/b4294\/Cartpool.jpg\" srcset=\"\/static\/20a3f97467eec4613b0e1f7c6303afd1\/f93b5\/Cartpool.jpg 300w,\/static\/20a3f97467eec4613b0e1f7c6303afd1\/b4294\/Cartpool.jpg 600w\" sizes=\"(max-width: 600px) 100vw, 600px\" style=\"width:100%;height:100%;margin:0;vertical-align:middle;position:absolute;top:0;left:0\" loading=\"lazy\"\/><\/p>\n<p>    <\/span><\/p>\n<p>\u042f \u043e\u0431\u044a\u044f\u0441\u043d\u044e \u0431\u043e\u043b\u044c\u0448\u0435 \u043e Deep Q Networks \u0432\u043c\u0435\u0441\u0442\u0435 \u0441 \u043a\u043e\u0434\u043e\u043c.  \u0421\u043d\u0430\u0447\u0430\u043b\u0430 \u043c\u044b \u0434\u043e\u043b\u0436\u043d\u044b \u043f\u043e\u0441\u0442\u0440\u043e\u0438\u0442\u044c \u043d\u0430\u0448\u0435\u0433\u043e \u0430\u0433\u0435\u043d\u0442\u0430 \u043a\u0430\u043a \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u0443\u044e \u0441\u0435\u0442\u044c \u0441 3 \u043f\u043b\u043e\u0442\u043d\u044b\u043c\u0438 \u0441\u043b\u043e\u044f\u043c\u0438, \u0438 \u043c\u044b \u0441\u043e\u0431\u0438\u0440\u0430\u0435\u043c\u0441\u044f \u043e\u0431\u0443\u0447\u0438\u0442\u044c \u0435\u0433\u043e \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043e\u043f\u0442\u0438\u043c\u0438\u0437\u0430\u0446\u0438\u0438 \u0410\u0434\u0430\u043c\u0430.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">class<\/span><span class=\"token plain\"> <\/span><span class=\"token class-name\">DQNAgent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">__init__<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">state_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> state_size<\/span><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> action_size<\/span><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> deque<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">maxlen<\/span><span class=\"token operator\">=<\/span><span class=\"token number\">2000<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">gamma <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.95<\/span><span class=\"token plain\">    <\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">epsilon <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">1.0<\/span><span class=\"token plain\">  <\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">epsilon_min <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.01<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">epsilon_decay <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.995<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">learning_rate <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">0.001<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">_build_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">_build_model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> Sequential<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">add<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">24<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> input_dim<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">add<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">24<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'relu'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">add<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Dense<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> activation<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'linear'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">compile<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">loss<\/span><span class=\"token operator\">=<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'mse'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                      optimizer<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">Adam<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">lr<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">learning_rate<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> model<\/span><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">remember<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">append<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">act<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">rand<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">&lt;=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">epsilon<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">randrange<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        act_values <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">return<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">argmax<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">act_values<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<p>\u041a\u043b\u044e\u0447\u0435\u0432\u044b\u0435 \u043c\u043e\u043c\u0435\u043d\u0442\u044b:<\/p>\n<ul>\n<li>\n<p>\u0410\u0433\u0435\u043d\u0442 \u0445\u0440\u0430\u043d\u0438\u0442 \u0431\u0443\u0444\u0435\u0440 \u043f\u0430\u043c\u044f\u0442\u0438 \u0441\u043e \u0432\u0441\u0435\u043c \u043f\u0440\u043e\u0448\u043b\u044b\u043c \u043e\u043f\u044b\u0442\u043e\u043c.<\/p>\n<\/li>\n<li>\n<p>\u0415\u0433\u043e \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u043e\u043f\u0440\u0435\u0434\u0435\u043b\u044f\u0435\u0442\u0441\u044f \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u044b\u043c \u0432\u044b\u0445\u043e\u0434\u043e\u043c (\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u0434\u043e\u0431\u0440\u043e\u0442\u043d\u043e\u0441\u0442\u0438) \u0441\u0435\u0442\u0438.<\/p>\n<\/li>\n<li>\n<p>\u0424\u0443\u043d\u043a\u0446\u0438\u044f \u043f\u043e\u0442\u0435\u0440\u044c \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0431\u043e\u0439 \u0441\u0440\u0435\u0434\u043d\u0435\u043a\u0432\u0430\u0434\u0440\u0430\u0442\u0438\u0447\u043d\u0443\u044e \u043e\u0448\u0438\u0431\u043a\u0443 \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u0430\u043d\u043d\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0438 \u0446\u0435\u043b\u0435\u0432\u043e\u0433\u043e \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q.<\/p>\n<\/li>\n<li>\n<p>\u0418\u0437 \u0443\u0440\u0430\u0432\u043d\u0435\u043d\u0438\u044f \u0411\u0435\u043b\u043b\u043c\u0430\u043d\u0430 \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c, \u0447\u0442\u043e \u0446\u0435\u043b\u044c\u044e \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f R + g<!-- -->*<!-- -->\u043c\u0430\u043a\u0441(Q).<\/p>\n<\/li>\n<li>\n<p>\u0420\u0430\u0437\u043d\u0438\u0446\u0430 \u043c\u0435\u0436\u0434\u0443 \u0446\u0435\u043b\u0435\u0432\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u0438 \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u0438\u0440\u0443\u0435\u043c\u044b\u043c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435\u043c \u043d\u0430\u0437\u044b\u0432\u0430\u0435\u0442\u0441\u044f \u043e\u0448\u0438\u0431\u043a\u043e\u0439 \u0432\u0440\u0435\u043c\u0435\u043d\u043d\u043e\u0439 \u0440\u0430\u0437\u043d\u0438\u0446\u044b (\u043e\u0448\u0438\u0431\u043a\u043e\u0439 TD).<\/p>\n<\/li>\n<\/ul>\n<p>\u041f\u0440\u0435\u0436\u0434\u0435 \u0447\u0435\u043c \u043c\u044b \u043e\u0431\u0443\u0447\u0438\u043c \u043d\u0430\u0448 DQN, \u043d\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u0440\u0435\u0448\u0438\u0442\u044c \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443, \u043a\u043e\u0442\u043e\u0440\u0430\u044f \u0438\u0433\u0440\u0430\u0435\u0442 \u0436\u0438\u0437\u043d\u0435\u043d\u043d\u043e \u0432\u0430\u0436\u043d\u0443\u044e \u0440\u043e\u043b\u044c \u0432 \u0442\u043e\u043c, \u043a\u0430\u043a \u0430\u0433\u0435\u043d\u0442 \u0443\u0447\u0438\u0442\u0441\u044f \u043e\u0446\u0435\u043d\u0438\u0432\u0430\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q, \u0430 \u0438\u043c\u0435\u043d\u043d\u043e:<\/p>\n<div class=\"inline-newsletter-form\">\n<p><h2>\u0417\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u0435 \u043d\u0430\u0448\u0443 \u0431\u0435\u0441\u043f\u043b\u0430\u0442\u043d\u0443\u044e \u044d\u043b\u0435\u043a\u0442\u0440\u043e\u043d\u043d\u0443\u044e \u043a\u043d\u0438\u0433\u0443 \u043f\u043e \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u043c\u0443 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c<\/h2>\n<h4>\u041c\u044b \u043e\u0431\u044a\u0435\u0434\u0438\u043d\u0438\u043b\u0438 \u0432\u0441\u0435 \u0441\u0442\u0430\u0442\u044c\u0438 \u043f\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u0432 \u043e\u0434\u0438\u043d PDF-\u0444\u0430\u0439\u043b.  \u041f\u043e\u044d\u0442\u043e\u043c\u0443, \u0435\u0441\u043b\u0438 \u0443 \u0432\u0430\u0441 \u043d\u0435\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u0447\u0438\u0442\u0430\u0442\u044c \u0432\u0441\u044e \u0441\u0442\u0430\u0442\u044c\u044e \u0438\u043b\u0438 \u0432\u044b \u0445\u043e\u0442\u0438\u0442\u0435, \u0447\u0442\u043e\u0431\u044b pdf-\u0432\u0435\u0440\u0441\u0438\u044f \u0447\u0438\u0442\u0430\u043b\u0430\u0441\u044c \u0432 \u0430\u0432\u0442\u043e\u043d\u043e\u043c\u043d\u043e\u043c \u0440\u0435\u0436\u0438\u043c\u0435, \u043d\u0430\u0436\u043c\u0438\u0442\u0435 \u043a\u043d\u043e\u043f\u043a\u0443 \u043d\u0438\u0436\u0435.<\/h4>\n<\/p>\n<div id=\"mauticform_wrapper__drlebook\" class=\"mauticform-wrapper\">\n<div class=\"form-privacy-text--dark\">\n<p>* \u041c\u044b \u0441\u0442\u0440\u0435\u043c\u0438\u043c\u0441\u044f \u043a \u0432\u0430\u0448\u0435\u0439 \u043a\u043e\u043d\u0444\u0438\u0434\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u0438.  AI Summer \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442 \u043f\u0440\u0435\u0434\u043e\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u043d\u0443\u044e \u0432\u0430\u043c\u0438 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u044e \u0434\u043b\u044f \u043e\u0442\u043f\u0440\u0430\u0432\u043a\u0438 \u0432\u0430\u043c \u043d\u0430\u0448\u0435\u0433\u043e \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u043e\u043d\u043d\u043e\u0433\u043e \u0431\u044e\u043b\u043b\u0435\u0442\u0435\u043d\u044f \u0438 \u0441\u0432\u044f\u0437\u0438 \u0441 \u0432\u0430\u043c\u0438 \u043f\u043e \u043f\u043e\u0432\u043e\u0434\u0443 \u043d\u0430\u0448\u0438\u0445 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u043e\u0432.  \u0412\u044b \u043c\u043e\u0436\u0435\u0442\u0435 \u043e\u0442\u043a\u0430\u0437\u0430\u0442\u044c\u0441\u044f \u043e\u0442 \u043f\u043e\u0434\u043f\u0438\u0441\u043a\u0438 \u043d\u0430 \u044d\u0442\u0438 \u0441\u043e\u043e\u0431\u0449\u0435\u043d\u0438\u044f \u0432 \u043b\u044e\u0431\u043e\u0435 \u0432\u0440\u0435\u043c\u044f.  \u0414\u043b\u044f \u043f\u043e\u043b\u0443\u0447\u0435\u043d\u0438\u044f \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0439 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u0438 \u043e\u0437\u043d\u0430\u043a\u043e\u043c\u044c\u0442\u0435\u0441\u044c \u0441 \u043d\u0430\u0448\u0438\u043c<!-- --> \u041f\u043e\u043b\u0438\u0442\u0438\u043a\u0430 \u043a\u043e\u043d\u0444\u0438\u0434\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u0441\u0442\u0438.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<h2 id=\"experience-replay\">\u041f\u043e\u0432\u0442\u043e\u0440 \u043e\u043f\u044b\u0442\u0430<\/h2>\n<p>\u0412\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u0435 \u043e\u043f\u044b\u0442\u0430 \u2014 \u044d\u0442\u043e \u043a\u043e\u043d\u0446\u0435\u043f\u0446\u0438\u044f, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u043c\u044b \u043f\u043e\u043c\u043e\u0433\u0430\u0435\u043c \u0430\u0433\u0435\u043d\u0442\u0443 \u043f\u043e\u043c\u043d\u0438\u0442\u044c \u0438 \u043d\u0435 \u0437\u0430\u0431\u044b\u0432\u0430\u0442\u044c \u0441\u0432\u043e\u0438 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0438\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f, \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u044f \u0438\u0445.  \u0412\u0440\u0435\u043c\u044f \u043e\u0442 \u0432\u0440\u0435\u043c\u0435\u043d\u0438 \u043c\u044b \u043f\u0440\u043e\u0431\u0443\u0435\u043c \u043f\u0430\u043a\u0435\u0442 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e \u043e\u043f\u044b\u0442\u0430 (\u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0445\u0440\u0430\u043d\u0438\u0442\u0441\u044f \u0432 \u0431\u0443\u0444\u0435\u0440\u0435) \u0438 \u043e\u0442\u043f\u0440\u0430\u0432\u043b\u044f\u0435\u043c \u0432 \u0441\u0435\u0442\u044c.  \u0422\u0430\u043a\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c, \u0430\u0433\u0435\u043d\u0442 \u043f\u0435\u0440\u0435\u0436\u0438\u0432\u0430\u0435\u0442 \u0441\u0432\u043e\u0435 \u043f\u0440\u043e\u0448\u043b\u043e\u0435 \u0438 \u0443\u043b\u0443\u0447\u0448\u0430\u0435\u0442 \u0441\u0432\u043e\u044e \u043f\u0430\u043c\u044f\u0442\u044c.  \u0414\u0440\u0443\u0433\u0430\u044f \u043f\u0440\u0438\u0447\u0438\u043d\u0430 \u044d\u0442\u043e\u0439 \u0437\u0430\u0434\u0430\u0447\u0438 \u2014 \u0437\u0430\u0441\u0442\u0430\u0432\u0438\u0442\u044c \u0430\u0433\u0435\u043d\u0442\u0430 \u043e\u0441\u0432\u043e\u0431\u043e\u0434\u0438\u0442\u044c\u0441\u044f \u043e\u0442 \u043a\u043e\u043b\u0435\u0431\u0430\u043d\u0438\u0439, \u0432\u043e\u0437\u043d\u0438\u043a\u0430\u044e\u0449\u0438\u0445 \u0438\u0437-\u0437\u0430 \u0432\u044b\u0441\u043e\u043a\u043e\u0439 \u043a\u043e\u0440\u0440\u0435\u043b\u044f\u0446\u0438\u0438 \u043c\u0435\u0436\u0434\u0443 \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u043c\u0438 \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f\u043c\u0438 \u0438 \u043f\u0440\u0438\u0432\u043e\u0434\u044f\u0449\u0438\u0445 \u043a \u043e\u0434\u043d\u0438\u043c \u0438 \u0442\u0435\u043c \u0436\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f\u043c \u0441\u043d\u043e\u0432\u0430 \u0438 \u0441\u043d\u043e\u0432\u0430.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">def<\/span><span class=\"token plain\"> <\/span><span class=\"token function\" style=\"color:rgb(80, 250, 123)\">replay<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        minibatch <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> random<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">sample<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> minibatch<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            target <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> reward<\/span><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">not<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                Q_next<\/span><span class=\"token operator\">=<\/span><span class=\"token plain\">self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                target <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">reward <\/span><span class=\"token operator\">+<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">gamma <\/span><span class=\"token operator\">*<\/span><span class=\"token plain\">np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">amax<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">Q_next<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            target_f <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">predict<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            target_f<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token plain\">action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> target<\/span><\/p><p><span class=\"token plain\">            self<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">model<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">fit<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> target_f<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> epochs<\/span><span class=\"token operator\">=<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> verbose<\/span><span class=\"token operator\">=<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<p>\u041d\u0430\u043a\u043e\u043d\u0435\u0446, \u043c\u044b \u0437\u0430\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u043c \u043d\u0430\u0448\u0435\u0433\u043e \u0430\u0433\u0435\u043d\u0442\u0430 \u0432\u0437\u0430\u0438\u043c\u043e\u0434\u0435\u0439\u0441\u0442\u0432\u043e\u0432\u0430\u0442\u044c \u0441 \u043e\u043a\u0440\u0443\u0436\u0430\u044e\u0449\u0435\u0439 \u0441\u0440\u0435\u0434\u043e\u0439 \u0438 \u043e\u0431\u0443\u0447\u0430\u0435\u043c \u0435\u0433\u043e \u043f\u0440\u0435\u0434\u0441\u043a\u0430\u0437\u044b\u0432\u0430\u0442\u044c \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f Q \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0433\u043e \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u044f.<\/p>\n<pre class=\"prism-code language-python\" style=\"color:#F8F8F2;background-color:#282A36\"><p><span class=\"token plain\">env <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> gym<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">make<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">'MountainCar-v0'<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">state_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">observation_space<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">shape<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">0<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">action_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">action_space<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">n<\/span><\/p><p><span class=\"token plain\">agent <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> DQNAgent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">done <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token boolean\">False<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">batch_size <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> <\/span><span class=\"token number\">32<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\"\/><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> e <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">EPISODES<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    state <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">reset<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    state <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">reshape<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">for<\/span><span class=\"token plain\"> time <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">in<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">range<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token number\">500<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        action <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> agent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">act<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> _ <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> env<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">step<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        reward <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> reward <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">not<\/span><span class=\"token plain\"> done <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">else<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">-<\/span><span class=\"token number\">10<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        next_state <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> np<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">reshape<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">[<\/span><span class=\"token number\">1<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> state_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">]<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        agent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">remember<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> action<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> reward<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> next_state<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        state <\/span><span class=\"token operator\">=<\/span><span class=\"token plain\"> next_state<\/span><\/p><p><span class=\"token plain\">        <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> done<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">print<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token string\" style=\"color:rgb(255, 121, 198)\">\"episode: {}\/{}, score: {}, e: {:.2}\"<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">                  <\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">format<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">e<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> EPISODES<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> time<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">,<\/span><span class=\"token plain\"> agent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">epsilon<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">            <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">break<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">    <\/span><span class=\"token keyword\" style=\"color:rgb(189, 147, 249);font-style:italic\">if<\/span><span class=\"token plain\"> <\/span><span class=\"token builtin\" style=\"color:rgb(189, 147, 249)\">len<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">agent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">memory<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><span class=\"token plain\"> <\/span><span class=\"token operator\">&gt;<\/span><span class=\"token plain\"> batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">:<\/span><span class=\"token plain\"\/><\/p><p><span class=\"token plain\">        agent<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">.<\/span><span class=\"token plain\">replay<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">(<\/span><span class=\"token plain\">batch_size<\/span><span class=\"token punctuation\" style=\"color:rgb(248, 248, 242)\">)<\/span><\/p><\/pre>\n<p>\u041a\u0430\u043a \u0432\u044b \u043c\u043e\u0436\u0435\u0442\u0435 \u0432\u0438\u0434\u0435\u0442\u044c, \u044d\u0442\u043e \u0442\u043e\u0447\u043d\u043e \u0442\u0430\u043a\u043e\u0439 \u0436\u0435 \u043f\u0440\u043e\u0446\u0435\u0441\u0441 \u0441 \u043f\u0440\u0438\u043c\u0435\u0440\u043e\u043c Q-\u0442\u0430\u0431\u043b\u0438\u0446\u044b, \u0441 \u0442\u043e\u0439 \u0440\u0430\u0437\u043d\u0438\u0446\u0435\u0439, \u0447\u0442\u043e \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u0435 \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 \u043f\u0440\u043e\u0438\u0441\u0445\u043e\u0434\u0438\u0442 \u043f\u043e \u043f\u0440\u043e\u0433\u043d\u043e\u0437\u0443 DQN, \u0430 \u043d\u0435 \u043f\u043e Q-\u0442\u0430\u0431\u043b\u0438\u0446\u0435.  \u0412 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u0435 \u0435\u0433\u043e \u043c\u043e\u0436\u043d\u043e \u043f\u0440\u0438\u043c\u0435\u043d\u044f\u0442\u044c \u043a <strong>\u043d\u0435\u0438\u0437\u0432\u0435\u0441\u0442\u043d\u044b\u0439<\/strong> \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u044f.  \u042d\u0442\u043e \u043c\u0430\u0433\u0438\u044f \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439.<\/p>\n<p>\u0412\u044b \u0442\u043e\u043b\u044c\u043a\u043e \u0447\u0442\u043e \u0441\u043e\u0437\u0434\u0430\u043b\u0438 \u0430\u0433\u0435\u043d\u0442\u0430, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u0443\u0447\u0438\u0442\u0441\u044f \u0432\u043e\u0434\u0438\u0442\u044c \u043c\u0430\u0448\u0438\u043d\u0443 \u0432 \u0433\u043e\u0440\u0443.  \u041f\u043e\u0442\u0440\u044f\u0441\u0430\u044e\u0449\u0438\u0439.  \u0418 \u0447\u0442\u043e \u0435\u0449\u0435 \u0431\u043e\u043b\u0435\u0435 \u0443\u0434\u0438\u0432\u0438\u0442\u0435\u043b\u044c\u043d\u043e, \u0442\u043e\u0442 \u0436\u0435 \u0441\u0430\u043c\u044b\u0439 \u043a\u043e\u0434 (\u044f \u0438\u043c\u0435\u044e \u0432 \u0432\u0438\u0434\u0443 \u043a\u043e\u043f\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u0435 \u0438 \u0432\u0441\u0442\u0430\u0432\u043a\u0443) \u043c\u043e\u0436\u043d\u043e \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0432\u043e \u043c\u043d\u043e\u0433\u0438\u0445 \u0434\u0440\u0443\u0433\u0438\u0445 \u0438\u0433\u0440\u0430\u0445, \u043e\u0442 Atari \u0438 Super Mario \u0434\u043e Doom (!!!)<\/p>\n<p>\u041f\u043e\u0442\u0440\u044f\u0441\u0430\u044e\u0449\u0438\u0439!<\/p>\n<p>\u041f\u0440\u043e\u0441\u0442\u043e \u043d\u0430 \u0431\u043e\u043b\u044c\u0448\u0435 \u0432\u0440\u0435\u043c\u0435\u043d\u0438, \u044f \u043e\u0431\u0435\u0449\u0430\u044e.<\/p>\n<p>\u041f\u043e\u0442\u0440\u044f\u0441\u0430\u044e\u0449\u0438\u0439!<\/p>\n<p>\u0415\u0441\u043b\u0438 \u0432\u0430\u043c \u043d\u0443\u0436\u0435\u043d \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0439 \u043c\u0430\u0442\u0435\u0440\u0438\u0430\u043b, \u044f \u043d\u0435 \u043c\u043e\u0433\u0443 \u043f\u043e\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u043e\u0432\u0430\u0442\u044c \u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u043a\u0443\u0440\u0441\u0430 Advanced AI: Deep Reinforcement Learning Course \u0432 Python \u043d\u0430 Udemy.  \u041e\u043d \u043e\u0445\u0432\u0430\u0442\u044b\u0432\u0430\u0435\u0442 \u0432\u0441\u0435, \u0447\u0442\u043e \u0432\u0430\u043c \u043d\u0443\u0436\u043d\u043e \u0434\u043b\u044f Deep Q Learning, \u0438 \u043c\u043d\u043e\u0433\u043e\u0435 \u0434\u0440\u0443\u0433\u043e\u0435.<\/p>\n<p>\u0412 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0435\u043c \u044d\u043f\u0438\u0437\u043e\u0434\u0435 \u200b\u200b\u043c\u044b \u043e\u0441\u0442\u0430\u043d\u0435\u043c\u0441\u044f \u0432 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 Deep Q Learning \u0438 \u043e\u0431\u0441\u0443\u0434\u0438\u043c \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0431\u043e\u043b\u0435\u0435 \u043f\u0440\u043e\u0434\u0432\u0438\u043d\u0443\u0442\u044b\u0435 \u043c\u0435\u0442\u043e\u0434\u044b, \u0442\u0430\u043a\u0438\u0435 \u043a\u0430\u043a \u0434\u0432\u043e\u0439\u043d\u044b\u0435 \u0441\u0435\u0442\u0438 DQN, \u0434\u0443\u044d\u043b\u044c\u043d\u044b\u0435 DQN \u0438 \u0432\u043e\u0441\u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u0435 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u043d\u043e\u0433\u043e \u043e\u043f\u044b\u0442\u0430.<\/p>\n<p>\u0414\u043e \u0441\u043a\u043e\u0440\u043e\u0439 \u0432\u0441\u0442\u0440\u0435\u0447\u0438&#8230;<\/p>\n<div class=\"dl-prod-book-inline-banner\">\n<div class=\"dl-prod-book-inline-banner__image gatsby-image-wrapper\" style=\"position:relative;overflow:hidden\"><img decoding=\"async\" aria-hidden=\"true\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAZCAYAAAAxFw7TAAAACXBIWXMAAAsTAAALEwEAmpwYAAAFzklEQVQ4y1WU2VOb1xnGzyfVbt1O6tST4AVjglc2s1sskgxC8GlBIAmQkMRugtlBbAJjwAKMDYbYYOyMSTJt46YzSV1PO2k7uWimnfauF532or3oTC\/7L3R6+ev7SaGeXjxzzrf9znOe9\/2OUtm1qAs3UVl2tOybmLJlvGBDy7Kina9JyZRSNWaRKVPGzCq0c5WYDJ21oE6Xp5VRhlI5TtR79WmdrkG9UykPqmQuOlONelc+OCUvn5KX3y5B\/UD0drG8Z9wrRZ0sFri8a4DPiNRlF2aRAQyPrxNff87U2gFtw6tU+keIjCWJTawzvvKY2Y0Dhha2GVnaQY\/FCQ3d5dbsfTFQgSlLzIh7pa75UBdFWW5qe1fpTuzROb\/PVX2Y0uA0ocltAiMbtE9sEZ3ZJTi6SfPQBs7uRapDs\/iH1sStwM7ZUedtAsz1425uIhDw4vU6OVdSj8ftoMxWj8\/bwPVqB5lFdgotdhx1VtwNVoJuK9fK7ehOGzeqqrHW1lNgcXDich1KK2zn4wUPP7zj4XfbLg4THkZ7mni55ObpjJsv111M9Xn40xOdtqAH3ethc9TFyzs6S+\/r7Md1\/rDVwMsFJyfzpB7HSjqo8wXJqGynJ9aC2+9nsLuZtrbmFLjR18xkvwe\/30NvxEul04tN99Ag4NsxF5FWF\/W6Tle7zndzddlySSeqMIa6HkXlRVD5HaIwKleUJ7oYxJwXSs8vNcv9AOpKi8xFl+X6klGDJimqV649AizrRSvrQZV2YyrtRCuJoRVHMZfIAvltnKrpZ2f2EYVTEr61H5PAtII2TPmtaHkBNKmBltuCKVfgRoFVxQCq\/JaoH1XWh7GAAVeFEU44pjmdPGQucR9HZBLL4h6ZvoS4bJXn4rgglFpU5QXTzgWuVOUQyjKIuvE+R3BzWT9V\/juci39IxsAqnzbO0FQywFnnAGUrh1wJJMWlRHO94w24oD0NVzVjqOpRVNUIpqrhFNTqWmR5ag9\/KMFaU4KvJp9i8cxhzumg0NZLT\/QhVyvku6IIWpFEcz3yBq7scZRtSvKZQKsZF\/AYbk+cj1pn+bN+l78PPqY3usj3yjo4W9xLpkDHJp7SPvYzcSTAUsm\/uEskxS2S4qraedTNWdEMJvu0wOP01Y\/yr+ga\/+zZpcEhkRRIPvkBsotDXL0Wxj\/5gqEP\/yKuelI7Mkv+ZqO4BlzV30E5FlB1CUx1afi9lkW+GNgkw3FbYEGOF4c5WRqmqCjM3PkAv50\/YPfVP8gqHybTcpu3JH\/zUWGVvopqWEY575KCO5fIk9yOX5B\/M8fD8Ypuvl8RJV968UVBN\/+Ww4CvP+WvyedU3JjkRPUI\/1dY5d1AuaXHXPdIwRu\/gVtkVeOczGkkp7yTr9sm4bND+M0hf5u7z4B1ireMzK2i6rFUUZVRVNX8AOXbRDUJ2LMucGkJgWvG3LnIaWlW50U3fzx4xn9+\/3O+jK4SqZrnlMSkORJoRv5G9rKAqpkQoH8L1fKQFPgI3iJjYBtTaI9vh5+SaRujtGaQYNMyDlkkuy2JCm+jWh+ljegr\/K8WKZgBaN5MSZO56UjGvcADLi39iNatz7jSucV3\/Gt8yye78Bm7WMakG7nPCUxc1o4bW06DlO9+esuNRpbiQJexRa7DmxzresC127uUDz\/h3S5x1pRMO7LLb2g1YIvfdIp0iQHSDJhrjY6t1+grP6XjyS9p2X2NY\/MLYge\/5uba57Q+ek388Cv6nv0Kz84rIvu\/IPrgc2pnDjmmL6XaLr1lo8qpSic5G9sls2+P4AeviG3\/BHviCRVy7NfM7BBc2Wf00QtCy7s4kp9gufdj3uvbJVt+Q5PkmgYmDKC0jCeZVuNKKuCs4cf0LD\/j4foMQ4tTDC8MMZ24JadOHwtLg1intzgzsifZiSPb7BtYastHMKNdjFECvzoubjY+YmL1OcH5PYpHdsjo2uKdzodcuLVNSXyf7DEBSlG0+oU3sLo5\/gubMZbW7M93hgAAAABJRU5ErkJggg==\" alt=\"\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb\" style=\"position:absolute;top:0;left:0;width:100%;height:100%;object-fit:contain;object-position:center;opacity:1;transition-delay:500ms\"\/><noscript><picture><source srcset=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/69585\/deep-learning-book-cover.png 200w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/497c6\/deep-learning-book-cover.png 400w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png 720w\" sizes=\"(max-width: 720px) 100vw, 720px\"\/><img decoding=\"async\" loading=\"lazy\" sizes=\"(max-width: 720px) 100vw, 720px\" srcset=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/69585\/deep-learning-book-cover.png 200w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/497c6\/deep-learning-book-cover.png 400w,&#10;https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png 720w\" src=\"https:\/\/theaisummer.com\/static\/502e7c498dd9d981ac44c1dcd10f9276\/3c17d\/deep-learning-book-cover.png\" alt=\"\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb\" style=\"position:absolute;top:0;left:0;opacity:1;width:100%;height:100%;object-fit:cover;object-position:center\"\/><\/picture><\/noscript><\/div>\n<div class=\"dl-prod-book-inline-banner__text\">\n<h2>\u041a\u043d\u0438\u0433\u0430 \u00ab\u0413\u043b\u0443\u0431\u043e\u043a\u043e\u0435 \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0441\u0442\u0432\u0435\u00bb \ud83d\udcd6<\/h2>\n<h4>\u0423\u0437\u043d\u0430\u0439\u0442\u0435, \u043a\u0430\u043a \u0441\u043e\u0437\u0434\u0430\u0432\u0430\u0442\u044c, \u043e\u0431\u0443\u0447\u0430\u0442\u044c, \u0440\u0430\u0437\u0432\u0435\u0440\u0442\u044b\u0432\u0430\u0442\u044c, \u043c\u0430\u0441\u0448\u0442\u0430\u0431\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0438 \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0438\u0432\u0430\u0442\u044c \u043c\u043e\u0434\u0435\u043b\u0438 \u0433\u043b\u0443\u0431\u043e\u043a\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f.  \u0418\u0437\u0443\u0447\u0438\u0442\u0435 \u0438\u043d\u0444\u0440\u0430\u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u0443 \u043c\u0430\u0448\u0438\u043d\u043d\u043e\u0433\u043e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0438 MLOps \u043d\u0430 \u043f\u0440\u0430\u043a\u0442\u0438\u0447\u0435\u0441\u043a\u0438\u0445 \u043f\u0440\u0438\u043c\u0435\u0440\u0430\u0445.<\/h4>\n<p>\u0423\u0437\u043d\u0430\u0442\u044c \u0431\u043e\u043b\u044c\u0448\u0435<\/p><\/div>\n<\/div>\n<p><em class=\"affiliate-disclosure\">* \u0420\u0430\u0441\u043a\u0440\u044b\u0442\u0438\u0435 \u0438\u043d\u0444\u043e\u0440\u043c\u0430\u0446\u0438\u0438: \u041e\u0431\u0440\u0430\u0442\u0438\u0442\u0435 \u0432\u043d\u0438\u043c\u0430\u043d\u0438\u0435, \u0447\u0442\u043e \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u044b\u0435 \u0438\u0437 \u043f\u0440\u0438\u0432\u0435\u0434\u0435\u043d\u043d\u044b\u0445 \u0432\u044b\u0448\u0435 \u0441\u0441\u044b\u043b\u043e\u043a \u043c\u043e\u0433\u0443\u0442 \u0431\u044b\u0442\u044c \u043f\u0430\u0440\u0442\u043d\u0435\u0440\u0441\u043a\u0438\u043c\u0438 \u0441\u0441\u044b\u043b\u043a\u0430\u043c\u0438, \u0438 \u043c\u044b \u0431\u0435\u0437 \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0445 \u0437\u0430\u0442\u0440\u0430\u0442 \u0434\u043b\u044f \u0432\u0430\u0441 \u043f\u043e\u043b\u0443\u0447\u0438\u043c \u043a\u043e\u043c\u0438\u0441\u0441\u0438\u044e, \u0435\u0441\u043b\u0438 \u0432\u044b \u0440\u0435\u0448\u0438\u0442\u0435 \u0441\u043e\u0432\u0435\u0440\u0448\u0438\u0442\u044c \u043f\u043e\u043a\u0443\u043f\u043a\u0443 \u043f\u043e\u0441\u043b\u0435 \u043f\u0435\u0440\u0435\u0445\u043e\u0434\u0430 \u043f\u043e \u0441\u0441\u044b\u043b\u043a\u0435.<\/em><\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u041f\u0443\u0442\u0435\u0448\u0435\u0441\u0442\u0432\u0438\u0435 \u043a \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044e \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u043f\u0440\u043e\u0434\u043e\u043b\u0436\u0430\u0435\u0442\u0441\u044f\u2026 \u041f\u0440\u0438\u0448\u043b\u043e \u0432\u0440\u0435\u043c\u044f \u043f\u0440\u043e\u0430\u043d\u0430\u043b\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u043f\u0435\u0447\u0430\u043b\u044c\u043d\u043e \u0438\u0437\u0432\u0435\u0441\u0442\u043d\u043e\u0435 Q-\u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0438 \u043f\u043e\u0441\u043c\u043e\u0442\u0440\u0435\u0442\u044c, \u043a\u0430\u043a \u043e\u043d\u043e \u0441\u0442\u0430\u043b\u043e \u043d\u043e\u0432\u044b\u043c \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u043e\u043c \u0432 \u043e\u0431\u043b\u0430\u0441\u0442\u0438 \u0418\u0418 (\u0441 \u043d\u0435\u0431\u043e\u043b\u044c\u0448\u043e\u0439 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u043d\u0435\u0439\u0440\u043e\u043d\u043d\u044b\u0445 \u0441\u0435\u0442\u0435\u0439). \u041f\u0435\u0440\u0432\u043e-\u043d\u0430\u043f\u0435\u0440\u0432\u043e. \u0412 \u043f\u043e\u0441\u043b\u0435\u0434\u043d\u0435\u043c \u043f\u043e\u0441\u0442\u0435 \u043c\u044b \u0443\u0432\u0438\u0434\u0435\u043b\u0438 \u043e\u0441\u043d\u043e\u0432\u043d\u0443\u044e \u043a\u043e\u043d\u0446\u0435\u043f\u0446\u0438\u044e \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u044f \u0441 \u043f\u043e\u0434\u043a\u0440\u0435\u043f\u043b\u0435\u043d\u0438\u0435\u043c \u0438 \u0441\u0444\u043e\u0440\u043c\u0443\u043b\u0438\u0440\u043e\u0432\u0430\u043b\u0438 \u043f\u0440\u043e\u0431\u043b\u0435\u043c\u0443, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u044f \u0430\u0433\u0435\u043d\u0442\u0430, \u0441\u0440\u0435\u0434\u0443, \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0435 (S), \u0434\u0435\u0439\u0441\u0442\u0432\u0438\u0435 (A) \u0438 \u0432\u043e\u0437\u043d\u0430\u0433\u0440\u0430\u0436\u0434\u0435\u043d\u0438\u0435 (R). \u041c\u044b \u0433\u043e\u0432\u043e\u0440\u0438\u043b\u0438 \u043e \u0442\u043e\u043c, [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":1577,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[1],"tags":[],"class_list":{"0":"post-1576","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-ai-research-and-news"},"_links":{"self":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts\/1576","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1576"}],"version-history":[{"count":0,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/posts\/1576\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=\/wp\/v2\/media\/1577"}],"wp:attachment":[{"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1576"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1576"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gptmain.news\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1576"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}