[{"data":1,"prerenderedAt":376},["ShallowReactive",2],{"publication-2024\u002Fmobile-robot-sequential-decision-making-using-a-deep-reinforcement-learning-hype-en":3,"publication-members":64},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"_hidden":6,"authors":10,"authors_orcid":17,"year":24,"doi":25,"openalex_id":26,"venue":27,"abstract_screenshot":20,"keywords":28,"body":44,"_type":57,"_id":58,"_source":59,"_file":60,"_stem":61,"_extension":62,"locale":63},"\u002Fpublications\u002F2024\u002Fmobile-robot-sequential-decision-making-using-a-deep-reinforcement-learning-hype","2024",false,"","Mobile robot sequential decision making using a deep reinforcement learning hyper-heuristic approach","Sequential decision making is an important part of robotic problems that is receiving unprecedented attention from both academia and industry. Recently, Deep Reinforcement Learning (DRL) has shown its promising capabilities in decision making problems. However, traditional DRL algorithms directly operate in the space of low-level actions, when it is applied in the domain of robotics, it can easily result in an exponential growth of computational complexity and suffer from the “curse of dimensionality”, becoming less efficient as the dimensionality of the environment increases. To address this issue, a novel DRL hyper-heuristic approach is proposed in this paper. The proposed approach is tailored to align with a problem taken from a real-world competition by taking advantage of well-developed low-level heuristic actions in order to narrow the search space and speed up the convergence. This fundamental contribution is a significant step forward from earlier approaches that directly exploit the entire low-level action domain. A state augmentation scheme and a novel reward design are utilized to further improve the performance of the proposed method. Moreover, a Real-to-Sim based training framework is developed to reduce the cost of acquiring real-time data and improve the robustness of agent’s decision-making model. Numerous experimental results demonstrate our proposed method can achieve notable performance gains compared to both competitive DRL baselines and heuristic approaches of the same problem in both known environment and previously unseen scenarios.",[11,12,13,14,15,16],"Cui, Tianxiang","Yang, Xiaoying","Jia, Fuhua","Jin, J.S.","Ye, Yujian","Bai, Ruibin",[18,19,20,21,22,23],"0000-0002-0102-2581","0000-0002-4062-6724",null,"0000-0001-9839-4279","0000-0002-9278-9218","0000-0003-1722-568X",2024,"https:\u002F\u002Fdoi.org\u002F10.1016\u002Fj.eswa.2024.124959","W4401354861","Expert Systems with Applications",[29,30,31,32,33,34,35,36,37,38,39,40,41,42,43],"Reinforcement learning","Computer science","Artificial intelligence","Curse of dimensionality","Robustness (evolution)","Machine learning","Heuristic","Exploit","Robot","Robotics","Hyper-heuristic","Domain (mathematical analysis)","Mobile robot","Robot learning","Mathematics",{"type":45,"children":46,"toc":54},"root",[47],{"type":48,"tag":49,"props":50,"children":51},"element","p",{},[52],{"type":53,"value":9},"text",{"title":7,"searchDepth":55,"depth":55,"links":56},2,[],"markdown","content:publications:2024:mobile-robot-sequential-decision-making-using-a-deep-reinforcement-learning-hype.md","content","publications\u002F2024\u002Fmobile-robot-sequential-decision-making-using-a-deep-reinforcement-learning-hype.md","publications\u002F2024\u002Fmobile-robot-sequential-decision-making-using-a-deep-reinforcement-learning-hype","md","en",[65,78,83,94,101,109,115,124,131,137,142,152,159,168,174,186,195,204,210,218,223,231,237,245,249,259,266,274,279,287,293,301,307,315,320,326,336,344,350,358,363,371],{"_path":66,"title":67,"name":68,"role":69,"email":20,"image":70,"category":71,"interests":72,"order":55,"_id":77},"\u002Fmembers\u002Fstaff\u002Falain-chong","Vice President for Global Affairs and Partnerships · Professor of Information Systems and Digital Innovation","Alain Chong","Deputy Director of Lab","assets\u002F8.png","staff",[73,74,75,76],"信息系统与运作管理","计算机科学与运筹学","Information Systems and Operations Management","Computer Science and Operations Research","content:members:staff:alain-chong.md",{"_path":66,"title":79,"role":80,"interests":81,"_id":82},"全球事务与合作副校长 · 信息系统与数字创新教授","实验室副主任",[73,74],"content:members:staff:alain-chong.zh-CN.md",{"_path":84,"title":85,"name":86,"role":87,"email":20,"image":88,"category":71,"interests":89,"order":92,"_id":93},"\u002Fmembers\u002Fstaff\u002Fanthony-belloti","Professor","Anthony Belloti","Core Member","assets\u002F41.png",[90,91],"Machine Learning and Credit Risk Model","Model Risks",9,"content:members:staff:anthony-belloti.md",{"_path":84,"title":95,"role":96,"interests":97,"_id":100},"计算机科学系教授","核心成员",[98,99],"机器学习与信用风险模型","模型风险","content:members:staff:anthony-belloti.zh-CN.md",{"_path":102,"title":103,"name":103,"role":87,"email":20,"image":104,"category":71,"interests":105,"order":107,"_id":108},"\u002Fmembers\u002Fstaff\u002Fboon-giin-lee","Boon Giin Lee","assets\u002F31.jpg",[106],"Intelligent Sensor and Extended Reality",11,"content:members:staff:boon-giin-lee.md",{"_path":102,"title":110,"role":96,"interests":111,"_id":114},"人机交互实验室负责人 · 计算机科学系副教授",[112,113],"人机交互 HCI","智能传感与扩展现实技术","content:members:staff:boon-giin-lee.zh-CN.md",{"_path":116,"title":117,"name":117,"role":118,"email":20,"image":119,"category":71,"interests":120,"order":122,"_id":123},"\u002Fmembers\u002Fstaff\u002Fcong-cao","Cong Cao","Direction Leader","assets\u002FCC.png",[121],"Science and technology policy and institutional reform",7,"content:members:staff:cong-cao.md",{"_path":116,"title":125,"name":126,"role":127,"interests":128,"_id":130},"宁波诺丁汉大学商学院创新学教授","曹聪","方向带头人",[129],"科技政策与体制改革","content:members:staff:cong-cao.zh-CN.md",{"_path":132,"title":133,"name":133,"role":87,"email":20,"image":134,"category":71,"order":135,"_id":136},"\u002Fmembers\u002Fstaff\u002Fdave-towey","Dave Towey","assets\u002F32.jpg",8,"content:members:staff:dave-towey.md",{"_path":132,"title":138,"role":96,"interests":139,"_id":141},"计算机科学系教授 · 计算机科学系主任",[140],"计算机科学与语言学","content:members:staff:dave-towey.zh-CN.md",{"_path":143,"title":144,"name":144,"role":87,"email":20,"image":145,"category":71,"interests":146,"order":150,"_id":151},"\u002Fmembers\u002Fstaff\u002Ffazl-ullah-khan","Fazl Ullah Khan","assets\u002F44.png",[147,148,149],"Computer Network","Computer Architecture and Network Security","Software Engineering",12,"content:members:staff:fazl-ullah-khan.md",{"_path":143,"title":153,"role":96,"interests":154,"_id":158},"计算机科学系助理教授 · IEEE 高级会员",[155,156,157],"计算机网络","计算机和网络安全","软件工程","content:members:staff:fazl-ullah-khan.zh-CN.md",{"_path":160,"title":161,"name":162,"role":87,"email":20,"image":163,"category":71,"interests":164,"order":166,"_id":167},"\u002Fmembers\u002Fstaff\u002Fheng-yu","Associate Professor","Heng Yu","assets\u002FHENGYU.png",[165],"Embedded Systems Design",17,"content:members:staff:heng-yu.md",{"_path":160,"title":169,"name":170,"role":96,"interests":171,"_id":173},"计算机科学系副教授","于恒",[172],"嵌入式系统设计","content:members:staff:heng-yu.zh-CN.md",{"_path":175,"title":161,"name":176,"role":87,"email":20,"image":177,"category":71,"interests":178,"order":184,"_id":185},"\u002Fmembers\u002Fstaff\u002Fheshan-du","Heshan Du","assets\u002Fhesahndu.png",[179,180,181,182,183],"Logic, Knowledge Representation and Reasoning","Geographic Information Systems","Operations Research","Machine Learning","Reinforcement Learning",20,"content:members:staff:heshan-du.md",{"_path":175,"title":169,"name":187,"role":96,"interests":188,"_id":194},"杜何珊",[189,190,191,192,193],"逻辑与知识表示","地理信息系统","运筹学","机器学习","强化学习","content:members:staff:heshan-du.zh-CN.md",{"_path":196,"title":197,"name":198,"role":87,"email":20,"image":199,"category":71,"interests":200,"order":202,"_id":203},"\u002Fmembers\u002Fstaff\u002Fhuan-jin","Assistant Professor","Huan Jin","assets\u002Fhuanjin.png",[201,182],"Optimisation",21,"content:members:staff:huan-jin.md",{"_path":196,"title":205,"name":206,"role":96,"interests":207,"_id":209},"计算机科学系助理教授","靳欢",[208,192],"优化","content:members:staff:huan-jin.zh-CN.md",{"_path":211,"title":161,"name":212,"role":118,"email":20,"image":213,"category":71,"interests":214,"order":216,"_id":217},"\u002Fmembers\u002Fstaff\u002Fjianfeng-ren","Jianfeng Ren","assets\u002F42.jpg",[182,215],"Computer Vision",3,"content:members:staff:jianfeng-ren.md",{"_path":211,"title":169,"name":219,"role":127,"interests":220,"_id":222},"任剑锋",[192,221],"计算机视觉","content:members:staff:jianfeng-ren.zh-CN.md",{"_path":224,"title":225,"name":225,"role":118,"email":20,"image":226,"category":71,"interests":227,"order":229,"_id":230},"\u002Fmembers\u002Fstaff\u002Fjiawei-li","Jiawei Li","assets\u002F11.png",[228],"Computer Science and Artificial Intelligence",15,"content:members:staff:jiawei-li.md",{"_path":224,"title":232,"name":233,"role":127,"interests":234,"_id":236},"计算机科学系助理教授 · 英国诺丁汉大学博士后","李家炜",[235],"计算机与人工智能","content:members:staff:jiawei-li.zh-CN.md",{"_path":238,"title":161,"name":239,"role":87,"email":20,"image":240,"category":71,"interests":241,"order":243,"_id":244},"\u002Fmembers\u002Fstaff\u002Fmatthew-pike","Matthew Pike","assets\u002F43.jpg",[242],"Digitalised Learning",16,"content:members:staff:matthew-pike.md",{"_path":238,"title":169,"role":96,"interests":246,"_id":248},[247],"数字化学习","content:members:staff:matthew-pike.zh-CN.md",{"_path":250,"title":197,"name":251,"role":87,"email":20,"image":252,"category":71,"interests":253,"order":257,"_id":258},"\u002Fmembers\u002Fstaff\u002Fning-xue","Ning Xue","\u002Fimages\u002Fuon-logo.png",[254,255,256],"Artificial Intelligence","Computational Intelligence","Combinatorial Optimization",13,"content:members:staff:ning-xue.md",{"_path":250,"title":205,"name":260,"role":96,"interests":261,"_id":265},"薛宁",[262,263,264],"人工智能","计算智能","组合优化","content:members:staff:ning-xue.zh-CN.md",{"_path":267,"title":197,"name":268,"role":87,"email":20,"image":269,"category":71,"interests":270,"order":272,"_id":273},"\u002Fmembers\u002Fstaff\u002Fqian-zhang","Qian Zhang","assets\u002Fqz.png",[271,215,182],"Image Processing",14,"content:members:staff:qian-zhang.md",{"_path":267,"title":205,"name":275,"role":96,"interests":276,"_id":278},"张茜",[277,221,192],"图像处理","content:members:staff:qian-zhang.zh-CN.md",{"_path":280,"title":85,"name":281,"role":282,"email":20,"image":283,"category":71,"interests":284,"orcid":23,"order":285,"_id":286},"\u002Fmembers\u002Fstaff\u002Fruibin-bai","Ruibin Bai","Director of Lab","assets\u002F38.png",[76],1,"content:members:staff:ruibin-bai.md",{"_path":280,"title":288,"name":289,"role":290,"interests":291,"_id":292},"教授","白瑞斌","实验室主任",[74],"content:members:staff:ruibin-bai.zh-CN.md",{"_path":294,"title":295,"name":295,"role":118,"email":20,"image":296,"category":71,"interests":297,"order":299,"_id":300},"\u002Fmembers\u002Fstaff\u002Fsean-he","Sean He","assets\u002F39.png",[215,298,182],"Data Analytics",5,"content:members:staff:sean-he.md",{"_path":294,"title":302,"name":303,"role":127,"interests":304,"_id":306},"计算机科学系教授 · 国家级讲席学者","何祥健",[221,305,192],"数据分析","content:members:staff:sean-he.zh-CN.md",{"_path":308,"title":309,"name":309,"role":87,"email":20,"image":310,"category":71,"interests":311,"order":313,"_id":314},"\u002Fmembers\u002Fstaff\u002Ftianxiang-cui","Tianxiang Cui","assets\u002Ftianxiangcui.png",[255,312,182,183],"Operation Research",19,"content:members:staff:tianxiang-cui.md",{"_path":308,"title":153,"name":316,"role":96,"interests":317,"_id":319},"崔天翔",[263,318,192,193],"运筹研究","content:members:staff:tianxiang-cui.zh-CN.md",{"_path":321,"title":85,"name":322,"role":87,"email":20,"image":323,"category":71,"order":324,"_id":325},"\u002Fmembers\u002Fstaff\u002Fxiuping-hua","Xiuping Hua","assets\u002FxiupignHua.png",10,"content:members:staff:xiuping-hua.md",{"_path":321,"title":327,"name":328,"role":96,"interests":329,"_id":335},"金融、会计与经济系教授","华秀萍",[330,331,332,333,334,98],"资产定价","公司金融","衍生品","金融科技","创新金融和普惠金融","content:members:staff:xiuping-hua.zh-CN.md",{"_path":337,"title":85,"name":338,"role":87,"email":20,"image":339,"category":71,"interests":340,"order":342,"_id":343},"\u002Fmembers\u002Fstaff\u002Fying-weng","Ying Weng","assets\u002Fyingweng.png",[215,271,341],"IoT",4,"content:members:staff:ying-weng.md",{"_path":337,"title":95,"name":345,"role":96,"interests":346,"_id":349},"翁莹",[221,277,347,348],"物联网 IoT","无线网络安全与服务质量","content:members:staff:ying-weng.zh-CN.md",{"_path":351,"title":197,"name":352,"role":87,"email":20,"image":353,"category":71,"interests":354,"order":356,"_id":357},"\u002Fmembers\u002Fstaff\u002Fyuan-yao","Yuan Yao","assets\u002Fyuanyao.png",[355],"Autonomous Agents and Multi-Agent Systems",18,"content:members:staff:yuan-yao.md",{"_path":351,"title":205,"name":359,"role":96,"interests":360,"_id":362},"姚远",[361],"自主智能体与多智能体系统","content:members:staff:yuan-yao.zh-CN.md",{"_path":364,"title":197,"name":365,"role":118,"email":20,"image":366,"category":71,"interests":367,"order":369,"_id":370},"\u002Fmembers\u002Fstaff\u002Fzheng-lu","Zheng Lu","assets\u002F13.png",[368],"Computer Science",6,"content:members:staff:zheng-lu.md",{"_path":364,"title":205,"name":372,"role":127,"interests":373,"_id":375},"卢正",[374],"计算机科学","content:members:staff:zheng-lu.zh-CN.md",1782639657545]