[{"data":1,"prerenderedAt":375},["ShallowReactive",2],{"publication-2023\u002Fa-max-relevance-min-divergence-criterion-for-data-discretization-with-applicatio-en":3,"publication-members":63},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"_hidden":6,"authors":10,"authors_orcid":16,"year":22,"doi":23,"openalex_id":24,"venue":25,"abstract_screenshot":26,"keywords":27,"body":43,"_type":56,"_id":57,"_source":58,"_file":59,"_stem":60,"_extension":61,"locale":62},"\u002Fpublications\u002F2023\u002Fa-max-relevance-min-divergence-criterion-for-data-discretization-with-applicatio","2023",false,"","A Max-Relevance-Min-Divergence criterion for data discretization with applications on naive Bayes","In many classification models, data is discretized to better estimate its distribution. Existing discretization methods often target at maximizing the discriminant power of discretized data, while overlooking the fact that the primary target of data discretization in classification is to improve the generalization performance. As a result, the data tend to be over-split into many small bins since the data without discretization retain the maximal discriminant information. Thus, we propose a Max-Dependency-Min-Divergence (MDmD) criterion that maximizes both the discriminant information and generalization ability of the discretized data. More specifically, the Max-Dependency criterion maximizes the statistical dependency between the discretized data and the classification variable while the Min-Divergence criterion explicitly minimizes the JS-divergence between the training data and the validation data for a given discretization scheme. The proposed MDmD criterion is technically appealing, but it is difficult to reliably estimate the high-order joint distributions of attributes and the classification variable. We hence further propose a more practical solution, Max-Relevance-Min-Divergence (MRmD) discretization scheme, where each attribute is discretized separately, by simultaneously maximizing the discriminant information and the generalization ability of the discretized data. The proposed MRmD is compared with the state-of-the-art discretization algorithms under the naive Bayes classification framework on 45 benchmark datasets. It significantly outperforms all the compared methods on most of the datasets.",[11,12,13,14,15],"Wang, Shihe","Ren, Jianfeng","Bai, Ruibin","Yao, Yuan","Jiang, Xudong",[17,18,19,20,21],"0000-0002-8371-6893","0000-0003-4619-6590","0000-0003-1722-568X","0000-0002-2705-6245","0000-0002-9104-2315",2023,"https:\u002F\u002Fdoi.org\u002F10.1016\u002Fj.patcog.2023.110236","W4390322455","Pattern Recognition",null,[28,29,30,31,32,33,34,35,36,37,38,39,40,41,42],"Discretization","Divergence (linguistics)","Discretization of continuous features","Generalization","Dependency (UML)","Linear discriminant analysis","Mathematics","Naive Bayes classifier","Computer science","Pattern recognition (psychology)","Artificial intelligence","Algorithm","Data mining","Support vector machine","Discretization error",{"type":44,"children":45,"toc":53},"root",[46],{"type":47,"tag":48,"props":49,"children":50},"element","p",{},[51],{"type":52,"value":9},"text",{"title":7,"searchDepth":54,"depth":54,"links":55},2,[],"markdown","content:publications:2023:a-max-relevance-min-divergence-criterion-for-data-discretization-with-applicatio.md","content","publications\u002F2023\u002Fa-max-relevance-min-divergence-criterion-for-data-discretization-with-applicatio.md","publications\u002F2023\u002Fa-max-relevance-min-divergence-criterion-for-data-discretization-with-applicatio","md","en",[64,77,82,93,100,108,114,123,130,136,141,151,158,167,173,185,194,203,209,217,222,230,236,244,248,258,265,273,278,286,292,300,306,314,319,325,335,343,349,357,362,370],{"_path":65,"title":66,"name":67,"role":68,"email":26,"image":69,"category":70,"interests":71,"order":54,"_id":76},"\u002Fmembers\u002Fstaff\u002Falain-chong","Vice President for Global Affairs and Partnerships · Professor of Information Systems and Digital Innovation","Alain Chong","Deputy Director of Lab","assets\u002F8.png","staff",[72,73,74,75],"信息系统与运作管理","计算机科学与运筹学","Information Systems and Operations Management","Computer Science and Operations Research","content:members:staff:alain-chong.md",{"_path":65,"title":78,"role":79,"interests":80,"_id":81},"全球事务与合作副校长 · 信息系统与数字创新教授","实验室副主任",[72,73],"content:members:staff:alain-chong.zh-CN.md",{"_path":83,"title":84,"name":85,"role":86,"email":26,"image":87,"category":70,"interests":88,"order":91,"_id":92},"\u002Fmembers\u002Fstaff\u002Fanthony-belloti","Professor","Anthony Belloti","Core Member","assets\u002F41.png",[89,90],"Machine Learning and Credit Risk Model","Model Risks",9,"content:members:staff:anthony-belloti.md",{"_path":83,"title":94,"role":95,"interests":96,"_id":99},"计算机科学系教授","核心成员",[97,98],"机器学习与信用风险模型","模型风险","content:members:staff:anthony-belloti.zh-CN.md",{"_path":101,"title":102,"name":102,"role":86,"email":26,"image":103,"category":70,"interests":104,"order":106,"_id":107},"\u002Fmembers\u002Fstaff\u002Fboon-giin-lee","Boon Giin Lee","assets\u002F31.jpg",[105],"Intelligent Sensor and Extended Reality",11,"content:members:staff:boon-giin-lee.md",{"_path":101,"title":109,"role":95,"interests":110,"_id":113},"人机交互实验室负责人 · 计算机科学系副教授",[111,112],"人机交互 HCI","智能传感与扩展现实技术","content:members:staff:boon-giin-lee.zh-CN.md",{"_path":115,"title":116,"name":116,"role":117,"email":26,"image":118,"category":70,"interests":119,"order":121,"_id":122},"\u002Fmembers\u002Fstaff\u002Fcong-cao","Cong Cao","Direction Leader","assets\u002FCC.png",[120],"Science and technology policy and institutional reform",7,"content:members:staff:cong-cao.md",{"_path":115,"title":124,"name":125,"role":126,"interests":127,"_id":129},"宁波诺丁汉大学商学院创新学教授","曹聪","方向带头人",[128],"科技政策与体制改革","content:members:staff:cong-cao.zh-CN.md",{"_path":131,"title":132,"name":132,"role":86,"email":26,"image":133,"category":70,"order":134,"_id":135},"\u002Fmembers\u002Fstaff\u002Fdave-towey","Dave Towey","assets\u002F32.jpg",8,"content:members:staff:dave-towey.md",{"_path":131,"title":137,"role":95,"interests":138,"_id":140},"计算机科学系教授 · 计算机科学系主任",[139],"计算机科学与语言学","content:members:staff:dave-towey.zh-CN.md",{"_path":142,"title":143,"name":143,"role":86,"email":26,"image":144,"category":70,"interests":145,"order":149,"_id":150},"\u002Fmembers\u002Fstaff\u002Ffazl-ullah-khan","Fazl Ullah Khan","assets\u002F44.png",[146,147,148],"Computer Network","Computer Architecture and Network Security","Software Engineering",12,"content:members:staff:fazl-ullah-khan.md",{"_path":142,"title":152,"role":95,"interests":153,"_id":157},"计算机科学系助理教授 · IEEE 高级会员",[154,155,156],"计算机网络","计算机和网络安全","软件工程","content:members:staff:fazl-ullah-khan.zh-CN.md",{"_path":159,"title":160,"name":161,"role":86,"email":26,"image":162,"category":70,"interests":163,"order":165,"_id":166},"\u002Fmembers\u002Fstaff\u002Fheng-yu","Associate Professor","Heng Yu","assets\u002FHENGYU.png",[164],"Embedded Systems Design",17,"content:members:staff:heng-yu.md",{"_path":159,"title":168,"name":169,"role":95,"interests":170,"_id":172},"计算机科学系副教授","于恒",[171],"嵌入式系统设计","content:members:staff:heng-yu.zh-CN.md",{"_path":174,"title":160,"name":175,"role":86,"email":26,"image":176,"category":70,"interests":177,"order":183,"_id":184},"\u002Fmembers\u002Fstaff\u002Fheshan-du","Heshan Du","assets\u002Fhesahndu.png",[178,179,180,181,182],"Logic, Knowledge Representation and Reasoning","Geographic Information Systems","Operations Research","Machine Learning","Reinforcement Learning",20,"content:members:staff:heshan-du.md",{"_path":174,"title":168,"name":186,"role":95,"interests":187,"_id":193},"杜何珊",[188,189,190,191,192],"逻辑与知识表示","地理信息系统","运筹学","机器学习","强化学习","content:members:staff:heshan-du.zh-CN.md",{"_path":195,"title":196,"name":197,"role":86,"email":26,"image":198,"category":70,"interests":199,"order":201,"_id":202},"\u002Fmembers\u002Fstaff\u002Fhuan-jin","Assistant Professor","Huan Jin","assets\u002Fhuanjin.png",[200,181],"Optimisation",21,"content:members:staff:huan-jin.md",{"_path":195,"title":204,"name":205,"role":95,"interests":206,"_id":208},"计算机科学系助理教授","靳欢",[207,191],"优化","content:members:staff:huan-jin.zh-CN.md",{"_path":210,"title":160,"name":211,"role":117,"email":26,"image":212,"category":70,"interests":213,"order":215,"_id":216},"\u002Fmembers\u002Fstaff\u002Fjianfeng-ren","Jianfeng Ren","assets\u002F42.jpg",[181,214],"Computer Vision",3,"content:members:staff:jianfeng-ren.md",{"_path":210,"title":168,"name":218,"role":126,"interests":219,"_id":221},"任剑锋",[191,220],"计算机视觉","content:members:staff:jianfeng-ren.zh-CN.md",{"_path":223,"title":224,"name":224,"role":117,"email":26,"image":225,"category":70,"interests":226,"order":228,"_id":229},"\u002Fmembers\u002Fstaff\u002Fjiawei-li","Jiawei Li","assets\u002F11.png",[227],"Computer Science and Artificial Intelligence",15,"content:members:staff:jiawei-li.md",{"_path":223,"title":231,"name":232,"role":126,"interests":233,"_id":235},"计算机科学系助理教授 · 英国诺丁汉大学博士后","李家炜",[234],"计算机与人工智能","content:members:staff:jiawei-li.zh-CN.md",{"_path":237,"title":160,"name":238,"role":86,"email":26,"image":239,"category":70,"interests":240,"order":242,"_id":243},"\u002Fmembers\u002Fstaff\u002Fmatthew-pike","Matthew Pike","assets\u002F43.jpg",[241],"Digitalised Learning",16,"content:members:staff:matthew-pike.md",{"_path":237,"title":168,"role":95,"interests":245,"_id":247},[246],"数字化学习","content:members:staff:matthew-pike.zh-CN.md",{"_path":249,"title":196,"name":250,"role":86,"email":26,"image":251,"category":70,"interests":252,"order":256,"_id":257},"\u002Fmembers\u002Fstaff\u002Fning-xue","Ning Xue","\u002Fimages\u002Fuon-logo.png",[253,254,255],"Artificial Intelligence","Computational Intelligence","Combinatorial Optimization",13,"content:members:staff:ning-xue.md",{"_path":249,"title":204,"name":259,"role":95,"interests":260,"_id":264},"薛宁",[261,262,263],"人工智能","计算智能","组合优化","content:members:staff:ning-xue.zh-CN.md",{"_path":266,"title":196,"name":267,"role":86,"email":26,"image":268,"category":70,"interests":269,"order":271,"_id":272},"\u002Fmembers\u002Fstaff\u002Fqian-zhang","Qian Zhang","assets\u002Fqz.png",[270,214,181],"Image Processing",14,"content:members:staff:qian-zhang.md",{"_path":266,"title":204,"name":274,"role":95,"interests":275,"_id":277},"张茜",[276,220,191],"图像处理","content:members:staff:qian-zhang.zh-CN.md",{"_path":279,"title":84,"name":280,"role":281,"email":26,"image":282,"category":70,"interests":283,"orcid":19,"order":284,"_id":285},"\u002Fmembers\u002Fstaff\u002Fruibin-bai","Ruibin Bai","Director of Lab","assets\u002F38.png",[75],1,"content:members:staff:ruibin-bai.md",{"_path":279,"title":287,"name":288,"role":289,"interests":290,"_id":291},"教授","白瑞斌","实验室主任",[73],"content:members:staff:ruibin-bai.zh-CN.md",{"_path":293,"title":294,"name":294,"role":117,"email":26,"image":295,"category":70,"interests":296,"order":298,"_id":299},"\u002Fmembers\u002Fstaff\u002Fsean-he","Sean He","assets\u002F39.png",[214,297,181],"Data Analytics",5,"content:members:staff:sean-he.md",{"_path":293,"title":301,"name":302,"role":126,"interests":303,"_id":305},"计算机科学系教授 · 国家级讲席学者","何祥健",[220,304,191],"数据分析","content:members:staff:sean-he.zh-CN.md",{"_path":307,"title":308,"name":308,"role":86,"email":26,"image":309,"category":70,"interests":310,"order":312,"_id":313},"\u002Fmembers\u002Fstaff\u002Ftianxiang-cui","Tianxiang Cui","assets\u002Ftianxiangcui.png",[254,311,181,182],"Operation Research",19,"content:members:staff:tianxiang-cui.md",{"_path":307,"title":152,"name":315,"role":95,"interests":316,"_id":318},"崔天翔",[262,317,191,192],"运筹研究","content:members:staff:tianxiang-cui.zh-CN.md",{"_path":320,"title":84,"name":321,"role":86,"email":26,"image":322,"category":70,"order":323,"_id":324},"\u002Fmembers\u002Fstaff\u002Fxiuping-hua","Xiuping Hua","assets\u002FxiupignHua.png",10,"content:members:staff:xiuping-hua.md",{"_path":320,"title":326,"name":327,"role":95,"interests":328,"_id":334},"金融、会计与经济系教授","华秀萍",[329,330,331,332,333,97],"资产定价","公司金融","衍生品","金融科技","创新金融和普惠金融","content:members:staff:xiuping-hua.zh-CN.md",{"_path":336,"title":84,"name":337,"role":86,"email":26,"image":338,"category":70,"interests":339,"order":341,"_id":342},"\u002Fmembers\u002Fstaff\u002Fying-weng","Ying Weng","assets\u002Fyingweng.png",[214,270,340],"IoT",4,"content:members:staff:ying-weng.md",{"_path":336,"title":94,"name":344,"role":95,"interests":345,"_id":348},"翁莹",[220,276,346,347],"物联网 IoT","无线网络安全与服务质量","content:members:staff:ying-weng.zh-CN.md",{"_path":350,"title":196,"name":351,"role":86,"email":26,"image":352,"category":70,"interests":353,"order":355,"_id":356},"\u002Fmembers\u002Fstaff\u002Fyuan-yao","Yuan Yao","assets\u002Fyuanyao.png",[354],"Autonomous Agents and Multi-Agent Systems",18,"content:members:staff:yuan-yao.md",{"_path":350,"title":204,"name":358,"role":95,"interests":359,"_id":361},"姚远",[360],"自主智能体与多智能体系统","content:members:staff:yuan-yao.zh-CN.md",{"_path":363,"title":196,"name":364,"role":117,"email":26,"image":365,"category":70,"interests":366,"order":368,"_id":369},"\u002Fmembers\u002Fstaff\u002Fzheng-lu","Zheng Lu","assets\u002F13.png",[367],"Computer Science",6,"content:members:staff:zheng-lu.md",{"_path":363,"title":204,"name":371,"role":126,"interests":372,"_id":374},"卢正",[373],"计算机科学","content:members:staff:zheng-lu.zh-CN.md",1782639710349]