[{"data":1,"prerenderedAt":372},["ShallowReactive",2],{"publication-2024\u002Ftransformcode-a-contrastive-learning-framework-for-code-embedding-via-subtree-tr-en":3,"publication-members":59},{"_path":4,"_dir":5,"_draft":6,"_partial":6,"_locale":7,"title":8,"description":9,"_hidden":6,"Transformation\" authors":10,"authors_orcid":16,"year":22,"doi":23,"openalex_id":24,"venue":25,"abstract_screenshot":26,"keywords":27,"body":39,"_type":52,"_id":53,"_source":54,"_file":55,"_stem":56,"_extension":57,"locale":58},"\u002Fpublications\u002F2024\u002Ftransformcode-a-contrastive-learning-framework-for-code-embedding-via-subtree-tr","2024",false,"","TransformCode: A Contrastive Learning Framework for Code Embedding via Subtre","Artificial intelligence (AI) has revolutionized software engineering (SE) by enhancing software development efficiency. The advent of pre-trained models (PTMs) leveraging transfer learning has significantly advanced AI for SE. However, existing PTMs that operate on individual code tokens suffer from several limitations: They are costly to train and fine-tune; and they rely heavily on labeled data for fine-tuning on task-specific datasets.In this paper, we present TransformCode, a novel framework that learns code embeddings in a contrastive learning manner. Our framework is encoder-agnostic and language-agnostic, which means that it can leverage any encoder model and handle any programming language.We also propose a novel data-augmentation technique called abstract syntax tree (AST) transformation, which applies syntactic and semantic transformations to the original code snippets, to generate more diverse and robust samples for contrastive learning. Our framework has several advantages over existing methods: (1) It is flexible and adaptable, because it can easily be extended to other downstream tasks that require code representation (such as code-clone detection and classification); (2) it is efficient and scalable, because it does not require a large model or a large amount of training data, and it can support any programming language; (3) it is not limited to unsupervised learning, but can also be applied to some supervised learning tasks by incorporating task-specific labels or objectives; and (4) it can also adjust the number of encoder parameters based on computing resources. We evaluate our framework on several code-related tasks, and demonstrate its effectiveness and superiority over the state-of-the-art methods such as SourcererCC, Code2vec, and InferCode.",[11,12,13,14,15],"Xian, Zixiang","Huang, Rubing","Towey, Dave","Fang, Chunrong","Chen, Zhenyu",[17,18,19,20,21],"0000-0002-8892-6187","0000-0002-1769-6126","0000-0003-0877-4353","0000-0002-9930-7111","0000-0002-9592-7022",2024,"https:\u002F\u002Fdoi.org\u002F10.1109\u002Ftse.2024.3393419","W4395470967","IEEE Transactions on Software Engineering",null,[28,29,30,31,32,33,34,35,36,37,38],"Computer science","Programming language","Transformation (genetics)","Embedding","Code (set theory)","Model transformation","Theoretical computer science","Program transformation","Artificial intelligence","Natural language processing","Software engineering",{"type":40,"children":41,"toc":49},"root",[42],{"type":43,"tag":44,"props":45,"children":46},"element","p",{},[47],{"type":48,"value":9},"text",{"title":7,"searchDepth":50,"depth":50,"links":51},2,[],"markdown","content:publications:2024:transformcode-a-contrastive-learning-framework-for-code-embedding-via-subtree-tr.md","content","publications\u002F2024\u002Ftransformcode-a-contrastive-learning-framework-for-code-embedding-via-subtree-tr.md","publications\u002F2024\u002Ftransformcode-a-contrastive-learning-framework-for-code-embedding-via-subtree-tr","md","en",[60,73,78,89,96,104,110,119,126,132,137,147,154,163,169,181,190,199,205,213,218,226,232,240,244,254,261,269,274,283,289,297,303,311,316,322,332,340,346,354,359,367],{"_path":61,"title":62,"name":63,"role":64,"email":26,"image":65,"category":66,"interests":67,"order":50,"_id":72},"\u002Fmembers\u002Fstaff\u002Falain-chong","Vice President for Global Affairs and Partnerships · Professor of Information Systems and Digital Innovation","Alain Chong","Deputy Director of Lab","assets\u002F8.png","staff",[68,69,70,71],"信息系统与运作管理","计算机科学与运筹学","Information Systems and Operations Management","Computer Science and Operations Research","content:members:staff:alain-chong.md",{"_path":61,"title":74,"role":75,"interests":76,"_id":77},"全球事务与合作副校长 · 信息系统与数字创新教授","实验室副主任",[68,69],"content:members:staff:alain-chong.zh-CN.md",{"_path":79,"title":80,"name":81,"role":82,"email":26,"image":83,"category":66,"interests":84,"order":87,"_id":88},"\u002Fmembers\u002Fstaff\u002Fanthony-belloti","Professor","Anthony Belloti","Core Member","assets\u002F41.png",[85,86],"Machine Learning and Credit Risk Model","Model Risks",9,"content:members:staff:anthony-belloti.md",{"_path":79,"title":90,"role":91,"interests":92,"_id":95},"计算机科学系教授","核心成员",[93,94],"机器学习与信用风险模型","模型风险","content:members:staff:anthony-belloti.zh-CN.md",{"_path":97,"title":98,"name":98,"role":82,"email":26,"image":99,"category":66,"interests":100,"order":102,"_id":103},"\u002Fmembers\u002Fstaff\u002Fboon-giin-lee","Boon Giin Lee","assets\u002F31.jpg",[101],"Intelligent Sensor and Extended Reality",11,"content:members:staff:boon-giin-lee.md",{"_path":97,"title":105,"role":91,"interests":106,"_id":109},"人机交互实验室负责人 · 计算机科学系副教授",[107,108],"人机交互 HCI","智能传感与扩展现实技术","content:members:staff:boon-giin-lee.zh-CN.md",{"_path":111,"title":112,"name":112,"role":113,"email":26,"image":114,"category":66,"interests":115,"order":117,"_id":118},"\u002Fmembers\u002Fstaff\u002Fcong-cao","Cong Cao","Direction Leader","assets\u002FCC.png",[116],"Science and technology policy and institutional reform",7,"content:members:staff:cong-cao.md",{"_path":111,"title":120,"name":121,"role":122,"interests":123,"_id":125},"宁波诺丁汉大学商学院创新学教授","曹聪","方向带头人",[124],"科技政策与体制改革","content:members:staff:cong-cao.zh-CN.md",{"_path":127,"title":128,"name":128,"role":82,"email":26,"image":129,"category":66,"order":130,"_id":131},"\u002Fmembers\u002Fstaff\u002Fdave-towey","Dave Towey","assets\u002F32.jpg",8,"content:members:staff:dave-towey.md",{"_path":127,"title":133,"role":91,"interests":134,"_id":136},"计算机科学系教授 · 计算机科学系主任",[135],"计算机科学与语言学","content:members:staff:dave-towey.zh-CN.md",{"_path":138,"title":139,"name":139,"role":82,"email":26,"image":140,"category":66,"interests":141,"order":145,"_id":146},"\u002Fmembers\u002Fstaff\u002Ffazl-ullah-khan","Fazl Ullah Khan","assets\u002F44.png",[142,143,144],"Computer Network","Computer Architecture and Network Security","Software Engineering",12,"content:members:staff:fazl-ullah-khan.md",{"_path":138,"title":148,"role":91,"interests":149,"_id":153},"计算机科学系助理教授 · IEEE 高级会员",[150,151,152],"计算机网络","计算机和网络安全","软件工程","content:members:staff:fazl-ullah-khan.zh-CN.md",{"_path":155,"title":156,"name":157,"role":82,"email":26,"image":158,"category":66,"interests":159,"order":161,"_id":162},"\u002Fmembers\u002Fstaff\u002Fheng-yu","Associate Professor","Heng Yu","assets\u002FHENGYU.png",[160],"Embedded Systems Design",17,"content:members:staff:heng-yu.md",{"_path":155,"title":164,"name":165,"role":91,"interests":166,"_id":168},"计算机科学系副教授","于恒",[167],"嵌入式系统设计","content:members:staff:heng-yu.zh-CN.md",{"_path":170,"title":156,"name":171,"role":82,"email":26,"image":172,"category":66,"interests":173,"order":179,"_id":180},"\u002Fmembers\u002Fstaff\u002Fheshan-du","Heshan Du","assets\u002Fhesahndu.png",[174,175,176,177,178],"Logic, Knowledge Representation and Reasoning","Geographic Information Systems","Operations Research","Machine Learning","Reinforcement Learning",20,"content:members:staff:heshan-du.md",{"_path":170,"title":164,"name":182,"role":91,"interests":183,"_id":189},"杜何珊",[184,185,186,187,188],"逻辑与知识表示","地理信息系统","运筹学","机器学习","强化学习","content:members:staff:heshan-du.zh-CN.md",{"_path":191,"title":192,"name":193,"role":82,"email":26,"image":194,"category":66,"interests":195,"order":197,"_id":198},"\u002Fmembers\u002Fstaff\u002Fhuan-jin","Assistant Professor","Huan Jin","assets\u002Fhuanjin.png",[196,177],"Optimisation",21,"content:members:staff:huan-jin.md",{"_path":191,"title":200,"name":201,"role":91,"interests":202,"_id":204},"计算机科学系助理教授","靳欢",[203,187],"优化","content:members:staff:huan-jin.zh-CN.md",{"_path":206,"title":156,"name":207,"role":113,"email":26,"image":208,"category":66,"interests":209,"order":211,"_id":212},"\u002Fmembers\u002Fstaff\u002Fjianfeng-ren","Jianfeng Ren","assets\u002F42.jpg",[177,210],"Computer Vision",3,"content:members:staff:jianfeng-ren.md",{"_path":206,"title":164,"name":214,"role":122,"interests":215,"_id":217},"任剑锋",[187,216],"计算机视觉","content:members:staff:jianfeng-ren.zh-CN.md",{"_path":219,"title":220,"name":220,"role":113,"email":26,"image":221,"category":66,"interests":222,"order":224,"_id":225},"\u002Fmembers\u002Fstaff\u002Fjiawei-li","Jiawei Li","assets\u002F11.png",[223],"Computer Science and Artificial Intelligence",15,"content:members:staff:jiawei-li.md",{"_path":219,"title":227,"name":228,"role":122,"interests":229,"_id":231},"计算机科学系助理教授 · 英国诺丁汉大学博士后","李家炜",[230],"计算机与人工智能","content:members:staff:jiawei-li.zh-CN.md",{"_path":233,"title":156,"name":234,"role":82,"email":26,"image":235,"category":66,"interests":236,"order":238,"_id":239},"\u002Fmembers\u002Fstaff\u002Fmatthew-pike","Matthew Pike","assets\u002F43.jpg",[237],"Digitalised Learning",16,"content:members:staff:matthew-pike.md",{"_path":233,"title":164,"role":91,"interests":241,"_id":243},[242],"数字化学习","content:members:staff:matthew-pike.zh-CN.md",{"_path":245,"title":192,"name":246,"role":82,"email":26,"image":247,"category":66,"interests":248,"order":252,"_id":253},"\u002Fmembers\u002Fstaff\u002Fning-xue","Ning Xue","\u002Fimages\u002Fuon-logo.png",[249,250,251],"Artificial Intelligence","Computational Intelligence","Combinatorial Optimization",13,"content:members:staff:ning-xue.md",{"_path":245,"title":200,"name":255,"role":91,"interests":256,"_id":260},"薛宁",[257,258,259],"人工智能","计算智能","组合优化","content:members:staff:ning-xue.zh-CN.md",{"_path":262,"title":192,"name":263,"role":82,"email":26,"image":264,"category":66,"interests":265,"order":267,"_id":268},"\u002Fmembers\u002Fstaff\u002Fqian-zhang","Qian Zhang","assets\u002Fqz.png",[266,210,177],"Image Processing",14,"content:members:staff:qian-zhang.md",{"_path":262,"title":200,"name":270,"role":91,"interests":271,"_id":273},"张茜",[272,216,187],"图像处理","content:members:staff:qian-zhang.zh-CN.md",{"_path":275,"title":80,"name":276,"role":277,"email":26,"image":278,"category":66,"interests":279,"orcid":280,"order":281,"_id":282},"\u002Fmembers\u002Fstaff\u002Fruibin-bai","Ruibin Bai","Director of Lab","assets\u002F38.png",[71],"0000-0003-1722-568X",1,"content:members:staff:ruibin-bai.md",{"_path":275,"title":284,"name":285,"role":286,"interests":287,"_id":288},"教授","白瑞斌","实验室主任",[69],"content:members:staff:ruibin-bai.zh-CN.md",{"_path":290,"title":291,"name":291,"role":113,"email":26,"image":292,"category":66,"interests":293,"order":295,"_id":296},"\u002Fmembers\u002Fstaff\u002Fsean-he","Sean He","assets\u002F39.png",[210,294,177],"Data Analytics",5,"content:members:staff:sean-he.md",{"_path":290,"title":298,"name":299,"role":122,"interests":300,"_id":302},"计算机科学系教授 · 国家级讲席学者","何祥健",[216,301,187],"数据分析","content:members:staff:sean-he.zh-CN.md",{"_path":304,"title":305,"name":305,"role":82,"email":26,"image":306,"category":66,"interests":307,"order":309,"_id":310},"\u002Fmembers\u002Fstaff\u002Ftianxiang-cui","Tianxiang Cui","assets\u002Ftianxiangcui.png",[250,308,177,178],"Operation Research",19,"content:members:staff:tianxiang-cui.md",{"_path":304,"title":148,"name":312,"role":91,"interests":313,"_id":315},"崔天翔",[258,314,187,188],"运筹研究","content:members:staff:tianxiang-cui.zh-CN.md",{"_path":317,"title":80,"name":318,"role":82,"email":26,"image":319,"category":66,"order":320,"_id":321},"\u002Fmembers\u002Fstaff\u002Fxiuping-hua","Xiuping Hua","assets\u002FxiupignHua.png",10,"content:members:staff:xiuping-hua.md",{"_path":317,"title":323,"name":324,"role":91,"interests":325,"_id":331},"金融、会计与经济系教授","华秀萍",[326,327,328,329,330,93],"资产定价","公司金融","衍生品","金融科技","创新金融和普惠金融","content:members:staff:xiuping-hua.zh-CN.md",{"_path":333,"title":80,"name":334,"role":82,"email":26,"image":335,"category":66,"interests":336,"order":338,"_id":339},"\u002Fmembers\u002Fstaff\u002Fying-weng","Ying Weng","assets\u002Fyingweng.png",[210,266,337],"IoT",4,"content:members:staff:ying-weng.md",{"_path":333,"title":90,"name":341,"role":91,"interests":342,"_id":345},"翁莹",[216,272,343,344],"物联网 IoT","无线网络安全与服务质量","content:members:staff:ying-weng.zh-CN.md",{"_path":347,"title":192,"name":348,"role":82,"email":26,"image":349,"category":66,"interests":350,"order":352,"_id":353},"\u002Fmembers\u002Fstaff\u002Fyuan-yao","Yuan Yao","assets\u002Fyuanyao.png",[351],"Autonomous Agents and Multi-Agent Systems",18,"content:members:staff:yuan-yao.md",{"_path":347,"title":200,"name":355,"role":91,"interests":356,"_id":358},"姚远",[357],"自主智能体与多智能体系统","content:members:staff:yuan-yao.zh-CN.md",{"_path":360,"title":192,"name":361,"role":113,"email":26,"image":362,"category":66,"interests":363,"order":365,"_id":366},"\u002Fmembers\u002Fstaff\u002Fzheng-lu","Zheng Lu","assets\u002F13.png",[364],"Computer Science",6,"content:members:staff:zheng-lu.md",{"_path":360,"title":200,"name":368,"role":122,"interests":369,"_id":371},"卢正",[370],"计算机科学","content:members:staff:zheng-lu.zh-CN.md",1782639693656]