CodeLLMPaper/data/labeldata/patch/example.json at main · PurCL/CodeLLMPaper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
{
    "Large Language Models Based Fuzzing Techniques: A Survey": {
        "author": "Misu, Md Rakib Hossain and Lopes, Cristina V. and Ma, Iris and Noble, James",
        "title": "Large Language Models Based Fuzzing Techniques: A Survey",
        "url": "https://arxiv.org/pdf/2402.00350",
        "abstract": "In the modern era where software plays a pivotal role, software security and vulnerability analysis have become essential for software development. Fuzzing test, as an efficient software testing method, are widely used in various domains. Moreover, the rapid development of Large Language Models (LLMs) has facilitated their application in the field of software testing, demonstrating remarkable performance. Considering existing fuzzing test techniques are not entirely automated and software vulnerabilities continue to evolve, there is a growing trend towards employing fuzzing test generated based on large language models. This survey provides a systematic overview of the approaches that fuse LLMs and fuzzing tests for software testing. In this paper, a statistical analysis and discussion of the literature in three areas, including LLMs, fuzzing test, and fuzzing test generated based on LLMs, are conducted by summarising the state-of-the-art methods up until 2024. Our survey also investigates the potential for widespread deployment and application of fuzzing test techniques generated by LLMs in the future.",
        "labels": [
            "program testing",
            "fuzzing",
            "survey"
        ],
        "venue": "arXiv2024"
    },
    "KernelGPT: Enhanced Kernel Fuzzing via Large Language Models": {
            "author": "Chenyuan Yang, Zijie Zhao and Lingming Zhang",
            "title": "KernelGPT: Enhanced Kernel Fuzzing via Large Language Models",
            "url": "https://arxiv.org/pdf/2401.00563",
            "abstract": "Bugs in operating system kernels can affect billions of devices and users all over the world. As a result, a large body of research has been focused on kernel fuzzing, i.e., automatically generating syscall (system call) sequences to detect potential kernel bugs or vulnerabilities. Kernel fuzzing aims to generate valid syscall sequences guided by syscall specifications that define both the syntax and semantics of syscalls. While there has been existing work trying to automate syscall specification generation, this remains largely manual work, and a large number of important syscalls are still uncovered. In this paper, we propose KernelGPT, the first approach to automatically synthesizing syscall specifications via Large Language Models (LLMs) for enhanced kernel fuzzing. Our key insight is that LLMs have seen massive kernel code, documentation, and use cases during pre-training, and thus can automatically distill the necessary information for making valid syscalls. More specifically, KernelGPT leverages an iterative approach to automatically infer the specifications, and further debug and repair them based on the validation feedback. Our results demonstrate that KernelGPT can generate more new and valid specifications and achieve higher coverage than state-of-the-art techniques. So far, by using newly generated specifications, KernelGPT has already detected 24 new unique bugs in Linux kernel, with 12 fixed and 11 assigned with CVE numbers. Moreover, a number of specifications generated by KernelGPT have already been merged into the kernel fuzzer Syzkaller, following the request from its development team.",
            "labels": [
                "program testing",
                "fuzzing"
            ],
            "venue": "arXiv2024"
    }
}