Neo111x's picture
Update README.md
5945d88 verified

FalCodecompiler

Benchmark

Introduction of Falcon3-decompile-7b

Falcon3-decompiler-7b aims to decompile x86 ghidra decompilation into more readable C.

Evaluation Results

The benchmark that have been used is HumanEval benchmark from LLM4Decompile Benchmark

How to Use

Here is an example of how to use our model Note: Replace asm_func with the function that you want to decompile

Decompilation: Use falcon3-decompiler-7b to translate ghidra decompilation output to more readable code:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os

model_path = 'Neo111x/falcon3-decompiler-7b-v1' # V1.5 Model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,torch_dtype=torch.bfloat16)


asm_func = """
char * func0(char **param_1,int param_2)

{
  char **ppcVar1;
  char *__s;
  size_t sVar2;
  int iVar3;
  char *pcVar4;
  
  pcVar4 = "";
  if (0 < param_2) {
    iVar3 = 0;
    ppcVar1 = param_1 + (ulong)(param_2 - 1) + 1;
    do {
      __s = *param_1;
      sVar2 = strlen(__s);
      if (iVar3 < (int)sVar2) {
        pcVar4 = __s;
        iVar3 = (int)sVar2;
      }
      param_1 = param_1 + 1;
    } while (param_1 != ppcVar1);
  }
  return pcVar4;
}
"""

before = f"# This is the assembly code:\n"#prompt
after = "\n# What is the source code?\n"#prompt
asm_func = before+asm_func.strip()+after
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto")

inputs = tokenizer(asm_func, return_tensors="pt")
with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=2048)### max length to 4096, max new tokens should be below the range
c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])

# Note only decompile one function, where the original file may contain multiple functions

print(f'decompiled function:\n{c_func_decompile}')

Contact

If you have any questions, please raise an issue.