import contextlib import multiprocessing import os import subprocess import tempfile HEADERS: str = """ #include """ def check_correctness(candidate, reference, cpp_type, task_id, completion_id): """ Evaluates the functional correctness of a completion by running the test suite provided in the problem. :param completion_id: an optional completion ID so we can match the results later even if execution finishes asynchronously. """ manager = multiprocessing.Manager() result = dict( task_id=task_id, completion_id=completion_id, ) if cpp_type == "base": base_run_result = manager.list() process_case( unsafe_execute_cpp, candidate, reference["tests"], base_run_result, "c++17", ) base_run_result = base_run_result[0] base_run_passed = base_run_result == "passed" base_run_compiled = ( base_run_result == "passed" or base_run_result.startswith("failed: runtime error:") ) result = { **result, **dict( base_run_passed=base_run_passed, base_run_compiled=base_run_compiled, base_run_result=base_run_result, ), } elif cpp_type == "sfinae": sfinae_run_result = manager.list() process_case( unsafe_execute_cpp, candidate, reference["tests"], sfinae_run_result, "c++17", ) sfinae_constrain_result = manager.list() process_case( invalid_compile_cpp, candidate, reference["invalids"], sfinae_constrain_result, "c++17", ) sfinae_run_result = sfinae_run_result[0] sfinae_constrain_result = sfinae_constrain_result[0] sfinae_run_passed = sfinae_run_result == "passed" sfinae_run_compiled = sfinae_run_passed or sfinae_run_result.startswith( "failed: runtime error:" ) sfinae_constrain_passed = ( sfinae_constrain_result == "passed" and sfinae_run_compiled ) result = { **result, **dict( sfinae_run_passed=sfinae_run_passed, sfinae_run_compiled=sfinae_run_compiled, sfinae_run_result=sfinae_run_result, sfinae_constrain_passed=sfinae_constrain_passed, sfinae_constrain_result=sfinae_constrain_result, ), } elif cpp_type == "concepts": concepts_run_result = manager.list() process_case( unsafe_execute_cpp, candidate, reference["tests"], concepts_run_result, "c++20", ) concepts_constrain_result = manager.list() process_case( invalid_compile_cpp, candidate, reference["invalids"], concepts_constrain_result, "c++20", ) concepts_run_result = concepts_run_result[0] concepts_constrain_result = concepts_constrain_result[0] concepts_run_passed = concepts_run_result == "passed" concepts_run_compiled = ( concepts_run_passed or concepts_run_result.startswith("failed: runtime error:") ) concepts_constrain_passed = ( concepts_constrain_result == "passed" and concepts_run_compiled ) result = { **result, **dict( concepts_run_passed=concepts_run_passed, concepts_run_compiled=concepts_run_compiled, concepts_run_result=concepts_run_result, concepts_constrain_passed=concepts_constrain_passed, concepts_constrain_result=concepts_constrain_result, ), } else: raise ValueError(f"Unknown cpp_type: {cpp_type}") return result def process_case(target, candidate, reference, result, cppstd): timeout = 60 p = multiprocessing.Process( target=target, args=(candidate, reference, result, timeout, cppstd), ) p.start() p.join(timeout=timeout + 5) if p.is_alive(): p.kill() if not result: result.append("timed out") def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd): with create_tempdir(): code = "\n".join([HEADERS, candidate, reference]) open(f"test.cpp", "w").write(code) cpp_compiler = os.getenv("GENERICIFY_CLANG") compilation_result = subprocess.run( [cpp_compiler, f"-std={cppstd}", "test.cpp"], timeout=timeout, capture_output=True, ) if compilation_result.returncode != 0: if compilation_result.stderr: err = compilation_result.stderr.decode() else: err = compilation_result.stdout.decode() result.append(f"failed: compilation error: {err}") else: try: exec_result = subprocess.run( ["./a.out"], timeout=timeout, capture_output=True ) if exec_result.returncode == 0: result.append("passed") else: if exec_result.stderr: try: err = exec_result.stderr.decode() except: err = exec_result.stderr else: try: err = exec_result.stdout.decode() except: err = exec_result.stdout result.append(f"failed: runtime error: {err}") except subprocess.TimeoutExpired as e: result.append("timed out") def invalid_compile_cpp(candidate, reference, result, timeout, cppstd): with create_tempdir(): code = "\n".join([HEADERS, candidate, reference]) open(f"invalid.cpp", "w").write(code) cpp_compiler = os.getenv("GENERICIFY_CLANG") compilation_result = subprocess.run( [cpp_compiler, f"-std={cppstd}", "invalid.cpp"], timeout=timeout, capture_output=True, ) if compilation_result.stderr: err = compilation_result.stderr.decode() else: err = compilation_result.stdout.decode() if compilation_result.returncode != 1: result.append(f"failed: compilation succeeded: {err}") else: if "note: candidate template ignored" in err: result.append("passed") else: result.append("failed: improperly constrained: {err}") @contextlib.contextmanager def create_tempdir(): with tempfile.TemporaryDirectory() as dirname: with chdir(dirname): yield dirname @contextlib.contextmanager def chdir(root): if root == ".": yield return cwd = os.getcwd() os.chdir(root) try: yield except BaseException as exc: raise exc finally: os.chdir(cwd)