A comprehensive benchmark dataset and open challenge for code intelligence, covering 14 datasets across 10 tasks. Tasks include code completion, defect detection, code translation, natural language code search, code summarization, and documentation translation.
from benchthing import Bench
bench = Bench("codexglue")
bench.run(
benchmark="codexglue",
task_id="1",
models=yourCodeModels
)
result = bench.get_result("1")