Badge
Information
Organization
James4ever0
Release Date
September 1, 2025
Console output:
Success: True
Results:
[VimGolfEnvResult(correct=True, keys='ihello world<NL>hello world<Esc>:wq<NL>', score=29)]
import vimgolf_gym
import time
import PIL.Image
def test_demo():
"""
Run a demo of vimgolf-gym, interacting with the environment by
typing "hello world" into the buffer and then saving and quitting vim.
Takes screenshots of the process and saves them to a .gif file.
"""
env = vimgolf_gym.make("vimgolf-test")
images: list[PIL.Image.Image] = []
images.append(env.screenshot())
env.act("i")
images.append(env.screenshot())
env.act("hello world\n")
images.append(env.screenshot())
env.act("hello world")
images.append(env.screenshot())
env.act("\x1b:wq")
images.append(env.screenshot())
env.act("\n")
images.append(env.screenshot())
time.sleep(1)
images.append(env.screenshot())
print("Success:", env.success)
print("Results:")
try:
import rich
rich.print(env.results)
except ImportError:
print(env.results)
env.close()
write_images_to_gif(images=images, output_gif_path="vimgolf-test-success.gif")
def write_images_to_gif(
images: list[PIL.Image.Image], output_gif_path: str, interval=1000
):
durations = [interval] * len(images)
images[0].save(
output_gif_path,
save_all=True,
append_images=images[1:],
duration=durations,
loop=1,
)
if __name__ == "__main__":
test_demo()
Console output:
Success: False
Results:
[VimGolfEnvResult(correct=False, keys=':wq<NL>', score=4)]
import vimgolf_gym
import time
import PIL.Image
def write_images_to_gif(
images: list[PIL.Image.Image], output_gif_path: str, interval=1000
):
durations = [interval] * len(images)
images[0].save(
output_gif_path,
save_all=True,
append_images=images[1:],
duration=durations,
loop=1,
)
def test_local():
"""
Test a local challenge with the given challenge id.
It checks the data of the challenge in the local dataset, and then runs the
challenge in the local environment and takes screenshots of the process.
"""
challenge_id = "4d1a1c36567bac34a9000002"
assert challenge_id in vimgolf_gym.list_local_challenge_ids()
assert (
vimgolf_gym.get_local_challenge_definition(challenge_id).client_version
== "0.5.0"
)
assert (
vimgolf_gym.get_local_challenge_metadata(challenge_id).challenge_hash
== challenge_id
)
assert vimgolf_gym.get_local_challenge_worst_solution(challenge_id).rank == "74"
assert (
vimgolf_gym.get_local_challenge_worst_solution_header(challenge_id).score
== "206"
)
env = vimgolf_gym.make("vimgolf-local-%s" % challenge_id)
images: list[PIL.Image.Image] = []
images.append(env.screenshot())
env.act(":wq")
images.append(env.screenshot())
env.act("\n")
images.append(env.screenshot())
time.sleep(1)
images.append(env.screenshot())
print("Success:", env.success)
print("Results:")
try:
import rich
rich.print(env.results)
except ImportError:
print(env.results)
env.close()
write_images_to_gif(
images=images, output_gif_path="vimgolf-local-%s-fail.gif" % challenge_id
)
if __name__ == "__main__":
test_local()
# install from pypi
pip install vimgolf-gym
# or install the latest version from github
pip install git+https://github.com/james4ever0/vimgolf-gym.git
If you do not have Vim installed locally, or want an extra layer of isolation, you can use this docker image:
# build the image
bash build_docker_image.sh
docker tag cybergod_vimgolf_gym agile4im/cybergod_vimgolf_gym
# or pull the image
docker pull agile4im/cybergod_vimgolf_gym
Before everything else, remember to manually sanitize the string before passing into the VimGolf challenge environment, to prevent misjudgement.
import vimgolf_gym
unsanitized_string = "hello world\r\nhello again"
# dos2unix and add newline to end if missing
# output: "hello world\nhello again\n"
sanitized_string = vimgolf_gym.format_vimgolf_string(unsanitized_string)
Basic interactions:
import vimgolf_gym
import vimgolf_gym.dataclasses
# a basic "hello world" challenge
env_name = "vimgolf-test"
# a local challenge, format is "vimgolf-local-<challenge_id>"
env_name = "vimgolf-local-4d1a1c36567bac34a9000002"
# an online challenge, format is "vimgolf-online-<challenge_id>"
env_name = "vimgolf-online-4d1a1c36567bac34a9000002"
# if you have vim installed locally
env = vimgolf_gym.make(env_name)
# or run the executor with docker
env = vimgolf_gym.make(env_name, use_docker=True)
# if you want to customize the challenge
env = vimgolf_gym.make("vimgolf-custom", custom_challenge = vimgolf_gym.dataclasses.VimGolfCustomChallenge(input="", output="hello world\n"))
# if you want to read the buffer of the editor (and avoid cheating)
env = vimgolf_gym.make(env_name, log_buffer=True)
# retrieve the editor buffer to track progress
buffer = env.buffer
# reset the env
env.reset()
# close the env
env.close()
# verify a solution by its keys, in vimgolf style
success = env.verify_keys("ihello world<NL>hello world<Esc>:wq<NL>")
# calculate relative inverse score directly
relative_inverse_score = env.calculate_relative_inverse_score(score=100)
# or if you have a known worst score
relative_inverse_score = env.calculate_relative_inverse_score(score=100, worst_score=200)
# if you want to close the environment automatically
with vimgolf_gym.make(env_name) as env:
# take an action, in raw string
env.act("hello world\n")
# take a screenshot and output a PIL image
img = env.screenshot()
# preview screenshot
env.render()
# reset the environment
env.reset()
# check if the environment has at least one success result
if env.success:
# VimGolfEnvResult: (correct: bool, keys: str, score: int)
result: vimgolf_gym.dataclasses.VimGolfEnvResult = env.get_last_success_result()
An example custom challenge in yaml:
input: |
The second line
The first line
output: |
The first line
The second line
name: Swap lines
description: Swap the first and second lines of the input
solution: null
You can load the challenge with:
import yaml
import vimgolf_gym.dataclasses
input_file = "<path to your challenge file>"
with open(input_file, "r") as f:
yaml_string = f.read()
yaml_obj = yaml.safe_load(yaml_string)
custom_challenge = vimgolf_gym.dataclasses.VimGolfCustomChallenge.parse_obj(yaml_obj)
The local challenges are stored in ~/.cache/cybergod-vimgolf-challenges/
.
If you want to learn more about the local challenges, use the following code:
import vimgolf_gym
import vimgolf_gym.dataclasses
challenge_id = "4d1a1c36567bac34a9000002"
# list all local challenge ids
local_challenge_ids: list[str] = vimgolf_gym.list_local_challenge_ids()
# get the challenge definition
# VimGolfChallengeDefinition: (input: InputOutputModel, output: InputOutputModel, client_version: str)
# InputOutputModel: (data: str, type: str)
challenge: vimgolf_gym.dataclasses.VimGolfChallengeDefinition = get_local_challenge_definition(challenge_id)
# get the challenge metadata
# VimGolfChallengeMetadata: (href: str, title: str, detail: str, challenge_hash: str)
metadata: vimgolf_gym.dataclasses.VimGolfChallengeMetadata = vimgolf_gym.get_local_challenge_metadata(challenge_id)
# get the worst solution
# VimGolfPublicSolution: (rank: str, solution: str, header: str)
solution: vimgolf_gym.dataclasses.VimGolfPublicSolution = vimgolf_gym.get_local_challenge_worst_solution(challenge_id)
# get the worst solution header
# VimGolfParsedPublicSolutionHeader: (rank: str, score: str, user_name: str, user_id: str, data: datetime)
header: vimgolf_gym.dataclasses.VimGolfParsedPublicSolutionHeader = vimgolf_gym.get_local_challenge_worst_solution_header(challenge_id)
If you want to obtain online challenge ids, you have a few options:
vimgolf
command
pip3 install vimgolf
vimgolf list
We provide a simple benchmark script vimgolf-benchmark.py
that can be used to benchmark the performance of a model in batch. The script produces a JSONL file which can be used for evaluation.
Before running the script, you need to install dependencies:
pip3 install litellm==1.76.2 vimgolf_gym==0.1.1
To use Ollama for inference, you can run the following command:
export OLLAMA_API_BASE=<your_ollama_api_base>
MODEL="ollama/<model_id>"
JSONL_OUTPUT="<benchmark_output>.jsonl"
DATASET_NAME="<dataset_name>"
DATASET_DIR=$(realpath "<dataset_dir>")
DATASET_FORMAT="<dataset_format>"
RUNNER="single_shot"
python3 vimgolf-benchmark.py \
--output-jsonl $JSONL_OUTPUT \
--model $MODEL \
--dataset-name $DATASET_NAME \
--dataset-dir $DATASET_DIR \
--dataset-format $DATASET_FORMAT \
--runner $RUNNER
If you want to evaluate the results, you need to provide --solution-format vimgolf-benchmark
to the evaluator script.
We provide a simple evaluation script vimgolf-evaluate.py
that can be used to evaluate the performance in batch. The script takes a JSONL file and the format name of the file as input and returns the overall score of the batch.
Before running the script, you need to install dependencies:
pip3 install vimgolf-gym==0.1.1
To evaluate the output from terminal-bench adaptor for a parity experiment, you can use the following command:
python3 -u vimgolf-evaluate.py \
--solution-format terminal-bench-adaptor \
--jsonl-file "<solution-result>.jsonl" \
--validator vimgolf-validator \
--solution-not-longer-than-output \
--result-savepath "<eval-result>.json" \
2>&1 | tee -a "<evaluate-result>.log"
The Unlicense