This matrix displays the results of matches among six LLMs and a random play generator software. Each LLM played against every other LLM (excluding itself) five times. Each row shows the results of the games between the LLM in the 'Player' column and the other LLMs.
Each 'player' played 5 games with each corresponding opponent for a selected game and prompt type combination, totaling 25 games per player for list and illustration prompts, and 20 games per player for the image prompt type.
| Game | Prompt | Player | Claude-3-Sonnet | Gemini-1.5-flash | Gemini-1.5-pro | GPT-4-turbo | GPT-4o | Llama3-70b | Random-Play | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | W1st | W2nd | Dr | Dq1st | Dq2nd | |||
| TicTacToe | List | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 2 | 0 | 3 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | 2 |
| Gemini-1.5-flash | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 1 | 2 | 1 | 0 | 1 | ||
| Gemini-1.5-pro | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | ||
| GPT-4-turbo | 1 | 1 | 2 | 1 | 0 | 1 | 4 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 2 | ||
| GPT-4o | 5 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Llama3-70b | 4 | 0 | 1 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 3 | ||
| Random-Play | 2 | 0 | 0 | 3 | 0 | 1 | 2 | 0 | 2 | 0 | 0 | 2 | 0 | 3 | 0 | 1 | 1 | 0 | 3 | 0 | 1 | 2 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ||
| Illustration | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 2 | 3 | 2 | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 2 | |
| Gemini-1.5-flash | 1 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 5 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | ||
| Gemini-1.5-pro | 1 | 3 | 1 | 0 | 0 | 2 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | ||
| GPT-4-turbo | 4 | 1 | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 2 | 1 | 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | ||
| GPT-4o | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | ||
| Llama3-70b | 3 | 2 | 0 | 0 | 0 | 3 | 0 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | ||
| Random-Play | 2 | 0 | 0 | 3 | 0 | 1 | 1 | 0 | 2 | 1 | 1 | 2 | 0 | 2 | 0 | 1 | 1 | 0 | 3 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 3 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | ||
| Image | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 2 | 0 | 2 | 0 | 0 | 2 | 1 | 1 | 3 | 0 | 1 | 0 | 1 | 1 | 0 | 3 | 0 | 3 | 0 | 0 | 0 | 2 | ||||||
| Gemini-1.5-flash | 2 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 4 | 0 | 1 | 0 | 0 | 1 | 3 | 2 | 1 | 0 | 0 | 2 | 2 | 0 | 0 | 0 | 3 | |||||||
| Gemini-1.5-pro | 1 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 2 | 0 | 1 | 0 | 4 | 0 | 1 | 1 | 0 | 1 | 2 | |||||||
| GPT-4-turbo | 5 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 3 | 0 | 3 | 0 | 0 | 1 | 1 | |||||||
| GPT-4o | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 2 | 0 | 2 | 0 | 0 | 0 | 3 | 1 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | |||||||
| Random-Play | 1 | 0 | 0 | 3 | 1 | 1 | 2 | 0 | 2 | 0 | 1 | 2 | 0 | 2 | 0 | 2 | 0 | 0 | 0 | 3 | 0 | 1 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | |||||||
| Connect4 | List | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 |
| Gemini-1.5-flash | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Gemini-1.5-pro | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 0 | 0 | 1 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | ||
| GPT-4-turbo | 4 | 1 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | ||
| GPT-4o | 4 | 1 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Llama3-70b | 2 | 3 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | ||
| Random-Play | 1 | 4 | 0 | 0 | 0 | 2 | 2 | 0 | 0 | 1 | 3 | 2 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ||
| Illustration | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 0 | 1 | 0 | 1 | 2 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 5 | 3 | 1 | 0 | 1 | 0 | 2 | 2 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 0 | |
| Gemini-1.5-flash | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 3 | 4 | 0 | 0 | 1 | 0 | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Gemini-1.5-pro | 3 | 2 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 3 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 3 | 0 | 3 | 1 | 0 | 1 | 0 | ||
| GPT-4-turbo | 1 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 3 | 0 | 0 | 2 | 0 | ||
| GPT-4o | 2 | 1 | 0 | 0 | 2 | 4 | 1 | 0 | 0 | 0 | 2 | 2 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | ||
| Llama3-70b | 3 | 2 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 5 | 3 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | ||
| Random-Play | 1 | 4 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ||
| Image | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 3 | 1 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 3 | 2 | 4 | 0 | 0 | 1 | 0 | ||||||
| Gemini-1.5-flash | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 3 | 0 | 4 | 0 | 0 | 1 | 0 | |||||||
| Gemini-1.5-pro | 0 | 0 | 0 | 4 | 1 | 4 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 2 | 1 | 0 | 2 | 0 | 2 | 2 | 0 | 1 | 0 | |||||||
| GPT-4-turbo | 1 | 2 | 0 | 2 | 0 | 3 | 2 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 0 | 1 | 0 | 4 | 1 | 0 | 0 | 0 | |||||||
| GPT-4o | 1 | 1 | 0 | 0 | 3 | 5 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 2 | 1 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | |||||||
| Random-Play | 0 | 4 | 0 | 0 | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |||||||
| Gomoku | List | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 1 | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 |
| Gemini-1.5-flash | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Gemini-1.5-pro | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 4 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| GPT-4-turbo | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 2 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| GPT-4o | 3 | 0 | 0 | 2 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 2 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Llama3-70b | 4 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| Random-Play | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ||
| Illustration | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 3 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | 5 | 0 | 0 | 0 | 0 | |
| Gemini-1.5-flash | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 1 | 0 | 4 | 0 | 0 | 2 | 0 | 3 | 0 | 0 | 0 | 0 | 1 | 4 | 3 | 0 | 0 | 2 | 0 | ||
| Gemini-1.5-pro | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 2 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 1 | 4 | 3 | 0 | 0 | 2 | 0 | ||
| GPT-4-turbo | 3 | 0 | 0 | 2 | 0 | 2 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | 0 | 4 | 0 | 0 | 1 | 0 | 5 | 0 | 0 | 0 | 0 | ||
| GPT-4o | 4 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 5 | 0 | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 4 | 5 | 0 | 0 | 0 | 0 | ||
| Llama3-70b | 3 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 1 | 4 | 0 | 3 | 0 | 1 | 1 | 3 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 3 | 0 | ||
| Random-Play | 0 | 3 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 0 | 1 | 0 | 0 | 4 | 0 | 3 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | ||
| Image | Claude-3-Sonnet | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 4 | 1 | 4 | 0 | 0 | 1 | 0 | ||||||
| Gemini-1.5-flash | 2 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 4 | 1 | 3 | 0 | 0 | 2 | 0 | |||||||
| Gemini-1.5-pro | 1 | 0 | 0 | 3 | 1 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 5 | 0 | 3 | 0 | 0 | 2 | 0 | |||||||
| GPT-4-turbo | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 0 | 5 | 0 | |||||||
| GPT-4o | 0 | 2 | 0 | 1 | 2 | 1 | 2 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 5 | 0 | 0 | 0 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | |||||||
| Random-Play | 0 | 5 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 2 | 0 | 4 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 4 | 0 | 3 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | |||||||