The responses from LLMs (especially local / open source ones) are inconsistent—mostly mediocre but sometimes excellent. You often need to tweak and run the same prompt several times before getting something good. Can we automate the process of getting something good?
I went overkill on one simple task: write a good H1. The process is adaptable for tweets, emails or any other short blob of text. Here's the process:
Runs locally (free) and only requires ollama.
Note: The evaluation processes hundreds of rankings and may warm up your Mac.
Here's the code:
OLLAMA_MODEL = 'knoopx/hermes-2-pro-mistral:7b-q8_0'
NUM_OF_VARIATIONS = 15
NUM_OF_MATCHES = 1000
import os
import random
import ollama
import uuid
import json
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 2000)
authors = [
{
"name": "Seth Godin",
"description": "An author, entrepreneur, and marketer, Godin writes about marketing, the spread of ideas, and managing both customers and employees with respect."
},
{
"name": "Paul Graham",
"description": ""
},
{
"name": "James Clear",
"description": "Author of \"Atomic Habits,\" Clear writes about habits, decision-making, and continuous improvement."
},
{
"name": "Derek Sivers",
"description": "An entrepreneur, author, and musician, Sivers writes about creativity, life philosophy, and the lessons he's learned from founding and selling CD Baby."
},
{
"name": "David Ogilvy",
"description": "Often referred to as the Father of Advertising, Ogilvy was known for his emphasis on research and consumer insights. His work for brands like Rolls-Royce and Hathaway shirts has become legendary."
},
{
"name": "Stephen King",
"description": "A prolific author of horror, suspense, and fantasy novels, King has written over 60 books and is known for his detailed character development and storytelling."
},
]
def parse_numbered_responses(input_str, tone):
if not input_str.strip():
raise ValueError("Input string is empty.")
lines = input_str.strip().split('\n')
parsed_responses = []
for line in lines:
try:
# Attempt to split each line at the first period followed by a space.
number, text = line.split('. ', 1)
number = int(number.strip()) # Convert number to integer.
text = text.strip() # Trim whitespace from text.
generated_uuid = uuid.uuid4()
parsed_responses.append({'number': number, 'text': text, 'tone': tone, 'uuid': generated_uuid})
except ValueError:
# Skip lines that do not conform to the expected format.
continue
if not parsed_responses:
raise ValueError("No valid numbered responses found in the input.")
return parsed_responses
def update_item_by_number(items, uuid, k):
"""Updates the text of an item in a list of dictionaries, identified by its number."""
for item in items:
if item['uuid'] == uuid:
if not item.get(k):
item[k] = 0
item[k] = item[k] + 1
return True
return False
def get_one_tone(h1, context, tone, n):
system_message = 'You are a helpful copywriting assistant. Only reply with a numbered list of variations of the text provided.'
user_prompt = f'''Context: {context}.\nPlease generate {n} variations of the following text written in the voice of {tone}.'''
user_prompt += f'''Do not mention {tone} in the text:\n{h1}'''
response = ollama.chat(model=OLLAMA_MODEL,
messages=[
{
'role': 'system',
'content': system_message,
},
{
'role': 'user',
'content': user_prompt,
},
],
options = {
'temperature': 1.5
}
)
parsed_responses = parse_numbered_responses(response['message']['content'], tone)
return parsed_responses
all_variations = []
n = NUM_OF_VARIATIONS
context = "I'm using this as the H1 for my website. Please write variations that are unique and engaging."
h1 = "Definite combines ETL, a data warehouse and BI in one modern platform."
for author in authors:
print(f"Generating variations for {author['name']}...")
tone = author['name']
parsed_responses = get_one_tone(h1, context, tone, n)
all_variations.extend(parsed_responses)
df = pd.DataFrame(all_variations)
print('Number of variations: ', len(df))
i = 0
while i < NUM_OF_MATCHES:
print('i:', i)
selected_items = random.sample(all_variations, 2)
system_message = 'You are a helpful copywriting assistant. Only reply with "AAA" or "BBB". Do not include any other text or explaination.'
user_prompt = f'''Please tell me which copy is more unique and engaging. Please reply in JSON format with the key "answer" and the value of your response. The only valid options for "answer" are "AAA" or "BBB". Do not include any other text or explaination.\n
AAA: {selected_items[0]['text']}\n\n
BBB: {selected_items[1]['text']}
'''
response = ollama.chat(model=OLLAMA_MODEL,
messages=[
{
'role': 'system',
'content': system_message,
},
{
'role': 'user',
'content': user_prompt,
},
],
format='json',
options = {
'temperature': 0.0
}
)
try:
j = json.loads(response['message']['content'])
if j['answer'] == 'AAA':
update_item_by_number(all_variations, selected_items[0]['uuid'], 'wins')
update_item_by_number(all_variations, selected_items[1]['uuid'], 'losses')
elif j['answer'] == 'BBB':
update_item_by_number(all_variations, selected_items[1]['uuid'], 'wins')
update_item_by_number(all_variations, selected_items[0]['uuid'], 'losses')
else:
print('Invalid response:', j)
except:
print('Invalid response:', response)
pass
i += 1
df = pd.DataFrame(all_variations)
df['wins'] = df['wins'].fillna(0).astype(int)
df['losses'] = df['losses'].fillna(0).astype(int)
df['total'] = df['wins'] + df['losses']
df['win_rate'] = df['wins'] / df['total']
df = df.sort_values('win_rate', ascending=False)
winner = df.iloc[0]
print('Author win rates: ', df.groupby('tone').win_rate.mean())
print('Top 20: ', df.head(20))
print('Winner: ', winner.text)
Get the new standard in analytics. Sign up below or get in touch and we’ll set you up in under 30 minutes.