Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow up: Means of Evaluation and Instruction Automatic Generation #93

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
65 changes: 64 additions & 1 deletion backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from llm import stream_openai_response
from mock import mock_completion
from image_generation import create_alt_url_mapping, generate_images
from prompts import assemble_prompt
from prompts import assemble_prompt, assemble_instruction_generation_prompt
from routes import screenshot
from access_token import validate_access_token

Expand Down Expand Up @@ -204,3 +204,66 @@ async def process_chunk(content):
)

await websocket.close()


@app.websocket("/generate-instruction")
async def stream_code(websocket: WebSocket):
await websocket.accept()

params = await websocket.receive_json()

# Get the OpenAI API key from the request. Fall back to environment variable if not provided.
# If neither is provided, we throw an error.
if params["openAiApiKey"]:
openai_api_key = params["openAiApiKey"]
print("Using OpenAI API key from client-side settings dialog")
else:
openai_api_key = os.environ.get("OPENAI_API_KEY")
if openai_api_key:
print("Using OpenAI API key from environment variable")

if not openai_api_key:
print("OpenAI API key not found")
await websocket.send_json(
{
"type": "error",
"value": "No OpenAI API key found. Please add your API key in the settings dialog or add it to backend/.env file.",
}
)
return

print("generating code...")
await websocket.send_json({"type": "status", "value": "Generating instruction..."})

async def process_chunk(content):
await websocket.send_json({"type": "chunk", "value": content})

prompt_messages = assemble_instruction_generation_prompt(
params["image"], params["resultImage"]
)

if SHOULD_MOCK_AI_RESPONSE:
completion = await mock_completion(process_chunk)
else:
completion = await stream_openai_response(
prompt_messages,
api_key=openai_api_key,
base_url=None,
callback=lambda x: process_chunk(x),
)

# Write the messages dict into a log so that we can debug later
write_logs(prompt_messages, completion)

try:
await websocket.send_json({"type": "setInstruction", "value": completion})
await websocket.send_json(
{"type": "status", "value": "Instruction generation complete."}
)
except Exception as e:
traceback.print_exc()
await websocket.send_json(
{"type": "status", "value": "Instruction generation failed."}
)
finally:
await websocket.close()
60 changes: 60 additions & 0 deletions backend/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,63 @@ def assemble_prompt(
"content": user_content,
},
]


INSTUCTION_GENERATION_SYSTEM_PROMPT = """
You are a Frontend Vision Comparison expert,
You are required to compare two website screenshots: the first one is the original site and the second one is a redesigned version.
Your task is to identify differences in elements and their css, focusing on layout, style, and structure.
Do not consider the content(text, placeholder) of the elements, only the elements themselves
Analyze the screenshots considering these categories:

Lack of Elements: Identify any element present in the original but missing in the redesign.
Redundant Elements: Spot elements in the redesign that were not in the original.
Wrong Element Properties: Note discrepancies in element properties like size, color, font, and layout.

Provide a clear conclusion as a list, specifying the element, the mistake, and its location.
In ambiguous cases, suggest a manual review.
Remember, this comparison is not pixel-by-pixel, but at a higher, more conceptual level.

Return only the JSON array in this format:
[
{
"element": "name, text, etc.",
"mistake": "wrong color, wrong size, etc.(strictly use css properties to describe)",
"improvement": "use correct color, use width: correct px, etc.",
"location": "header"
},
]
Do not include markdown "```" or "```JSON" at the start or end.
"""

INSTUCTION_GENERATION_USER_PROMPT = """
Generate a list of differences between the two screenshots.
"""


def assemble_instruction_generation_prompt(image_data_url, result_image_data_url):
content = [
{
"type": "image_url",
"image_url": {"url": image_data_url, "detail": "high"},
},
{
"type": "text",
"text": INSTUCTION_GENERATION_USER_PROMPT,
},
]
if result_image_data_url:
content.insert(
1,
{
"type": "image_url",
"image_url": {"url": result_image_data_url, "detail": "high"},
},
)
return [
{"role": "system", "content": INSTUCTION_GENERATION_SYSTEM_PROMPT},
{
"role": "user",
"content": content,
},
]
74 changes: 67 additions & 7 deletions frontend/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ import { useEffect, useRef, useState } from "react";
import ImageUpload from "./components/ImageUpload";
import CodePreview from "./components/CodePreview";
import Preview from "./components/Preview";
import { CodeGenerationParams, generateCode } from "./generateCode";
import {
CodeGenerationParams,
InstructionGenerationParams,
generateCode,
generateInstruction,
} from "./generateCode";
import Spinner from "./components/Spinner";
import classNames from "classnames";
import {
Expand All @@ -27,6 +32,7 @@ import { UrlInputSection } from "./components/UrlInputSection";
import TermsOfServiceDialog from "./components/TermsOfServiceDialog";
import html2canvas from "html2canvas";
import { USER_CLOSE_WEB_SOCKET_CODE } from "./constants";
import { calculateMistakesNum, handleInstructions } from "./lib/utils";
import CodeTab from "./components/CodeTab";
import OutputSettingsSection from "./components/OutputSettingsSection";

Expand Down Expand Up @@ -54,7 +60,7 @@ function App() {

const [shouldIncludeResultImage, setShouldIncludeResultImage] =
useState<boolean>(false);

const [mistakesNum, setMistakesNum] = useState<number>(0);
const wsRef = useRef<WebSocket>(null);

// When the user already has the settings in local storage, newly added keys
Expand Down Expand Up @@ -115,6 +121,7 @@ function App() {

function doGenerateCode(params: CodeGenerationParams) {
setExecutionConsole([]);
setMistakesNum(0);
setAppState(AppState.CODING);

// Merge settings with params
Expand All @@ -130,6 +137,29 @@ function App() {
);
}

function doGenerateInstruction(params: InstructionGenerationParams) {
setAppState(AppState.INSTRUCTION_GENERATING);
setUpdateInstruction("");
setMistakesNum(0);
// Merge settings with params
const updatedParams = { ...params, ...settings };

generateInstruction(
wsRef,
updatedParams,
(token) => setUpdateInstruction((prev) => prev + token),
(code) => setUpdateInstruction(code),
(line) => setExecutionConsole((prev) => [...prev, line]),
() => {
setAppState(AppState.CODE_READY);
setUpdateInstruction((instruction) => {
setMistakesNum(calculateMistakesNum(instruction));
return handleInstructions(instruction);
});
}
);
}

// Initial version creation
function doCreate(referenceImages: string[]) {
// Reset any existing state
Expand Down Expand Up @@ -175,6 +205,15 @@ function App() {
}));
};

const instructionGenerate = async () => {
const resultImage = await takeScreenshot();
const originalImage = referenceImages[0];
doGenerateInstruction({
image: originalImage,
resultImage: resultImage,
});
};

return (
<div className="mt-2 dark:bg-black dark:text-white">
{IS_RUNNING_ON_CLOUD && <PicoBadge settings={settings} />}
Expand Down Expand Up @@ -210,7 +249,8 @@ function App() {
)}

{(appState === AppState.CODING ||
appState === AppState.CODE_READY) && (
appState === AppState.CODE_READY ||
appState === AppState.INSTRUCTION_GENERATING) && (
<>
{/* Show code preview only when coding */}
{appState === AppState.CODING && (
Expand All @@ -231,13 +271,15 @@ function App() {
</div>
)}

{appState === AppState.CODE_READY && (
{(appState === AppState.CODE_READY ||
appState === AppState.INSTRUCTION_GENERATING) && (
<div>
<div className="grid w-full gap-2">
<Textarea
placeholder="Tell the AI what to change..."
onChange={(e) => setUpdateInstruction(e.target.value)}
value={updateInstruction}
disabled={appState === AppState.INSTRUCTION_GENERATING}
/>
<div className="flex justify-between items-center gap-x-2">
<div className="font-500 text-xs text-slate-700 dark:text-white">
Expand All @@ -246,15 +288,27 @@ function App() {
<Switch
checked={shouldIncludeResultImage}
onCheckedChange={setShouldIncludeResultImage}
className="dark:bg-gray-700"
disabled={appState === AppState.INSTRUCTION_GENERATING}
/>
</div>

<Button
onClick={doUpdate}
className="dark:text-white dark:bg-gray-700"
disabled={appState === AppState.INSTRUCTION_GENERATING}
>
Update
</Button>

<Button
onClick={instructionGenerate}
className="flex items-center gap-x-2"
disabled={appState === AppState.INSTRUCTION_GENERATING}
>
{appState === AppState.INSTRUCTION_GENERATING
? "Generating Instruction..."
: "Generate Instruction"}
</Button>
</div>
<div className="flex items-center gap-x-2 mt-2">
<Button
Expand All @@ -266,6 +320,7 @@ function App() {
<Button
onClick={reset}
className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
disabled={appState === AppState.INSTRUCTION_GENERATING}
>
<FaUndo />
Reset
Expand All @@ -276,7 +331,7 @@ function App() {

{/* Reference image display */}
<div className="flex gap-x-2 mt-2">
<div className="flex flex-col">
<div className="flex flex-col items-center">
<div
className={classNames({
"scanning relative": appState === AppState.CODING,
Expand All @@ -291,6 +346,9 @@ function App() {
<div className="text-gray-400 uppercase text-sm text-center mt-1">
Original Screenshot
</div>
<div className="flex flex-col mt-4 text-sm">
Total Mistakes Found: {mistakesNum}
</div>
</div>
<div className="bg-gray-400 px-4 py-2 rounded text-sm hidden">
<h2 className="text-lg mb-4 border-b border-gray-800">
Expand Down Expand Up @@ -322,7 +380,9 @@ function App() {
</div>
)}

{(appState === AppState.CODING || appState === AppState.CODE_READY) && (
{(appState === AppState.CODING ||
appState === AppState.CODE_READY ||
appState === AppState.INSTRUCTION_GENERATING) && (
<div className="ml-4">
<Tabs defaultValue="desktop">
<div className="flex justify-end mr-8 mb-4">
Expand Down
54 changes: 54 additions & 0 deletions frontend/src/generateCode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ export interface CodeGenerationParams {
// isImageGenerationEnabled: boolean; // TODO: Merge with Settings type in types.ts
}

export interface InstructionGenerationParams {
image: string;
resultImage?: string;
}

export function generateCode(
wsRef: React.MutableRefObject<WebSocket | null>,
params: CodeGenerationParams,
Expand Down Expand Up @@ -62,3 +67,52 @@ export function generateCode(
toast.error(ERROR_MESSAGE);
});
}


export function generateInstruction(
wsRef: React.MutableRefObject<WebSocket | null>,
params: InstructionGenerationParams,
onChange: (chunk: string) => void,
onSetInstruction: (code: string) => void,
onStatusUpdate: (status: string) => void,
onComplete: () => void
) {
const wsUrl = `${WS_BACKEND_URL}/generate-instruction`;
console.log("Connecting to backend @ ", wsUrl);

const ws = new WebSocket(wsUrl);
wsRef.current = ws;

ws.addEventListener("open", () => {
ws.send(JSON.stringify(params));
});

ws.addEventListener("message", async (event: MessageEvent) => {
const response = JSON.parse(event.data);
if (response.type === "chunk") {
onChange(response.value);
} else if (response.type === "status") {
onStatusUpdate(response.value);
} else if (response.type === "setInstruction") {
onSetInstruction(response.value);
} else if (response.type === "error") {
console.error("Error generating code", response.value);
toast.error(response.value);
}
});
ws.addEventListener("close", (event) => {
console.log("Connection closed", event.code, event.reason);
if (event.code === USER_CLOSE_WEB_SOCKET_CODE) {
toast.success(STOP_MESSAGE);
} else if (event.code !== 1000) {
console.error("WebSocket error code", event);
toast.error(ERROR_MESSAGE);
}
onComplete();
});

ws.addEventListener("error", (error) => {
console.error("WebSocket error", error);
toast.error(ERROR_MESSAGE);
});
}