abi · clean99 · Nov 24, 2023 · Nov 24, 2023 · Nov 24, 2023 · Nov 24, 2023
diff --git a/backend/main.py b/backend/main.py
@@ -13,7 +13,7 @@
 from llm import stream_openai_response
 from mock import mock_completion
 from image_generation import create_alt_url_mapping, generate_images
-from prompts import assemble_prompt
+from prompts import assemble_prompt, assemble_instruction_generation_prompt
 from routes import screenshot
 from access_token import validate_access_token
 
@@ -204,3 +204,66 @@ async def process_chunk(content):
         )
 
     await websocket.close()
+
+
+@app.websocket("/generate-instruction")
+async def stream_code(websocket: WebSocket):
+    await websocket.accept()
+
+    params = await websocket.receive_json()
+
+    # Get the OpenAI API key from the request. Fall back to environment variable if not provided.
+    # If neither is provided, we throw an error.
+    if params["openAiApiKey"]:
+        openai_api_key = params["openAiApiKey"]
+        print("Using OpenAI API key from client-side settings dialog")
+    else:
+        openai_api_key = os.environ.get("OPENAI_API_KEY")
+        if openai_api_key:
+            print("Using OpenAI API key from environment variable")
+
+    if not openai_api_key:
+        print("OpenAI API key not found")
+        await websocket.send_json(
+            {
+                "type": "error",
+                "value": "No OpenAI API key found. Please add your API key in the settings dialog or add it to backend/.env file.",
+            }
+        )
+        return
+
+    print("generating code...")
+    await websocket.send_json({"type": "status", "value": "Generating instruction..."})
+
+    async def process_chunk(content):
+        await websocket.send_json({"type": "chunk", "value": content})
+
+    prompt_messages = assemble_instruction_generation_prompt(
+        params["image"], params["resultImage"]
+    )
+
+    if SHOULD_MOCK_AI_RESPONSE:
+        completion = await mock_completion(process_chunk)
+    else:
+        completion = await stream_openai_response(
+            prompt_messages,
+            api_key=openai_api_key,
+            base_url=None,
+            callback=lambda x: process_chunk(x),
+        )
+
+    # Write the messages dict into a log so that we can debug later
+    write_logs(prompt_messages, completion)
+
+    try:
+        await websocket.send_json({"type": "setInstruction", "value": completion})
+        await websocket.send_json(
+            {"type": "status", "value": "Instruction generation complete."}
+        )
+    except Exception as e:
+        traceback.print_exc()
+        await websocket.send_json(
+            {"type": "status", "value": "Instruction generation failed."}
+        )
+    finally:
+        await websocket.close()
diff --git a/backend/prompts.py b/backend/prompts.py
@@ -162,3 +162,63 @@ def assemble_prompt(
             "content": user_content,
         },
     ]
+
+
+INSTUCTION_GENERATION_SYSTEM_PROMPT = """
+You are a Frontend Vision Comparison expert,
+You are required to compare two website screenshots: the first one is the original site and the second one is a redesigned version.
+Your task is to identify differences in elements and their css, focusing on layout, style, and structure.
+Do not consider the content(text, placeholder) of the elements, only the elements themselves
+Analyze the screenshots considering these categories:
+
+Lack of Elements: Identify any element present in the original but missing in the redesign.
+Redundant Elements: Spot elements in the redesign that were not in the original.
+Wrong Element Properties: Note discrepancies in element properties like size, color, font, and layout.
+
+Provide a clear conclusion as a list, specifying the element, the mistake, and its location.
+In ambiguous cases, suggest a manual review.
+Remember, this comparison is not pixel-by-pixel, but at a higher, more conceptual level.
+
+Return only the JSON array in this format:
+[
+  {
+    "element": "name, text, etc.",
+    "mistake": "wrong color, wrong size, etc.(strictly use css properties to describe)",
+    "improvement": "use correct color, use width: correct px, etc.",
+    "location": "header"
+  },
+]
+Do not include markdown "```" or "```JSON" at the start or end.
+"""
+
+INSTUCTION_GENERATION_USER_PROMPT = """
+Generate a list of differences between the two screenshots.
+"""
+
+
+def assemble_instruction_generation_prompt(image_data_url, result_image_data_url):
+    content = [
+        {
+            "type": "image_url",
+            "image_url": {"url": image_data_url, "detail": "high"},
+        },
+        {
+            "type": "text",
+            "text": INSTUCTION_GENERATION_USER_PROMPT,
+        },
+    ]
+    if result_image_data_url:
+        content.insert(
+            1,
+            {
+                "type": "image_url",
+                "image_url": {"url": result_image_data_url, "detail": "high"},
+            },
+        )
+    return [
+        {"role": "system", "content": INSTUCTION_GENERATION_SYSTEM_PROMPT},
+        {
+            "role": "user",
+            "content": content,
+        },
+    ]
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
@@ -2,7 +2,12 @@ import { useEffect, useRef, useState } from "react";
 import ImageUpload from "./components/ImageUpload";
 import CodePreview from "./components/CodePreview";
 import Preview from "./components/Preview";
-import { CodeGenerationParams, generateCode } from "./generateCode";
+import {
+  CodeGenerationParams,
+  InstructionGenerationParams,
+  generateCode,
+  generateInstruction,
+} from "./generateCode";
 import Spinner from "./components/Spinner";
 import classNames from "classnames";
 import {
@@ -27,6 +32,7 @@ import { UrlInputSection } from "./components/UrlInputSection";
 import TermsOfServiceDialog from "./components/TermsOfServiceDialog";
 import html2canvas from "html2canvas";
 import { USER_CLOSE_WEB_SOCKET_CODE } from "./constants";
+import { calculateMistakesNum, handleInstructions } from "./lib/utils";
 import CodeTab from "./components/CodeTab";
 import OutputSettingsSection from "./components/OutputSettingsSection";
 
@@ -54,7 +60,7 @@ function App() {
 
   const [shouldIncludeResultImage, setShouldIncludeResultImage] =
     useState<boolean>(false);
-
+  const [mistakesNum, setMistakesNum] = useState<number>(0);
   const wsRef = useRef<WebSocket>(null);
 
   // When the user already has the settings in local storage, newly added keys
@@ -115,6 +121,7 @@ function App() {
 
   function doGenerateCode(params: CodeGenerationParams) {
     setExecutionConsole([]);
+    setMistakesNum(0);
     setAppState(AppState.CODING);
 
     // Merge settings with params
@@ -130,6 +137,29 @@ function App() {
     );
   }
 
+  function doGenerateInstruction(params: InstructionGenerationParams) {
+    setAppState(AppState.INSTRUCTION_GENERATING);
+    setUpdateInstruction("");
+    setMistakesNum(0);
+    // Merge settings with params
+    const updatedParams = { ...params, ...settings };
+
+    generateInstruction(
+      wsRef,
+      updatedParams,
+      (token) => setUpdateInstruction((prev) => prev + token),
+      (code) => setUpdateInstruction(code),
+      (line) => setExecutionConsole((prev) => [...prev, line]),
+      () => {
+        setAppState(AppState.CODE_READY);
+        setUpdateInstruction((instruction) => {
+          setMistakesNum(calculateMistakesNum(instruction));
+          return handleInstructions(instruction);
+        });
+      }
+    );
+  }
+
   // Initial version creation
   function doCreate(referenceImages: string[]) {
     // Reset any existing state
@@ -175,6 +205,15 @@ function App() {
     }));
   };
 
+  const instructionGenerate = async () => {
+    const resultImage = await takeScreenshot();
+    const originalImage = referenceImages[0];
+    doGenerateInstruction({
+      image: originalImage,
+      resultImage: resultImage,
+    });
+  };
+
   return (
     <div className="mt-2 dark:bg-black dark:text-white">
       {IS_RUNNING_ON_CLOUD && <PicoBadge settings={settings} />}
@@ -210,7 +249,8 @@ function App() {
             )}
 
           {(appState === AppState.CODING ||
-            appState === AppState.CODE_READY) && (
+            appState === AppState.CODE_READY ||
+            appState === AppState.INSTRUCTION_GENERATING) && (
             <>
               {/* Show code preview only when coding */}
               {appState === AppState.CODING && (
@@ -231,13 +271,15 @@ function App() {
                 </div>
               )}
 
-              {appState === AppState.CODE_READY && (
+              {(appState === AppState.CODE_READY ||
+                appState === AppState.INSTRUCTION_GENERATING) && (
                 <div>
                   <div className="grid w-full gap-2">
                     <Textarea
                       placeholder="Tell the AI what to change..."
                       onChange={(e) => setUpdateInstruction(e.target.value)}
                       value={updateInstruction}
+                      disabled={appState === AppState.INSTRUCTION_GENERATING}
                     />
                     <div className="flex justify-between items-center gap-x-2">
                       <div className="font-500 text-xs text-slate-700 dark:text-white">
@@ -246,15 +288,27 @@ function App() {
                       <Switch
                         checked={shouldIncludeResultImage}
                         onCheckedChange={setShouldIncludeResultImage}
-                        className="dark:bg-gray-700"
+                        disabled={appState === AppState.INSTRUCTION_GENERATING}
                       />
                     </div>
+
                     <Button
                       onClick={doUpdate}
                       className="dark:text-white dark:bg-gray-700"
+                      disabled={appState === AppState.INSTRUCTION_GENERATING}
                     >
                       Update
                     </Button>
+
+                    <Button
+                      onClick={instructionGenerate}
+                      className="flex items-center gap-x-2"
+                      disabled={appState === AppState.INSTRUCTION_GENERATING}
+                    >
+                      {appState === AppState.INSTRUCTION_GENERATING
+                        ? "Generating Instruction..."
+                        : "Generate Instruction"}
+                    </Button>
                   </div>
                   <div className="flex items-center gap-x-2 mt-2">
                     <Button
@@ -266,6 +320,7 @@ function App() {
                     <Button
                       onClick={reset}
                       className="flex items-center gap-x-2 dark:text-white dark:bg-gray-700"
+                      disabled={appState === AppState.INSTRUCTION_GENERATING}
                     >
                       <FaUndo />
                       Reset
@@ -276,7 +331,7 @@ function App() {
 
               {/* Reference image display */}
               <div className="flex gap-x-2 mt-2">
-                <div className="flex flex-col">
+                <div className="flex flex-col items-center">
                   <div
                     className={classNames({
                       "scanning relative": appState === AppState.CODING,
@@ -291,6 +346,9 @@ function App() {
                   <div className="text-gray-400 uppercase text-sm text-center mt-1">
                     Original Screenshot
                   </div>
+                  <div className="flex flex-col mt-4 text-sm">
+                    Total Mistakes Found: {mistakesNum}
+                  </div>
                 </div>
                 <div className="bg-gray-400 px-4 py-2 rounded text-sm hidden">
                   <h2 className="text-lg mb-4 border-b border-gray-800">
@@ -322,7 +380,9 @@ function App() {
           </div>
         )}
 
-        {(appState === AppState.CODING || appState === AppState.CODE_READY) && (
+        {(appState === AppState.CODING ||
+          appState === AppState.CODE_READY ||
+          appState === AppState.INSTRUCTION_GENERATING) && (
           <div className="ml-4">
             <Tabs defaultValue="desktop">
               <div className="flex justify-end mr-8 mb-4">

diff --git a/frontend/src/generateCode.ts b/frontend/src/generateCode.ts
@@ -15,6 +15,11 @@ export interface CodeGenerationParams {
   // isImageGenerationEnabled: boolean; // TODO: Merge with Settings type in types.ts
 }
 
+export interface InstructionGenerationParams {
+  image: string;
+  resultImage?: string;
+}
+
 export function generateCode(
   wsRef: React.MutableRefObject<WebSocket | null>,
   params: CodeGenerationParams,
@@ -62,3 +67,52 @@ export function generateCode(
     toast.error(ERROR_MESSAGE);
   });
 }
+
+
+export function generateInstruction(
+  wsRef: React.MutableRefObject<WebSocket | null>,
+  params: InstructionGenerationParams,
+  onChange: (chunk: string) => void,
+  onSetInstruction: (code: string) => void,
+  onStatusUpdate: (status: string) => void,
+  onComplete: () => void
+) {
+  const wsUrl = `${WS_BACKEND_URL}/generate-instruction`;
+  console.log("Connecting to backend @ ", wsUrl);
+
+  const ws = new WebSocket(wsUrl);
+  wsRef.current = ws;
+
+  ws.addEventListener("open", () => {
+    ws.send(JSON.stringify(params));
+  });
+
+  ws.addEventListener("message", async (event: MessageEvent) => {
+    const response = JSON.parse(event.data);
+    if (response.type === "chunk") {
+      onChange(response.value);
+    } else if (response.type === "status") {
+      onStatusUpdate(response.value);
+    } else if (response.type === "setInstruction") {
+      onSetInstruction(response.value);
+    } else if (response.type === "error") {
+      console.error("Error generating code", response.value);
+      toast.error(response.value);
+    }
+  });
+  ws.addEventListener("close", (event) => {
+    console.log("Connection closed", event.code, event.reason);
+    if (event.code === USER_CLOSE_WEB_SOCKET_CODE) {
+      toast.success(STOP_MESSAGE);
+    } else if (event.code !== 1000) {
+      console.error("WebSocket error code", event);
+      toast.error(ERROR_MESSAGE);
+    }
+    onComplete();
+  });
+
+  ws.addEventListener("error", (error) => {
+    console.error("WebSocket error", error);
+    toast.error(ERROR_MESSAGE);
+  });
+}