ShowUI

Runtime error

App Files Files Community

h-siyuan commited on Nov 30, 2024

Commit

5028d04

verified ·

1 Parent(s): d337207

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -98

app.py CHANGED Viewed

@@ -66,13 +66,11 @@ def array_to_image_path(image_array, session_id):
     img.save(filename)
     return os.path.abspath(filename)
-# Function to upload the file to S3
 def upload_to_s3(file_name, bucket, object_name=None):
     """Upload a file to an S3 bucket."""
     if object_name is None:
         object_name = file_name
-    # Create an S3 client
     s3 = boto3.client('s3')
     try:
@@ -87,14 +85,10 @@ def upload_to_s3(file_name, bucket, object_name=None):
         return False
 @spaces.GPU
-def run_showui(image, query):
     """Main function for inference."""
-    session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
     image_path = array_to_image_path(image, session_id)
-    # Upload the image to S3
-    upload_to_s3(image_path, 'altair.storage', object_name=f"ootb/images/{os.path.basename(image_path)}")
     messages = [
         {
             "role": "user",
@@ -106,7 +100,6 @@ def run_showui(image, query):
         }
     ]
-    # Prepare inputs for the model
     global model
     model = model.to("cuda")
@@ -121,7 +114,6 @@ def run_showui(image, query):
     )
     inputs = inputs.to("cuda")
-    # Generate output
     generated_ids = model.generate(**inputs, max_new_tokens=128)
     generated_ids_trimmed = [
         out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
@@ -130,50 +122,53 @@ def run_showui(image, query):
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
-    # Parse the output into coordinates
     click_xy = ast.literal_eval(output_text)
-    # Draw the point on the image
     result_image = draw_point(image_path, click_xy, radius=10)
-    return result_image, str(click_xy), image_path, session_id
-# Modify the record_vote function
-def record_vote(vote_type, image_path, query, action_generated, session_id):
-    """Record a vote in a JSON file and upload to S3."""
-    vote_data = {
-        "vote_type": vote_type,
         "image_path": image_path,
         "query": query,
-        "action_generated": action_generated,
         "timestamp": datetime.now().isoformat()
     }
-    local_file_name = f"votes_{session_id}.json"
-    # Append vote data to the local JSON file
-    with open(local_file_name, "a") as f:
-        f.write(json.dumps(vote_data) + "\n")
-    # Upload the updated JSON file to S3
-    upload_to_s3(local_file_name, 'altair.storage', object_name=f"ootb/votes/{local_file_name}")
-    return f"Your {vote_type} has been recorded. Thank you!"
-# Use session_id in the handle_vote function
-def handle_vote(vote_type, image_path, query, action_generated, session_id):
-    """Handle vote recording by using the consistent image path."""
-    if image_path is None:
-        return "No image uploaded. Please upload an image before voting."
-    return record_vote(vote_type, image_path, query, action_generated, session_id)
-# Load logo and encode to Base64
 with open("./assets/showui.png", "rb") as image_file:
     base64_image = base64.b64encode(image_file.read()).decode("utf-8")
-# Define layout and UI
 def build_demo(embed_mode, concurrency_count=1):
     with gr.Blocks(title="ShowUI Demo", theme=gr.themes.Default()) as demo:
-        # State to store the consistent image path
         state_image_path = gr.State(value=None)
         state_session_id = gr.State(value=None)
@@ -181,15 +176,10 @@ def build_demo(embed_mode, concurrency_count=1):
             gr.HTML(
                 f"""
                 <div style="text-align: center; margin-bottom: 20px;">
-                    <!-- Image -->
                     <div style="display: flex; justify-content: center;">
                         <img src="data:image/png;base64,{base64_image}" alt="ShowUI" width="320" style="margin-bottom: 10px;"/>
                     </div>
-                    <!-- Description -->
                     <p>ShowUI is a lightweight vision-language-action model for GUI agents.</p>
-                    <!-- Links -->
                     <div style="display: flex; justify-content: center; gap: 15px; font-size: 20px;">
                         <a href="https://huggingface.co/showlab/ShowUI-2B" target="_blank">
                             <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-ShowUI--2B-blue" alt="model"/>
@@ -207,7 +197,6 @@ def build_demo(embed_mode, concurrency_count=1):
         with gr.Row():
             with gr.Column(scale=3):
-                # Input components
                 imagebox = gr.Image(type="numpy", label="Input Screenshot")
                 textbox = gr.Textbox(
                     show_label=True,
@@ -216,7 +205,6 @@ def build_demo(embed_mode, concurrency_count=1):
                 )
                 submit_btn = gr.Button(value="Submit", variant="primary")
-                # Placeholder examples
                 gr.Examples(
                     examples=[
                         ["./examples/app_store.png", "Download Kindle."],
@@ -234,9 +222,7 @@ def build_demo(embed_mode, concurrency_count=1):
                 )
             with gr.Column(scale=8):
-                # Output components
                 output_img = gr.Image(type="pil", label="Output Image")
-                # Add a note below the image to explain the red point
                 gr.HTML(
                     """
                     <p><strong>Note:</strong> The <span style="color: red;">red point</span> on the output image represents the predicted clickable coordinates.</p>
@@ -244,39 +230,21 @@ def build_demo(embed_mode, concurrency_count=1):
                 )
                 output_coords = gr.Textbox(label="Clickable Coordinates")
-                # Buttons for voting, flagging, regenerating, and clearing
                 with gr.Row(elem_id="action-buttons", equal_height=True):
-                    vote_btn = gr.Button(value="👍 Vote", variant="secondary")
-                    downvote_btn = gr.Button(value="👎 Downvote", variant="secondary")
-                    flag_btn = gr.Button(value="🚩 Flag", variant="secondary")
-                    regenerate_btn = gr.Button(value="🔄 Regenerate", variant="secondary")
-                    clear_btn = gr.Button(value="🗑️ Clear", interactive=True)  # Combined Clear button
-            # Define button actions
             def on_submit(image, query):
-                """Handle the submit button click."""
                 if image is None:
                     raise ValueError("No image provided. Please upload an image before submitting.")
-                # Generate consistent image path and store it in the state
-                result_image, click_coords, image_path, session_id = run_showui(image, query)
-                return result_image, click_coords, image_path, session_id
-            def on_image_upload(image):
-                """Generate a new session ID when a new image is uploaded."""
-                if image is None:
-                    raise ValueError("No image provided. Please upload an image.")
-                # Generate a new session ID
-                new_session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
-                return new_session_id
-            imagebox.upload(
-                on_image_upload,
-                inputs=imagebox,
-                outputs=state_session_id,
-                queue=False
-            )
             submit_btn.click(
                 on_submit,
@@ -287,47 +255,26 @@ def build_demo(embed_mode, concurrency_count=1):
             clear_btn.click(
                 lambda: (None, None, None, None, None),
                 inputs=None,
-                outputs=[imagebox, textbox, output_img, output_coords, state_image_path, state_session_id],  # Clear all outputs
                 queue=False
             )
-            regenerate_btn.click(
-                lambda image, query, state_image_path: run_showui(image, query),
-                [imagebox, textbox, state_image_path],
-                [output_img, output_coords, state_image_path],
-            )
-            # Record vote actions without feedback messages
-            vote_btn.click(
-                lambda image_path, query, action_generated, session_id: handle_vote(
-                    "upvote", image_path, query, action_generated, session_id
-                ),
-                inputs=[state_image_path, textbox, output_coords, state_session_id],
                 outputs=[],
                 queue=False
             )
             downvote_btn.click(
-                lambda image_path, query, action_generated, session_id: handle_vote(
-                    "downvote", image_path, query, action_generated, session_id
-                ),
-                inputs=[state_image_path, textbox, output_coords, state_session_id],
-                outputs=[],
-                queue=False
-            )
-            flag_btn.click(
-                lambda image_path, query, action_generated, session_id: handle_vote(
-                    "flag", image_path, query, action_generated, session_id
-                ),
-                inputs=[state_image_path, textbox, output_coords, state_session_id],
                 outputs=[],
                 queue=False
             )
     return demo
-# Launch the app
 if __name__ == "__main__":
     demo = build_demo(embed_mode=False)
     demo.queue(api_open=False).launch(
@@ -335,4 +282,4 @@ if __name__ == "__main__":
         server_port=7860,
         ssr_mode=False,
         debug=True,
-    )

     img.save(filename)
     return os.path.abspath(filename)
 def upload_to_s3(file_name, bucket, object_name=None):
     """Upload a file to an S3 bucket."""
     if object_name is None:
         object_name = file_name
     s3 = boto3.client('s3')
     try:
         return False
 @spaces.GPU
+def run_showui(image, query, session_id):
     """Main function for inference."""
     image_path = array_to_image_path(image, session_id)
     messages = [
         {
             "role": "user",
         }
     ]
     global model
     model = model.to("cuda")
     )
     inputs = inputs.to("cuda")
     generated_ids = model.generate(**inputs, max_new_tokens=128)
     generated_ids_trimmed = [
         out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
     )[0]
     click_xy = ast.literal_eval(output_text)
     result_image = draw_point(image_path, click_xy, radius=10)
+    return result_image, str(click_xy), image_path
+def save_and_upload_data(image_path, query, session_id, votes=None):
+    """Save the data to a JSON file and upload to S3."""
+    votes = votes or {"upvotes": 0, "downvotes": 0}
+    data = {
         "image_path": image_path,
         "query": query,
+        "votes": votes,
         "timestamp": datetime.now().isoformat()
     }
+    local_file_name = f"data_{session_id}.json"
+    with open(local_file_name, "w") as f:
+        json.dump(data, f)
+    upload_to_s3(local_file_name, 'altair.storage', object_name=f"ootb/{local_file_name}")
+    return data
+def update_vote(vote_type, session_id):
+    """Update the vote count and re-upload the JSON file."""
+    local_file_name = f"data_{session_id}.json"
+    with open(local_file_name, "r") as f:
+        data = json.load(f)
+    if vote_type == "upvote":
+        data["votes"]["upvotes"] += 1
+    elif vote_type == "downvote":
+        data["votes"]["downvotes"] += 1
+    with open(local_file_name, "w") as f:
+        json.dump(data, f)
+    upload_to_s3(local_file_name, 'altair.storage', object_name=f"ootb/{local_file_name}")
+    return f"Your {vote_type} has been recorded. Thank you!"
 with open("./assets/showui.png", "rb") as image_file:
     base64_image = base64.b64encode(image_file.read()).decode("utf-8")
 def build_demo(embed_mode, concurrency_count=1):
     with gr.Blocks(title="ShowUI Demo", theme=gr.themes.Default()) as demo:
         state_image_path = gr.State(value=None)
         state_session_id = gr.State(value=None)
             gr.HTML(
                 f"""
                 <div style="text-align: center; margin-bottom: 20px;">
                     <div style="display: flex; justify-content: center;">
                         <img src="data:image/png;base64,{base64_image}" alt="ShowUI" width="320" style="margin-bottom: 10px;"/>
                     </div>
                     <p>ShowUI is a lightweight vision-language-action model for GUI agents.</p>
                     <div style="display: flex; justify-content: center; gap: 15px; font-size: 20px;">
                         <a href="https://huggingface.co/showlab/ShowUI-2B" target="_blank">
                             <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-ShowUI--2B-blue" alt="model"/>
         with gr.Row():
             with gr.Column(scale=3):
                 imagebox = gr.Image(type="numpy", label="Input Screenshot")
                 textbox = gr.Textbox(
                     show_label=True,
                 )
                 submit_btn = gr.Button(value="Submit", variant="primary")
                 gr.Examples(
                     examples=[
                         ["./examples/app_store.png", "Download Kindle."],
                 )
             with gr.Column(scale=8):
                 output_img = gr.Image(type="pil", label="Output Image")
                 gr.HTML(
                     """
                     <p><strong>Note:</strong> The <span style="color: red;">red point</span> on the output image represents the predicted clickable coordinates.</p>
                 )
                 output_coords = gr.Textbox(label="Clickable Coordinates")
                 with gr.Row(elem_id="action-buttons", equal_height=True):
+                    upvote_btn = gr.Button(value="Looks good!", variant="secondary")
+                    downvote_btn = gr.Button(value="Too bad!", variant="secondary")
+                    clear_btn = gr.Button(value="🗑️ Clear", interactive=True)
             def on_submit(image, query):
                 if image is None:
                     raise ValueError("No image provided. Please upload an image before submitting.")
+                session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+                result_image, click_coords, image_path = run_showui(image, query, session_id)
+                save_and_upload_data(image_path, query, session_id)
+                return result_image, click_coords, image_path, session_id
             submit_btn.click(
                 on_submit,
             clear_btn.click(
                 lambda: (None, None, None, None, None),
                 inputs=None,
+                outputs=[imagebox, textbox, output_img, output_coords, state_image_path, state_session_id],
                 queue=False
             )
+            upvote_btn.click(
+                lambda session_id: update_vote("upvote", session_id),
+                inputs=state_session_id,
                 outputs=[],
                 queue=False
             )
             downvote_btn.click(
+                lambda session_id: update_vote("downvote", session_id),
+                inputs=state_session_id,
                 outputs=[],
                 queue=False
             )
     return demo
 if __name__ == "__main__":
     demo = build_demo(embed_mode=False)
     demo.queue(api_open=False).launch(
         server_port=7860,
         ssr_mode=False,
         debug=True,
+    )