Spaces:
Running
Running
File size: 5,482 Bytes
946a274 0241b66 d7d471b a40cc94 d7d471b 946a274 d7d471b 30c739d d7d471b 946a274 0241b66 30c739d 946a274 af926fe d7d471b 946a274 23c71d8 0241b66 946a274 d7d471b 23c71d8 a40cc94 0241b66 d7d471b af926fe d7d471b 6f874f7 d55b380 d7d471b 6f874f7 0241b66 af926fe 6f874f7 af926fe 6f874f7 d7d471b 0241b66 946a274 d7d471b 946a274 6f874f7 946a274 0f4b7f9 946a274 39bca12 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from gradio_client import Client
from huggingface_hub import HfApi
import logging
import time
import os
# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# Function to call the API and get the result
def call_api(prompt):
try:
# Reload the Gradio client for each chunk
client = Client("MiniMaxAI/MiniMax-Text-01")
logger.info(f"Calling API with prompt: {prompt[:100]}...") # Log the first 100 chars of the prompt
result = client.predict(
message=prompt,
max_tokens=12800,
temperature=0.1,
top_p=0.9,
api_name="/chat"
)
logger.info("API call successful.")
return result
except Exception as e:
logger.error(f"API call failed: {e}")
raise gr.Error(f"API call failed: {str(e)}")
# Function to segment the text into chunks of 1500 words
def segment_text(text):
# Split the text into chunks of 1500 words
words = text.split()
chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1250)]
logger.info(f"Segmented text into {len(chunks)} chunks.")
return chunks
# Function to read file content with fallback encoding
def read_file_content(file):
try:
# Try reading with UTF-8 encoding first
if hasattr(file, "read"):
content = file.read().decode('utf-8')
else:
content = file.decode('utf-8')
logger.info("File read successfully with UTF-8 encoding.")
return content
except UnicodeDecodeError:
# Fallback to latin-1 encoding if UTF-8 fails
logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.")
if hasattr(file, "read"):
file.seek(0) # Reset file pointer to the beginning
content = file.read().decode('latin-1')
else:
content = file.decode('latin-1')
logger.info("File read successfully with latin-1 encoding.")
return content
except Exception as e:
logger.error(f"Failed to read file: {e}")
raise gr.Error(f"Failed to read file: {str(e)}")
# Function to process the text and make API calls with rate limiting
def process_text(file, prompt):
try:
logger.info("Starting text processing...")
# Read the file content with fallback encoding
text = read_file_content(file)
logger.info(f"Text length: {len(text)} characters.")
# Segment the text into chunks
chunks = segment_text(text)
# Initialize Hugging Face API
hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN"))
if not hf_api.token:
raise ValueError("Hugging Face token not found in environment variables.")
# Repository name on Hugging Face Hub
repo_name = "TeacherPuffy/book2"
# Process each chunk with a 15-second delay between API calls
results = []
for idx, chunk in enumerate(chunks):
logger.info(f"Processing chunk {idx + 1}/{len(chunks)}")
try:
# Call the API
result = call_api(f"{prompt}\n\n{chunk}")
results.append(result)
logger.info(f"Chunk {idx + 1} processed successfully.")
# Upload the chunk directly to Hugging Face
try:
logger.info(f"Uploading chunk {idx + 1} to Hugging Face...")
hf_api.upload_file(
path_or_fileobj=result.encode('utf-8'), # Convert result to bytes
path_in_repo=f"output_{idx}.txt", # File name in the repository
repo_id=repo_name,
repo_type="dataset",
)
logger.info(f"Chunk {idx + 1} uploaded to Hugging Face successfully.")
except Exception as e:
logger.error(f"Failed to upload chunk {idx + 1} to Hugging Face: {e}")
raise gr.Error(f"Failed to upload chunk {idx + 1} to Hugging Face: {str(e)}")
# Wait 15 seconds before the next API call
if idx < len(chunks) - 1: # No need to wait after the last chunk
logger.info("Waiting 15 seconds before the next API call...")
time.sleep(15)
except Exception as e:
logger.error(f"Failed to process chunk {idx + 1}: {e}")
raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}")
return "All chunks processed and uploaded to Hugging Face."
except Exception as e:
logger.error(f"An error occurred during processing: {e}")
raise gr.Error(f"An error occurred: {str(e)}")
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## Text File Processor with Rate-Limited API Calls")
with gr.Row():
file_input = gr.File(label="Upload Text File")
prompt_input = gr.Textbox(label="Enter Prompt")
with gr.Row():
output_message = gr.Textbox(label="Status Message")
submit_button = gr.Button("Submit")
submit_button.click(
process_text,
inputs=[file_input, prompt_input],
outputs=[output_message]
)
# Launch the Gradio app with a public link
demo.launch(share=True) |