import gradio as gr from gradio_client import Client from huggingface_hub import HfApi import logging import time import os # Set up logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) # Function to call the API and get the result def call_api(prompt): try: # Reload the Gradio client for each chunk client = Client("MiniMaxAI/MiniMax-Text-01") logger.info(f"Calling API with prompt: {prompt[:100]}...") # Log the first 100 chars of the prompt result = client.predict( message=prompt, max_tokens=12800, temperature=0.1, top_p=0.9, api_name="/chat" ) logger.info("API call successful.") return result except Exception as e: logger.error(f"API call failed: {e}") raise gr.Error(f"API call failed: {str(e)}") # Function to segment the text into chunks of 1500 words def segment_text(text): # Split the text into chunks of 1500 words words = text.split() chunks = [" ".join(words[i:i + 1500]) for i in range(0, len(words), 1250)] logger.info(f"Segmented text into {len(chunks)} chunks.") return chunks # Function to read file content with fallback encoding def read_file_content(file): try: # Try reading with UTF-8 encoding first if hasattr(file, "read"): content = file.read().decode('utf-8') else: content = file.decode('utf-8') logger.info("File read successfully with UTF-8 encoding.") return content except UnicodeDecodeError: # Fallback to latin-1 encoding if UTF-8 fails logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.") if hasattr(file, "read"): file.seek(0) # Reset file pointer to the beginning content = file.read().decode('latin-1') else: content = file.decode('latin-1') logger.info("File read successfully with latin-1 encoding.") return content except Exception as e: logger.error(f"Failed to read file: {e}") raise gr.Error(f"Failed to read file: {str(e)}") # Function to process the text and make API calls with rate limiting def process_text(file, prompt): try: logger.info("Starting text processing...") # Read the file content with fallback encoding text = read_file_content(file) logger.info(f"Text length: {len(text)} characters.") # Segment the text into chunks chunks = segment_text(text) # Initialize Hugging Face API hf_api = HfApi(token=os.environ.get("HUGGINGFACE_TOKEN")) if not hf_api.token: raise ValueError("Hugging Face token not found in environment variables.") # Repository name on Hugging Face Hub repo_name = "TeacherPuffy/book2" # Process each chunk with a 15-second delay between API calls results = [] for idx, chunk in enumerate(chunks): logger.info(f"Processing chunk {idx + 1}/{len(chunks)}") try: # Call the API result = call_api(f"{prompt}\n\n{chunk}") results.append(result) logger.info(f"Chunk {idx + 1} processed successfully.") # Upload the chunk directly to Hugging Face try: logger.info(f"Uploading chunk {idx + 1} to Hugging Face...") hf_api.upload_file( path_or_fileobj=result.encode('utf-8'), # Convert result to bytes path_in_repo=f"output_{idx}.txt", # File name in the repository repo_id=repo_name, repo_type="dataset", ) logger.info(f"Chunk {idx + 1} uploaded to Hugging Face successfully.") except Exception as e: logger.error(f"Failed to upload chunk {idx + 1} to Hugging Face: {e}") raise gr.Error(f"Failed to upload chunk {idx + 1} to Hugging Face: {str(e)}") # Wait 15 seconds before the next API call if idx < len(chunks) - 1: # No need to wait after the last chunk logger.info("Waiting 15 seconds before the next API call...") time.sleep(15) except Exception as e: logger.error(f"Failed to process chunk {idx + 1}: {e}") raise gr.Error(f"Failed to process chunk {idx + 1}: {str(e)}") return "All chunks processed and uploaded to Hugging Face." except Exception as e: logger.error(f"An error occurred during processing: {e}") raise gr.Error(f"An error occurred: {str(e)}") # Gradio interface with gr.Blocks() as demo: gr.Markdown("## Text File Processor with Rate-Limited API Calls") with gr.Row(): file_input = gr.File(label="Upload Text File") prompt_input = gr.Textbox(label="Enter Prompt") with gr.Row(): output_message = gr.Textbox(label="Status Message") submit_button = gr.Button("Submit") submit_button.click( process_text, inputs=[file_input, prompt_input], outputs=[output_message] ) # Launch the Gradio app with a public link demo.launch(share=True)