TeacherPuffy commited on
Commit
23c71d8
·
verified ·
1 Parent(s): a4e88d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -8
app.py CHANGED
@@ -36,19 +36,37 @@ def segment_text(text):
36
  logger.info(f"Segmented text into {len(chunks)} chunks.")
37
  return chunks
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Function to process the text and make API calls with rate limiting
40
  def process_text(file, prompt):
41
  try:
42
  logger.info("Starting text processing...")
43
 
44
- # Read the file content properly
45
- if hasattr(file, "read"):
46
- # If the file is a file-like object, read its content
47
- text = file.read().decode('utf-8') # Decode bytes to string
48
- else:
49
- # If the file is already a string, use it directly
50
- text = file
51
-
52
  logger.info(f"Text length: {len(text)} characters.")
53
 
54
  # Segment the text into chunks
 
36
  logger.info(f"Segmented text into {len(chunks)} chunks.")
37
  return chunks
38
 
39
+ # Function to read file content with fallback encoding
40
+ def read_file_content(file):
41
+ try:
42
+ # Try reading with UTF-8 encoding first
43
+ if hasattr(file, "read"):
44
+ content = file.read().decode('utf-8')
45
+ else:
46
+ content = file.decode('utf-8')
47
+ logger.info("File read successfully with UTF-8 encoding.")
48
+ return content
49
+ except UnicodeDecodeError:
50
+ # Fallback to latin-1 encoding if UTF-8 fails
51
+ logger.warning("UTF-8 encoding failed. Trying latin-1 encoding.")
52
+ if hasattr(file, "read"):
53
+ file.seek(0) # Reset file pointer to the beginning
54
+ content = file.read().decode('latin-1')
55
+ else:
56
+ content = file.decode('latin-1')
57
+ logger.info("File read successfully with latin-1 encoding.")
58
+ return content
59
+ except Exception as e:
60
+ logger.error(f"Failed to read file: {e}")
61
+ raise gr.Error(f"Failed to read file: {str(e)}")
62
+
63
  # Function to process the text and make API calls with rate limiting
64
  def process_text(file, prompt):
65
  try:
66
  logger.info("Starting text processing...")
67
 
68
+ # Read the file content with fallback encoding
69
+ text = read_file_content(file)
 
 
 
 
 
 
70
  logger.info(f"Text length: {len(text)} characters.")
71
 
72
  # Segment the text into chunks