Svngoku commited on
Commit
769b82c
·
verified ·
1 Parent(s): c338980

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +237 -0
README.md CHANGED
@@ -54,3 +54,240 @@ or
54
  ```
55
  ./llama-server --hf-repo Svngoku/ReaderLM-v2-Q8_0-GGUF --hf-file readerlm-v2-q8_0.gguf -c 2048
56
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ```
55
  ./llama-server --hf-repo Svngoku/ReaderLM-v2-Q8_0-GGUF --hf-file readerlm-v2-q8_0.gguf -c 2048
56
  ```
57
+
58
+ ## VLLM Inference
59
+
60
+ ```py
61
+ # -*- coding: utf-8 -*-
62
+ """Untitled64.ipynb
63
+
64
+ Automatically generated by Colab.
65
+
66
+ Original file is located at
67
+ https://colab.research.google.com/drive/1hVqCTm6XLJmrOjkaIYLHXgOTg2ffnhue
68
+ """
69
+
70
+ !pip install vllm
71
+
72
+ model_name = 'Svngoku/ReaderLM-v2-Q8_0-GGUF' # @param ["jinaai/ReaderLM-v2", "jinaai/reader-lm-1.5b", "Svngoku/ReaderLM-v2-Q8_0-GGUF"]
73
+ max_model_len = 256000 # @param {type:"integer"}
74
+ # @markdown ---
75
+ # @markdown ### SamplingParams:
76
+
77
+ top_k = 1 # @param {type:"integer"}
78
+ temperature = 0 # @param {type:"slider", min:0, max:1, step:0.1}
79
+ repetition_penalty = 1.05 # @param {type:"number"}
80
+ presence_penalty = 0.25 # @param {type:"slider", min:0, max:1, step:0.1}
81
+ max_tokens = 8192 # @param {type:"integer"}
82
+ # @markdown ---
83
+
84
+ from vllm import SamplingParams
85
+
86
+ sampling_params = SamplingParams(temperature=temperature, top_k=top_k, presence_penalty=presence_penalty, repetition_penalty=repetition_penalty, max_tokens=max_tokens)
87
+
88
+ print('sampling_params', sampling_params)
89
+
90
+ !wget https://huggingface.co/Svngoku/ReaderLM-v2-Q8_0-GGUF/resolve/main/readerlm-v2-q8_0.gguf
91
+
92
+ !wget https://huggingface.co/jinaai/ReaderLM-v2/resolve/main/tokenizer.json
93
+
94
+ !vllm serve /content/readerlm-v2-q8_0.gguf --tokenizer /content/tokenizer.json
95
+
96
+ from vllm import LLM
97
+
98
+ llm = LLM(
99
+ model="/content/readerlm-v2-q8_0.gguf",
100
+ max_model_len=max_model_len,
101
+ tokenizer='jinaai/ReaderLM-v2'
102
+ )
103
+
104
+ # @title ## Specify a URL as input{"run":"auto","vertical-output":true}
105
+
106
+ import re
107
+ import requests
108
+ from IPython.display import display, Markdown
109
+
110
+ def display_header(text):
111
+ display(Markdown(f'**{text}**'))
112
+
113
+ def display_rendered_md(text):
114
+ # for mimic "Reading mode" in Safari/Firefox
115
+ display(Markdown(text))
116
+
117
+ def display_content(text):
118
+ display(Markdown(text))
119
+
120
+ def get_html_content(url):
121
+ api_url = f'https://r.jina.ai/{url}'
122
+ headers = {'X-Return-Format': 'html'}
123
+ try:
124
+ response = requests.get(api_url, headers=headers, timeout=10)
125
+ response.raise_for_status()
126
+ return response.text
127
+ except requests.exceptions.RequestException as e:
128
+ return f"error: {str(e)}"
129
+
130
+
131
+ def get_html_content(url):
132
+ api_url = f'https://r.jina.ai/{url}'
133
+ headers = {'X-Return-Format': 'html'}
134
+ try:
135
+ response = requests.get(api_url, headers=headers, timeout=10)
136
+ response.raise_for_status()
137
+ return response.text
138
+ except requests.exceptions.RequestException as e:
139
+ return f"error: {str(e)}"
140
+
141
+ def create_prompt(text: str, tokenizer = None, instruction: str = None, schema: str = None) -> str:
142
+ """
143
+ Create a prompt for the model with optional instruction and JSON schema.
144
+
145
+ Args:
146
+ text (str): The input HTML text
147
+ tokenizer: The tokenizer to use
148
+ instruction (str, optional): Custom instruction for the model
149
+ schema (str, optional): JSON schema for structured extraction
150
+
151
+ Returns:
152
+ str: The formatted prompt
153
+ """
154
+ if not tokenizer:
155
+ tokenizer = llm.get_tokenizer()
156
+
157
+
158
+ if not instruction:
159
+ instruction = "Extract the main content from the given HTML and convert it to Markdown format."
160
+
161
+ if schema:
162
+ instruction = 'Extract the specified information from a list of news threads and present it in a structured JSON format.'
163
+ prompt = f"{instruction}\n```html\n{text}\n```\nThe JSON schema is as follows:```json{schema}```"
164
+ else:
165
+ prompt = f"{instruction}\n```html\n{text}\n```"
166
+
167
+ messages = [
168
+ {
169
+ "role": "user",
170
+ "content": prompt,
171
+ }
172
+ ]
173
+
174
+ return tokenizer.apply_chat_template(
175
+ messages, tokenize=False, add_generation_prompt=True
176
+ )
177
+
178
+
179
+
180
+ # (REMOVE <SCRIPT> to </script> and variations)
181
+ SCRIPT_PATTERN = r'<[ ]*script.*?\/[ ]*script[ ]*>' # mach any char zero or more times
182
+ # text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
183
+
184
+ # (REMOVE HTML <STYLE> to </style> and variations)
185
+ STYLE_PATTERN = r'<[ ]*style.*?\/[ ]*style[ ]*>' # mach any char zero or more times
186
+ # text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
187
+
188
+ # (REMOVE HTML <META> to </meta> and variations)
189
+ META_PATTERN = r'<[ ]*meta.*?>' # mach any char zero or more times
190
+ # text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
191
+
192
+ # (REMOVE HTML COMMENTS <!-- to --> and variations)
193
+ COMMENT_PATTERN = r'<[ ]*!--.*?--[ ]*>' # mach any char zero or more times
194
+ # text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
195
+
196
+ # (REMOVE HTML LINK <LINK> to </link> and variations)
197
+ LINK_PATTERN = r'<[ ]*link.*?>' # mach any char zero or more times
198
+
199
+ # (REPLACE base64 images)
200
+ BASE64_IMG_PATTERN = r'<img[^>]+src="data:image/[^;]+;base64,[^"]+"[^>]*>'
201
+
202
+ # (REPLACE <svg> to </svg> and variations)
203
+ SVG_PATTERN = r'(<svg[^>]*>)(.*?)(<\/svg>)'
204
+
205
+
206
+ def replace_svg(html: str, new_content: str = "this is a placeholder") -> str:
207
+ return re.sub(
208
+ SVG_PATTERN,
209
+ lambda match: f"{match.group(1)}{new_content}{match.group(3)}",
210
+ html,
211
+ flags=re.DOTALL,
212
+ )
213
+
214
+
215
+ def replace_base64_images(html: str, new_image_src: str = "#") -> str:
216
+ return re.sub(BASE64_IMG_PATTERN, f'<img src="{new_image_src}"/>', html)
217
+
218
+
219
+ def has_base64_images(text: str) -> bool:
220
+ base64_content_pattern = r'data:image/[^;]+;base64,[^"]+'
221
+ return bool(re.search(base64_content_pattern, text, flags=re.DOTALL))
222
+
223
+
224
+ def has_svg_components(text: str) -> bool:
225
+ return bool(re.search(SVG_PATTERN, text, flags=re.DOTALL))
226
+
227
+
228
+ def clean_html(html: str, clean_svg: bool = False, clean_base64: bool = False):
229
+ html = re.sub(SCRIPT_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
230
+ html = re.sub(STYLE_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
231
+ html = re.sub(META_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
232
+ html = re.sub(COMMENT_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
233
+ html = re.sub(LINK_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
234
+
235
+ if clean_svg:
236
+ html = replace_svg(html)
237
+
238
+ if clean_base64:
239
+ html = replace_base64_images(html)
240
+
241
+ return html
242
+
243
+ url = "https://news.ycombinator.com/" # @param {type:"string"}
244
+
245
+
246
+ print(f'We will use Jina Reader to fetch the **raw HTML** from: {url}')
247
+
248
+ html = get_html_content(url)
249
+
250
+ html = clean_html(html, clean_svg=True, clean_base64=True)
251
+
252
+ prompt = create_prompt(html)
253
+ result = llm.generate(prompt, sampling_params=sampling_params)[0].outputs[0].text.strip()
254
+
255
+ print(result)
256
+
257
+ import json
258
+
259
+ schema = {
260
+ "type": "object",
261
+ "properties": {
262
+ "title": {"type": "string", "description": "News thread title"},
263
+ "url": {"type": "string", "description": "Thread URL"},
264
+ "summary": {"type": "string", "description": "Article summary"},
265
+ "keywords": {"type": "list", "description": "Descriptive keywords"},
266
+ "author": {"type": "string", "description": "Thread author"},
267
+ "comments": {"type": "integer", "description": "Comment count"}
268
+ },
269
+ "required": ["title", "url", "date", "points", "author", "comments"]
270
+ }
271
+
272
+ prompt = create_prompt(html, schema=json.dumps(schema, indent=2))
273
+
274
+
275
+ result = llm.generate(prompt, sampling_params=sampling_params)[0].outputs[0].text.strip()
276
+ print(result)
277
+
278
+ from vllm.distributed.parallel_state import destroy_model_parallel, destroy_distributed_environment
279
+ import gc
280
+ import os
281
+ import torch
282
+
283
+ destroy_model_parallel()
284
+ destroy_distributed_environment()
285
+ del llm.llm_engine.model_executor.driver_worker
286
+ del llm.llm_engine.model_executor
287
+ del llm
288
+ gc.collect()
289
+ torch.cuda.empty_cache()
290
+
291
+ print(f"cuda memory: {torch.cuda.memory_allocated() // 1024 // 1024}MB")
292
+
293
+ ```