Svngoku
/

ReaderLM-v2-Q8_0-GGUF

 ```
 ./llama-server --hf-repo Svngoku/ReaderLM-v2-Q8_0-GGUF --hf-file readerlm-v2-q8_0.gguf -c 2048
 ```
+## VLLM Inference
+```py
+# -*- coding: utf-8 -*-
+"""Untitled64.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1hVqCTm6XLJmrOjkaIYLHXgOTg2ffnhue
+"""
+!pip install vllm
+model_name = 'Svngoku/ReaderLM-v2-Q8_0-GGUF' # @param ["jinaai/ReaderLM-v2", "jinaai/reader-lm-1.5b", "Svngoku/ReaderLM-v2-Q8_0-GGUF"]
+max_model_len = 256000 # @param {type:"integer"}
+# @markdown ---
+# @markdown ### SamplingParams:
+top_k = 1 # @param {type:"integer"}
+temperature = 0 # @param {type:"slider", min:0, max:1, step:0.1}
+repetition_penalty = 1.05 # @param {type:"number"}
+presence_penalty = 0.25 # @param {type:"slider", min:0, max:1, step:0.1}
+max_tokens = 8192 # @param {type:"integer"}
+# @markdown ---
+from vllm import SamplingParams
+sampling_params = SamplingParams(temperature=temperature, top_k=top_k, presence_penalty=presence_penalty, repetition_penalty=repetition_penalty, max_tokens=max_tokens)
+print('sampling_params', sampling_params)
+!wget https://huggingface.co/Svngoku/ReaderLM-v2-Q8_0-GGUF/resolve/main/readerlm-v2-q8_0.gguf
+!wget https://huggingface.co/jinaai/ReaderLM-v2/resolve/main/tokenizer.json
+!vllm serve /content/readerlm-v2-q8_0.gguf --tokenizer /content/tokenizer.json
+from vllm import LLM
+llm = LLM(
+    model="/content/readerlm-v2-q8_0.gguf",
+    max_model_len=max_model_len,
+    tokenizer='jinaai/ReaderLM-v2'
+)
+# @title ## Specify a URL as input{"run":"auto","vertical-output":true}
+import re
+import requests
+from IPython.display import display, Markdown
+def display_header(text):
+    display(Markdown(f'**{text}**'))
+def display_rendered_md(text):
+    # for mimic "Reading mode" in Safari/Firefox
+    display(Markdown(text))
+def display_content(text):
+    display(Markdown(text))
+def get_html_content(url):
+    api_url = f'https://r.jina.ai/{url}'
+    headers = {'X-Return-Format': 'html'}
+    try:
+        response = requests.get(api_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        return f"error: {str(e)}"
+def get_html_content(url):
+    api_url = f'https://r.jina.ai/{url}'
+    headers = {'X-Return-Format': 'html'}
+    try:
+        response = requests.get(api_url, headers=headers, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except requests.exceptions.RequestException as e:
+        return f"error: {str(e)}"
+def create_prompt(text: str, tokenizer = None, instruction: str = None, schema: str = None) -> str:
+    """
+    Create a prompt for the model with optional instruction and JSON schema.
+    Args:
+        text (str): The input HTML text
+        tokenizer: The tokenizer to use
+        instruction (str, optional): Custom instruction for the model
+        schema (str, optional): JSON schema for structured extraction
+    Returns:
+        str: The formatted prompt
+    """
+    if not tokenizer:
+      tokenizer = llm.get_tokenizer()
+    if not instruction:
+        instruction = "Extract the main content from the given HTML and convert it to Markdown format."
+    if schema:
+        instruction = 'Extract the specified information from a list of news threads and present it in a structured JSON format.'
+        prompt = f"{instruction}\n```html\n{text}\n```\nThe JSON schema is as follows:```json{schema}```"
+    else:
+        prompt = f"{instruction}\n```html\n{text}\n```"
+    messages = [
+        {
+            "role": "user",
+            "content": prompt,
+        }
+    ]
+    return tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+# (REMOVE <SCRIPT> to </script> and variations)
+SCRIPT_PATTERN = r'<[ ]*script.*?\/[ ]*script[ ]*>'  # mach any char zero or more times
+# text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+# (REMOVE HTML <STYLE> to </style> and variations)
+STYLE_PATTERN = r'<[ ]*style.*?\/[ ]*style[ ]*>'  # mach any char zero or more times
+# text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+# (REMOVE HTML <META> to </meta> and variations)
+META_PATTERN = r'<[ ]*meta.*?>'  # mach any char zero or more times
+# text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+# (REMOVE HTML COMMENTS <!-- to --> and variations)
+COMMENT_PATTERN = r'<[ ]*!--.*?--[ ]*>'  # mach any char zero or more times
+# text = re.sub(pattern, '', text, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+# (REMOVE HTML LINK <LINK> to </link> and variations)
+LINK_PATTERN = r'<[ ]*link.*?>'  # mach any char zero or more times
+# (REPLACE base64 images)
+BASE64_IMG_PATTERN = r'<img[^>]+src="data:image/[^;]+;base64,[^"]+"[^>]*>'
+# (REPLACE <svg> to </svg> and variations)
+SVG_PATTERN = r'(<svg[^>]*>)(.*?)(<\/svg>)'
+def replace_svg(html: str, new_content: str = "this is a placeholder") -> str:
+    return re.sub(
+        SVG_PATTERN,
+        lambda match: f"{match.group(1)}{new_content}{match.group(3)}",
+        html,
+        flags=re.DOTALL,
+    )
+def replace_base64_images(html: str, new_image_src: str = "#") -> str:
+    return re.sub(BASE64_IMG_PATTERN, f'<img src="{new_image_src}"/>', html)
+def has_base64_images(text: str) -> bool:
+    base64_content_pattern = r'data:image/[^;]+;base64,[^"]+'
+    return bool(re.search(base64_content_pattern, text, flags=re.DOTALL))
+def has_svg_components(text: str) -> bool:
+    return bool(re.search(SVG_PATTERN, text, flags=re.DOTALL))
+def clean_html(html: str, clean_svg: bool = False, clean_base64: bool = False):
+    html = re.sub(SCRIPT_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+    html = re.sub(STYLE_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+    html = re.sub(META_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+    html = re.sub(COMMENT_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+    html = re.sub(LINK_PATTERN, '', html, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
+    if clean_svg:
+        html = replace_svg(html)
+    if clean_base64:
+        html = replace_base64_images(html)
+    return html
+url = "https://news.ycombinator.com/" # @param {type:"string"}
+print(f'We will use Jina Reader to fetch the **raw HTML** from: {url}')
+html = get_html_content(url)
+html = clean_html(html, clean_svg=True, clean_base64=True)
+prompt = create_prompt(html)
+result = llm.generate(prompt, sampling_params=sampling_params)[0].outputs[0].text.strip()
+print(result)
+import json
+schema = {
+    "type": "object",
+    "properties": {
+        "title": {"type": "string", "description": "News thread title"},
+        "url": {"type": "string", "description": "Thread URL"},
+        "summary": {"type": "string", "description": "Article summary"},
+        "keywords": {"type": "list", "description": "Descriptive keywords"},
+        "author": {"type": "string", "description": "Thread author"},
+        "comments": {"type": "integer", "description": "Comment count"}
+    },
+    "required": ["title", "url", "date", "points", "author", "comments"]
+}
+prompt = create_prompt(html, schema=json.dumps(schema, indent=2))
+result = llm.generate(prompt, sampling_params=sampling_params)[0].outputs[0].text.strip()
+print(result)
+from vllm.distributed.parallel_state import destroy_model_parallel, destroy_distributed_environment
+import gc
+import os
+import torch
+destroy_model_parallel()
+destroy_distributed_environment()
+del llm.llm_engine.model_executor.driver_worker
+del llm.llm_engine.model_executor
+del llm
+gc.collect()
+torch.cuda.empty_cache()
+print(f"cuda memory: {torch.cuda.memory_allocated() // 1024 // 1024}MB")
+```