File size: 4,411 Bytes
54ec24f 514ce14 f85c8fd 5bcb322 54ec24f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
---
library_name: transformers
pipeline_tag: text-generation
inference: true
widget:
- text: Hello!
example_title: Hello world
group: Python
---
This model is for debugging. It is randomly initialized with the config from [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) but is of smaller size.
**⚠️Note: At this moment, this repo does not contain the Multi-Token Prediction (MTP) module as explained [here](https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/README_WEIGHTS.md).**
Usage:
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "yujiepan/deepseek-v3-tiny-random"
device = torch.device("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True,
).eval().to(device)
prompt = 'Hello!'
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
inputs = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device)
with torch.inference_mode():
outputs = model.generate(
inputs,
max_new_tokens=16,
do_sample=False,
use_cache=True,
)
string = tokenizer.decode(outputs[0])
print(string)
```
Codes:
```python
import os
from pathlib import Path
import torch
import transformers
from huggingface_hub import create_repo, upload_folder
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
GenerationConfig, enable_full_determinism, pipeline,
set_seed)
model_id = "deepseek-ai/DeepSeek-V3"
repo_id = "yujiepan/deepseek-v3-tiny-random"
save_path = f"/tmp/{repo_id}"
os.system(f"rm -rf {save_path}")
config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
config.num_hidden_layers = 2
config.first_k_dense_replace = 1
config.hidden_size = 16
config.intermediate_size = 32
config.moe_intermediate_size = 16
config.q_lora_rank = 16
config.kv_lora_rank = 16
config.qk_rope_head_dim = 16
config.qk_nope_head_dim = 16
config.v_head_dim = 16
config.num_attention_heads = 2
config.num_key_value_heads = 2
# transformers has not supported the customized quantization config
del config.quantization_config
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.save_pretrained(save_path)
enable_full_determinism(seed=42)
model = AutoModelForCausalLM.from_config(
config, torch_dtype=torch.bfloat16, trust_remote_code=True,
).eval()
try:
model.generation_config = GenerationConfig.from_pretrained(
model_id, trust_remote_code=True)
except:
print("No generation config found")
num_params = 0
with torch.no_grad():
for name, p in sorted(model.named_parameters()):
if 'experts' in name and 'experts.0.' not in name: # avoid printing too much
pass
else:
print(name, p.shape)
# torch.nn.init.uniform_(p, -0.2, 0.2)
num_params += p.numel()
print(f"Number of parameters: {num_params / 1e6:.2f}M")
model.save_pretrained(save_path)
# patch to use official modeling codes
auto_map = config.auto_map
import json
with open(f"{save_path}/config.json", "r") as f:
config = json.load(f)
config['auto_map'] = auto_map
with open(f"{save_path}/config.json", "w") as f:
json.dump(config, f, indent=2)
! cat {save_path}/config.json
del model
del tokenizer
for p in Path(save_path).glob("*.py"):
os.remove(p)
os.system(f"ls -alh {save_path}")
torch.use_deterministic_algorithms(False)
tokenizer = AutoTokenizer.from_pretrained(save_path)
model = AutoModelForCausalLM.from_pretrained(
save_path, trust_remote_code=True).eval()
prompt = 'Hello!'
messages = [
{"role": "system", "content": "You are a helpful assistant."}
]
messages.append({"role": "user", "content": prompt})
tokenized_chat = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
device = torch.device("cuda")
outputs = model.to(device).generate(
tokenized_chat.to(device),
max_new_tokens=16,
do_sample=False,
use_cache=True,
)
tokens = tokenizer.convert_ids_to_tokens(outputs[0])
string = tokenizer.decode(outputs[0])
print(tokens)
# create_repo(repo_id, exist_ok=True)
# upload_folder(repo_id=repo_id, folder_path=save_path)
```
|