File size: 4,411 Bytes
54ec24f
 
 
 
 
 
 
 
 
 
 
 
514ce14
f85c8fd
5bcb322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54ec24f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
---
library_name: transformers
pipeline_tag: text-generation
inference: true
widget:
- text: Hello!
  example_title: Hello world
  group: Python
---

This model is for debugging. It is randomly initialized with the config from [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) but is of smaller size. 

**⚠️Note: At this moment, this repo does not contain the Multi-Token Prediction (MTP) module as explained [here](https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/README_WEIGHTS.md).**

Usage:
```python
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "yujiepan/deepseek-v3-tiny-random"
device = torch.device("cuda")

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True,
).eval().to(device)

prompt = 'Hello!'
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]

inputs = tokenizer.apply_chat_template(
    messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device)

with torch.inference_mode():
    outputs = model.generate(
        inputs,
        max_new_tokens=16,
        do_sample=False,
        use_cache=True,
    )
string = tokenizer.decode(outputs[0])
print(string)
```

Codes:
```python
import os
from pathlib import Path

import torch
import transformers
from huggingface_hub import create_repo, upload_folder
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
                          GenerationConfig, enable_full_determinism, pipeline,
                          set_seed)

model_id = "deepseek-ai/DeepSeek-V3"
repo_id = "yujiepan/deepseek-v3-tiny-random"
save_path = f"/tmp/{repo_id}"
os.system(f"rm -rf {save_path}")

config = AutoConfig.from_pretrained(model_id, trust_remote_code=True)
config.num_hidden_layers = 2
config.first_k_dense_replace = 1
config.hidden_size = 16
config.intermediate_size = 32
config.moe_intermediate_size = 16
config.q_lora_rank = 16
config.kv_lora_rank = 16
config.qk_rope_head_dim = 16
config.qk_nope_head_dim = 16
config.v_head_dim = 16
config.num_attention_heads = 2
config.num_key_value_heads = 2
# transformers has not supported the customized quantization config
del config.quantization_config

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.save_pretrained(save_path)

enable_full_determinism(seed=42)
model = AutoModelForCausalLM.from_config(
    config, torch_dtype=torch.bfloat16, trust_remote_code=True,
).eval()

try:
    model.generation_config = GenerationConfig.from_pretrained(
        model_id, trust_remote_code=True)
except:
    print("No generation config found")

num_params = 0
with torch.no_grad():
    for name, p in sorted(model.named_parameters()):
        if 'experts' in name and 'experts.0.' not in name:  # avoid printing too much
            pass
        else:
            print(name, p.shape)
        # torch.nn.init.uniform_(p, -0.2, 0.2)
        num_params += p.numel()
print(f"Number of parameters: {num_params / 1e6:.2f}M")
model.save_pretrained(save_path)

# patch to use official modeling codes
auto_map = config.auto_map
import json
with open(f"{save_path}/config.json", "r") as f:
    config = json.load(f)
    config['auto_map'] = auto_map
with open(f"{save_path}/config.json", "w") as f:
    json.dump(config, f, indent=2)

! cat {save_path}/config.json

del model
del tokenizer
for p in Path(save_path).glob("*.py"):
    os.remove(p)

os.system(f"ls -alh {save_path}")
torch.use_deterministic_algorithms(False)
tokenizer = AutoTokenizer.from_pretrained(save_path)
model = AutoModelForCausalLM.from_pretrained(
    save_path, trust_remote_code=True).eval()
prompt = 'Hello!'
messages = [
    {"role": "system", "content": "You are a helpful assistant."}
]
messages.append({"role": "user", "content": prompt})
tokenized_chat = tokenizer.apply_chat_template(
    messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")

device = torch.device("cuda")
outputs = model.to(device).generate(
    tokenized_chat.to(device),
    max_new_tokens=16,
    do_sample=False,
    use_cache=True,
)
tokens = tokenizer.convert_ids_to_tokens(outputs[0])
string = tokenizer.decode(outputs[0])
print(tokens)


# create_repo(repo_id, exist_ok=True)
# upload_folder(repo_id=repo_id, folder_path=save_path)
```