sh2orc commited on
Commit
3df13c8
ยท
1 Parent(s): 57ca967

READMD.md modify

Browse files
Files changed (1) hide show
  1. README.md +59 -0
README.md CHANGED
@@ -1,3 +1,62 @@
1
  ---
2
  license: llama3
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: llama3
3
  ---
4
+
5
+ - Foundation Model [Bllossom 8B](https://huggingface.co/MLP-KTLim/llama-3-Korean-Bllossom-8B)
6
+ - datasets
7
+ - [Koalpaca v1.1a](https://huggingface.co/datasets/beomi/KoAlpaca-v1.1a)
8
+ - [jojo0217/korean_safe_conversation](https://huggingface.co/datasets/jojo0217/korean_safe_conversation)
9
+
10
+ # Query
11
+ ```python
12
+
13
+ import torch
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
15
+
16
+ BASE_MODEL = "sh2orc/llama-3-korean-8b"
17
+
18
+ model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,device_map="auto")
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
21
+ tokenizer.pad_token = tokenizer.eos_token
22
+ tokenizer.padding_side = 'right'
23
+
24
+ instruction = "ํ•œ๊ฐ•์—๋Š” ๋Œ€๊ต๊ฐ€ ๋ช‡ ๊ฐœ ์žˆ์–ด?"
25
+
26
+ pipe = pipeline("text-generation",
27
+ model=model,
28
+ tokenizer=tokenizer,
29
+ max_new_tokens=1024)
30
+
31
+ messages = [
32
+ {"role": "user", "content": instruction},
33
+ ]
34
+
35
+ prompt = pipe.tokenizer.apply_chat_template(
36
+ messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+
41
+ outputs = pipe(
42
+ prompt,
43
+ do_sample=True,
44
+ temperature=0.8,
45
+ top_k=10,
46
+ top_p=0.9,
47
+ add_special_tokens=True,
48
+ eos_token_id = [
49
+ pipe.tokenizer.eos_token_id,
50
+ pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
51
+ ]
52
+ )
53
+
54
+ print(outputs[0]['generated_text'][len(prompt):])
55
+ ```
56
+
57
+ # Result
58
+ <pre>
59
+
60
+ ํ•œ๊ฐ•์—๋Š” ์ด 8๊ฐœ์˜ ๋‹ค๋ฆฌ(๊ต)๊ฐ€ ์žˆ์Šต๋‹ˆ๋‹ค. ๊ทธ ์ค‘ 3๊ฐœ๋Š” ๋ถ์ชฝ์œผ๋กœ ํ–ฅํ•ด ์žˆ๊ณ , ๋‚˜๋จธ์ง€ 5๊ฐœ๋Š” ๋‚จ์ชฝ์œผ๋กœ ํ–ฅํ•ด ์žˆ์Šต๋‹ˆ๋‹ค.
61
+
62
+ </pre>