calebnwokocha
commited on
Upload 2 files
Browse files- GPT2.exe +1 -1
- main-ctx.cpp +1 -2
GPT2.exe
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 21223233
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7277090f60fd35553ee09bb0a110c36efb344614b2deec8441d21813b57c78b4
|
3 |
size 21223233
|
main-ctx.cpp
CHANGED
@@ -20,7 +20,6 @@
|
|
20 |
// default hparams (GPT-2 774M)
|
21 |
struct gpt_hparams {
|
22 |
int32_t n_vocab = 50257; // Vocabulary size remains the same
|
23 |
-
//int32_t n_ctx = 1024; // Maximum context length (sequence length)
|
24 |
int32_t n_embd = 1024; // Embedding dimensionality
|
25 |
int32_t n_head = 16; // Number of attention heads
|
26 |
int32_t n_layer = 24; // Number of transformer layers
|
@@ -32,7 +31,7 @@ struct gpt_hparams {
|
|
32 |
int32_t n_predict = 200; // new tokens to predict
|
33 |
int32_t n_parallel = 1; // number of parallel streams
|
34 |
int32_t n_batch = 32; // batch size for prompt processing
|
35 |
-
int32_t n_ctx =
|
36 |
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
|
37 |
|
38 |
bool ignore_eos = false; // ignore EOS token when generating text
|
|
|
20 |
// default hparams (GPT-2 774M)
|
21 |
struct gpt_hparams {
|
22 |
int32_t n_vocab = 50257; // Vocabulary size remains the same
|
|
|
23 |
int32_t n_embd = 1024; // Embedding dimensionality
|
24 |
int32_t n_head = 16; // Number of attention heads
|
25 |
int32_t n_layer = 24; // Number of transformer layers
|
|
|
31 |
int32_t n_predict = 200; // new tokens to predict
|
32 |
int32_t n_parallel = 1; // number of parallel streams
|
33 |
int32_t n_batch = 32; // batch size for prompt processing
|
34 |
+
int32_t n_ctx = 1024; // context size (this is the KV cache max size)
|
35 |
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
|
36 |
|
37 |
bool ignore_eos = false; // ignore EOS token when generating text
|