calebnwokocha
/

GPT2

Text Generation

Model card Files Files and versions Community

calebnwokocha commited on Nov 17, 2024

Commit

8c5a97d

·

verified ·

1 Parent(s): 61852c0

Upload 2 files

Files changed (2) hide show

GPT2.exe +1 -1
main-ctx.cpp +1 -2

GPT2.exe CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0e149c51573ff8bcc51794e973c6f52b40386a50ca1477c1c05513160239fdb
 size 21223233

 version https://git-lfs.github.com/spec/v1
+oid sha256:7277090f60fd35553ee09bb0a110c36efb344614b2deec8441d21813b57c78b4
 size 21223233

main-ctx.cpp CHANGED Viewed

@@ -20,7 +20,6 @@
 // default hparams (GPT-2 774M)
 struct gpt_hparams {
     int32_t n_vocab = 50257;   // Vocabulary size remains the same
-    //int32_t n_ctx   = 1024;     // Maximum context length (sequence length)
     int32_t n_embd  = 1024;     // Embedding dimensionality
     int32_t n_head  = 16;       // Number of attention heads
     int32_t n_layer = 24;       // Number of transformer layers
@@ -32,7 +31,7 @@ struct gpt_hparams {
     int32_t n_predict    = 200;  // new tokens to predict
     int32_t n_parallel   = 1;    // number of parallel streams
     int32_t n_batch      = 32;   // batch size for prompt processing
-    int32_t n_ctx        = 2048; // context size (this is the KV cache max size)
     int32_t n_gpu_layers = 0;    // number of layers to offlload to the GPU
     bool ignore_eos = false; // ignore EOS token when generating text

 // default hparams (GPT-2 774M)
 struct gpt_hparams {
     int32_t n_vocab = 50257;   // Vocabulary size remains the same
     int32_t n_embd  = 1024;     // Embedding dimensionality
     int32_t n_head  = 16;       // Number of attention heads
     int32_t n_layer = 24;       // Number of transformer layers
     int32_t n_predict    = 200;  // new tokens to predict
     int32_t n_parallel   = 1;    // number of parallel streams
     int32_t n_batch      = 32;   // batch size for prompt processing
+    int32_t n_ctx        = 1024; // context size (this is the KV cache max size)
     int32_t n_gpu_layers = 0;    // number of layers to offlload to the GPU
     bool ignore_eos = false; // ignore EOS token when generating text