v2ray
/

DeepSeek-V3-1B-Test

Text Generation

Model card Files Files and versions Community

v2ray commited on 3 days ago

Commit

b2eb5f8

•

1 Parent(s): 43b1ed7

Removed deprecated.

Files changed (1) hide show

modeling_deepseek.py +0 -4

modeling_deepseek.py CHANGED Viewed

@@ -43,7 +43,6 @@ from transformers.modeling_outputs import (
 from transformers.modeling_utils import PreTrainedModel
 from transformers.pytorch_utils import (
     ALL_LAYERNORM_LAYERS,
-    is_torch_greater_or_equal_than_1_13,
 )
 from transformers.utils import (
     add_start_docstrings,
@@ -66,9 +65,6 @@ if is_flash_attn_2_available():
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():
-    if not is_torch_greater_or_equal_than_1_13:
-        import torch.fx
     _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)

 from transformers.modeling_utils import PreTrainedModel
 from transformers.pytorch_utils import (
     ALL_LAYERNORM_LAYERS,
 )
 from transformers.utils import (
     add_start_docstrings,
 # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 # It means that the function will not be traced through and simply appear as a node in the graph.
 if is_torch_fx_available():
     _prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)