jupyterjazz
commited on
Commit
·
1752c7c
1
Parent(s):
6e55444
docs: add comments
Browse filesSigned-off-by: jupyterjazz <[email protected]>
- modeling_lora.py +16 -4
modeling_lora.py
CHANGED
@@ -162,6 +162,16 @@ class LoRAParametrization(nn.Module):
|
|
162 |
dropout_p: float,
|
163 |
alpha: float,
|
164 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
if isinstance(layer, nn.Linear):
|
166 |
parametrize.register_parametrization(
|
167 |
layer,
|
@@ -312,11 +322,11 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
312 |
config = XLMRobertaFlashConfig.from_pretrained(
|
313 |
pretrained_model_name_or_path, *model_args, **kwargs
|
314 |
)
|
315 |
-
if config.load_trained_adapters:
|
316 |
return super().from_pretrained(
|
317 |
pretrained_model_name_or_path, *model_args, **kwargs
|
318 |
)
|
319 |
-
else:
|
320 |
roberta = XLMRobertaModel.from_pretrained(
|
321 |
pretrained_model_name_or_path, *model_args, **kwargs
|
322 |
)
|
@@ -358,10 +368,12 @@ class XLMRobertaLoRA(XLMRobertaPreTrainedModel):
|
|
358 |
**kwargs,
|
359 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
360 |
"""
|
361 |
-
Computes sentence embeddings
|
362 |
|
|
|
|
|
363 |
task_type(`str`, *optional*, defaults to `None`):
|
364 |
-
Specifies the task for which the encoding is intended. If `task_type` is not
|
365 |
all LoRA adapters are disabled, and the model reverts to its original,
|
366 |
general-purpose weights.
|
367 |
"""
|
|
|
162 |
dropout_p: float,
|
163 |
alpha: float,
|
164 |
):
|
165 |
+
"""
|
166 |
+
Registering LoRA adapters to all embedding and linear layers.
|
167 |
+
|
168 |
+
Additionally, we implement a custom forward function for LoRA parametrization.
|
169 |
+
This function modifies the layer's forward pass to optionally use task-specific
|
170 |
+
parameters. When a `task_id` is provided, it employs a LoRA parametrization
|
171 |
+
to modify the original weights according to the specific task. This allows
|
172 |
+
the layer to adapt dynamically to different tasks at runtime. If no `task_id`
|
173 |
+
is specified, the layer uses its original weights.
|
174 |
+
"""
|
175 |
if isinstance(layer, nn.Linear):
|
176 |
parametrize.register_parametrization(
|
177 |
layer,
|
|
|
322 |
config = XLMRobertaFlashConfig.from_pretrained(
|
323 |
pretrained_model_name_or_path, *model_args, **kwargs
|
324 |
)
|
325 |
+
if config.load_trained_adapters: # checkpoint already contains LoRA adapters
|
326 |
return super().from_pretrained(
|
327 |
pretrained_model_name_or_path, *model_args, **kwargs
|
328 |
)
|
329 |
+
else: # initializing new adapters
|
330 |
roberta = XLMRobertaModel.from_pretrained(
|
331 |
pretrained_model_name_or_path, *model_args, **kwargs
|
332 |
)
|
|
|
368 |
**kwargs,
|
369 |
) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
|
370 |
"""
|
371 |
+
Computes sentence embeddings.
|
372 |
|
373 |
+
sentences(`str` or `List[str]`):
|
374 |
+
Sentence or sentences to be encoded
|
375 |
task_type(`str`, *optional*, defaults to `None`):
|
376 |
+
Specifies the task for which the encoding is intended. If `task_type` is not provided,
|
377 |
all LoRA adapters are disabled, and the model reverts to its original,
|
378 |
general-purpose weights.
|
379 |
"""
|