fix

xingmingyyj · xingmingyyj · commit 419b349e0c4b · 2025-08-22T14:07:09.000+08:00
diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py
@@ -166,6 +166,7 @@
     split_parallel_config,
 )
 from .training_args import TrainingArguments
+from .unified_checkpoint import UnifiedCheckpointHandler
 from .utils import reshard as reshard_util
 from .utils.async_save import AsyncSaver
 
@@ -957,7 +958,7 @@ def train(
                     init_optimizer(self.optimizer)
                     optimizer_sharded_state_dict = self.optimizer.sharded_state_dict(model_sharded_state_dict)
                     sharded_state_dict = {**model_sharded_state_dict, **optimizer_sharded_state_dict}
-                    dist.load_state_dict(sharded_state_dict, resume_from_checkpoint)
+                    dist.load_state_dict(sharded_state_dict, resume_from_checkpoint, aoa_config=self.args.aoa_config)
                     self._load_scheduler(resume_from_checkpoint)
         else:
             model = self.model_wrapped
diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py
@@ -407,6 +407,10 @@ class TrainingArguments:
             Whether to release gradients during training. Default is `False`.
         ckpt_quant_stage (`str`, *optional*):
             Whether activate checkpoint quantization. O0: deactivate, O1: Int8 compression, O2: Int4 compression. (default: O0).
+        using_flex_checkpoint(`bool`, *optional*):
+            Whether to use FlexCheckpoint for save and load. Default is False.
+        aoa_config (`Optional[dict[str, list[str]]]`, *optional*):
+            The AoA configuration of FlexCheckpoint, used to describe the mapping between model weights and the checkpoint content. Default is None.
     """
 
     output_dir: str = field(
@@ -1086,6 +1090,13 @@ class TrainingArguments:
         default=None, metadata={"help": "NCCL中通信组的细粒度控制的配置文件路径, 默认值为None, 代表不启用此项配置"}
     )
 
+    aoa_config: Optional[dict[str, list[str]]] = field(
+        default=None,
+        metadata={
+            "help": "The AoA configuration of FlexCheckpoint, used to describe the mapping between model weights and the checkpoint content. Default is None."
+        },
+    )
+
     def __post_init__(self):
         world_size = paddle.distributed.get_world_size()
         if in_auto_parallel_align_mode():
@@ -2376,7 +2387,6 @@ def _no_sync_in_gradient_accumulation(self):
 
     @property
     def should_save_sharding_stage1_model(self):
-        # return True
         if self.enable_auto_parallel:
             return False
         return (
diff --git a/paddlenlp/transformers/llama/modeling.py b/paddlenlp/transformers/llama/modeling.py
@@ -30,7 +30,9 @@
 from paddle.distributed import fleet
 from paddle.distributed.fleet.meta_parallel import get_rng_state_tracker
 from paddle.distributed.fleet.recompute.recompute import recompute
-from paddle.distributed.flex_checkpoint import build_sharded_state_dict
+from paddle.distributed.flex_checkpoint.dcp.sharded_weight import (
+    build_sharded_state_dict,
+)
 
 from paddlenlp.transformers.refined_recompute import (
     RRColumnParallelLinear,
@@ -1427,7 +1429,6 @@ def get_tensor_parallel_split_mappings(num_layers):
 
     @classmethod
     def _get_fuse_or_split_param_mappings(cls, config: LlamaConfig, is_fuse=False):
-        raise NotImplementedError
         # return parameter fuse utils
         from paddlenlp.transformers.conversion_utils import split_or_fuse_func