Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libai/config/configs
14 changes: 7 additions & 7 deletions libai/data/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def build_nlp_train_val_test_loader(
train_val_test_num_samples,
train_batch_size,
test_batch_size,
train_sampler=LazyCall(CyclicSampler)(shuffle=True),
train_sampler=LazyCall(CyclicSampler)(shuffle=False),
test_sampler=LazyCall(SingleRoundSampler)(shuffle=False, drop_last=False),
num_workers=4,
consumed_samples=0,
Expand Down Expand Up @@ -152,7 +152,7 @@ def build_nlp_train_loader(
dataset,
train_batch_size,
test_batch_size=None,
sampler=LazyCall(CyclicSampler)(shuffle=True),
sampler=LazyCall(CyclicSampler)(shuffle=False),
num_workers=4,
consumed_samples=0,
seed=0,
Expand Down Expand Up @@ -211,7 +211,7 @@ def build_nlp_train_loader(
dataset,
batch_sampler=sampler,
num_workers=num_workers,
persistent_workers=True if num_workers > 0 else False,
# persistent_workers=True if num_workers > 0 else False,
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
**kwargs,
)
Expand Down Expand Up @@ -263,7 +263,7 @@ def build_nlp_test_loader(
dataset,
batch_sampler=sampler,
num_workers=num_workers,
persistent_workers=True if num_workers > 0 else False,
# persistent_workers=True if num_workers > 0 else False,
collate_fn=collate_fn,
)
return test_loader
Expand All @@ -273,7 +273,7 @@ def build_image_train_loader(
dataset,
train_batch_size,
test_batch_size=None,
sampler=LazyCall(CyclicSampler)(shuffle=True),
sampler=LazyCall(CyclicSampler)(shuffle=False),
num_workers=4,
consumed_samples=0,
seed=0,
Expand Down Expand Up @@ -335,7 +335,7 @@ def build_image_train_loader(
dataset,
batch_sampler=sampler,
num_workers=num_workers,
persistent_workers=True if num_workers > 0 else False,
# persistent_workers=True if num_workers > 0 else False,
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
**kwargs,
)
Expand Down Expand Up @@ -389,7 +389,7 @@ def build_image_test_loader(
dataset,
batch_sampler=sampler,
num_workers=num_workers,
persistent_workers=True if num_workers > 0 else False,
# persistent_workers=True if num_workers > 0 else False,
collate_fn=trivial_batch_collator if collate_fn is None else collate_fn,
**kwargs,
)
Expand Down
6 changes: 3 additions & 3 deletions libai/engine/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ def build_model(cls, cfg):
model = build_model(cfg.model)
logger = logging.getLogger(__name__)
logger.info("Model:\n{}".format(model))
model.apply(dist.convert_to_distributed_default_setting)
model._apply(dist.convert_to_distributed_default_setting)
return model

@classmethod
Expand Down Expand Up @@ -726,8 +726,8 @@ def auto_scale_hyperparams(cls, cfg, data_loader):
logger.info(log_info)

# Global scheduler cfg
cfg.train.scheduler.warmup_iter = cfg.train.warmup_iter
cfg.train.scheduler.max_iter = cfg.train.train_iter
# cfg.train.scheduler.warmup_iter = cfg.train.warmup_iter
# cfg.train.scheduler.max_iter = cfg.train.train_iter

@classmethod
def build_evaluator(cls, cfg):
Expand Down
14 changes: 13 additions & 1 deletion libai/engine/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,12 @@ def write_metrics(
# }
metrics_dict = all_metrics_dict
total_losses_reduced = sum(metrics_dict.values())

# ### 新增
# total_losses_reduced = sum(metrics_dict.values())
if dist.is_main_process():
txt = open("/home/chenqiaoling/libai/projects/RWKV_v4/results/libai_bf16_defaultSize_graph_nolossscale_test2.txt", "a")
txt.write(str(total_losses_reduced.item())+"\n")

storage.put_scalar("{}total_loss".format(prefix), total_losses_reduced)
if len(metrics_dict) > 1:
Expand Down Expand Up @@ -270,9 +276,14 @@ def run_step(self, get_batch: Callable, input_placement_device: str = "cuda"):
data = get_batch(
data, input_placement_device, getattr(self.data_loader, "mixup_func", None)
)


data_time = time.perf_counter() - start



loss_dict = self.model(**data)

losses = sum(loss_dict.values()) / self.grad_acc_steps

losses.backward()
Expand Down Expand Up @@ -330,11 +341,12 @@ def run_step(self, get_batch: Callable, input_placement_device: str = "cuda"):
data = get_batch(
data, input_placement_device, getattr(self.data_loader, "mixup_func", None)
)

# data
data_time = time.perf_counter() - start

# If you want to do something with the losses, you can wrap the model.
loss_dict = self.graph(**data)

# Add this because when set up gradient accumulations, graph will return
# an unpacked n-d tensor whose size is accumulation step
loss_dict = {key: value.mean() for key, value in loss_dict.items()}
Expand Down
17 changes: 9 additions & 8 deletions libai/models/utils/graph_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,15 @@ def __init__(
if is_train:
self.add_optimizer(optimizer, lr_sch=lr_scheduler)
if fp16:
self.config.enable_amp(True)
grad_scaler = flow.amp.GradScaler(
init_scale=2 ** 30,
growth_factor=2.0,
backoff_factor=0.5,
growth_interval=2000,
)
self.set_grad_scaler(grad_scaler)
self.config.enable_amp(True,dtype=flow.bfloat16)

# grad_scaler = flow.amp.GradScaler(
# init_scale=2 ** 12,
# growth_factor=2.0,
# backoff_factor=0.5,
# growth_interval=1000,
# )
# self.set_grad_scaler(grad_scaler)

if grad_acc_steps > 1:
self.config.set_gradient_accumulation_steps(grad_acc_steps)
Expand Down
16 changes: 8 additions & 8 deletions libai/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,18 +387,18 @@ def get_num_nodes():
return flow.env.get_node_size()


def convert_to_distributed_default_setting(module):
def convert_to_distributed_default_setting(t):
"""
Helper function to convert all eager local tensor in :attr:`nn.Module` in the model to
global tensor with data parallelism as default.
"""
for _, v in module.state_dict().items():
if not v.is_global:
module.to_global(
sbp=get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
placement=get_layer_placement(0),
)
return
if not t.is_global:
return t.to_global(
sbp=get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast]),
placement=get_layer_placement(0),
)
else:
return t


def ttol(tensor, pure_local=False, ranks=None):
Expand Down
60 changes: 60 additions & 0 deletions projects/RWKV_v4/configs/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from omegaconf import OmegaConf

from libai.config import get_config
from libai.config import LazyCall
from libai.tokenizer import GPT2Tokenizer

# 配置 model
from projects.RWKV_v4.modeling.model import GPT ,GPTConfig
# 配置 dataloader `build_image_train_loader` 和 `build_image_test_loader` 是 LiBai 提供的用于创建图像数据的训练集和测试集 DataLoader 的两个函数
from libai.data.build import build_nlp_test_loader, build_nlp_train_loader
# 导入自定义的 dataset
from projects.RWKV_v4.dataset import RWKVDataset

graph = get_config("common/models/graph.py").graph
train = get_config("common/train.py").train
optim = get_config("common/optim.py").optim

# 配置model
model=LazyCall(GPT)(
vocab_size=6064,
ctx_len=1024,
model_type='RWKV',
n_layer=6,
n_embd=512
)

# 训练过程
train = get_config("common/train.py").train
train.input_placement_device = "cpu"
train.dist.pipeline_num_layers = 6
train.train_micro_batch_size = 12

datafile="/home/chenqiaoling/RWKV-LM/data/enwik8"
# 获得一个 DataLoader 的配置对象
dataloader = OmegaConf.create()
dataloader.train = LazyCall(build_nlp_train_loader)(
dataset=[
LazyCall(RWKVDataset)(
data_dir=datafile,
ctx_len=1024,
epoch_length_fixed=9996,
),
],
num_workers=1,
)

train.train_iter=0
train.train_epoch=1

train.output_dir = "output/rwkv_output_loss_compare"
train.load_weight = "/home/chenqiaoling/RWKV-LM/libai/projects/RWKV_v4/model/output_model/" # 采用同一个model进行初始化
train.rdma_enabled = False

# model.cfg.hidden_dropout_prob= 0.0 # 关闭所有的dropout
# model.cfg.attention_probs_dropout_prob= 0.0
# model.cfg.bias_dropout_fusion= False

train.dist.pipeline_parallel_size=2
train.evaluation.enabled = False
# train.dist.tensor_parallel_size = 4 # 并行度为 4 的模型并行
88 changes: 88 additions & 0 deletions projects/RWKV_v4/configs/config_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from omegaconf import OmegaConf

from libai.config import get_config
from libai.config import LazyCall
from libai.tokenizer import GPT2Tokenizer
# 配置 dataloader `build_image_train_loader` 和 `build_image_test_loader` 是 LiBai 提供的用于创建图像数据的训练集和测试集 DataLoader 的两个函数
from libai.data.build import build_nlp_test_loader, build_nlp_train_loader
import oneflow as flow

# 配置 model
from projects.RWKV_v4.modeling.model import GPT ,GPTConfig
# 导入自定义的 dataset
from projects.RWKV_v4.dataset import RWKVDataset
from projects.RWKV_v4.utils.config_optimizer import get_RWKV_v4_config_optim


test=OmegaConf.create()
test.enable=True
test.weight_style=(
"pytorch"
)
test.path="/home/chenqiaoling/RWKV-LM/RWKV-v4/for_load.pth"

graph = get_config("common/models/graph.py").graph

graph.enabled=True

# optim = get_config("common/optim.py").optim
optim = LazyCall(flow.optim.Adam)(
params=LazyCall(get_RWKV_v4_config_optim)(),
lr=8e-4,
)


# 配置model
model=LazyCall(GPT)(
vocab_size=6064,
ctx_len=1024,
model_type='RWKV',
n_layer=6,
n_embd=512
)

# 训练过程
train = get_config("common/train.py").train
train.input_placement_device = "cpu"
train.dist.pipeline_num_layers = 6
train.train_micro_batch_size = 4
train.scheduler=LazyCall(flow.optim.lr_scheduler.StepLR)(
step_size=1000,
gamma=1.0
)

# false = fp32
train.amp.enabled=True

datafile="/home/chenqiaoling/RWKV-LM/data/enwik8"
# 获得一个 DataLoader 的配置对象
dataloader = OmegaConf.create()
dataloader.train = LazyCall(build_nlp_train_loader)(
dataset=[
LazyCall(RWKVDataset)(
data_dir=datafile,
ctx_len=1024,
epoch_length_fixed=9996,
),
],
num_workers=4,
)

# train.train_iter=3
train.train_epoch=1

train.output_dir = "output/rwkv_output_loss_compare"
# train.load_weight = "/home/chenqiaoling/RWKV-LM/libai/projects/RWKV_v4/model/output_model/" # 采用同一个model进行初始化
train.rdma_enabled = False

# model.cfg.hidden_dropout_prob= 0.0 # 关闭所有的dropout
# model.cfg.attention_probs_dropout_prob= 0.0
# model.cfg.bias_dropout_fusion= False

# train.dist.pipeline_parallel_size=2
train.evaluation.enabled = False

# train.train_iter=5
# train.dist.tensor_parallel_size = 2 # 并行度为 4 的模型并行
# train.dist.tensor_parallel_size = 4 # 并行度为 4 的模型并行
train.activation_checkpoint.enabled=True
1 change: 1 addition & 0 deletions projects/RWKV_v4/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .dataset import RWKVDataset
Loading