Skip to content

Commit c85f6c9

Browse files
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into hackathon9th06
2 parents 95fdadc + 2aa2b68 commit c85f6c9

File tree

181 files changed

+12975
-1163
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

181 files changed

+12975
-1163
lines changed

.github/workflows/H-Coverage.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ jobs:
142142
run: |
143143
docker exec -t ${{ env.container_name }} /bin/bash -c '
144144
flashattn_version=$(git submodule status|grep flashattn|awk "{print \$1}"|sed "s#-##g")
145+
echo flashattn_version:$flashattn_version
145146
url="https://xly-devops.bj.bcebos.com/gpups/flash-attention/cu90/flashattn_libs_${flashattn_version}.tar"
147+
echo url:$url
146148
url_return=`curl -s -o /dev/null -w "%{http_code}" $url`
147149
if [ "$url_return" != "200" ];then
148150
echo "flashattn cache not found, please contact umiswing"
@@ -309,6 +311,7 @@ jobs:
309311
'
310312
311313
- name: Test
314+
id: unit_test
312315
run: |
313316
docker exec -t ${{ env.container_name }} /bin/bash -c '
314317
source ${{ github.workspace }}/../../../proxy
@@ -317,6 +320,15 @@ jobs:
317320
bash $ci_scripts/h-test.sh
318321
'
319322
323+
- name: FA Test
324+
if: (success() || failure()) && steps.unit_test.conclusion != 'skipped'
325+
run: |
326+
docker exec -t ${{ env.container_name }} /bin/bash -c '
327+
source ${{ github.workspace }}/../../../proxy
328+
cd test/test_flashmask_ci
329+
bash run.sh
330+
'
331+
320332
- name: Terminate and delete the container
321333
if: always()
322334
run: |

.github/workflows/rerun.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,13 @@ jobs:
286286
OWNER: ${{ github.repository_owner }}
287287
REPO: ${{ github.event.repository.name }}
288288
JOB_NAME: 'Doc-Preview / Check bypass / Check bypass'
289+
290+
- name: Rerun Slice
291+
if: ${{ contains(github.event.comment.body, 'slice') }}
292+
uses: ./.github/actions/rerun-workflow
293+
with:
294+
PR_ID: ${{ github.event.issue.number }}
295+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
296+
OWNER: ${{ github.repository_owner }}
297+
REPO: ${{ github.event.repository.name }}
298+
JOB_NAME: 'Slice / Check bypass / Check bypass'

README.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,9 @@ PaddlePaddle originates from industrial practices with dedication and commitment
2323
Our vision is to enable deep learning for everyone via PaddlePaddle.
2424
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
2525

26-
### Install Latest Stable Release
26+
### Install Latest Stable Release or Nightly Release
2727

28-
``` sh
29-
# CPU
30-
pip install paddlepaddle
31-
# GPU
32-
pip install paddlepaddle-gpu
33-
```
34-
35-
For more information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
28+
For detailed information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
3629

3730
## **PaddlePaddle New Generation Framework 3.2**
3831

README_cn.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,9 @@
2222

2323
跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
2424

25-
### 安装最新稳定版本
25+
### 安装最新稳定版本或每日最新构建版本
2626

27-
``` sh
28-
# CPU
29-
pip install paddlepaddle
30-
# GPU
31-
pip install paddlepaddle-gpu
32-
```
33-
34-
更多安装信息详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick)
27+
安装方法详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick)
3528

3629
## 飞桨新一代框架 3.2
3730

README_ja.md

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,7 @@ PaddlePaddle は、工業化に対するコミットメントを持つ工業的
2424
私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
2525
PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。
2626

27-
### 最新の安定版リリースのインストール
28-
29-
``` sh
30-
# CPU
31-
pip install paddlepaddle
32-
# GPU
33-
pip install paddlepaddle-gpu
34-
```
27+
### 最新の安定版またはナイトリービルドをインストールする
3528

3629
インストール方法については、[クイックインストール](https://www.paddlepaddle.org.cn/install/quick)をご覧ください
3730

cmake/external/glog.cmake

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,19 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
4747
)
4848
set(GLOG_POLICY_ARGS "-DCMAKE_POLICY_VERSION_MINIMUM=3.5")
4949
endif()
50-
50+
# patch
51+
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/glog/indent.patch log_indent)
52+
set(GLOG_PATCH_COMMAND git checkout -- . && git apply --check ${log_indent} &&
53+
git apply ${log_indent})
5154
ExternalProject_Add(
5255
extern_glog
5356
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
5457
SOURCE_DIR ${SOURCE_DIR}
5558
DEPENDS gflags
5659
PREFIX ${GLOG_PREFIX_DIR}
5760
UPDATE_COMMAND ""
61+
PATCH_COMMAND
62+
COMMAND ${GLOG_PATCH_COMMAND}
5863
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
5964
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
6065
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}

paddle/common/flags.cc

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2314,3 +2314,56 @@ PHI_DEFINE_EXPORTED_bool(
23142314
PHI_DEFINE_EXPORTED_bool(use_accuracy_compatible_kernel,
23152315
false,
23162316
"Whether use torch compatible version kernel.");
2317+
2318+
/**
2319+
* Allocator Compact related FLAG
2320+
* Name: FLAGS_enable_compact_mem
2321+
* Since Version: 3.2.2
2322+
* Value Range: bool, default=false
2323+
* Example:
2324+
* Note: whether start compact memory.
2325+
*/
2326+
PHI_DEFINE_EXPORTED_bool(enable_compact_mem,
2327+
false,
2328+
"whether start compact memory or not.");
2329+
/**
2330+
* Allocator Compact related FLAG
2331+
* Name: FLAGS_max_reserved_threshold_in_gb
2332+
* Since Version: 3.2.2
2333+
* Value Range: int64, default=70
2334+
* Example:
2335+
* Note: Threshold (GB) used in compact memory. Only reserved_mem greater than
2336+
* threshold may trigger defragmentation.
2337+
*/
2338+
PHI_DEFINE_EXPORTED_int64(
2339+
max_reserved_threshold_in_gb,
2340+
70,
2341+
"Threshold (GB) used in compact memory. Only reserved_mem greater than "
2342+
"threshold may trigger defragmentation.");
2343+
2344+
/**
2345+
* Allocator Compact related FLAG
2346+
* Name: FLAGS_cur_allocated_threshold_in_gb
2347+
* Since Version: 3.2.2
2348+
* Value Range: int64, default=70
2349+
* Example:
2350+
* Note: Threshold (GB) used in compact memory. Only reserved_mem greater than
2351+
* threshold may trigger defragmentation.
2352+
*/
2353+
PHI_DEFINE_EXPORTED_int64(
2354+
cur_allocated_threshold_in_gb,
2355+
55,
2356+
"Threshold (GB) used in compact memory. Only reserved_mem greater than "
2357+
"threshold may trigger defragmentation.");
2358+
2359+
/**
2360+
* Allocator Compact related FLAG
2361+
* Name: FLAGS_try_allocate
2362+
* Since Version: 3.2.2
2363+
* Value Range: bool, default=false
2364+
* Example:
2365+
* Note: whether start compact memory.
2366+
*/
2367+
PHI_DEFINE_EXPORTED_bool(try_allocate,
2368+
false,
2369+
"whether use try allocate in memory compact.");

paddle/fluid/eager/api/utils/global_utils.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,52 @@ class EagerBackwardSubGraphNodeRecorder {
216216
bool need_capture_subgraph_ = false;
217217
};
218218

219+
/**
220+
* @class LogIndent
221+
* @brief Singleton class for managing log indentation levels globally
222+
*
223+
* This class implements the singleton pattern to provide a centralized way
224+
* to manage indentation levels for formatted log output. It ensures only
225+
* one instance exists throughout the application lifecycle.
226+
*/
227+
class LogIndent {
228+
public:
229+
/**
230+
* @brief Get the singleton instance of LogIndent
231+
* @return Reference to the singleton instance
232+
*
233+
* Uses static local variable for thread-safe singleton initialization
234+
* (C++11 guarantee). The instance is created on first call and destroyed
235+
* automatically at program termination.
236+
*/
237+
static LogIndent& Instance() {
238+
static LogIndent instance;
239+
return instance;
240+
}
241+
/**
242+
* @brief Increase the current indentation level by 1
243+
*
244+
* Call this method when entering a nested scope to increase
245+
* log indentation for better visual hierarchy.
246+
*/
247+
void IncreaseIndentLevel() { FLAGS_indentlevel = FLAGS_indentlevel + 1; }
248+
/**
249+
* @brief Decrease the current indentation level by 1
250+
*
251+
* Reduces the indentation level, but never goes below 0.
252+
* Call this when leaving a nested scope.
253+
*/
254+
void DecreaseIndentLevel() {
255+
if (FLAGS_indentlevel > 0) {
256+
FLAGS_indentlevel = FLAGS_indentlevel - 1;
257+
}
258+
}
259+
LogIndent(const LogIndent&) = delete;
260+
LogIndent& operator=(const LogIndent&) = delete;
261+
262+
private:
263+
LogIndent() = default;
264+
~LogIndent() = default;
265+
};
266+
219267
} // namespace egr

paddle/fluid/eager/auto_code_generator/generator/eager_gen.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,6 @@
311311
"index_put",
312312
# others
313313
"matmul",
314-
"expand",
315314
}
316315

317316
strided_op_need_flags_check_list = {

paddle/fluid/eager/pylayer/py_layer_node.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ GradNodePyLayer::operator()(
4545
egr::CUDAErrorCheck("GradNodePyLayer begin");
4646
}
4747
pybind11::gil_scoped_acquire gil;
48+
if (VLOG_IS_ON(2)) egr::LogIndent::Instance().IncreaseIndentLevel();
4849
VLOG(3) << "Running Eager Backward Node: " << name();
4950
if (FLAGS_call_stack_level == 3) {
5051
VLOG(3) << "PyLayer forward call stack: " << this->GetForwardTrace();
@@ -256,6 +257,7 @@ GradNodePyLayer::operator()(
256257
Py_XDECREF(outputs);
257258
Py_XDECREF(ctx_);
258259
ctx_ = nullptr;
260+
if (VLOG_IS_ON(2)) egr::LogIndent::Instance().DecreaseIndentLevel();
259261

260262
if (FLAGS_check_cuda_error) [[unlikely]] {
261263
egr::CUDAErrorCheck("GradNodePyLayer finish");

0 commit comments

Comments
 (0)