代码拉取完成,页面将自动刷新
workflow:
rules:
# always run MR pipelines
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
# always run web pipelines
- if: $CI_PIPELINE_SOURCE == "web"
# do not run branch pipelines if open MR exists
- if: $CI_COMMIT_BRANCH && $CI_OPEN_MERGE_REQUESTS
when: never
# run branch pipeline if no open MR
- if: $CI_COMMIT_BRANCH
stages:
- test
- jet
variables: &VARS
SELENE_ADLR_CI_PATH: "/lustre/fsw/adlr/adlr-nlp/adlr_ci/megatron"
DATA_DIR: "/lustre/fsw/adlr/adlr-nlp/adlr_ci/megatron/data"
PYTORCH_IMAGE: /lustre/fsw/adlr/adlr-nlp/adlr_ci/megatron/nvcr_pytorch_23.04.sqsh # This is the image that is run by all nodes on selene for tests
PYTHON_VIRTUAL_ENV: /lustre/fsw/adlr/adlr-nlp/adlr_ci/cienv/bin/activate
TESTS_TO_RUN_AFTER_MERGING: "MR_TESTS NIGHTLY_TESTS" # Can specify levels
TESTS_TO_RUN_ON_THIS_COMMIT: unit_tests
TEST_REGEX_ON_THIS_COMMIT: NONE #https://github.com/google/re2/wiki/Syntax (Can define regex as in this spec) e.g /.*gpt3.*/
JET_CUSTOM_FILTER: ""
DISPLAY_OUTPUT: "True" # Set to true for new tests to copy the logs for creating golden truth file
TIME_LIMIT: "10:00" # Default time limit for all jobs
MOE_GROUPED_GEMM: 0 # Set to 1 to enable grouped gemm for MoE
JET_CLUSTER_BRANCH:
value: "mcore/draco-oci"
options:
- "mcore/draco-oci"
- "mcore/eos"
description: '"mcore/draco-oci" for OCI-IAD, "mcore/eos" for EOS'
include:
- jet-tests.yml
unit_tests:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest --cov-report=term --cov-report=html --cov=megatron/core tests/unit_tests
coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
artifacts:
paths:
- coverage
expire_in: 30 days
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
unit_tests-data:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/data
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-dist-checkpointing:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/dist_checkpointing
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-fusions:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/fusions
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-models:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/models
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-pipeline-parallel:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/pipeline_parallel
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-tensor-parallel:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/tensor_parallel
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-transformer:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/transformer
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
unit_tests-top-py:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/pytorch-all-tests:23.04-v1
tags:
- 8xL40S
stage: test
script:
- torchrun --nproc_per_node=8 -m pytest tests/unit_tests/*.py
rules:
- if: '$CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_LABELS =~ /Run tests/'
when: never
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
when: never
- when: always
docs_build_test:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/python-format:0.0.1
stage: test
tags:
- os/linux
script:
- cd ..
- rm -rf documentation && git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab-master.nvidia.com/nemo-megatron-core-tme/documentation.git
- mv megatron-lm/ documentation/
- cd documentation/
- ./repo docs
allow_failure: true
except:
- main
formatting:
image: gitlab-master.nvidia.com:5005/adlr/megatron-lm/python-format:0.0.1
tags:
- os/linux
stage: test
script:
- black megatron/core --check --verbose --diff
- isort megatron/core --check
rules:
- when: always
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。