diff --git a/.circleci/config.yml b/.circleci/config.yml index e7650e63ea..9de733444d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,11 +23,15 @@ workflows: mmedit/.* lint_only false requirements/.* lint_only false tests/.* lint_only false - tools/.* lint_only false - configs/.* lint_only false .circleci/.* lint_only false + tools/.* lint_only true + configs/.* lint_only true + docs/.* lint_only true .dev_scripts/.* lint_only true - base-revision: 1.x + .github/.* lint_only true + demo/.* lint_only true + projects/.* lint_only true + base-revision: dev-1.x # this is the path of the configuration we should trigger once # path filtering and pipeline parameter value updates are # complete. In this case, we are using the parent dynamic diff --git a/.circleci/test.yml b/.circleci/test.yml index 71e58826ad..2cd5a7a5f8 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -59,13 +59,11 @@ jobs: - run: name: Install mmediting dependencies command: | + pip install 'opencv-python!=4.7.0.68' pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - pip install imageio-ffmpeg - run: name: Build and install command: | @@ -105,13 +103,11 @@ jobs: - run: name: Install mmedit dependencies command: | + docker exec mmedit pip install 'opencv-python!=4.7.0.68' docker exec mmedit pip install -e /mmengine docker exec mmedit pip install -U openmim docker exec mmedit mim install 'mmcv >= 2.0.0rc1' - docker exec mmedit mim install 'mmdet >= 3.0.0rc2' docker exec mmedit pip install -r requirements/tests.txt - docker exec mmedit pip install git+https://github.com/openai/CLIP.git - docker exec mmedit pip install imageio-ffmpeg - run: name: Build and install command: | @@ -131,7 +127,6 @@ workflows: branches: ignore: - dev-1.x - - test-1.x - 1.x pr_stage_test: when: @@ -144,7 +139,6 @@ workflows: branches: ignore: - dev-1.x - - test-1.x - 1.x - build_cpu: name: minimum_version_cpu @@ -155,8 +149,8 @@ workflows: - lint - build_cpu: name: maximum_version_cpu - torch: 1.12.1 - torchvision: 0.13.1 + torch: 1.13.0 + torchvision: 0.14.0 python: 3.9.0 requires: - minimum_version_cpu @@ -187,5 +181,3 @@ workflows: branches: only: - dev-1.x - - test-1.x - - 1.x diff --git a/.dev_scripts/README.md b/.dev_scripts/README.md index deb066b41f..db0b9ac724 100644 --- a/.dev_scripts/README.md +++ b/.dev_scripts/README.md @@ -1,7 +1,7 @@ # Scripts for developing MMEditing -- [1. Check UT](#check-ut) -- [2. Test all the models](#test-benchmark) +- [1. Check UT](#1-check-ut) +- [2. Test all the models](#2-test-all-the-models) - [3. Train all the models](#3-train-all-the-models) - [3.1 Train for debugging](#31-train-for-debugging) - [3.2 Train for FP32](#32-train-for-fp32) @@ -9,7 +9,11 @@ - [4. Monitor your training](#4-monitor-your-training) - [5. Train with a list of models](#5-train-with-a-list-of-models) - [6. Train with skipping a list of models](#6-train-with-skipping-a-list-of-models) -- [7. Automatically check links](#automatically-check-links) +- [7. Train failed or canceled jobs](#7-train-failed-or-canceled-jobs) +- [8. Deterministic training](#8-deterministic-training) +- [9. Automatically check links](#9-automatically-check-links) +- [10. Calculate flops](#10-calculate-flops) +- [11. Update model idnex](#11-update-model-index) ## 1. Check UT @@ -224,12 +228,28 @@ python .dev_scripts/train_benchmark.py mm_lol --job-name xzn --models pix2pix -- Use the following script to check whether the links in documentations are valid: ```shell -python3 .github/scripts/doc_link_checker.py --target docs/zh_cn -python3 .github/scripts/doc_link_checker.py --target README_zh-CN.md -python3 .github/scripts/doc_link_checker.py --target docs/en -python3 .github/scripts/doc_link_checker.py --target README.md +python .dev_scripts/doc_link_checker.py --target docs/zh_cn +python .dev_scripts/doc_link_checker.py --target README_zh-CN.md +python .dev_scripts/doc_link_checker.py --target docs/en +python .dev_scripts/doc_link_checker.py --target README.md ``` You can specify the `--target` by a file or a directory. **Notes:** DO NOT use it in CI, because requiring too many http requirements by CI will cause 503 and CI will propabaly fail. + +## 10. Calculate flops + +To summarize the flops of different models, you can run the following commands: + +```bash +python .dev_scripts/benchmark_valid_flop.py --flops --flops-str +``` + +## 11. Update model index + +To update model-index according to `README.md`, please run the following commands, + +```bash +python .dev_scripts/update_model_index.py +``` diff --git a/.dev_scripts/doc_link_checker.py b/.dev_scripts/doc_link_checker.py index f9fdd4e42e..51a452b4bb 100644 --- a/.dev_scripts/doc_link_checker.py +++ b/.dev_scripts/doc_link_checker.py @@ -1,20 +1,24 @@ # Copyright (c) MegFlow. All rights reserved. +# Copyright (c) OpenMMLab. All rights reserved. # /bin/python3 import argparse import os import re +import requests +from tqdm import tqdm + def make_parser(): parser = argparse.ArgumentParser('Doc link checker') - parser.add_argument( - '--http', default=False, type=bool, help='check http or not ') parser.add_argument( '--target', default='./docs', type=str, help='the directory or file to check') + parser.add_argument( + '--ignore', type=str, nargs='+', default=[], help='input image size') return parser @@ -22,7 +26,6 @@ def make_parser(): def analyze_doc(home, path): - print('analyze {}'.format(path)) problem_list = [] code_block = 0 with open(path) as f: @@ -51,11 +54,31 @@ def analyze_doc(home, path): end = item.find(')') ref = item[start + 1:end] - if ref.startswith('http') or ref.startswith('#'): + if ref.startswith('http'): + if ref.startswith( + 'https://download.openmmlab.com/' + ) or ref.startswith('http://download.openmmlab.com/'): + resp = requests.head(ref) + if resp.status_code == 200: + continue + else: + problem_list.append(ref) + else: + continue + + if ref.startswith('#'): continue + + if ref == '<>': + continue + if '.md#' in ref: - ref = ref[ref.find('#'):] - fullpath = os.path.join(home, ref) + ref = ref[:ref.find('#')] + if ref.startswith('/'): + fullpath = os.path.join( + os.path.dirname(__file__), '../', ref[1:]) + else: + fullpath = os.path.join(home, ref) if not os.path.exists(fullpath): problem_list.append(ref) else: @@ -68,11 +91,16 @@ def analyze_doc(home, path): raise Exception('found link error') -def traverse(target): +def traverse(args): + target = args.target if os.path.isfile(target): analyze_doc(os.path.dirname(target), target) return - for home, dirs, files in os.walk(target): + target_files = list(os.walk(target)) + target_files.sort() + for home, dirs, files in tqdm(target_files): + if home in args.ignore: + continue for filename in files: if filename.endswith('.md'): path = os.path.join(home, filename) @@ -82,4 +110,4 @@ def traverse(target): if __name__ == '__main__': args = make_parser().parse_args() - traverse(args.target) + traverse(args) diff --git a/.dev_scripts/download_models.py b/.dev_scripts/download_models.py index f3349aaf15..1fff57835e 100644 --- a/.dev_scripts/download_models.py +++ b/.dev_scripts/download_models.py @@ -74,9 +74,7 @@ def download(args): model_index.build_models_with_collections() models = OrderedDict({model.name: model for model in model_index.models}) - http_prefix_long = 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/' # noqa http_prefix_short = 'https://download.openmmlab.com/mmediting/' - http_prefix_gen = 'https://download.openmmlab.com/mmgen/' # load model list if args.model_list: @@ -109,12 +107,8 @@ def download(args): model_weight_url = model_info.weights - if model_weight_url.startswith(http_prefix_long): - model_name = model_weight_url[len(http_prefix_long):] - elif model_weight_url.startswith(http_prefix_short): + if model_weight_url.startswith(http_prefix_short): model_name = model_weight_url[len(http_prefix_short):] - elif model_weight_url.startswith(http_prefix_gen): - model_name = model_weight_url[len(http_prefix_gen):] elif model_weight_url == '': print(f'{model_info.Name} weight is missing') return None diff --git a/.dev_scripts/test_benchmark.py b/.dev_scripts/test_benchmark.py index ae36afb13b..ed66aa7491 100644 --- a/.dev_scripts/test_benchmark.py +++ b/.dev_scripts/test_benchmark.py @@ -99,16 +99,10 @@ def create_test_job_batch(commands, model_info, args, port, script_name): assert config.exists(), f'{fname}: {config} not found.' http_prefix_short = 'https://download.openmmlab.com/mmediting/' - http_prefix_long = 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/' # noqa - http_prefix_gen = 'https://download.openmmlab.com/mmgen/' model_weight_url = model_info.weights - if model_weight_url.startswith(http_prefix_long): - model_name = model_weight_url[len(http_prefix_long):] - elif model_weight_url.startswith(http_prefix_short): + if model_weight_url.startswith(http_prefix_short): model_name = model_weight_url[len(http_prefix_short):] - elif model_weight_url.startswith(http_prefix_gen): - model_name = model_weight_url[len(http_prefix_gen):] elif model_weight_url == '': print(f'{fname} weight is missing') return None diff --git a/.dev_scripts/update_model_index.py b/.dev_scripts/update_model_index.py index 1703d55f2f..672d3e18a8 100755 --- a/.dev_scripts/update_model_index.py +++ b/.dev_scripts/update_model_index.py @@ -41,7 +41,7 @@ def dump_yaml_and_check_difference(obj, file): if osp.isfile(file): file_exists = True - print(f' exist {file}') + # print(f' exist {file}') with open(file, 'r', encoding='utf-8') as f: str_orig = f.read() else: @@ -144,13 +144,17 @@ def parse_md(md_file): Name=collection_name, Metadata={'Architecture': []}, README=readme, - Paper=[]) + Paper=[], + Task=[], + Year=0, + ) models = [] # force utf-8 instead of system defined with open(md_file, 'r', encoding='utf-8') as md: lines = md.readlines() i = 0 name = lines[0][2:] + year = re.sub('[^0-9]', '', name.split('(', 1)[-1]) name = name.split('(', 1)[0].strip() collection['Metadata']['Architecture'].append(name) collection['Name'] = name @@ -158,6 +162,8 @@ def parse_md(md_file): is_liif = collection_name.upper() == 'LIIF' task_line = lines[4] task = task_line.strip().split(':')[-1].strip() + collection['Task'] = task.lower().split(', ') + collection['Year'] = int(year) while i < len(lines): # parse reference if lines[i].startswith('> ['): @@ -177,9 +183,11 @@ def parse_md(md_file): # import ipdb # ipdb.set_trace() if 'Config' not in cols and 'Download' not in cols: - warnings.warn(f"Lack 'Config' or 'Download' in line {i+1}") + warnings.warn("Lack 'Config' or 'Download' in" + f'line {i+1} in {md_file}') i += 1 continue + if 'Method' in cols: config_idx = cols.index('Method') elif 'Config' in cols: @@ -187,6 +195,7 @@ def parse_md(md_file): else: print(cols) raise ValueError('Cannot find config Table.') + checkpoint_idx = cols.index('Download') try: flops_idx = cols.index('FLOPs') @@ -210,6 +219,8 @@ def parse_md(md_file): left = line[config_idx].index('](') + 2 right = line[config_idx].index(')', left) config = line[config_idx][left:right].strip('./') + config = osp.join( + osp.dirname(md_file), osp.basename(config)) elif line[config_idx].find('△') == -1: j += 1 continue @@ -315,7 +326,7 @@ def parse_md(md_file): i += 1 if len(models) == 0: - warnings.warn('no model is found in this md file') + warnings.warn(f'no model is found in {md_file}') result = {'Collections': [collection], 'Models': models} yml_file = md_file.replace('README.md', 'metafile.yml') @@ -363,9 +374,11 @@ def update_model_index(): sys.exit(0) file_modified = False + # pbar = tqdm.tqdm(range(len(file_list)), initial=0, dynamic_ncols=True) for fn in file_list: - print(f'process {fn}') file_modified |= parse_md(fn) + # pbar.update(1) + # pbar.set_description(f'processing {fn}') file_modified |= update_model_index() diff --git a/.github/workflows/merge_stage_test.yml b/.github/workflows/merge_stage_test.yml index c48e3a9796..ff94784262 100644 --- a/.github/workflows/merge_stage_test.yml +++ b/.github/workflows/merge_stage_test.yml @@ -9,11 +9,13 @@ on: - '.dev_scripts/**' - '.circleci/**' - 'configs/**' + - 'projects/**' branches: - dev-1.x - test-1.x - 1.x + - test-branch concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -41,16 +43,13 @@ jobs: run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - name: Install MMEngine run: pip install git+https://github.com/open-mmlab/mmengine.git@main - - name: Install MMCV and MMDet + - name: Install MMCV run: | pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' - name: Install other dependencies run: | pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - pip install imageio-ffmpeg - name: Build and install run: rm -rf .eggs && pip install -e . - name: Run unittests and generate coverage report @@ -64,7 +63,7 @@ jobs: strategy: matrix: python-version: [3.7] - torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1] + torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1, 1.13.0] include: - torch: 1.6.0 torchvision: 0.7.0 @@ -80,6 +79,8 @@ jobs: torchvision: 0.12.0 - torch: 1.12.1 torchvision: 0.13.1 + - torch: 1.13.0 + torchvision: 0.14.0 steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -92,17 +93,13 @@ jobs: run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - name: Install MMEngine run: pip install git+https://github.com/open-mmlab/mmengine.git@main - - name: Install MMCV and MMDet + - name: Install MMCV run: | pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' - name: Install other dependencies run: | pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - pip install imageio-ffmpeg - - name: Build and install run: rm -rf .eggs && pip install -e . - name: Run unittests and generate coverage report @@ -151,17 +148,61 @@ jobs: apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 - name: Install PyTorch run: python -m pip install torch==1.8.1+cpu torchvision==0.9.1+cpu -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html + - name: Install mmediting dependencies + run: | + pip install -U openmim + mim install 'mmcv >= 2.0.0rc1' + pip install -r requirements/tests.txt + - name: Build and install + run: | + python setup.py check -m -s + TORCH_CUDA_ARCH_LIST=7.0 pip install -e . + + build_cu116: + runs-on: ubuntu-18.04 + container: + image: pytorch/pytorch:1.13.0-cuda11.6-cudnn8-devel + strategy: + matrix: + python-version: [3.7] + include: + - torch: 1.8.1 + cuda: 10.2 + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: pip install pip --upgrade + - name: Fetch GPG keys + run: | + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + - name: Install Python-dev + run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev + if: ${{matrix.python-version != 3.9}} + - name: Install system dependencies + run: | + apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 + - name: Install PyTorch + run: python -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cpu - name: Install mmediting dependencies run: | pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim mim install 'mmcv >= 2.0.0rc1' pip install -r requirements/tests.txt - pip install imageio-ffmpeg - name: Build and install run: | python setup.py check -m -s TORCH_CUDA_ARCH_LIST=7.0 pip install -e . + - name: Run unittests and generate coverage report + run: | + coverage run --branch --source mmedit -m pytest tests/ + coverage xml --omit="**/stylegan3_ops/*,**/conv2d_gradfix.py,**/grid_sample_gradfix.py,**/misc.py,**/upfirdn2d.py,**all_gather_layer.py" + coverage report -m build_windows: runs-on: ${{ matrix.os }} @@ -187,10 +228,7 @@ jobs: python -m pip install git+https://github.com/open-mmlab/mmengine.git@main python -m pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' python -m pip install -r requirements/tests.txt - python -m pip install git+https://github.com/openai/CLIP.git - python -m pip install imageio-ffmpeg - name: Build and install run: | python -m pip install -e . diff --git a/.github/workflows/pr_stage_test.yml b/.github/workflows/pr_stage_test.yml index dab6e3145a..bac567a170 100644 --- a/.github/workflows/pr_stage_test.yml +++ b/.github/workflows/pr_stage_test.yml @@ -6,6 +6,7 @@ on: - 'README.md' - 'README_zh-CN.md' - 'docs/**' + - 'projects/**' - '.dev_scripts/**' - '.circleci/**' - 'configs/**' @@ -35,16 +36,13 @@ jobs: run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html - name: Install MMEngine run: pip install git+https://github.com/open-mmlab/mmengine.git@main - - name: Install MMCV and MMDet + - name: Install MMCV run: | pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' - name: Install other dependencies run: | pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - pip install imageio-ffmpeg - name: Build and install run: rm -rf .eggs && pip install -e . - name: Run unittests and generate coverage report @@ -98,10 +96,7 @@ jobs: pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - pip install imageio-ffmpeg - name: Build and install run: | python setup.py check -m -s @@ -134,10 +129,7 @@ jobs: python -m pip install git+https://github.com/open-mmlab/mmengine.git@main python -m pip install -U openmim mim install 'mmcv >= 2.0.0rc1' - mim install 'mmdet >= 3.0.0rc2' python -m pip install -r requirements/tests.txt - pip install git+https://github.com/openai/CLIP.git - python -m pip install imageio-ffmpeg - name: Build and install run: | python -m pip install -e . diff --git a/.gitignore b/.gitignore index 1991b64bde..eafffbfb23 100644 --- a/.gitignore +++ b/.gitignore @@ -70,7 +70,6 @@ docs/en/_tmp/ docs/zh_cn/_build/ docs/zh_cn/_tmp/ requirements/src/ -docs/en/api/generated # PyBuilder target/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 5fc74db3d3..4978771764 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,10 +1,9 @@ version: 2 -formats: all +formats: [pdf, epub] python: version: 3.7 install: - requirements: requirements/docs.txt - requirements: requirements/readthedocs.txt - - requirements: requirements/runtime.txt diff --git a/README.md b/README.md index 5eaa62b138..10b3f77c40 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,13 @@ [📘Documentation](https://mmediting.readthedocs.io/en/1.x/) | [🛠️Installation](https://mmediting.readthedocs.io/en/1.x/2_get_started.html#installation) | [👀Model Zoo](https://mmediting.readthedocs.io/en/1.x/3_model_zoo.html) | -[🆕Update News](docs/en/notes/3_changelog.md) | +[🆕Update News](docs/en/changelog.md) | [🚀Ongoing Projects](https://github.com/open-mmlab/mmediting/projects) | [🤔Reporting Issues](https://github.com/open-mmlab/mmediting/issues) - +English | [简体中文](README_zh-CN.md) -English | [简体中文](/README_zh-CN.md) + ## Introduction @@ -106,14 +106,16 @@ hope MMEditing could provide better experience. ### 🌟 Preview of 1.x version -A brand new version of [**MMEditing v1.0.0rc4**](https://github.com/open-mmlab/mmediting/releases/tag/v1.0.0rc4) was released in 05/12/2022: +A brand new version of [**MMEditing v1.0.0rc5**](https://github.com/open-mmlab/mmediting/releases/tag/v1.0.0rc5) was released in 04/01/2023: -- Support Text2Image Task! [Disco-Diffusion](configs/disco_diffusion/README.md) -- Support 3D-aware Generation Task! [EG3D](configs/eg3d/README.md) +- Support well-known text-to-image method [Stable Diffusion](configs/stable_diffusion/README.md)! +- Support an efficient image restoration algorithm [Restormer](configs/restormer/README.md)! +- Support a new text-to-image algorithm [GLIDE](projects/glide/configs/README.md)! +- Support swin based image restoration algorithm [SwinIR](configs/swinir/README.md)! +- [Projects](projects/README.md) is opened for community to add projects to MMEditing. - Support all the tasks, models, metrics, and losses in [MMGeneration](https://github.com/open-mmlab/mmgeneration) 😍. - Unifies interfaces of all components based on [MMEngine](https://github.com/open-mmlab/mmengine). - Support patch-based and slider-based image and video comparison viewer. -- Support image colorization. Find more new features in [1.x branch](https://github.com/open-mmlab/mmediting/tree/1.x). Issues and PRs are welcome! @@ -153,11 +155,11 @@ cd mmediting pip3 install -e . ``` -Please refer to [get_started.md](docs/en/2_get_started.md) for more detailed instruction. +Please refer to [installation](docs/en/get_started/install.md) for more detailed instruction. ## Getting Started -Please see [get_started.md](docs/en/2_get_started.md) and [inference.md](docs/en/user_guides/3_inference.md) for the basic usage of MMEditing. +Please see [quick run](docs/en/get_started/quick_run.md) and [inference](docs/en/user_guides/inference.md) for the basic usage of MMEditing. ## Model Zoo @@ -253,7 +255,7 @@ Supported algorithms:
-Image2Image Translation +Image2Image - ✅ [Pix2Pix](configs/pix2pix/README.md) (CVPR'2017) - ✅ [CycleGAN](configs/cyclegan/README.md) (ICCV'2017) @@ -270,7 +272,9 @@ Supported algorithms:
Text2Image +- ✅ [GLIDE](projects/glide/configs/README.md) (NeurIPS'2021) - ✅ [Disco-Diffusion](configs/disco_diffusion/README.md) +- ✅ [Stable-Diffusion](configs/stable_diffusion/README.md)
@@ -278,7 +282,17 @@ Supported algorithms: 3D-aware Generation -- ✅ [EG3D](configs/eg3d/README.md) +- ✅ [EG3D](configs/eg3d/README.md) (CVPR'2022) + +
+ +
+ +Image Restoration + +- ✅ [SwinIR](configs/swinir/README.md) (ICCVW'2021) +- ✅ [NAFNet](configs/nafnet/README.md) (ECCV'2022) +- ✅ [Restormer](configs/restormer/README.md) (CVPR'2022)
@@ -307,7 +321,8 @@ If MMEditing is helpful to your research, please cite it as below. ## License -This project is released under the [Apache 2.0 license](LICENSE). Please refer to [LICENSES.md](LICENSES.md) for the careful check, if you are using our code for commercial matters. +This project is released under the [Apache 2.0 license](LICENSE). +Please refer to [LICENSES](LICENSE) for the careful check, if you are using our code for commercial matters. ## Projects in OpenMMLab 2.0 diff --git a/README_zh-CN.md b/README_zh-CN.md index 016b0c1bcb..f0f32e7693 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -29,13 +29,13 @@ [📘使用文档](https://mmediting.readthedocs.io/zh_CN/1.x/) | [🛠️安装教程](https://mmediting.readthedocs.io/zh_CN/1.x/2_get_started.htmll) | [👀模型库](https://mmediting.readthedocs.io/zh_CN/1.x/3_model_zoo.html) | -[🆕更新记录](docs/zh_cn/notes/3_changelog.md) | +[🆕更新记录](docs/zh_cn/changelog.md) | [🚀进行中的项目](https://github.com/open-mmlab/mmediting/projects) | [🤔提出问题](https://github.com/open-mmlab/mmediting/issues) - +[English](README.md) | 简体中文 -[English](/README.md) | 简体中文 + ## 介绍 @@ -105,14 +105,16 @@ MMEditing 缜密地设计新的框架并将其精心实现,希望能够为您 ### 🌟 1.x 预览版本 -全新的 [**MMEditing v1.0.0rc4**](https://github.com/open-mmlab/mmediting/releases/tag/v1.0.0rc4) 已经在 05/12/2022 发布: +全新的 [**MMEditing v1.0.0rc5**](https://github.com/open-mmlab/mmediting/releases/tag/v1.0.0rc5) 已经在 04/01/2023 发布: -- 支持了图文生成任务! [Disco-Diffusion](configs/disco_diffusion/README.md) -- 支持了3D级图像生成任务! [EG3D](configs/eg3d/README.md) +- 支持了著名的文本生成图像方法 [stable diffusion](configs/stable_diffusion/README.md)! +- 支持了一个高效的图像复原算法 [Restormer](configs/restormer/README.md)! +- 支持了一个新的文本到图像生成算法 [GLIDE](projects/glide/configs/README.md)! +- 支持了基于swin的图像复原算法 [SwinIR](configs/swinir/README.md)! +- 开启了[projects](projects/README.md)以便社区用户添加新的项目到MMEditing. - 支持[MMGeneration](https://github.com/open-mmlab/mmgeneration)中的全量任务、模型、优化函数和评价指标 😍。 - 基于[MMEngine](https://github.com/open-mmlab/mmengine)统一了各组件接口。 - 支持基于图像子块以及滑动条的图像和视频比较可视化工具。 -- 支持图像上色任务。 在[1.x 分支](https://github.com/open-mmlab/mmediting/tree/1.x)中发现更多特性!欢迎提 Issues 和 PRs! @@ -151,11 +153,11 @@ cd mmediting pip3 install -e . ``` -更详细的安装指南请参考 [get_started.md](docs/zh_cn/2_get_started.md) 。 +更详细的安装指南请参考 [installation](docs/zh_cn/get_started/install.md) 。 ## 开始使用 -请参考[使用教程](docs/zh_cn/2_get_started.md)和[功能演示](docs/zh_cn/user_guides/3_inference.md)获取MMEditing的基本用法。 +请参考[使用教程](docs/zh_cn/get_started/install.md)和[功能演示](docs/zh_cn/get_started/quick_run.md)获取MMEditing的基本用法。 ## 模型库 @@ -251,7 +253,7 @@ pip3 install -e .
-Image2Image Translation +Image2Image - ✅ [Pix2Pix](configs/pix2pix/README.md) (CVPR'2017) - ✅ [CycleGAN](configs/cyclegan/README.md) (ICCV'2017) @@ -268,7 +270,9 @@ pip3 install -e .
Text2Image +- ✅ [GLIDE](projects/glide/configs/README.md) (NeurIPS'2021) - ✅ [Disco-Diffusion](configs/disco_diffusion/README.md) +- ✅ [Stable-Diffusion](configs/stable_diffusion/README.md)
@@ -276,7 +280,17 @@ pip3 install -e . 3D-aware Generation -- ✅ [EG3D](configs/eg3d/README.md) +- ✅ [EG3D](configs/eg3d/README.md) (CVPR'2022) + +
+ +
+ +Image Restoration + +- ✅ [SwinIR](configs/swinir/README.md) (ICCVW'2021) +- ✅ [NAFNet](configs/nafnet/README.md) (ECCV'2022) +- ✅ [Restormer](configs/restormer/README.md) (CVPR'2022)
diff --git a/configs/_base_/datasets/deblurring-defocus_test_config.py b/configs/_base_/datasets/deblurring-defocus_test_config.py new file mode 100644 index 0000000000..360f10b8fa --- /dev/null +++ b/configs/_base_/datasets/deblurring-defocus_test_config.py @@ -0,0 +1,98 @@ +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='imgL', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='imgR', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +dpdd_data_root = 'data/DPDD' + +dpdd_indoor_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='DPDD-Indoor', task_name='deblurring'), + data_root=dpdd_data_root, + data_prefix=dict( + img='inputC', imgL='inputL', imgR='inputR', gt='target'), + ann_file='indoor_labels.txt', + pipeline=test_pipeline)) +dpdd_indoor_evaluator = [ + dict(type='MAE', prefix='DPDD-Indoor'), + dict(type='PSNR', prefix='DPDD-Indoor'), + dict(type='SSIM', prefix='DPDD-Indoor'), +] + +dpdd_outdoor_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='DPDD-Outdoor', task_name='deblurring'), + data_root=dpdd_data_root, + data_prefix=dict( + img='inputC', imgL='inputL', imgR='inputR', gt='target'), + ann_file='outdoor_labels.txt', + pipeline=test_pipeline)) +dpdd_outdoor_evaluator = [ + dict(type='MAE', prefix='DPDD-Outdoor'), + dict(type='PSNR', prefix='DPDD-Outdoor'), + dict(type='SSIM', prefix='DPDD-Outdoor'), +] + +dpdd_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='DPDD-Combined', task_name='deblurring'), + data_root=dpdd_data_root, + data_prefix=dict( + img='inputC', imgL='inputL', imgR='inputR', gt='target'), + pipeline=test_pipeline)) +dpdd_evaluator = [ + dict(type='MAE', prefix='DPDD-Combined'), + dict(type='PSNR', prefix='DPDD-Combined'), + dict(type='SSIM', prefix='DPDD-Combined'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + dpdd_indoor_dataloader, + dpdd_outdoor_dataloader, + dpdd_dataloader, +] +test_evaluator = [ + dpdd_indoor_evaluator, + dpdd_outdoor_evaluator, + dpdd_evaluator, +] diff --git a/configs/_base_/datasets/deblurring-motion_test_config.py b/configs/_base_/datasets/deblurring-motion_test_config.py new file mode 100644 index 0000000000..a863d70df4 --- /dev/null +++ b/configs/_base_/datasets/deblurring-motion_test_config.py @@ -0,0 +1,98 @@ +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +gopro_data_root = 'data/GoPro' +gopro_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='GoPro', task_name='deblurring'), + data_root=gopro_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +gopro_evaluator = [ + dict(type='PSNR', prefix='GoPro'), + dict(type='SSIM', prefix='GoPro'), +] + +hide_data_root = 'data/HIDE' +hide_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='HIDE', task_name='deblurring'), + data_root=hide_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +hide_evaluator = [ + dict(type='PSNR', prefix='HIDE'), + dict(type='SSIM', prefix='HIDE'), +] + +realblurj_data_root = 'data/RealBlur_J' +realblurj_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='RealBlur_J', task_name='deblurring'), + data_root=realblurj_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +realblurj_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='RealBlurJ'), + dict(type='SSIM', convert_to='Y', prefix='RealBlurJ'), +] + +realblurr_data_root = 'data/RealBlur_R' +realblurr_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='RealBlur_R', task_name='deblurring'), + data_root=realblurr_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +realblurr_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='RealBlurR'), + dict(type='SSIM', convert_to='Y', prefix='RealBlurR'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + gopro_dataloader, + hide_dataloader, + realblurj_dataloader, + realblurr_dataloader, +] +test_evaluator = [ + gopro_evaluator, + hide_evaluator, + realblurj_evaluator, + realblurr_evaluator, +] diff --git a/configs/_base_/datasets/decompression_test_config.py b/configs/_base_/datasets/decompression_test_config.py new file mode 100644 index 0000000000..0a52247a2e --- /dev/null +++ b/configs/_base_/datasets/decompression_test_config.py @@ -0,0 +1,66 @@ +quality = 10 +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='RandomJPEGCompression', + params=dict(quality=[quality, quality], color_type='color'), + bgr2rgb=True, + keys=['img']), + dict(type='PackEditInputs') +] + +classic5_data_root = 'data/Classic5' +classic5_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='classic5', task_name='CAR'), + data_root=classic5_data_root, + data_prefix=dict(img='', gt=''), + pipeline=test_pipeline)) +classic5_evaluator = [ + dict(type='PSNR', prefix='Classic5'), + dict(type='SSIM', prefix='Classic5'), +] + +live1_data_root = 'data/LIVE1' +live1_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='live1', task_name='CAR'), + data_root=live1_data_root, + data_prefix=dict(img='', gt=''), + pipeline=test_pipeline)) +live1_evaluator = [ + dict(type='PSNR', prefix='LIVE1'), + dict(type='SSIM', prefix='LIVE1'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + classic5_dataloader, + live1_dataloader, +] +test_evaluator = [ + classic5_evaluator, + live1_evaluator, +] diff --git a/configs/_base_/datasets/denoising-gaussian_color_test_config.py b/configs/_base_/datasets/denoising-gaussian_color_test_config.py new file mode 100644 index 0000000000..7950bebf35 --- /dev/null +++ b/configs/_base_/datasets/denoising-gaussian_color_test_config.py @@ -0,0 +1,104 @@ +sigma = 15 +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma, sigma], + gaussian_gray_noise_prob=0), + keys=['img']), + dict(type='PackEditInputs') +] + +data_root = 'data/denoising_gaussian_test' +cbsd68_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='CBSD68', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='CBSD68', gt='CBSD68'), + pipeline=test_pipeline)) +cbsd68_evaluator = [ + dict(type='PSNR', prefix='CBSD68'), + dict(type='SSIM', prefix='CBSD68'), +] + +kodak24_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Kodak24', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='Kodak24', gt='Kodak24'), + pipeline=test_pipeline)) +kodak24_evaluator = [ + dict(type='PSNR', prefix='Kodak24'), + dict(type='SSIM', prefix='Kodak24'), +] + +mcmaster_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='McMaster', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='McMaster', gt='McMaster'), + pipeline=test_pipeline)) +mcmaster_evaluator = [ + dict(type='PSNR', prefix='McMaster'), + dict(type='SSIM', prefix='McMaster'), +] + +urban100_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Urban100', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='Urban100', gt='Urban100'), + pipeline=test_pipeline)) +urban100_evaluator = [ + dict(type='PSNR', prefix='Urban100'), + dict(type='SSIM', prefix='Urban100'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + cbsd68_dataloader, + kodak24_dataloader, + mcmaster_dataloader, + urban100_dataloader, +] +test_evaluator = [ + cbsd68_evaluator, + kodak24_evaluator, + mcmaster_evaluator, + urban100_evaluator, +] diff --git a/configs/_base_/datasets/denoising-gaussian_gray_test_config.py b/configs/_base_/datasets/denoising-gaussian_gray_test_config.py new file mode 100644 index 0000000000..774869fd07 --- /dev/null +++ b/configs/_base_/datasets/denoising-gaussian_gray_test_config.py @@ -0,0 +1,88 @@ +sigma = 15 +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + to_y_channel=True, + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + to_y_channel=True, + imdecode_backend='cv2'), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma, sigma], + gaussian_gray_noise_prob=1), + keys=['img']), + dict(type='PackEditInputs') +] + +data_root = 'data/denoising_gaussian_test' +set12_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Set12', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='Set12', gt='Set12'), + pipeline=test_pipeline)) +set12_evaluator = [ + dict(type='PSNR', prefix='Set12'), + dict(type='SSIM', prefix='Set12'), +] + +bsd68_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='BSD68', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='BSD68', gt='BSD68'), + pipeline=test_pipeline)) +bsd68_evaluator = [ + dict(type='PSNR', prefix='BSD68'), + dict(type='SSIM', prefix='BSD68'), +] + +urban100_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Urban100', task_name='denoising'), + data_root=data_root, + data_prefix=dict(img='Urban100', gt='Urban100'), + pipeline=test_pipeline)) +urban100_evaluator = [ + dict(type='PSNR', prefix='Urban100'), + dict(type='SSIM', prefix='Urban100'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + set12_dataloader, + bsd68_dataloader, + urban100_dataloader, +] +test_evaluator = [ + set12_evaluator, + bsd68_evaluator, + urban100_evaluator, +] diff --git a/configs/_base_/datasets/denoising-real_test_config.py b/configs/_base_/datasets/denoising-real_test_config.py new file mode 100644 index 0000000000..88a069f850 --- /dev/null +++ b/configs/_base_/datasets/denoising-real_test_config.py @@ -0,0 +1,60 @@ +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +sidd_data_root = 'data/SIDD' +sidd_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='SIDD', task_name='denoising'), + data_root=sidd_data_root, + data_prefix=dict(img='input', gt='groundtruth'), + pipeline=test_pipeline)) +sidd_evaluator = [ + dict(type='PSNR', prefix='SIDD'), + dict(type='SSIM', prefix='SIDD'), +] + +dnd_data_root = 'data/DND' +dnd_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='DND', task_name='denoising'), + data_root=dnd_data_root, + data_prefix=dict(img='input', gt='groundtruth'), + pipeline=test_pipeline)) +dnd_evaluator = [ + dict(type='PSNR', prefix='DND'), + dict(type='SSIM', prefix='DND'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + sidd_dataloader, + # dnd_dataloader, +] +test_evaluator = [ + sidd_evaluator, + # dnd_dataloader, +] diff --git a/configs/_base_/datasets/deraining_test_config.py b/configs/_base_/datasets/deraining_test_config.py new file mode 100644 index 0000000000..2e633f08b9 --- /dev/null +++ b/configs/_base_/datasets/deraining_test_config.py @@ -0,0 +1,117 @@ +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +rain100h_data_root = 'data/Rain100H' +rain100h_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Rain100H', task_name='deraining'), + data_root=rain100h_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +rain100h_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='Rain100H'), + dict(type='SSIM', convert_to='Y', prefix='Rain100H'), +] + +rain100l_data_root = 'data/Rain100L' +rain100l_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Rain100L', task_name='deraining'), + data_root=rain100l_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +rain100l_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='Rain100L'), + dict(type='SSIM', convert_to='Y', prefix='Rain100L'), +] + +test100_data_root = 'data/Test100' +test100_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Test100', task_name='deraining'), + data_root=test100_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +test100_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='Test100'), + dict(type='SSIM', convert_to='Y', prefix='Test100'), +] + +test1200_data_root = 'data/Test1200' +test1200_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Test1200', task_name='deraining'), + data_root=test1200_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +test1200_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='Test1200'), + dict(type='SSIM', convert_to='Y', prefix='Test1200'), +] + +test2800_data_root = 'data/Test2800' +test2800_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='Test2800', task_name='deraining'), + data_root=test2800_data_root, + data_prefix=dict(img='input', gt='target'), + pipeline=test_pipeline)) +test2800_evaluator = [ + dict(type='PSNR', convert_to='Y', prefix='Test2800'), + dict(type='SSIM', convert_to='Y', prefix='Test2800'), +] + +# test config +test_cfg = dict(type='MultiTestLoop') +test_dataloader = [ + rain100h_dataloader, + rain100l_dataloader, + test100_dataloader, + test1200_dataloader, + test2800_dataloader, +] +test_evaluator = [ + rain100h_evaluator, + rain100l_evaluator, + test100_evaluator, + test1200_evaluator, + test2800_evaluator, +] diff --git a/configs/aot_gan/README.md b/configs/aot_gan/README.md index d5e7eb0ccb..8c376fc7f5 100644 --- a/configs/aot_gan/README.md +++ b/configs/aot_gan/README.md @@ -22,9 +22,9 @@ State-of-the-art image inpainting approaches can suffer from generating distorte **Places365-Challenge** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :--------------------------------------------------: | :----------------: | :--------: | :---------: | :-----------: | :------: | :---: | :---: | :---------------------: | :-----------------------------------------------------: | -| [AOT-GAN](/configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | 4 (GeForce GTX 1080 Ti) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :------------------------------------------------: | :----------------: | :--------: | :---------: | :-----------: | :------: | :---: | :---: | :---------------------: | :-------------------------------------------------------: | +| [AOT-GAN](./aot-gan_smpgan_4xb4_places-512x512.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | 4 (GeForce GTX 1080 Ti) | [model](https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | More results for different mask area: @@ -84,13 +84,13 @@ You can use the following commands to test a model with cpu or single/multiple G ```shell # cpu test -CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth # single-gpu test -python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth +python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth # multi-gpu test -./tools/dist_test.sh configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth 8 +./tools/dist_test.sh configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth 8 ``` For more details, you can refer to **Test a pre-trained model** part in [train_test.md](/docs/en/user_guides/train_test.md#Test-a-pre-trained-model-in-MMEditing). diff --git a/configs/aot_gan/README_zh-CN.md b/configs/aot_gan/README_zh-CN.md index 508936cbf3..abcec11c8c 100644 --- a/configs/aot_gan/README_zh-CN.md +++ b/configs/aot_gan/README_zh-CN.md @@ -20,9 +20,9 @@ **Places365-Challenge** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :------------------------------------------------------: | :----------------: | :-----: | :--------: | :-----------: | :-----: | :---: | :---: | :---------------------: | :------------------------------------------------------: | -| [AOT-GAN](/configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | 4 (GeForce GTX 1080 Ti) | [模型](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [日志](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :------------------------------------------------: | :----------------: | :-----: | :--------: | :-----------: | :-----: | :---: | :---: | :---------------------: | :------------------------------------------------------------: | +| [AOT-GAN](./aot-gan_smpgan_4xb4_places-512x512.py) | free-form (50-60%) | 512x512 | 500k | Places365-val | 7.07 | 19.01 | 0.682 | 4 (GeForce GTX 1080 Ti) | [模型](https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.json) | @@ -80,13 +80,13 @@ python tools/train.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py ```shell # CPU上测试 -CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth # 单个GPU上测试 -python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth +python tools/test.py configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth # 多个GPU上测试 -./tools/dist_test.sh configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth 8 +./tools/dist_test.sh configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth 8 ``` 更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Test a pre-trained model** 部分。 diff --git a/configs/aot_gan/metafile.yml b/configs/aot_gan/metafile.yml index e0cb08db51..2db2984d57 100644 --- a/configs/aot_gan/metafile.yml +++ b/configs/aot_gan/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/pdf/2104.01431.pdf README: configs/aot_gan/README.md + Task: + - inpainting + Year: 2021 Models: - Config: configs/aot_gan/aot-gan_smpgan_4xb4_places-512x512.py In Collection: AOT-GAN @@ -20,4 +23,4 @@ Models: SSIM: 0.682 l1 error: 7.07 Task: Inpainting - Weights: https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth + Weights: https://download.openmmlab.com/mmediting/inpainting/aot_gan/AOT-GAN_512x512_4x12_places_20220509-6641441b.pth diff --git a/configs/basicvsr/README.md b/configs/basicvsr/README.md index 7eceedeeb1..2516443664 100644 --- a/configs/basicvsr/README.md +++ b/configs/basicvsr/README.md @@ -23,11 +23,11 @@ Video super-resolution (VSR) approaches tend to have more components than the im Evaluated on RGB channels for REDS4 and Y channel for others. The metrics are `PSNR` / `SSIM` . The pretrained weights of SPyNet can be found [here](https://download.openmmlab.com/mmediting/restorers/basicvsr/spynet_20210409-c6c1bd09.pth). -| Method | REDS4 (BIx4) PSNR (RGB) | Vimeo-90K-T (BIx4) PSNR (Y) | Vid4 (BIx4) PSNR (Y) | UDM10 (BDx4) PSNR (Y) | Vimeo-90K-T (BDx4) PSNR (Y) | Vid4 (BDx4) PSNR (Y) | REDS4 (BIx4) SSIM (RGB) | Vimeo-90K-T (BIx4) SSIM (Y) | Vid4 (BIx4) SSIM (Y) | UDM10 (BDx4) SSIM (Y) | Vimeo-90K-T (BDx4) SSIM (Y) | Vid4 (BDx4) SSIM (Y) | GPU Info | Download | -| :--------------------------------------------------------------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :----------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| [basicvsr_reds4](/configs/basicvsr/basicvsr_2xb4_reds4.py) | **31.4170** | 36.2848 | 27.2694 | 33.4478 | 34.4700 | 24.4541 | **0.8909** | 0.9395 | 0.8318 | 0.9306 | 0.9286 | 0.7455 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20210409_092646.log.json) | -| [basicvsr_vimeo90k_bi](/configs/basicvsr/basicvsr_2xb4_vimeo90k-bi.py) | 30.3128 | **37.2026** | **27.2755** | 34.5554 | 34.8097 | 25.0517 | 0.8660 | **0.9451** | **0.8248** | 0.9434 | 0.9316 | 0.7636 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409-d2d8f760.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409_132702.log.json) | -| [basicvsr_vimeo90k_bd](/configs/basicvsr/basicvsr_2xb4_vimeo90k-bd.py) | 29.0376 | 34.6427 | 26.2708 | **39.9953** | **37.5501** | **27.9791** | 0.8481 | 0.9335 | 0.8022 | **0.9695** | **0.9499** | **0.8556** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409-0154dd64.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409_132740.log.json) | +| Method | REDS4 (BIx4) PSNR (RGB) | Vimeo-90K-T (BIx4) PSNR (Y) | Vid4 (BIx4) PSNR (Y) | UDM10 (BDx4) PSNR (Y) | Vimeo-90K-T (BDx4) PSNR (Y) | Vid4 (BDx4) PSNR (Y) | REDS4 (BIx4) SSIM (RGB) | Vimeo-90K-T (BIx4) SSIM (Y) | Vid4 (BIx4) SSIM (Y) | UDM10 (BDx4) SSIM (Y) | Vimeo-90K-T (BDx4) SSIM (Y) | Vid4 (BDx4) SSIM (Y) | GPU Info | Download | +| :----------------------------------------------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :----------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [basicvsr_reds4](./basicvsr_2xb4_reds4.py) | **31.4170** | 36.2848 | 27.2694 | 33.4478 | 34.4700 | 24.4541 | **0.8909** | 0.9395 | 0.8318 | 0.9306 | 0.9286 | 0.7455 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20210409_092646.log.json) | +| [basicvsr_vimeo90k_bi](./basicvsr_2xb4_vimeo90k-bi.py) | 30.3128 | **37.2026** | **27.2755** | 34.5554 | 34.8097 | 25.0517 | 0.8660 | **0.9451** | **0.8248** | 0.9434 | 0.9316 | 0.7636 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409-d2d8f760.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409_132702.log.json) | +| [basicvsr_vimeo90k_bd](./basicvsr_2xb4_vimeo90k-bd.py) | 29.0376 | 34.6427 | 26.2708 | **39.9953** | **37.5501** | **27.9791** | 0.8481 | 0.9335 | 0.8022 | **0.9695** | **0.9499** | **0.8556** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409-0154dd64.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409_132740.log.json) | ## Quick Start diff --git a/configs/basicvsr/README_zh-CN.md b/configs/basicvsr/README_zh-CN.md index 11c8a62f2c..69498cb82a 100644 --- a/configs/basicvsr/README_zh-CN.md +++ b/configs/basicvsr/README_zh-CN.md @@ -25,9 +25,9 @@ SPyNet 的 预训练权重在[这里](https://download.openmmlab.com/mmediting/r | 算法 | REDS4 (BIx4)
PSNR/SSIM (RGB) | Vimeo-90K-T (BIx4)
PSNR/SSIM (Y) | Vid4 (BIx4)
PSNR/SSIM (Y) | UDM10 (BDx4)
PSNR/SSIM (Y) | Vimeo-90K-T (BDx4)
PSNR/SSIM (Y) | Vid4 (BDx4)
PSNR/SSIM (Y) | GPU 信息 | 下载 | | :-: | :-----------------------------: | :---------------------------------: | :--------------------------: | :---------------------------: | :---------------------------------: | :--------------------------: | :-----: | :--: | -| [basicvsr_reds4](/configs/basicvsr/basicvsr_2xb4_reds4.py) | **31.4170/0.8909** | 36.2848/0.9395 | 27.2694/0.8318 | 33.4478/0.9306 | 34.4700/0.9286 | 24.4541/0.7455 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20210409_092646.log.json) | -| [basicvsr_vimeo90k_bi](/configs/basicvsr/basicvsr_2xb4_vimeo90k-bi.py) | 30.3128/0.8660 | **37.2026/0.9451** | **27.2755/0.8248** | 34.5554/0.9434 | 34.8097/0.9316 | 25.0517/0.7636 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409-d2d8f760.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409_132702.log.json) | -| [basicvsr_vimeo90k_bd](/configs/basicvsr/basicvsr_2xb4_vimeo90k-bd.py) | 29.0376/0.8481 | 34.6427/0.9335 | 26.2708/0.8022 | **39.9953/0.9695** | **37.5501/0.9499** | **27.9791/0.8556** | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409-0154dd64.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409_132740.log.json) | +| [basicvsr_reds4](./basicvsr_2xb4_reds4.py) | **31.4170/0.8909** | 36.2848/0.9395 | 27.2694/0.8318 | 33.4478/0.9306 | 34.4700/0.9286 | 24.4541/0.7455 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20210409_092646.log.json) | +| [basicvsr_vimeo90k_bi](./basicvsr_2xb4_vimeo90k-bi.py) | 30.3128/0.8660 | **37.2026/0.9451** | **27.2755/0.8248** | 34.5554/0.9434 | 34.8097/0.9316 | 25.0517/0.7636 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409-d2d8f760.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bi_20210409_132702.log.json) | +| [basicvsr_vimeo90k_bd](./basicvsr_2xb4_vimeo90k-bd.py) | 29.0376/0.8481 | 34.6427/0.9335 | 26.2708/0.8022 | **39.9953/0.9695** | **37.5501/0.9499** | **27.9791/0.8556** | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409-0154dd64.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_vimeo90k_bd_20210409_132740.log.json) | ## 快速开始 diff --git a/configs/basicvsr/metafile.yml b/configs/basicvsr/metafile.yml index 5428476b32..697f825ec9 100644 --- a/configs/basicvsr/metafile.yml +++ b/configs/basicvsr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2012.02181 README: configs/basicvsr/README.md + Task: + - video super-resolution + Year: 2021 Models: - Config: configs/basicvsr/basicvsr_2xb4_reds4.py In Collection: BasicVSR diff --git a/configs/basicvsr_pp/README.md b/configs/basicvsr_pp/README.md index 807f0a7aca..95b6894346 100644 --- a/configs/basicvsr_pp/README.md +++ b/configs/basicvsr_pp/README.md @@ -24,44 +24,27 @@ The pretrained weights of SPyNet can be found [here](https://download.openmmlab. | Method | REDS4 (BIx4) PSNR (RGB) | Vimeo-90K-T (BIx4) PSNR (Y) | Vid4 (BIx4) PSNR (Y) | UDM10 (BDx4) PSNR (Y) | Vimeo-90K-T (BDx4) PSNR (Y) | Vid4 (BDx4) PSNR (Y) | GPU Info | Download | | :-----------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :-------------------: | :--------------------: | -| [basicvsr_plusplus_c64n7_8x1_600k_reds4](/configs/basicvsr_pp/basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **32.3855** | 36.4445 | 27.7674 | 34.6868 | 34.0372 | 24.6209 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 31.0126 | **37.7864** | **27.7882** | 33.1211 | 33.8972 | 23.6086 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 29.2041 | 34.7248 | 26.4377 | **40.7216** | **38.2054** | **29.0400** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) | +| [basicvsr_plusplus_c64n7_8x1_600k_reds4](./basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **32.3855** | 36.4445 | 27.7674 | 34.6868 | 34.0372 | 24.6209 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 31.0126 | **37.7864** | **27.7882** | 33.1211 | 33.8972 | 23.6086 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 29.2041 | 34.7248 | 26.4377 | **40.7216** | **38.2054** | **29.0400** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) | | Method | REDS4 (BIx4) SSIM (RGB) | Vimeo-90K-T (BIx4) SSIM (Y) | Vid4 (BIx4) SSIM (Y) | UDM10 (BDx4) SSIM (Y) | Vimeo-90K-T (BDx4) SSIM (Y) | Vid4 (BDx4) SSIM (Y) | GPU Info | Download | | :-----------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :-------------------: | :--------------------: | -| [basicvsr_plusplus_c64n7_8x1_600k_reds4](/configs/basicvsr_pp/basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **0.9069** | 0.9411 | 0.8444 | 0.9417 | 0.9244 | 0.7540 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 0.8804 | **0.9500** | **0.8401** | 0.9270 | 0.9195 | 0.7033 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 0.8528 | 0.9351 | 0.8074 | **0.9722** | **0.9550** | **0.8753** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) | +| [basicvsr_plusplus_c64n7_8x1_600k_reds4](./basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **0.9069** | 0.9411 | 0.8444 | 0.9417 | 0.9244 | 0.7540 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 0.8804 | **0.9500** | **0.8401** | 0.9270 | 0.9195 | 0.7033 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 0.8528 | 0.9351 | 0.8074 | **0.9722** | **0.9550** | **0.8753** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) |
NTIRE 2021 checkpoints Note that the following models are finetuned from smaller models. The training schemes of these models will be released when MMEditing reaches 5k stars. We provide the pre-trained models here. -**NTIRE 2021 Video Super-Resolution** - -[basicvsr-pp_c128n25_600k_ntire-vsr](/configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-vsr.py) - -[model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_vsr_20210311-1ff35292.pth) - -**NTIRE 2021 Quality Enhancement of Compressed Video - Track 1** - -[basicvsr-pp_c128n25_600k_ntire-decompress-track1](/configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track1.py) - -[model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track1_20210223-7b2eba02.pth) - -**NTIRE 2021 Quality Enhancement of Compressed Video - Track 2** - -[basicvsr-pp_c128n25_600k_ntire-decompress-track2](/configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track2.py) - -[model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track2_20210314-eeae05e6.pth) - -**NTIRE 2021 Quality Enhancement of Compressed Video - Track 3** - -[basicvsr-pp_c128n25_600k_ntire-decompress-track3](/configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track3.py) - -[model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track3_20210304-6daf4a40.pth) +| Method | Download | Track | +| :-------------------------------------------------------------------: | :---------------------------------------------------------------------: | :----------------------------------------------------------: | +| [basicvsr-pp_c128n25_600k_ntire-vsr](./basicvsr-pp_c128n25_600k_ntire-vsr.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_vsr_20210311-1ff35292.pth) | NTIRE 2021 Video Super-Resolution | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track1](./basicvsr-pp_c128n25_600k_ntire-decompress-track1.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track1_20210223-7b2eba02.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 1 | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track2](./basicvsr-pp_c128n25_600k_ntire-decompress-track2.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track2_20210314-eeae05e6.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 2 | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track3](./basicvsr-pp_c128n25_600k_ntire-decompress-track3.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track3_20210304-6daf4a40.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 3 |
diff --git a/configs/basicvsr_pp/README_zh-CN.md b/configs/basicvsr_pp/README_zh-CN.md index 8ef47f6ee4..3048631f78 100644 --- a/configs/basicvsr_pp/README_zh-CN.md +++ b/configs/basicvsr_pp/README_zh-CN.md @@ -22,22 +22,21 @@ SPyNet 的 预训练权重在[这里](https://download.openmmlab.com/mmediting/r | 算法 | REDS4 (BIx4) PSNR/SSIM (RGB) | Vimeo-90K-T (BIx4) PSNR/SSIM (Y) | Vid4 (BIx4) PSNR/SSIM (Y) | UDM10 (BDx4) PSNR/SSIM (Y) | Vimeo-90K-T (BDx4) PSNR/SSIM (Y) | Vid4 (BDx4) PSNR/SSIM (Y) | GPU 信息 | Download | | :-----: | :--------------------------: | :------------------------------: | :-----------------------: | :------------------------: | :------------------------------: | :-----------------------: | :---------: | :------------: | -| [basicvsr_plusplus_c64n7_8x1_600k_reds4](/configs/basicvsr_pp/basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **32.3855/0.9069** | 36.4445/0.9411 | 27.7674/0.8444 | 34.6868/0.9417 | 34.0372/0.9244 | 24.6209/0.7540 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 31.0126/0.8804 | **37.7864/0.9500** | **27.7882/0.8401** | 33.1211/0.9270 | 33.8972/0.9195 | 23.6086/0.7033 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | -| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](/configs/basicvsr_pp/basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 29.2041/0.8528 | 34.7248/0.9351 | 26.4377/0.8074 | **40.7216/0.9722** | **38.2054/0.9550** | **29.0400/0.8753** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) | +| [basicvsr_plusplus_c64n7_8x1_600k_reds4](./basicvsr-pp_c64n7_8xb1-600k_reds4.py) | **32.3855/0.9069** | 36.4445/0.9411 | 27.7674/0.8444 | 34.6868/0.9417 | 34.0372/0.9244 | 24.6209/0.7540 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217-db622b2f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_600k_reds4_20210217_113115.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bi](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bi.py) | 31.0126/0.8804 | **37.7864/0.9500** | **27.7882/0.8401** | 33.1211/0.9270 | 33.8972/0.9195 | 23.6086/0.7033 | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305-4ef437e2.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bi_20210305_141254.log.json) | +| [basicvsr_plusplus_c64n7_4x2_300k_vimeo90k_bd](./basicvsr-pp_c64n7_4xb2-300k_vimeo90k-bd.py) | 29.2041/0.8528 | 34.7248/0.9351 | 26.4377/0.8074 | **40.7216/0.9722** | **38.2054/0.9550** | **29.0400/0.8753** | 4 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305_140921.log.json) |
NTIRE 2021 模型权重文件 请注意,以下模型是从较小的模型中微调而来的。 这些模型的训练方案将在 MMEditing 达到 5k star 时发布。 我们在这里提供预训练的模型。 -[NTIRE 2021 Video Super-Resolution](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_vsr_20210311-1ff35292.pth) - -[NTIRE 2021 Quality Enhancement of Compressed Video - Track 1](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track1_20210223-7b2eba02.pth) - -[NTIRE 2021 Quality Enhancement of Compressed Video - Track 2](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track2_20210314-eeae05e6.pth) - -[NTIRE 2021 Quality Enhancement of Compressed Video - Track 3](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track3_20210304-6daf4a40.pth) +| 算法 | 模型 | 赛道 | +| ---------------------------------------------------------------------- | ---------------------------------------------------------------------- | ------------------------------------------------------------ | +| [basicvsr-pp_c128n25_600k_ntire-vsr](./basicvsr-pp_c128n25_600k_ntire-vsr.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_vsr_20210311-1ff35292.pth) | NTIRE 2021 Video Super-Resolution | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track1](./basicvsr-pp_c128n25_600k_ntire-decompress-track1.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track1_20210223-7b2eba02.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 1 | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track2](./basicvsr-pp_c128n25_600k_ntire-decompress-track2.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track2_20210314-eeae05e6.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 2 | +| [basicvsr-pp_c128n25_600k_ntire-decompress-track3](./basicvsr-pp_c128n25_600k_ntire-decompress-track3.py) | [model](https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track3_20210304-6daf4a40.pth) | NTIRE 2021 Quality Enhancement of Compressed Video - Track 3 |
``` diff --git a/configs/basicvsr_pp/metafile.yml b/configs/basicvsr_pp/metafile.yml index c974808abc..75e9017317 100644 --- a/configs/basicvsr_pp/metafile.yml +++ b/configs/basicvsr_pp/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2104.13371 README: configs/basicvsr_pp/README.md + Task: + - video super-resolution + Year: 2022 Models: - Config: configs/basicvsr_pp/basicvsr-pp_c64n7_8xb1-600k_reds4.py In Collection: BasicVSR++ @@ -109,3 +112,43 @@ Models: Vimeo-90K-T (BIx4) SSIM (Y): 0.9351 Task: Video Super-Resolution Weights: https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c64n7_8x1_300k_vimeo90k_bd_20210305-ab315ab1.pth +- Config: configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-vsr.py + In Collection: BasicVSR++ + Metadata: + Training Data: Others + Name: basicvsr-pp_c128n25_600k_ntire-vsr + Results: + - Dataset: Others + Metrics: {} + Task: Video Super-Resolution + Weights: https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_vsr_20210311-1ff35292.pth +- Config: configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track1.py + In Collection: BasicVSR++ + Metadata: + Training Data: Others + Name: basicvsr-pp_c128n25_600k_ntire-decompress-track1 + Results: + - Dataset: Others + Metrics: {} + Task: Video Super-Resolution + Weights: https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track1_20210223-7b2eba02.pth +- Config: configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track2.py + In Collection: BasicVSR++ + Metadata: + Training Data: Others + Name: basicvsr-pp_c128n25_600k_ntire-decompress-track2 + Results: + - Dataset: Others + Metrics: {} + Task: Video Super-Resolution + Weights: https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track2_20210314-eeae05e6.pth +- Config: configs/basicvsr_pp/basicvsr-pp_c128n25_600k_ntire-decompress-track3.py + In Collection: BasicVSR++ + Metadata: + Training Data: Others + Name: basicvsr-pp_c128n25_600k_ntire-decompress-track3 + Results: + - Dataset: Others + Metrics: {} + Task: Video Super-Resolution + Weights: https://download.openmmlab.com/mmediting/restorers/basicvsr_plusplus/basicvsr_plusplus_c128n25_ntire_decompress_track3_20210304-6daf4a40.pth diff --git a/configs/biggan/README.md b/configs/biggan/README.md index 9babef2be5..dde0566bef 100644 --- a/configs/biggan/README.md +++ b/configs/biggan/README.md @@ -40,11 +40,15 @@ Evaluation of our trained BigGAN. | Models | Dataset | FID (Iter) | IS (Iter) | Config | Download | | :------------------------------------------------: | :--------: | :---------------: | :-----------------: | :-------------------------------------------------: | :---------------------------------------------------: | -| BigGAN 32x32 | CIFAR10 | 9.78(390000) | 8.70(390000) | [config](/configs/biggan/biggan_2xb25-500kiters_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan_cifar10_32x32_b25x2_500k_20210728_110906-08b61a44.pth)\|[log](https://download.openmmlab.com/mmgen/biggan/biggan_cifar10_32_b25x2_500k_20210706_171051.log.json) | -| BigGAN 128x128 Best FID | ImageNet1k | **8.69**(1232000) | 101.15(1232000) | [config](/configs/biggan/biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth)\|[log](https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_1500k_20211111_122548-5315b13d.log.json) | -| BigGAN 128x128 Best IS | ImageNet1k | 13.51(1328000) | **129.07**(1328000) | [config](/configs/biggan/biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_is_iter_1328000_20211111_122911-28c688bc.pth)\|[log](https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_1500k_20211111_122548-5315b13d.log.json) | +| BigGAN 32x32 | CIFAR10 | 9.78(390000) | 8.70(390000) | [config](./biggan_2xb25-500kiters_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan_cifar10_32x32_b25x2_500k_20210728_110906-08b61a44.pth)\|[log](https://download.openmmlab.com/mmediting/biggan/biggan_cifar10_32_b25x2_500k_20210706_171051.log.json) | +| BigGAN 128x128 Best FID | ImageNet1k | **8.69**(1232000) | 101.15(1232000) | [config](./biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth)\|[log](https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_1500k_20211111_122548-5315b13d.log.json) | +| BigGAN 128x128 Best IS | ImageNet1k | 13.51(1328000) | **129.07**(1328000) | [config](./biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_is_iter_1328000_20211111_122911-28c688bc.pth)\|[log](https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_1500k_20211111_122548-5315b13d.log.json) | | Note: `BigGAN-Deep` trained on `ImageNet1k` will come later. | | | | | | +### Note on reproducibility + +`BigGAN 128x128` model is trained with V100 GPUs and CUDA 10.1 and can hardly reproduce the result with A100 and CUDA 11.3. If you have any idea about the reproducibility, please feel free to contact with us. + ## Converted weights Since we haven't finished training our models, we provide you with several pre-trained weights which have been evaluated. Here, we refer to [BigGAN-PyTorch](https://github.com/ajbrock/BigGAN-PyTorch) and [pytorch-pretrained-BigGAN](https://github.com/huggingface/pytorch-pretrained-BigGAN). @@ -53,10 +57,10 @@ Evaluation results and download links are provided below. | Models | Dataset | FID | IS | Config | Download | Original Download link | | :-----------------: | :--------: | :-----: | :-----: | :--------------------------------------------: | :----------------------------------------------: | :-------------------------------------------------------------: | -| BigGAN 128x128 | ImageNet1k | 10.1414 | 96.728 | [config](/configs/biggan/biggan_cvt-BigGAN-PyTorch-rgb_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_cvt_BigGAN-PyTorch_rgb_20210730_125223-3e353fef.pth) | [link](https://drive.google.com/open?id=1nAle7FCVFZdix2--ks0r5JBkFnKw8ctW) | -| BigGAN-Deep 128x128 | ImageNet1k | 5.9471 | 107.161 | [config](/configs/biggan/biggan-deep_cvt-hugging-face-rgb_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_128x128_cvt_hugging-face_rgb_20210728_111659-099e96f9.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-128-pytorch_model.bin) | -| BigGAN-Deep 256x256 | ImageNet1k | 11.3151 | 135.107 | [config](/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_256x256_cvt_hugging-face_rgb_20210728_111735-28651569.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-256-pytorch_model.bin) | -| BigGAN-Deep 512x512 | ImageNet1k | 16.8728 | 124.368 | [config](/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py) | [model](https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_512x512_cvt_hugging-face_rgb_20210728_112346-a42585f2.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-512-pytorch_model.bin) | +| BigGAN 128x128 | ImageNet1k | 10.1414 | 96.728 | [config](./biggan_cvt-BigGAN-PyTorch-rgb_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_cvt_BigGAN-PyTorch_rgb_20210730_125223-3e353fef.pth) | [link](https://drive.google.com/open?id=1nAle7FCVFZdix2--ks0r5JBkFnKw8ctW) | +| BigGAN-Deep 128x128 | ImageNet1k | 5.9471 | 107.161 | [config](./biggan-deep_cvt-hugging-face-rgb_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_128x128_cvt_hugging-face_rgb_20210728_111659-099e96f9.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-128-pytorch_model.bin) | +| BigGAN-Deep 256x256 | ImageNet1k | 11.3151 | 135.107 | [config](./biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_256x256_cvt_hugging-face_rgb_20210728_111735-28651569.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-256-pytorch_model.bin) | +| BigGAN-Deep 512x512 | ImageNet1k | 16.8728 | 124.368 | [config](./biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py) | [model](https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_512x512_cvt_hugging-face_rgb_20210728_112346-a42585f2.pth) | [link](https://s3.amazonaws.com/models.huggingface.co/biggan/biggan-deep-512-pytorch_model.bin) | Sampling results are shown below. diff --git a/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py b/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py index d81c1a09ea..6e48b0cd9e 100644 --- a/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py +++ b/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py @@ -3,11 +3,10 @@ '../_base_/gen_default_runtime.py', ] -# setting image size to 512x512 -train_resize = _base_.train_dataloader.dataset.pipeline[3] -test_resize = _base_.test_dataloader.dataset.pipeline[3] -val_resize = _base_.val_dataloader.dataset.pipeline[3] -train_resize.scale = test_resize.scale = val_resize.scale = (256, 256) +# setting image size to 256x256 +_base_.train_dataloader.dataset.pipeline[2].scale = (256, 256) +_base_.test_dataloader.dataset.pipeline[2].scale = (256, 256) +_base_.val_dataloader.dataset.pipeline[2].scale = (256, 256) ema_config = dict( type='ExponentialMovingAverage', diff --git a/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py b/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py index 11b2a46275..72d7bef5d4 100644 --- a/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py +++ b/configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py @@ -4,10 +4,9 @@ ] # setting image size to 512x512 -train_resize = _base_.train_dataloader.dataset.pipeline[3] -test_resize = _base_.test_dataloader.dataset.pipeline[3] -val_resize = _base_.val_dataloader.dataset.pipeline[3] -train_resize.scale = test_resize.scale = val_resize.scale = (512, 512) +_base_.train_dataloader.dataset.pipeline[2].scale = (512, 512) +_base_.test_dataloader.dataset.pipeline[2].scale = (512, 512) +_base_.val_dataloader.dataset.pipeline[2].scale = (512, 512) ema_config = dict( type='ExponentialMovingAverage', diff --git a/configs/biggan/biggan_torch-sn_8xb32-1500kiters_imagenet1k-128x128.py b/configs/biggan/biggan_torch-sn_8xb32-1500kiters_imagenet1k-128x128.py deleted file mode 100644 index 3839c81a7d..0000000000 --- a/configs/biggan/biggan_torch-sn_8xb32-1500kiters_imagenet1k-128x128.py +++ /dev/null @@ -1,70 +0,0 @@ -_base_ = [ - '../_base_/models/biggan/base_biggan_128x128.py', - '../_base_/datasets/imagenet_noaug_128.py', - '../_base_/gen_default_runtime.py', -] - -# define model -ema_config = dict( - type='ExponentialMovingAverage', - interval=1, - momentum=0.9999, - update_buffers=True, - start_iter=20000) - -model = dict( - generator=dict(sn_style='torch'), - discriminator=dict(sn_style='torch'), - ema_config=ema_config) - -# define dataset -train_dataloader = dict( - batch_size=32, num_workers=8, dataset=dict(data_root='data/imagenet')) - -# define optimizer -optim_wrapper = dict( - generator=dict( - accumulative_counts=8, - optimizer=dict(type='Adam', lr=0.0001, betas=(0.0, 0.999), eps=1e-6)), - discriminator=dict( - accumulative_counts=8, - optimizer=dict(type='Adam', lr=0.0004, betas=(0.0, 0.999), eps=1e-6))) - -# VIS_HOOK -custom_hooks = [ - dict( - type='GenVisualizationHook', - interval=10000, - fixed_input=True, - # vis ema and orig at the same time - vis_kwargs_list=dict( - type='Noise', - name='fake_img', - sample_model='ema/orig', - target_keys=['ema', 'orig'])), -] - -# Traning sets' datasize 1,281,167 -train_cfg = dict(max_iters=1500000) - -metrics = [ - dict( - type='FrechetInceptionDistance', - prefix='FID-Full-50k', - fake_nums=50000, - inception_style='StyleGAN', - sample_model='ema'), - dict( - type='IS', - prefix='IS-50k', - fake_nums=50000, - inception_style='StyleGAN', - sample_model='ema') -] -# save multi best checkpoints -default_hooks = dict( - checkpoint=dict( - save_best=['FID-Full-50k/fid', 'IS-50k/is'], rule=['less', 'greater'])) - -val_evaluator = dict(metrics=metrics) -test_evaluator = dict(metrics=metrics) diff --git a/configs/biggan/metafile.yml b/configs/biggan/metafile.yml index d70d3070c7..d346c1dd65 100644 --- a/configs/biggan/metafile.yml +++ b/configs/biggan/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openreview.net/forum?id=B1xsqj09Fm README: configs/biggan/README.md + Task: + - conditional gans + Year: 2019 Models: - Config: configs/biggan/biggan_2xb25-500kiters_cifar10-32x32.py In Collection: BigGAN @@ -16,7 +19,7 @@ Models: - Dataset: Others Metrics: {} Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan_cifar10_32x32_b25x2_500k_20210728_110906-08b61a44.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan_cifar10_32x32_b25x2_500k_20210728_110906-08b61a44.pth - Config: configs/biggan/biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py In Collection: BigGAN Metadata: @@ -26,7 +29,7 @@ Models: - Dataset: Others Metrics: {} Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth - Config: configs/biggan/biggan_ajbrock-sn_8xb32-1500kiters_imagenet1k-128x128.py In Collection: BigGAN Metadata: @@ -36,7 +39,7 @@ Models: - Dataset: Others Metrics: {} Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_is_iter_1328000_20211111_122911-28c688bc.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_is_iter_1328000_20211111_122911-28c688bc.pth - Config: configs/biggan/biggan_cvt-BigGAN-PyTorch-rgb_imagenet1k-128x128.py In Collection: BigGAN Metadata: @@ -48,7 +51,7 @@ Models: FID: 10.1414 IS: 96.728 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_cvt_BigGAN-PyTorch_rgb_20210730_125223-3e353fef.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_cvt_BigGAN-PyTorch_rgb_20210730_125223-3e353fef.pth - Config: configs/biggan/biggan-deep_cvt-hugging-face-rgb_imagenet1k-128x128.py In Collection: BigGAN Metadata: @@ -60,7 +63,7 @@ Models: FID: 5.9471 IS: 107.161 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_128x128_cvt_hugging-face_rgb_20210728_111659-099e96f9.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_128x128_cvt_hugging-face_rgb_20210728_111659-099e96f9.pth - Config: configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-256x256.py In Collection: BigGAN Metadata: @@ -72,7 +75,7 @@ Models: FID: 11.3151 IS: 135.107 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_256x256_cvt_hugging-face_rgb_20210728_111735-28651569.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_256x256_cvt_hugging-face_rgb_20210728_111735-28651569.pth - Config: configs/biggan/biggan-deep_cvt-hugging-face_rgb_imagenet1k-512x512.py In Collection: BigGAN Metadata: @@ -84,4 +87,4 @@ Models: FID: 16.8728 IS: 124.368 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/biggan/biggan-deep_imagenet1k_512x512_cvt_hugging-face_rgb_20210728_112346-a42585f2.pth + Weights: https://download.openmmlab.com/mmediting/biggan/biggan-deep_imagenet1k_512x512_cvt_hugging-face_rgb_20210728_112346-a42585f2.pth diff --git a/configs/cain/README.md b/configs/cain/README.md index 4cfd230bfa..9f2bfdba9b 100644 --- a/configs/cain/README.md +++ b/configs/cain/README.md @@ -24,9 +24,9 @@ Evaluated on RGB channels. The metrics are `PSNR / SSIM` . The learning rate adjustment strategy is `Step LR scheduler with min_lr clipping`. -| Method | PSNR | SSIM | GPU Info | Download | -| :------------------------------------------------------------------------------: | :-----: | :----: | :----------------------: | :---------------------------------------------------------------------------------: | -| [cain_b5_g1b32_vimeo90k_triplet](/configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py) | 34.6010 | 0.9578 | 1 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.pth)/[log](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.log.json) | +| Method | PSNR | SSIM | GPU Info | Download | +| :---------------------------------------------------------------------: | :-----: | :----: | :----------------------: | :------------------------------------------------------------------------------------------: | +| [cain_b5_g1b32_vimeo90k_triplet](./cain_g1b32_1xb5_vimeo90k-triplet.py) | 34.6010 | 0.9578 | 1 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.pth)/[log](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.log.json) | ## Quick Start diff --git a/configs/cain/README_zh-CN.md b/configs/cain/README_zh-CN.md index f81da09e57..82860f9781 100644 --- a/configs/cain/README_zh-CN.md +++ b/configs/cain/README_zh-CN.md @@ -27,9 +27,9 @@ 我们使用 `PSNR` 和 `SSIM` 作为指标。 学习率调整策略是等间隔调整策略。 -| 算法 | vimeo-90k-triplet | GPU 信息 | 下载 | -| :-----------------------------------------------------------------------------: | :---------------: | :----------------------: | :------------------------------------------------------------------------------: | -| [cain_b5_g1b32_vimeo90k_triplet](/configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py) | 34.6010 / 0.9578 | 1 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.pth)/[日志](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.log.json) | +| 算法 | vimeo-90k-triplet | GPU 信息 | 下载 | +| :---------------------------------------------------------------------: | :---------------: | :----------------------: | :--------------------------------------------------------------------------------------: | +| [cain_b5_g1b32_vimeo90k_triplet](./cain_g1b32_1xb5_vimeo90k-triplet.py) | 34.6010 / 0.9578 | 1 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.pth)/[日志](https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_g1b32_vimeo90k_triplet_20220530-3520b00c.log.json) | ## 快速开始 diff --git a/configs/cain/metafile.yml b/configs/cain/metafile.yml index 5a866fedbc..c2d94922ca 100644 --- a/configs/cain/metafile.yml +++ b/configs/cain/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://aaai.org/ojs/index.php/AAAI/article/view/6693/6547 README: configs/cain/README.md + Task: + - video interpolation + Year: 2020 Models: - Config: configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py In Collection: CAIN diff --git a/configs/cyclegan/README.md b/configs/cyclegan/README.md index b72a0fefcf..be402efad9 100644 --- a/configs/cyclegan/README.md +++ b/configs/cyclegan/README.md @@ -2,7 +2,7 @@ > [CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks](https://openaccess.thecvf.com/content_iccv_2017/html/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.html) -> **Task**: Image2Image Translation +> **Task**: Image2Image @@ -27,24 +27,26 @@ Image-to-image translation is a class of vision and graphics problems where the We use `FID` and `IS` metrics to evaluate the generation performance of CycleGAN.1 -https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth -https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_in_1x1_80k_facades_20210902_165905-5e2c0876.pth - -| Models | Dataset | FID | IS | Config | Download | -| :----: | :---------------: | :------: | :---: | :-------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------: | -| Ours | facades | 124.8033 | 1.792 | [config](/configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth) \| [log](https://download.openmmlab.com/mmgen/cyclegan/base_cyclegan_in_1x1_80k_facades_20210317_160938.log.json) 2 | -| Ours | facades-id0 | 125.1694 | 1.905 | [config](/configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_80k_facades_convert-bgr_20210902_164411-d8e72b45.pth) | -| Ours | summer2winter | 83.7177 | 2.771 | [config](/configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth) | -| Ours | summer2winter-id0 | 83.1418 | 2.720 | [config](/configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth) | -| Ours | winter2summer | 72.8025 | 3.129 | [config](/configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth) | -| Ours | winter2summer-id0 | 73.5001 | 3.107 | [config](/configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth) | -| Ours | horse2zebra | 64.5225 | 1.418 | [config](/configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth) | -| Ours | horse2zebra-id0 | 74.7770 | 1.542 | [config](/configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth) | -| Ours | zebra2horse | 141.1517 | 3.154 | [config](/configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth) | -| Ours | zebra2horse-id0 | 134.3728 | 3.091 | [config](/configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth) | +https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth +https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_in_1x1_80k_facades_20210902_165905-5e2c0876.pth + +| Models | Dataset | FID | IS | Config | Download | +| :----: | :---------------: | :------: | :---: | :----------------------------------------------------------------------: | :------------------------------------------------------------------------------------------: | +| Ours | facades | 124.8033 | 1.792 | [config](./cyclegan_lsgan-resnet-in_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth) \| [log](https://download.openmmlab.com/mmediting/cyclegan/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210317_160938.log.json) 2 | +| Ours | facades-id0 | 125.1694 | 1.905 | [config](./cyclegan_lsgan-id0-resnet-in_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_80k_facades_convert-bgr_20210902_164411-d8e72b45.pth) | +| Ours | summer2winter | 83.7177 | 2.771 | [config](./cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth) | +| Ours | summer2winter-id0 | 83.1418 | 2.720 | [config](./cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth) | +| Ours | winter2summer | 72.8025 | 3.129 | [config](./cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth) | +| Ours | winter2summer-id0 | 73.5001 | 3.107 | [config](./cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth) | +| Ours | horse2zebra | 64.5225 | 1.418 | [config](./cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth) | +| Ours | horse2zebra-id0 | 74.7770 | 1.542 | [config](./cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth) | +| Ours | zebra2horse | 141.1517 | 3.154 | [config](./cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth) | +| Ours | zebra2horse-id0 | 134.3728 | 3.091 | [config](./cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py) | [model](https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth) | `FID` comparison with official: + + | Dataset | facades | facades-id0 | summer2winter | summer2winter-id0 | winter2summer | winter2summer-id0 | horse2zebra | horse2zebra-id0 | zebra2horse | zebra2horse-id0 | average | | :------: | :---------: | :---------: | :-----------: | :---------------: | :-----------: | :---------------: | :---------: | :-------------: | :---------: | :-------------: | :--------: | | official | **123.626** | **119.726** | **77.342** | **76.773** | **72.631** | 74.239 | **62.111** | 77.202 | **138.646** | **137.050** | **95.935** | @@ -52,6 +54,8 @@ https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_in_1x1_80k_facad `IS` comparison with evaluation: + + | Dataset | facades | facades-id0 | summer2winter | summer2winter-id0 | winter2summer | winter2summer-id0 | horse2zebra | horse2zebra-id0 | zebra2horse | zebra2horse-id0 | average | | :------: | :-------: | :---------: | :-----------: | :---------------: | :-----------: | :---------------: | :---------: | :-------------: | :---------: | :-------------: | :-------: | | official | 1.638 | 1.697 | 2.762 | **2.750** | **3.293** | **3.110** | 1.375 | **1.584** | **3.186** | 3.047 | 2.444 | diff --git a/configs/cyclegan/metafile.yml b/configs/cyclegan/metafile.yml index 6c54aec4aa..d799b964a0 100644 --- a/configs/cyclegan/metafile.yml +++ b/configs/cyclegan/metafile.yml @@ -8,6 +8,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content_iccv_2017/html/Zhu_Unpaired_Image-To-Image_Translation_ICCV_2017_paper.html README: configs/cyclegan/README.md + Task: + - image2image + Year: 2017 Models: - Config: configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-80kiters_facades.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent @@ -20,8 +23,8 @@ Models: Metrics: FID: 124.8033 IS: 1.792 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_80k_facades_20210902_165905-5e2c0876.pth - Config: configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-80kiters_facades.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -33,8 +36,8 @@ Models: Metrics: FID: 125.1694 IS: 1.905 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_80k_facades_convert-bgr_20210902_164411-d8e72b45.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_80k_facades_convert-bgr_20210902_164411-d8e72b45.pth - Config: configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -46,8 +49,8 @@ Models: Metrics: FID: 83.7177 IS: 2.771 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth - Config: configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -59,8 +62,8 @@ Models: Metrics: FID: 83.1418 IS: 2.72 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth - Config: configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-250kiters_summer2winter.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -72,8 +75,8 @@ Models: Metrics: FID: 72.8025 IS: 3.129 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165932-fcf08dc1.pth - Config: configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-250kiters_summer2winter.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -85,8 +88,8 @@ Models: Metrics: FID: 73.5001 IS: 3.107 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_246200_summer2winter_convert-bgr_20210902_165640-8b825581.pth - Config: configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -98,8 +101,8 @@ Models: Metrics: FID: 64.5225 IS: 1.418 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth - Config: configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -111,8 +114,8 @@ Models: Metrics: FID: 74.777 IS: 1.542 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth - Config: configs/cyclegan/cyclegan_lsgan-resnet-in_1xb1-270kiters_horse2zebra.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -124,8 +127,8 @@ Models: Metrics: FID: 141.1517 IS: 3.154 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_170004-a32c733a.pth - Config: configs/cyclegan/cyclegan_lsgan-id0-resnet-in_1xb1-270kiters_horse2zebra.py In Collection: 'CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks' @@ -137,5 +140,5 @@ Models: Metrics: FID: 134.3728 IS: 3.091 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/cyclegan/refactor/cyclegan_lsgan_id0_resnet_in_1x1_266800_horse2zebra_convert-bgr_20210902_165724-77c9c806.pth diff --git a/configs/dcgan/README.md b/configs/dcgan/README.md index 22315a4092..cce01969a8 100644 --- a/configs/dcgan/README.md +++ b/configs/dcgan/README.md @@ -26,11 +26,11 @@ In recent years, supervised learning with convolutional networks (CNNs) has seen -| Models | Dataset | SWD | MS-SSIM | Config | Download | -| :---------: | :------------: | :----------------------: | :-----: | :---------------------------------------------------------------------: | :-----------------------------------------------------------------------: | -| DCGAN 64x64 | MNIST (64x64) | 21.16, 4.4, 8.41/11.32 | 0.1395 | [config](/configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py) | [model](https://download.openmmlab.com/mmgen/base_dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth) \| [log](https://download.openmmlab.com//mmgen/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.json) | -| DCGAN 64x64 | CelebA-Cropped | 8.93,10.53,50.32/23.26 | 0.2899 | [config](/configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py) | [model](https://download.openmmlab.com/mmgen/base_dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth) \| [log](https://download.openmmlab.com/mmgen/dcgan/base_dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.json) | -| DCGAN 64x64 | LSUN-Bedroom | 42.79, 34.55, 98.46/58.6 | 0.2095 | [config](/configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmgen/base_dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth) \| [log](https://download.openmmlab.com/mmgen/dcgan/base_dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.json) | +| Models | Dataset | SWD | MS-SSIM | Config | Download | +| :---------: | :------------: | :----------------------: | :-----: | :-------------------------------------------------------------: | :-------------------------------------------------------------------------------: | +| DCGAN 64x64 | MNIST (64x64) | 21.16, 4.4, 8.41/11.32 | 0.1395 | [config](./dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py) | [model](https://download.openmmlab.com/mmediting/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth) \| [log](https://download.openmmlab.com//mmgen/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.json) | +| DCGAN 64x64 | CelebA-Cropped | 8.93,10.53,50.32/23.26 | 0.2899 | [config](./dcgan_1xb128-300kiters_celeba-cropped-64.py) | [model](https://download.openmmlab.com/mmediting/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth) \| [log](https://download.openmmlab.com/mmediting/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.json) | +| DCGAN 64x64 | LSUN-Bedroom | 42.79, 34.55, 98.46/58.6 | 0.2095 | [config](./dcgan_1xb128-5epoches_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmediting/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth) \| [log](https://download.openmmlab.com/mmediting/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.json) | ## Citation diff --git a/configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py b/configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py index 95324ff8e4..2f1ab816e6 100644 --- a/configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py +++ b/configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py @@ -45,6 +45,8 @@ sample_model='orig', image_shape=(3, 64, 64)) ] +# save best checkpoints +default_hooks = dict(checkpoint=dict(save_best='swd/avg', rule='less')) val_evaluator = dict(metrics=metrics) test_evaluator = dict(metrics=metrics) diff --git a/configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py b/configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py index 79f4e56f5f..e4396a3462 100644 --- a/configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py +++ b/configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py @@ -44,6 +44,8 @@ sample_model='orig', image_shape=(3, 64, 64)) ] +# save best checkpoints +default_hooks = dict(checkpoint=dict(save_best='swd/avg', rule='less')) val_evaluator = dict(metrics=metrics) test_evaluator = dict(metrics=metrics) diff --git a/configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py b/configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py index 3eb1b4ad99..3625022ccf 100644 --- a/configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py +++ b/configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py @@ -5,47 +5,44 @@ ] # output single channel -model = dict(generator=dict(out_channels=1), discriminator=dict(in_channels=1)) +model = dict( + data_preprocessor=dict(mean=[127.5], std=[127.5]), + generator=dict(out_channels=1), + discriminator=dict(in_channels=1)) # define dataset # modify train_pipeline to load gray scale images train_pipeline = [ - dict( - type='LoadImageFromFile', - key='img', - io_backend='disk', - color_type='grayscale'), + dict(type='LoadImageFromFile', key='img', color_type='grayscale'), dict(type='Resize', scale=(64, 64)), - dict(type='PackEditInputs', meta_keys=[]) + dict(type='PackEditInputs') ] # set ``batch_size``` and ``data_root``` batch_size = 128 data_root = 'data/mnist_64/train' train_dataloader = dict( - batch_size=batch_size, dataset=dict(data_root=data_root)) + batch_size=batch_size, + dataset=dict(data_root=data_root, pipeline=train_pipeline)) -val_dataloader = dict(batch_size=batch_size, dataset=dict(data_root=data_root)) +val_dataloader = dict( + batch_size=batch_size, + dataset=dict(data_root=data_root, pipeline=train_pipeline)) test_dataloader = dict( - batch_size=batch_size, dataset=dict(data_root=data_root)) - -default_hooks = dict( - checkpoint=dict( - interval=500, - save_best=['swd/avg', 'ms-ssim/avg'], - rule=['less', 'greater'])) + batch_size=batch_size, + dataset=dict(data_root=data_root, pipeline=train_pipeline)) # VIS_HOOK custom_hooks = [ dict( type='GenVisualizationHook', - interval=10000, + interval=500, fixed_input=True, vis_kwargs_list=dict(type='GAN', name='fake_img')) ] -train_cfg = dict(max_iters=5000) +train_cfg = dict(max_iters=5000, val_interval=500) # METRICS metrics = [ @@ -55,10 +52,13 @@ dict( type='SWD', prefix='swd', - fake_nums=16384, + fake_nums=-1, sample_model='orig', - image_shape=(3, 64, 64)) + image_shape=(1, 64, 64)) ] +# save best checkpoints +default_hooks = dict( + checkpoint=dict(interval=500, save_best='swd/avg', rule='less')) val_evaluator = dict(metrics=metrics) test_evaluator = dict(metrics=metrics) diff --git a/configs/dcgan/metafile.yml b/configs/dcgan/metafile.yml index 62d38230c5..2d201ff980 100644 --- a/configs/dcgan/metafile.yml +++ b/configs/dcgan/metafile.yml @@ -8,6 +8,9 @@ Collections: Paper: - https://arxiv.org/abs/1511.06434 README: configs/dcgan/README.md + Task: + - unconditional gans + Year: 2016 Models: - Config: configs/dcgan/dcgan_Glr4e-4_Dlr1e-4_1xb128-5kiters_mnist-64x64.py In Collection: Unsupervised Representation Learning with Deep Convolutional Generative @@ -20,7 +23,7 @@ Models: Metrics: MS-SSIM: 0.1395 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/base_dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth + Weights: https://download.openmmlab.com/mmediting/dcgan/dcgan_mnist-64_b128x1_Glr4e-4_Dlr1e-4_5k_20210512_163926-207a1eaf.pth - Config: configs/dcgan/dcgan_1xb128-300kiters_celeba-cropped-64.py In Collection: Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks @@ -32,7 +35,7 @@ Models: Metrics: MS-SSIM: 0.2899 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/base_dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth + Weights: https://download.openmmlab.com/mmediting/dcgan/dcgan_celeba-cropped_64_b128x1_300kiter_20210408_161607-1f8a2277.pth - Config: configs/dcgan/dcgan_1xb128-5epoches_lsun-bedroom-64x64.py In Collection: Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks @@ -44,4 +47,4 @@ Models: Metrics: MS-SSIM: 0.2095 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/base_dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth + Weights: https://download.openmmlab.com/mmediting/dcgan/dcgan_lsun-bedroom_64_b128x1_5e_20210408_161713-117c498b.pth diff --git a/configs/deepfillv1/README.md b/configs/deepfillv1/README.md index d68c28523c..a020f898ea 100644 --- a/configs/deepfillv1/README.md +++ b/configs/deepfillv1/README.md @@ -22,15 +22,15 @@ Recent deep learning based approaches have shown promising results for the chall **Places365-Challenge** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :-------------------------------------------------------------: | :---------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :---------------------------------------------------------------: | -| [DeepFillv1](/configs/deepfillv1/deepfillv1_8xb2_places-256x256.py) | square bbox | 256x256 | 3500k | Places365-val | 11.019 | 23.429 | 0.862 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :-----------------------------------------------: | :---------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :-----------------------------------------------------------------------------: | +| [DeepFillv1](./deepfillv1_8xb2_places-256x256.py) | square bbox | 256x256 | 3500k | Places365-val | 11.019 | 23.429 | 0.862 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.log.json) | **CelebA-HQ** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :--------------------------------------------------------------: | :---------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :-----------------------------------------------------------------: | -| [DeepFillv1](/configs/deepfillv1/deepfillv1_4xb4_celeba-256x256.py) | square bbox | 256x256 | 1500k | CelebA-val | 6.677 | 26.878 | 0.911 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :-----------------------------------------------: | :---------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :--------------------------------------------------------------------------------: | +| [DeepFillv1](./deepfillv1_4xb4_celeba-256x256.py) | square bbox | 256x256 | 1500k | CelebA-val | 6.677 | 26.878 | 0.911 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.log.json) | ## Quick Start diff --git a/configs/deepfillv1/README_zh-CN.md b/configs/deepfillv1/README_zh-CN.md index 273a29ac0a..db7625e8ba 100644 --- a/configs/deepfillv1/README_zh-CN.md +++ b/configs/deepfillv1/README_zh-CN.md @@ -23,15 +23,15 @@ **Places365-Challenge** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :----------------------------------------------------------------: | :---------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :-----------------------------------------------------------------: | -| [DeepFillv1](/configs/deepfillv1/deepfillv1_8xb2_places-256x256.py) | square bbox | 256x256 | 3500k | Places365-val | 11.019 | 23.429 | 0.862 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------: | :---------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :----------------------------------------------------------------------------------: | +| [DeepFillv1](./deepfillv1_8xb2_places-256x256.py) | square bbox | 256x256 | 3500k | Places365-val | 11.019 | 23.429 | 0.862 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_8x2_places_20200619-c00a0e21.log.json) | **CelebA-HQ** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :-----------------------------------------------------------------: | :---------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :-------------------------------------------------------------------: | -| [DeepFillv1](/configs/deepfillv1/deepfillv1_4xb4_celeba-256x256.py) | square bbox | 256x256 | 1500k | CelebA-val | 6.677 | 26.878 | 0.911 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------: | :---------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :-------------------------------------------------------------------------------------: | +| [DeepFillv1](./deepfillv1_4xb4_celeba-256x256.py) | square bbox | 256x256 | 1500k | CelebA-val | 6.677 | 26.878 | 0.911 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv1/deepfillv1_256x256_4x4_celeba_20200619-dd51a855.log.json) | ## 快速开始 diff --git a/configs/deepfillv1/metafile.yml b/configs/deepfillv1/metafile.yml index 968b448d38..c8a83d22a4 100644 --- a/configs/deepfillv1/metafile.yml +++ b/configs/deepfillv1/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1801.07892 README: configs/deepfillv1/README.md + Task: + - inpainting + Year: 2018 Models: - Config: configs/deepfillv1/deepfillv1_8xb2_places-256x256.py In Collection: DeepFillv1 diff --git a/configs/deepfillv2/README.md b/configs/deepfillv2/README.md index dce7150cb5..096bb2f16c 100644 --- a/configs/deepfillv2/README.md +++ b/configs/deepfillv2/README.md @@ -22,15 +22,15 @@ We present a generative image inpainting system to complete images with free-for **Places365-Challenge** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :--------------------------------------------------------------: | :-------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :----------------------------------------------------------------: | -| [DeepFillv2](/configs/deepfillv2/deepfillv2_8xb2_places-256x256.py) | free-form | 256x256 | 100k | Places365-val | 8.635 | 22.398 | 0.815 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :-----------------------------------------------: | :-------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :-------------------------------------------------------------------------------: | +| [DeepFillv2](./deepfillv2_8xb2_places-256x256.py) | free-form | 256x256 | 100k | Places365-val | 8.635 | 22.398 | 0.815 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.log.json) | **CelebA-HQ** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :---------------------------------------------------------------: | :-------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :------------------------------------------------------------------: | -| [DeepFillv2](/configs/deepfillv2/deepfillv2_8xb2_celeba-256x256.py) | free-form | 256x256 | 20k | CelebA-val | 5.411 | 25.721 | 0.871 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :-----------------------------------------------: | :-------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :----------------------------------------------------------------------------------: | +| [DeepFillv2](./deepfillv2_8xb2_celeba-256x256.py) | free-form | 256x256 | 20k | CelebA-val | 5.411 | 25.721 | 0.871 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.log.json) | ## Quick Start diff --git a/configs/deepfillv2/README_zh-CN.md b/configs/deepfillv2/README_zh-CN.md index 3c71037056..eaa1a31f4a 100644 --- a/configs/deepfillv2/README_zh-CN.md +++ b/configs/deepfillv2/README_zh-CN.md @@ -23,15 +23,15 @@ **Places365-Challenge** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :-----------------------------------------------------------------: | :-------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :------------------------------------------------------------------: | -| [DeepFillv2](/configs/deepfillv2/deepfillv2_8xb2_places-256x256.py) | free-form | 256x256 | 100k | Places365-val | 8.635 | 22.398 | 0.815 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------: | :-------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :------------------------------------------------------------------------------------: | +| [DeepFillv2](./deepfillv2_8xb2_places-256x256.py) | free-form | 256x256 | 100k | Places365-val | 8.635 | 22.398 | 0.815 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_places_20200619-10d15793.log.json) | **CelebA-HQ** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :-----------------------------------------------------------------: | :-------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :---------------------------------------------------------------------: | -| [DeepFillv2](/configs/deepfillv2/deepfillv2_8xb2_celeba-256x256.py) | free-form | 256x256 | 20k | CelebA-val | 5.411 | 25.721 | 0.871 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------: | :-------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :---------------------------------------------------------------------------------------: | +| [DeepFillv2](./deepfillv2_8xb2_celeba-256x256.py) | free-form | 256x256 | 20k | CelebA-val | 5.411 | 25.721 | 0.871 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/deepfillv2/deepfillv2_256x256_8x2_celeba_20200619-c96e5f12.log.json) | ## 快速开始 diff --git a/configs/deepfillv2/metafile.yml b/configs/deepfillv2/metafile.yml index d84dab7739..d0a57379e7 100644 --- a/configs/deepfillv2/metafile.yml +++ b/configs/deepfillv2/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1806.03589 README: configs/deepfillv2/README.md + Task: + - inpainting + Year: 2019 Models: - Config: configs/deepfillv2/deepfillv2_8xb2_places-256x256.py In Collection: DeepFillv2 diff --git a/configs/dic/README.md b/configs/dic/README.md index 69b808f74b..90215ae6b8 100644 --- a/configs/dic/README.md +++ b/configs/dic/README.md @@ -27,10 +27,10 @@ In the log data of `dic_gan_x8c48b6_g4_150k_CelebAHQ`, DICGAN is verified on the `GPU Info`: GPU information during training. -| Method | scale | PSNR | SSIM | GPU Info | Download | -| :------------------------------------------------------------------------------: | :---: | :-----: | :----: | :-----------------: | :---------------------------------------------------------------------------------: | -| [dic_x8c48b6_g4_150k_CelebAHQ](/configs/dic/dic_x8c48b6_4xb2-150k_celeba-hq.py) | x8 | 25.2319 | 0.7422 | 4 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.log.json) | -| [dic_gan_x8c48b6_g4_500k_CelebAHQ](/configs/dic/dic_gan-x8c48b6_4xb2-500k_celeba-hq.py) | x8 | 23.6241 | 0.6721 | 4 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.log.json) | +| Method | scale | PSNR | SSIM | GPU Info | Download | +| :--------------------------------------------------------------------------: | :---: | :-----: | :----: | :-----------------: | :-------------------------------------------------------------------------------------: | +| [dic_x8c48b6_g4_150k_CelebAHQ](./dic_x8c48b6_4xb2-150k_celeba-hq.py) | x8 | 25.2319 | 0.7422 | 4 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.log.json) | +| [dic_gan_x8c48b6_g4_500k_CelebAHQ](./dic_gan-x8c48b6_4xb2-500k_celeba-hq.py) | x8 | 23.6241 | 0.6721 | 4 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.log.json) | ## Quick Start diff --git a/configs/dic/README_zh-CN.md b/configs/dic/README_zh-CN.md index 8333162f54..74ef56d31b 100644 --- a/configs/dic/README_zh-CN.md +++ b/configs/dic/README_zh-CN.md @@ -29,10 +29,10 @@ `GPU 信息`: 训练过程中的 GPU 信息. -| 算法 | scale | CelebA-HQ | GPU 信息 | 下载 | -| :------------------------------------------------------------------------------: | :---: | :--------------: | :-----------------: | :------------------------------------------------------------------------------: | -| [dic_x8c48b6_g4_150k_CelebAHQ](/configs/dic/dic_x8c48b6_4xb2-150k_celeba-hq.py) | x8 | 25.2319 / 0.7422 | 4 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.log.json) | -| [dic_gan_x8c48b6_g4_500k_CelebAHQ](/configs/dic/dic_gan-x8c48b6_4xb2-500k_celeba-hq.py) | x8 | 23.6241 / 0.6721 | 4 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.log.json) | +| 算法 | scale | CelebA-HQ | GPU 信息 | 下载 | +| :--------------------------------------------------------------------------: | :---: | :--------------: | :-----------------: | :----------------------------------------------------------------------------------: | +| [dic_x8c48b6_g4_150k_CelebAHQ](./dic_x8c48b6_4xb2-150k_celeba-hq.py) | x8 | 25.2319 / 0.7422 | 4 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/dic/dic_x8c48b6_g4_150k_CelebAHQ_20210611-5d3439ca.log.json) | +| [dic_gan_x8c48b6_g4_500k_CelebAHQ](./dic_gan-x8c48b6_4xb2-500k_celeba-hq.py) | x8 | 23.6241 / 0.6721 | 4 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/dic/dic_gan_x8c48b6_g4_500k_CelebAHQ_20210625-3b89a358.log.json) | ## 快速开始 diff --git a/configs/dic/metafile.yml b/configs/dic/metafile.yml index 51d63d50d8..713931d1b1 100644 --- a/configs/dic/metafile.yml +++ b/configs/dic/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2003.13063 README: configs/dic/README.md + Task: + - image super-resolution + Year: 2020 Models: - Config: configs/dic/dic_x8c48b6_4xb2-150k_celeba-hq.py In Collection: DIC diff --git a/configs/dim/README.md b/configs/dim/README.md index f2c768d070..9106ec9ac5 100644 --- a/configs/dim/README.md +++ b/configs/dim/README.md @@ -20,13 +20,14 @@ Image matting is a fundamental computer vision problem and has many applications ## Results and models -| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | -| :------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :-----------------------------------------------------------------------------------------: | -| stage1 (paper) | 54.6 | 0.017 | 36.7 | 55.3 | - | - | -| stage3 (paper) | **50.4** | **0.014** | 31.0 | 50.8 | - | - | -| [stage1 (our)](/configs/dim/dim_stage1-v16_1xb1-1000k_comp1k.py) | 53.8 | 0.017 | 32.7 | 54.5 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_SAD-53.8_20200605_140257-979a420f.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_20200605_140257.log.json) | -| [stage2 (our)](/configs/dim/dim_stage2-v16-pln_1xb1-1000k_comp1k.py) | 52.3 | 0.016 | 29.4 | 52.4 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_SAD-52.3_20200607_171909-d83c4775.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_20200607_171909.log.json) | -| [stage3 (our)](/configs/dim/dim_stage3-v16-pln_1xb1-1000k_comp1k.py) | 50.6 | 0.015 | **29.0** | **50.7** | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_20200609_111851.log.json) | +| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | +| :-------------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :----------------------------------------------------------------------------------: | +| stage1 (paper) | 54.6 | 0.017 | 36.7 | 55.3 | - | - | +| stage3 (paper) | **50.4** | **0.014** | 31.0 | 50.8 | - | - | +| [stage1 (our)](./dim_stage1-v16_1xb1-1000k_comp1k.py) | 53.8 | 0.017 | 32.7 | 54.5 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_SAD-53.8_20200605_140257-979a420f.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_20200605_140257.log.json) | +| [stage2 (our)](./dim_stage2-v16-pln_1xb1-1000k_comp1k.py) | 52.3 | 0.016 | 29.4 | 52.4 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_SAD-52.3_20200607_171909-d83c4775.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_20200607_171909.log.json) | +| [stage3 (our)](./dim_stage3-v16-pln_1xb1-1000k_comp1k.py) | 50.6 | 0.015 | **29.0** | **50.7** | 1 | [model](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_20200609_111851.log.json) | +| [stage1 (online merge)](./dim_stage1-v16_1xb1-1000k_comp1k_online-merge.py) | - | - | - | - | - | - | **NOTE** diff --git a/configs/dim/README_zh-CN.md b/configs/dim/README_zh-CN.md index 4683b3efc0..518dd5f49b 100644 --- a/configs/dim/README_zh-CN.md +++ b/configs/dim/README_zh-CN.md @@ -21,13 +21,14 @@
-| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | -| :---------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :--------------------------------------------------------------------------------------: | -| 第一阶段 (原文) | 54.6 | 0.017 | 36.7 | 55.3 | - | - | -| 第三阶段 (原文) | **50.4** | **0.014** | 31.0 | 50.8 | - | - | -| [第一阶段 (复现)](/configs/dim/dim_stage1-v16_1xb1-1000k_comp1k.py) | 53.8 | 0.017 | 32.7 | 54.5 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_SAD-53.8_20200605_140257-979a420f.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_20200605_140257.log.json) | -| [第二阶段 (复现)](/configs/dim/dim_stage2-v16-pln_1xb1-1000k_comp1k.py) | 52.3 | 0.016 | 29.4 | 52.4 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_SAD-52.3_20200607_171909-d83c4775.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_20200607_171909.log.json) | -| [第三阶段 (复现)](/configs/dim/dim_stage3-v16-pln_1xb1-1000k_comp1k.py) | 50.6 | 0.015 | **29.0** | **50.7** | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_20200609_111851.log.json) | +| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | +| :---------------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :--------------------------------------------------------------------------------: | +| 第一阶段 (原文) | 54.6 | 0.017 | 36.7 | 55.3 | - | - | +| 第三阶段 (原文) | **50.4** | **0.014** | 31.0 | 50.8 | - | - | +| [第一阶段 (复现)](./dim_stage1-v16_1xb1-1000k_comp1k.py) | 53.8 | 0.017 | 32.7 | 54.5 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_SAD-53.8_20200605_140257-979a420f.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage1_v16_1x1_1000k_comp1k_20200605_140257.log.json) | +| [第二阶段 (复现)](./dim_stage2-v16-pln_1xb1-1000k_comp1k.py) | 52.3 | 0.016 | 29.4 | 52.4 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_SAD-52.3_20200607_171909-d83c4775.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage2_v16_pln_1x1_1000k_comp1k_20200607_171909.log.json) | +| [第三阶段 (复现)](./dim_stage3-v16-pln_1xb1-1000k_comp1k.py) | 50.6 | 0.015 | **29.0** | **50.7** | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_20200609_111851.log.json) | +| [第一阶段 (online merge)](./dim_stage1-v16_1xb1-1000k_comp1k_online-merge.py) | - | - | - | - | - | - | **注** diff --git a/configs/dim/metafile.yml b/configs/dim/metafile.yml index 83293d2c9c..bf4087954e 100644 --- a/configs/dim/metafile.yml +++ b/configs/dim/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1703.03872 README: configs/dim/README.md + Task: + - matting + Year: 2017 Models: - Config: configs/dim/dim_stage1-v16_1xb1-1000k_comp1k.py In Collection: DIM @@ -52,3 +55,14 @@ Models: SAD: 50.6 Task: Matting Weights: https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth +- Config: configs/dim/dim_stage1-v16_1xb1-1000k_comp1k_online-merge.py + In Collection: DIM + Metadata: + GPUs: '-' + Training Data: COMP1K + Name: dim_stage1-v16_1xb1-1000k_comp1k_online-merge + Results: + - Dataset: COMP1K + Metrics: {} + Task: Matting + Weights: '' diff --git a/configs/disco_diffusion/README.md b/configs/disco_diffusion/README.md index 098b7f43e4..d128fd7c69 100644 --- a/configs/disco_diffusion/README.md +++ b/configs/disco_diffusion/README.md @@ -1,8 +1,8 @@ -# Disco Diffusion +# Disco Diffusion (2022) > [Disco Diffusion](https://github.com/alembics/disco-diffusion) -> **Task**: Text2Image, Image2Image +> **Task**: Text2Image, Image2Image, diffusion @@ -24,18 +24,18 @@ Created by Somnai, augmented by Gandamu, and building on the work of RiversHaveW We have converted several `unet` weights and offer related configs. See more details of different `unet` in [Tutorial](#tutorials). -| Diffusion Model | Config | Download | -| :--------------------------------------: | :-----------------------------------------------------------------------------: | :-------------------------------------------------------------------------------: | -| 512x512_diffusion_uncond_finetune_008100 | [config](configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py) | [model](https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u_finetuned_imagenet-512x512-ab471d70.pth) | -| 256x256_diffusion_uncond | [config](configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-256x256.py) | [model](<>) | -| portrait_generator_v001 | [config](configs/disco_diffusion/disco-diffusion_portrait-generator-v001.py) | [model](https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u-cvt-rgb_portrait-v001-f4a3f3bc.pth) | -| pixelartdiffusion_expanded | Coming soon! | | -| pixel_art_diffusion_hard_256 | Coming soon! | | -| pixel_art_diffusion_soft_256 | Coming soon! | | -| pixelartdiffusion4k | Coming soon! | | -| watercolordiffusion_2 | Coming soon! | | -| watercolordiffusion | Coming soon! | | -| PulpSciFiDiffusion | Coming soon! | | +| Diffusion Model | Config | Download | +| :--------------------------------------: | :-------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------: | +| 512x512_diffusion_uncond_finetune_008100 | [config](./disco-diffusion_adm-u-finetuned_imagenet-512x512.py) | [model](https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u_finetuned_imagenet-512x512-ab471d70.pth) | +| 256x256_diffusion_uncond | [config](./disco-diffusion_adm-u-finetuned_imagenet-256x256.py) | [model](<>) | +| portrait_generator_v001 | [config](./disco-diffusion_portrait-generator-v001.py) | [model](https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u-cvt-rgb_portrait-v001-f4a3f3bc.pth) | +| pixelartdiffusion_expanded | Coming soon! | | +| pixel_art_diffusion_hard_256 | Coming soon! | | +| pixel_art_diffusion_soft_256 | Coming soon! | | +| pixelartdiffusion4k | Coming soon! | | +| watercolordiffusion_2 | Coming soon! | | +| watercolordiffusion | Coming soon! | | +| PulpSciFiDiffusion | Coming soon! | | ## To-do List @@ -81,7 +81,7 @@ save_image(image, "image.png") ## Tutorials -Considering that `disco-diffusion` contains many adjustable parameters, we provide users with a [jupyter-notebook](configs/disco_diffusion/tutorials.ipynb) / [colab](https://githubtocolab.com/open-mmlab/mmediting/blob/dev-1.x/configs/disco_diffusion/tutorials.ipynb) tutorial that exhibits the meaning of different parameters, and gives results corresponding to adjustment. +Considering that `disco-diffusion` contains many adjustable parameters, we provide users with a [jupyter-notebook](./tutorials.ipynb) / [colab](https://githubtocolab.com/open-mmlab/mmediting/blob/dev-1.x/configs/disco_diffusion/tutorials.ipynb) tutorial that exhibits the meaning of different parameters, and gives results corresponding to adjustment. Refer to [Disco Sheet](https://docs.google.com/document/d/1l8s7uS2dGqjztYSjPpzlmXLjl5PM3IGkRWI3IiCuK7g/edit). ## Credits diff --git a/configs/disco_diffusion/metafile.yml b/configs/disco_diffusion/metafile.yml index 3dc2d91419..dd0163309c 100644 --- a/configs/disco_diffusion/metafile.yml +++ b/configs/disco_diffusion/metafile.yml @@ -6,6 +6,11 @@ Collections: Paper: - https://github.com/alembics/disco-diffusion README: configs/disco_diffusion/README.md + Task: + - text2image + - image2image + - diffusion + Year: 2022 Models: - Config: configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py In Collection: Disco Diffusion @@ -15,7 +20,7 @@ Models: Results: - Dataset: Others Metrics: {} - Task: Text2Image, Image2Image + Task: Text2Image, Image2Image, diffusion Weights: https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u_finetuned_imagenet-512x512-ab471d70.pth - Config: configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-256x256.py In Collection: Disco Diffusion @@ -25,7 +30,7 @@ Models: Results: - Dataset: Others Metrics: {} - Task: Text2Image, Image2Image + Task: Text2Image, Image2Image, diffusion Weights: <> - Config: configs/disco_diffusion/disco-diffusion_portrait-generator-v001.py In Collection: Disco Diffusion @@ -35,5 +40,5 @@ Models: Results: - Dataset: Others Metrics: {} - Task: Text2Image, Image2Image + Task: Text2Image, Image2Image, diffusion Weights: https://download.openmmlab.com/mmediting/synthesizers/disco/adm-u-cvt-rgb_portrait-v001-f4a3f3bc.pth diff --git a/configs/disco_diffusion/tutorials.ipynb b/configs/disco_diffusion/tutorials.ipynb index b21156bfb0..4c6e54d9b2 100644 --- a/configs/disco_diffusion/tutorials.ipynb +++ b/configs/disco_diffusion/tutorials.ipynb @@ -79,8 +79,7 @@ "!git clone -b dev-1.x https://github.com/open-mmlab/mmediting.git \n", "%cd mmediting\n", "!pip install -r requirements.txt\n", - "!pip install -e .\n", - "%cd configs/disco_diffusion" + "!pip install -e ." ] }, { @@ -122,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "config = 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", + "config = 'configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", "disco = MODELS.build(Config.fromfile(config).model).cuda().eval()\n", "text_prompts = {\n", " 0: [\"clouds surround the mountains and Chinese palaces, sunshine, lake, overlook, overlook, unreal engine, light effect, Dream, Greg Rutkowski, James Gurney, artstation\"]\n", @@ -219,7 +218,7 @@ "text_prompts = {\n", " 0: [\"clouds surround the mountains and Chinese palaces,sunshine,lake,overlook,overlook,unreal engine,light effect,Dream,Greg Rutkowski,James Gurney,artstation\"]\n", "}\n", - "config = 'disco-diffusion_adm-u-finetuned_imagenet-256x256.py'\n", + "config = 'configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-256x256.py'\n", "disco = MODELS.build(Config.fromfile(config).model).cuda().eval()\n", "image = disco.infer(width=512, height=448, text_prompts=text_prompts, show_progress=True, num_inference_steps=num_inference_steps, eta=0.8, seed=seed)['samples']\n", "show_tensor(image)" @@ -233,7 +232,7 @@ "source": [ "\n", "# 512x512_diffusion_uncond_finetune_008100\n", - "config = 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", + "config = 'configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", "disco = MODELS.build(Config.fromfile(config).model).cuda().eval()\n", "image = disco.infer(width=1280, height=768, text_prompts=text_prompts, show_progress=True, num_inference_steps=num_inference_steps, eta=0.8, seed=seed)['samples']\n", "show_tensor(image)\n" @@ -294,7 +293,7 @@ "from mmedit.models.editors.disco_diffusion.guider import ImageTextGuider\n", "\n", "\n", - "config = 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", + "config = 'configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", "disco = MODELS.build(Config.fromfile(config).model).cuda().eval()\n", "text_prompts = {0: [\"A beautiful painting of a map of the city of Atlantis\"]}\n" ] @@ -1179,7 +1178,7 @@ "metadata": {}, "outputs": [], "source": [ - "config = 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", + "config = 'configs/disco_diffusion/disco-diffusion_adm-u-finetuned_imagenet-512x512.py'\n", "disco = MODELS.build(Config.fromfile(config).model).cuda().eval()" ] }, diff --git a/configs/edsr/README.md b/configs/edsr/README.md index b375f49f0d..c49922a093 100644 --- a/configs/edsr/README.md +++ b/configs/edsr/README.md @@ -25,9 +25,9 @@ The metrics are `PSNR / SSIM` . | Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | | :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | -| [edsr_x2c64b16_1x16_300k_div2k](/configs/edsr/edsr_x2c64b16_1xb16-300k_div2k.py) | 35.7592 | 31.4290 | 34.5896 | 0.9372 | 0.8874 | 0.9352 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604-19fe95ea.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604_221933.log.json) | -| [edsr_x3c64b16_1x16_300k_div2k](/configs/edsr/edsr_x3c64b16_1xb16-300k_div2k.py) | 32.3301 | 28.4125 | 30.9154 | 0.8912 | 0.8022 | 0.8711 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608-36d896f4.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608_114850.log.json) | -| [edsr_x4c64b16_1x16_300k_div2k](/configs/edsr/edsr_x4c64b16_1xb16-300k_div2k.py) | 30.2223 | 26.7870 | 28.9675 | 0.8500 | 0.7366 | 0.8172 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608-3c2af8a3.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608_115148.log.json) | +| [edsr_x2c64b16_1x16_300k_div2k](./edsr_x2c64b16_1xb16-300k_div2k.py) | 35.7592 | 31.4290 | 34.5896 | 0.9372 | 0.8874 | 0.9352 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604-19fe95ea.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604_221933.log.json) | +| [edsr_x3c64b16_1x16_300k_div2k](./edsr_x3c64b16_1xb16-300k_div2k.py) | 32.3301 | 28.4125 | 30.9154 | 0.8912 | 0.8022 | 0.8711 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608-36d896f4.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608_114850.log.json) | +| [edsr_x4c64b16_1x16_300k_div2k](./edsr_x4c64b16_1xb16-300k_div2k.py) | 30.2223 | 26.7870 | 28.9675 | 0.8500 | 0.7366 | 0.8172 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608-3c2af8a3.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608_115148.log.json) | ## Quick Start diff --git a/configs/edsr/README_zh-CN.md b/configs/edsr/README_zh-CN.md index 59579d6728..1668c56959 100644 --- a/configs/edsr/README_zh-CN.md +++ b/configs/edsr/README_zh-CN.md @@ -24,11 +24,11 @@ 在 RGB 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 我们使用 `PSNR` 和 `SSIM` 作为指标。 -| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | -| :----------------------------------------------------------------------: | :--------------: | :--------------: | :--------------: | :------: | :----------------------------------------------------------------------: | -| [edsr_x2c64b16_1x16_300k_div2k](/configs/edsr/edsr_x2c64b16_1xb16-300k_div2k.py) | 35.7592 / 0.9372 | 31.4290 / 0.8874 | 34.5896 / 0.9352 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604-19fe95ea.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604_221933.log.json) | -| [edsr_x3c64b16_1x16_300k_div2k](/configs/edsr/edsr_x3c64b16_1xb16-300k_div2k.py) | 32.3301 / 0.8912 | 28.4125 / 0.8022 | 30.9154 / 0.8711 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608-36d896f4.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608_114850.log.json) | -| [edsr_x4c64b16_1x16_300k_div2k](/configs/edsr/edsr_x4c64b16_1xb16-300k_div2k.py) | 30.2223 / 0.8500 | 26.7870 / 0.7366 | 28.9675 / 0.8172 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608-3c2af8a3.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608_115148.log.json) | +| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | +| :------------------------------------------------------------------: | :--------------: | :--------------: | :--------------: | :------: | :--------------------------------------------------------------------------: | +| [edsr_x2c64b16_1x16_300k_div2k](./edsr_x2c64b16_1xb16-300k_div2k.py) | 35.7592 / 0.9372 | 31.4290 / 0.8874 | 34.5896 / 0.9352 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604-19fe95ea.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x2c64b16_1x16_300k_div2k_20200604_221933.log.json) | +| [edsr_x3c64b16_1x16_300k_div2k](./edsr_x3c64b16_1xb16-300k_div2k.py) | 32.3301 / 0.8912 | 28.4125 / 0.8022 | 30.9154 / 0.8711 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608-36d896f4.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x3c64b16_1x16_300k_div2k_20200608_114850.log.json) | +| [edsr_x4c64b16_1x16_300k_div2k](./edsr_x4c64b16_1xb16-300k_div2k.py) | 30.2223 / 0.8500 | 26.7870 / 0.7366 | 28.9675 / 0.8172 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608-3c2af8a3.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edsr/edsr_x4c64b16_1x16_300k_div2k_20200608_115148.log.json) | ## 快速开始 diff --git a/configs/edsr/metafile.yml b/configs/edsr/metafile.yml index 481e6ef5ac..9dba8bb33f 100644 --- a/configs/edsr/metafile.yml +++ b/configs/edsr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1707.02921 README: configs/edsr/README.md + Task: + - image super-resolution + Year: 2017 Models: - Config: configs/edsr/edsr_x2c64b16_1xb16-300k_div2k.py In Collection: EDSR diff --git a/configs/edvr/README.md b/configs/edvr/README.md index d0c74c9299..e5de63a3de 100644 --- a/configs/edvr/README.md +++ b/configs/edvr/README.md @@ -25,17 +25,17 @@ The metrics are `PSNR and SSIM` . | Method | PSNR | GPU Info | Download | | :---------------------------------------------------------------------------------: | :-----: | :----------------------: | :------------------------------------------------------------------------------------: | -| [edvrm_wotsa_x4_8x4_600k_reds](/configs/edvr/edvrm_wotsa_8xb4-600k_reds.py) | 30.3430 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | -| [edvrm_x4_8x4_600k_reds](/configs/edvr/edvrm_8xb4-600k_reds.py) | 30.4194 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | -| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0010 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | -| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0467 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | +| [edvrm_wotsa_x4_8x4_600k_reds](./edvrm_wotsa_8xb4-600k_reds.py) | 30.3430 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | +| [edvrm_x4_8x4_600k_reds](./edvrm_8xb4-600k_reds.py) | 30.4194 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | +| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0010 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | +| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0467 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | | Method | SSIM | GPU Info | Download | | :----------------------------------------------------------------------------------: | :----: | :----------------------: | :------------------------------------------------------------------------------------: | -| [edvrm_wotsa_x4_8x4_600k_reds](/configs/edvr/edvrm_wotsa_8xb4-600k_reds.py) | 0.8664 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | -| [edvrm_x4_8x4_600k_reds](/configs/edvr/edvrm_8xb4-600k_reds.py) | 0.8684 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | -| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 0.8784 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | -| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 0.8793 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | +| [edvrm_wotsa_x4_8x4_600k_reds](./edvrm_wotsa_8xb4-600k_reds.py) | 0.8664 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | +| [edvrm_x4_8x4_600k_reds](./edvrm_8xb4-600k_reds.py) | 0.8684 | 8 | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | +| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 0.8784 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | +| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 0.8793 | 8 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | ## Quick Start diff --git a/configs/edvr/README_zh-CN.md b/configs/edvr/README_zh-CN.md index d5bdfe52a2..d8d14809d2 100644 --- a/configs/edvr/README_zh-CN.md +++ b/configs/edvr/README_zh-CN.md @@ -26,10 +26,10 @@ | 算法 | REDS4 | GPU 信息 | 下载 | | :------------------------------------------------------------------------------: | :--------------: | :----------------------: | :------------------------------------------------------------------------------: | -| [edvrm_wotsa_x4_8x4_600k_reds](/configs/edvr/edvrm_wotsa_8xb4-600k_reds.py) | 30.3430 / 0.8664 | 8 | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | -| [edvrm_x4_8x4_600k_reds](/configs/edvr/edvrm_8xb4-600k_reds.py) | 30.4194 / 0.8684 | 8 | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | -| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0010 / 0.8784 | 8 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | -| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](/configs/edvr/edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0467 / 0.8793 | 8 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | +| [edvrm_wotsa_x4_8x4_600k_reds](./edvrm_wotsa_8xb4-600k_reds.py) | 30.3430 / 0.8664 | 8 | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522_141644.log.json) | +| [edvrm_x4_8x4_600k_reds](./edvrm_8xb4-600k_reds.py) | 30.4194 / 0.8684 | 8 | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20210625-e29b71b5.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_x4_8x4_600k_reds_20200622_102544.log.json) | +| [edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_wotsa-c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0010 / 0.8784 | 8 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228-d895a769.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_wotsa_c128b40_8x8_lr2e-4_600k_reds4_20211228_144658.log.json) | +| [edvrl_c128b40_8x8_lr2e-4_600k_reds4](./edvrl_c128b40_8xb8-lr2e-4-600k_reds4.py) | 31.0467 / 0.8793 | 8 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104-4509865f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/edvr/edvrl_c128b40_8x8_lr2e-4_600k_reds4_20220104_171823.log.json) | ## 快速开始 diff --git a/configs/edvr/metafile.yml b/configs/edvr/metafile.yml index dd7ca2fd59..c5669c4fe1 100644 --- a/configs/edvr/metafile.yml +++ b/configs/edvr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1905.02716?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%253A+arxiv%252FQSXk+%2528ExcitingAds%2521+cs+updates+on+arXiv.org%2529 README: configs/edvr/README.md + Task: + - video super-resolution + Year: 2019 Models: - Config: configs/edvr/edvrm_wotsa_8xb4-600k_reds.py In Collection: EDVR diff --git a/configs/eg3d/README.md b/configs/eg3d/README.md index a1de6c75f8..7ef79dcf32 100644 --- a/configs/eg3d/README.md +++ b/configs/eg3d/README.md @@ -20,11 +20,11 @@ Unsupervised generation of high-quality multi-view-consistent images and 3D shap ## Results and Models -| Model | Comment | FID50k | FID50k-Camera | Config | Download | -| :----------: | :-------------: | :----: | :-----------: | :---------------------------------------------------------------: | :---------------------------------------------------------------------------------------: | -| ShapeNet-Car | official weight | 5.6573 | 5.2325 | [config](/configs/eg3d/eg3d_cvt-official-rgb_shapenet-128x128.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_shapenet-128x128-85757f4d.pth) | -| AFHQ | official weight | 2.9134 | 6.4213 | [config](/configs/eg3d/eg3d_cvt-official-rgb_afhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_afhq-512x512-ca1dd7c9.pth) | -| FFHQ | official weight | 4.3076 | 6.4453 | [config](configs/eg3d/eg3d_cvt-official-rgb_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_ffhq-512x512-5a0ddcb6.pth) | +| Model | Comment | FID50k | FID50k-Camera | Config | Download | +| :----------: | :-------------: | :----: | :-----------: | :---------------------------------------------------: | :---------------------------------------------------------------------------------------------------: | +| ShapeNet-Car | official weight | 5.6573 | 5.2325 | [config](./eg3d_cvt-official-rgb_shapenet-128x128.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_shapenet-128x128-85757f4d.pth) | +| AFHQ | official weight | 2.9134 | 6.4213 | [config](./eg3d_cvt-official-rgb_afhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_afhq-512x512-ca1dd7c9.pth) | +| FFHQ | official weight | 4.3076 | 6.4453 | [config](./eg3d_cvt-official-rgb_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/eg3d/eg3d_cvt-official-rgb_ffhq-512x512-5a0ddcb6.pth) | - `FID50k-Camera` denotes image generated with random sampled camera position. - `FID50k` denotes image generated with camera position randomly sampled from the original dataset. @@ -32,7 +32,7 @@ Unsupervised generation of high-quality multi-view-consistent images and 3D shap ### Influence of FP16 All metrics are evaluated under FP32, and it's hard to determine how they will change if we use FP16. -For example, if we use FP16 at the super resolution module in [FFHQ model](/configs/eg3d_ffhq.py), the output images will be slightly blurrier than the ones generated under FP32, but FID (**4.03**) will be better than FP32 ones. +For example, if we use FP16 at the super resolution module in [FFHQ model](./eg3d_cvt-official-rgb_ffhq-512x512.py), the output images will be slightly blurrier than the ones generated under FP32, but FID (**4.03**) will be better than FP32 ones. ## About generate images and videos with High-Level API diff --git a/configs/eg3d/metafile.yml b/configs/eg3d/metafile.yml index 6cabb8a37c..8efc14266d 100644 --- a/configs/eg3d/metafile.yml +++ b/configs/eg3d/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content/CVPR2022/html/Chan_Efficient_Geometry-Aware_3D_Generative_Adversarial_Networks_CVPR_2022_paper.html README: configs/eg3d/README.md + Task: + - 3d-aware generation + Year: 2022 Models: - Config: configs/eg3d/eg3d_cvt-official-rgb_shapenet-128x128.py In Collection: EG3D diff --git a/configs/esrgan/README.md b/configs/esrgan/README.md index dd5f84b783..95af8fead6 100644 --- a/configs/esrgan/README.md +++ b/configs/esrgan/README.md @@ -25,8 +25,8 @@ The metrics are `PSNR / SSIM` . | Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | | :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | -| [esrgan_psnr_x4c64b23g32_1x16_1000k_div2k](/configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) | 30.6428 | 27.0543 | 29.3354 | 0.8559 | 0.7447 | 0.8263 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420_112550.log.json) | -| [esrgan_x4c64b23g32_1x16_400k_div2k](/configs/esrgan/esrgan_x4c64b23g32_1xb16-400k_div2k.py) | 28.2700 | 24.6328 | 26.6531 | 0.7778 | 0.6491 | 0.7340 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508_191042.log.json) | +| [esrgan_psnr_x4c64b23g32_1x16_1000k_div2k](./esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) | 30.6428 | 27.0543 | 29.3354 | 0.8559 | 0.7447 | 0.8263 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420_112550.log.json) | +| [esrgan_x4c64b23g32_1x16_400k_div2k](./esrgan_x4c64b23g32_1xb16-400k_div2k.py) | 28.2700 | 24.6328 | 26.6531 | 0.7778 | 0.6491 | 0.7340 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508_191042.log.json) | ## Quick Start diff --git a/configs/esrgan/README_zh-CN.md b/configs/esrgan/README_zh-CN.md index 48ba0ac400..c569397179 100644 --- a/configs/esrgan/README_zh-CN.md +++ b/configs/esrgan/README_zh-CN.md @@ -26,8 +26,8 @@ | 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | | :---------------------------------------------------------------------: | :---------------: | :--------------: | :--------------: | :------: | :----------------------------------------------------------------------: | -| [esrgan_psnr_x4c64b23g32_1x16_1000k_div2k](/configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) | 30.6428 / 0.8559 | 27.0543 / 0.7447 | 29.3354 / 0.8263 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420_112550.log.json) | -| [esrgan_x4c64b23g32_1x16_400k_div2k](/configs/esrgan/esrgan_x4c64b23g32_1xb16-400k_div2k.py) | 28.2700 / 0.7778 | 24.6328 / 0.6491 | 26.6531 / 0.7340 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508_191042.log.json) | +| [esrgan_psnr_x4c64b23g32_1x16_1000k_div2k](./esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) | 30.6428 / 0.8559 | 27.0543 / 0.7447 | 29.3354 / 0.8263 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420_112550.log.json) | +| [esrgan_x4c64b23g32_1x16_400k_div2k](./esrgan_x4c64b23g32_1xb16-400k_div2k.py) | 28.2700 / 0.7778 | 24.6328 / 0.6491 | 26.6531 / 0.7340 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508_191042.log.json) | ## 快速开始 diff --git a/configs/esrgan/metafile.yml b/configs/esrgan/metafile.yml index 184485f817..3b71708a83 100644 --- a/configs/esrgan/metafile.yml +++ b/configs/esrgan/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1809.00219 README: configs/esrgan/README.md + Task: + - image super-resolution + Year: 2018 Models: - Config: configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py In Collection: ESRGAN diff --git a/configs/flavr/README.md b/configs/flavr/README.md index b9e1a8c817..453137d8b1 100644 --- a/configs/flavr/README.md +++ b/configs/flavr/README.md @@ -25,7 +25,7 @@ The metrics are `PSNR / SSIM` . | Method | scale | PSNR | SSIM | GPU Info | Download | | :------------------------------------------------------------------------------: | :---: | :-----: | :-----: | :-----------------: | :--------------------------------------------------------------------------------: | -| [flavr_in4out1_g8b4_vimeo90k_septuplet](/configs/flavr/flavr_in4out1_8xb4_vimeo90k-septuplet.py) | x2 | 36.3340 | 0.96015 | 8 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.log.json) | +| [flavr_in4out1_g8b4_vimeo90k_septuplet](./flavr_in4out1_8xb4_vimeo90k-septuplet.py) | x2 | 36.3340 | 0.96015 | 8 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.log.json) | Note: FLAVR for x8 VFI task will supported in the future. diff --git a/configs/flavr/README_zh-CN.md b/configs/flavr/README_zh-CN.md index 31ff68a917..1ec3783c28 100644 --- a/configs/flavr/README_zh-CN.md +++ b/configs/flavr/README_zh-CN.md @@ -13,7 +13,7 @@ | 算法 | scale | Vimeo90k-triplet | GPU 信息 | 下载 | | :-----------------------------------------------------------------------------: | :---: | :---------------: | :-----------------: | :------------------------------------------------------------------------------: | -| [flavr_in4out1_g8b4_vimeo90k_septuplet](/configs/flavr/flavr_in4out1_8xb4_vimeo90k-septuplet.py) | x2 | 36.3340 / 0.96015 | 8 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.log.json) | +| [flavr_in4out1_g8b4_vimeo90k_septuplet](./flavr_in4out1_8xb4_vimeo90k-septuplet.py) | x2 | 36.3340 / 0.96015 | 8 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/flavr/flavr_in4out1_g8b4_vimeo90k_septuplet_20220509-c2468995.log.json) | 注:FLAVR 中的 8 倍视频插帧算法将会在未来版本中支持。 diff --git a/configs/flavr/metafile.yml b/configs/flavr/metafile.yml index 0b0d70b430..d57958d755 100644 --- a/configs/flavr/metafile.yml +++ b/configs/flavr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/pdf/2012.08512.pdf README: configs/flavr/README.md + Task: + - video interpolation + Year: 2020 Models: - Config: configs/flavr/flavr_in4out1_8xb4_vimeo90k-septuplet.py In Collection: FLAVR diff --git a/configs/gca/README.md b/configs/gca/README.md index 7c8a9b8913..8abbe00c06 100644 --- a/configs/gca/README.md +++ b/configs/gca/README.md @@ -20,19 +20,19 @@ Over the last few years, deep learning based approaches have achieved outstandin ## Results and models -| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | -| :--------------------------------------------------------------: | :-------: | :--------: | :-------: | :-------: | :------: | :-----------------------------------------------------------------------------------------: | -| baseline (paper) | 40.62 | 0.0106 | 21.53 | 38.43 | - | - | -| GCA (paper) | 35.28 | 0.0091 | 16.92 | 32.53 | - | - | -| [baseline (our)](/configs/gca/baseline_r34_4xb10-200k_comp1k.py) | 34.61 | 0.0083 | 16.21 | 32.12 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.log) | -| [GCA (our)](/configs/gca/gca_r34_4xb10-200k_comp1k.py) | **33.38** | **0.0081** | **14.96** | **30.59** | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.log) | +| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | +| :---------------------------------------------------: | :-------: | :--------: | :-------: | :-------: | :------: | :----------------------------------------------------------------------------------------------------: | +| baseline (paper) | 40.62 | 0.0106 | 21.53 | 38.43 | - | - | +| GCA (paper) | 35.28 | 0.0091 | 16.92 | 32.53 | - | - | +| [baseline (our)](./baseline_r34_4xb10-200k_comp1k.py) | 34.61 | 0.0083 | 16.21 | 32.12 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.log) | +| [GCA (our)](./gca_r34_4xb10-200k_comp1k.py) | **33.38** | **0.0081** | **14.96** | **30.59** | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.log) | **More results** -| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | -| :----------------------------------------------------------------------------------: | :---: | :----: | :---: | :---: | :------: | :-------------------------------------------------------------------------------------: | -| [baseline (with DIM pipeline)](/configs/gca/baseline_r34_4xb10-dimaug-200k_comp1k.py) | 49.95 | 0.0144 | 30.21 | 49.67 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_SAD-49.95_20200626_231612-535c9a11.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_20200626_231612.log.json) | -| [GCA (with DIM pipeline)](/configs/gca/gca_r34_4xb10-dimaug-200k_comp1k.py) | 49.42 | 0.0129 | 28.07 | 49.47 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_SAD-49.42_20200626_231422-8e9cc127.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_20200626_231422.log.json) | +| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | +| :------------------------------------------------------------------------: | :---: | :----: | :---: | :---: | :------: | :-----------------------------------------------------------------------------------------------: | +| [baseline (with DIM pipeline)](./baseline_r34_4xb10-dimaug-200k_comp1k.py) | 49.95 | 0.0144 | 30.21 | 49.67 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_SAD-49.95_20200626_231612-535c9a11.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_20200626_231612.log.json) | +| [GCA (with DIM pipeline)](./gca_r34_4xb10-dimaug-200k_comp1k.py) | 49.42 | 0.0129 | 28.07 | 49.47 | 4 | [model](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_SAD-49.42_20200626_231422-8e9cc127.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_20200626_231422.log.json) | ## Quick Start diff --git a/configs/gca/README_zh-CN.md b/configs/gca/README_zh-CN.md index ac7ad9e407..c995d8141b 100644 --- a/configs/gca/README_zh-CN.md +++ b/configs/gca/README_zh-CN.md @@ -20,19 +20,19 @@
-| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | -| :-----------------------------------------------------------: | :-------: | :--------: | :-------: | :-------: | :------: | :--------------------------------------------------------------------------------------------: | -| 基线 (原文) | 40.62 | 0.0106 | 21.53 | 38.43 | - | - | -| GCA (原文) | 35.28 | 0.0091 | 16.92 | 32.53 | - | - | -| [基线 (复现)](/configs/gca/baseline_r34_4xb10-200k_comp1k.py) | 34.61 | 0.0083 | 16.21 | 32.12 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.log) | -| [GCA (复现)](/configs/gca/gca_r34_4xb10-200k_comp1k.py) | **33.38** | **0.0081** | **14.96** | **30.59** | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.log) | +| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | +| :------------------------------------------------: | :-------: | :--------: | :-------: | :-------: | :------: | :-------------------------------------------------------------------------------------------------------: | +| 基线 (原文) | 40.62 | 0.0106 | 21.53 | 38.43 | - | - | +| GCA (原文) | 35.28 | 0.0091 | 16.92 | 32.53 | - | - | +| [基线 (复现)](./baseline_r34_4xb10-200k_comp1k.py) | 34.61 | 0.0083 | 16.21 | 32.12 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/baseline_r34_4x10_200k_comp1k_SAD-34.61_20220620-96f85d56.log) | +| [GCA (复现)](./gca_r34_4xb10-200k_comp1k.py) | **33.38** | **0.0081** | **14.96** | **30.59** | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/gca_r34_4x10_200k_comp1k_SAD-33.38_20220615-65595f39.log) | **其他结果** -| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | -| :-----------------------------------------------------------------------------: | :---: | :----: | :---: | :---: | :------: | :------------------------------------------------------------------------------------------: | -| [基线 (使用 DIM 流水线)](/configs/gca/baseline_r34_4xb10-dimaug-200k_comp1k.py) | 49.95 | 0.0144 | 30.21 | 49.67 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_SAD-49.95_20200626_231612-535c9a11.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_20200626_231612.log.json) | -| [GCA (使用 DIM 流水线)](/configs/gca/gca_r34_4xb10-dimaug-200k_comp1k.py) | 49.42 | 0.0129 | 28.07 | 49.47 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_SAD-49.42_20200626_231422-8e9cc127.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_20200626_231422.log.json) | +| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | +| :------------------------------------------------------------------: | :---: | :----: | :---: | :---: | :------: | :-----------------------------------------------------------------------------------------------------: | +| [基线 (使用 DIM 流水线)](./baseline_r34_4xb10-dimaug-200k_comp1k.py) | 49.95 | 0.0144 | 30.21 | 49.67 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_SAD-49.95_20200626_231612-535c9a11.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/baseline_dimaug_r34_4x10_200k_comp1k_20200626_231612.log.json) | +| [GCA (使用 DIM 流水线)](./gca_r34_4xb10-dimaug-200k_comp1k.py) | 49.42 | 0.0129 | 28.07 | 49.47 | 4 | [模型](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_SAD-49.42_20200626_231422-8e9cc127.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/gca/gca_dimaug_r34_4x10_200k_comp1k_20200626_231422.log.json) | ## 快速开始 diff --git a/configs/gca/metafile.yml b/configs/gca/metafile.yml index 01be20817f..b65883c04c 100644 --- a/configs/gca/metafile.yml +++ b/configs/gca/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2001.04069 README: configs/gca/README.md + Task: + - matting + Year: 2020 Models: - Config: configs/gca/baseline_r34_4xb10-200k_comp1k.py In Collection: GCA diff --git a/configs/ggan/README.md b/configs/ggan/README.md index d282f0c258..f4dfda02c7 100644 --- a/configs/ggan/README.md +++ b/configs/ggan/README.md @@ -28,13 +28,15 @@ Generative Adversarial Nets (GANs) represent an important milestone for effectiv | Models | Dataset | SWD | MS-SSIM | FID | Config | Download | | :----------: | :------------: | :-----------------------------: | :-----: | :-----: | :-------------------------------------------------------------: | :----------------------------------------------------------------: | -| GGAN 64x64 | CelebA-Cropped | 11.18, 12.21, 39.16/20.85 | 0.3318 | 20.1797 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210430_113839.log.json) | -| GGAN 128x128 | CelebA-Cropped | 9.81, 11.29, 19.22, 47.79/22.03 | 0.3149 | 18.7647 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210423_154258.log.json) | -| GGAN 64x64 | LSUN-Bedroom | 9.1, 6.2, 12.27/9.19 | 0.0649 | 39.9261 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_lsgan-archi_lr1e-4-1xb128-20Mimgs_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth) \| [log](https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210428_202027.log.json) | +| GGAN 64x64 | CelebA-Cropped | 11.18, 12.21, 39.16/20.85 | 0.3318 | 20.1797 | [config](./ggan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py) | [model](https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth) \| [log](https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210430_113839.log.json) | +| GGAN 128x128 | CelebA-Cropped | 9.81, 11.29, 19.22, 47.79/22.03 | 0.3149 | 18.7647 | [config](./ggan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth) \| [log](https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210423_154258.log.json) | +| GGAN 64x64 | LSUN-Bedroom | 9.1, 6.2, 12.27/9.19 | 0.0649 | 39.9261 | [config](./ggan_lsgan-archi_lr1e-4-1xb128-20Mimgs_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmediting/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth) \| [log](https://download.openmmlab.com/mmediting/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210428_202027.log.json) | Note: In the original implementation of [GGAN](https://github.com/lim0606/pytorch-geometric-gan), they set `G_iters` to 10. However our framework does not support `G_iters` currently, so we dropped the settings in the original implementation and conducted several experiments with our own settings. We have shown above the experiment results with the lowest `fid` score. \ Original settings and our settings: + + | Models | Dataset | Architecture | optimizer | lr_G | lr_D | G_iters | D_iters | | :----------------: | :------------: | :----------: | :-------: | :----: | :----: | :-----: | :-----: | | GGAN(origin) 64x64 | CelebA-Cropped | dcgan-archi | RMSprop | 0.0002 | 0.0002 | 10 | 1 | diff --git a/configs/ggan/metafile.yml b/configs/ggan/metafile.yml index ecb3a36221..b01e5bef52 100644 --- a/configs/ggan/metafile.yml +++ b/configs/ggan/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://arxiv.org/abs/1705.02894 README: configs/ggan/README.md + Task: + - unconditional gans + Year: 2017 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py +- Config: configs/ggan/ggan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py In Collection: GGAN Metadata: Training Data: CELEBA @@ -18,8 +21,8 @@ Models: FID: 20.1797 MS-SSIM: 0.3318 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py + Weights: https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m.pth +- Config: configs/ggan/ggan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py In Collection: GGAN Metadata: Training Data: CELEBA @@ -30,8 +33,8 @@ Models: FID: 18.7647 MS-SSIM: 0.3149 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/ggan/ggan_lsgan-archi_lr1e-4-1xb128-20Mimgs_lsun-bedroom-64x64.py + Weights: https://download.openmmlab.com/mmediting/ggan/ggan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210430_143027-516423dc.pth +- Config: configs/ggan/ggan_lsgan-archi_lr1e-4-1xb128-20Mimgs_lsun-bedroom-64x64.py In Collection: GGAN Metadata: Training Data: Others @@ -42,4 +45,4 @@ Models: FID: 39.9261 MS-SSIM: 0.0649 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth + Weights: https://download.openmmlab.com/mmediting/ggan/ggan_lsun-bedroom_lsgan_archi_lr-1e-4_64_b128x1_20m_20210430_143114-5d99b76c.pth diff --git a/configs/glean/README.md b/configs/glean/README.md index 6d881993d0..3da41f7b98 100644 --- a/configs/glean/README.md +++ b/configs/glean/README.md @@ -24,13 +24,13 @@ For the meta info used in training and test, please refer to [here](https://gith | Method | PSNR | GPU Info | Download | | :----------------------------------------------------------------------------------: | :---: | :----------------------: | :-------------------------------------------------------------------------------------: | -| [glean_cat_8x](/configs/glean/glean_x8_2xb8_cat.py) | 23.98 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614-d3ac8683.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614_145540.log.json) | -| [glean_ffhq_16x](/configs/glean/glean_x16_2xb8_ffhq.py) | 26.91 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527-61a3afad.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527_194536.log.json) | -| [glean_cat_16x](/configs/glean/glean_x16_2xb8_cat.py) | 20.88 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527-68912543.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527_103708.log.json) | -| [glean_in128out1024_4x2_300k_ffhq_celebahq](/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py) | 27.94 | 4 (Tesla V100-SXM3-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812_100549.log.json) | -| [glean_fp16_cat_8x](/configs/glean/glean_x8-fp16_2xb8_cat.py) | - | - | - | -| [glean_fp16_ffhq_16x](/configs/glean/glean_x16-fp16_2xb8_ffhq.py) | - | - | - | -| [glean_fp16_in128out1024_4x2_300k_ffhq_celebahq](/configs/glean/glean_in128out1024-fp16_4xb2-300k_ffhq-celeba-hq.py) | - | - | - | +| [glean_cat_8x](./glean_x8_2xb8_cat.py) | 23.98 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614-d3ac8683.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614_145540.log.json) | +| [glean_ffhq_16x](./glean_x16_2xb8_ffhq.py) | 26.91 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527-61a3afad.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527_194536.log.json) | +| [glean_cat_16x](./glean_x16_2xb8_cat.py) | 20.88 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527-68912543.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527_103708.log.json) | +| [glean_in128out1024_4x2_300k_ffhq_celebahq](./glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py) | 27.94 | 4 (Tesla V100-SXM3-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812_100549.log.json) | +| [glean_fp16_cat_8x](./glean_x8-fp16_2xb8_cat.py) | - | - | - | +| [glean_fp16_ffhq_16x](./glean_x16-fp16_2xb8_ffhq.py) | - | - | - | +| [glean_fp16_in128out1024_4x2_300k_ffhq_celebahq](./glean_in128out1024-fp16_4xb2-300k_ffhq-celeba-hq.py) | - | - | - | ## Quick Start diff --git a/configs/glean/README_zh-CN.md b/configs/glean/README_zh-CN.md index 2d47a7c713..3bd66eb8ce 100644 --- a/configs/glean/README_zh-CN.md +++ b/configs/glean/README_zh-CN.md @@ -24,13 +24,13 @@ | 算法 | PSNR | GPU 信息 | 下载 | | :-----------------------------------------------------------------------------------: | :---: | :----------------------: | :------------------------------------------------------------------------------------: | -| [glean_cat_8x](/configs/glean/glean_x8_2xb8_cat.py) | 23.98 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614-d3ac8683.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614_145540.log.json) | -| [glean_ffhq_16x](/configs/glean/glean_x16_2xb8_ffhq.py) | 26.91 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527-61a3afad.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527_194536.log.json) | -| [glean_cat_16x](/configs/glean/glean_x16_2xb8_cat.py) | 20.88 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527-68912543.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527_103708.log.json) | -| [glean_in128out1024_4x2_300k_ffhq_celebahq](/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py) | 27.94 | 4 (Tesla V100-SXM3-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812_100549.log.json) | -| [glean_fp16_cat_8x](/configs/glean/glean_x8-fp16_2xb8_cat.py) | - | - | - | -| [glean_fp16_ffhq_16x](/configs/glean/glean_x16-fp16_2xb8_ffhq.py) | - | - | - | -| [glean_fp16_in128out1024_4x2_300k_ffhq_celebahq](/configs/glean/glean_in128out1024-fp16_4xb2-300k_ffhq-celeba-hq.py) | - | - | - | +| [glean_cat_8x](./glean_x8_2xb8_cat.py) | 23.98 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614-d3ac8683.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_8x_20210614_145540.log.json) | +| [glean_ffhq_16x](./glean_x16_2xb8_ffhq.py) | 26.91 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527-61a3afad.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_ffhq_16x_20210527_194536.log.json) | +| [glean_cat_16x](./glean_x16_2xb8_cat.py) | 20.88 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527-68912543.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_cat_16x_20210527_103708.log.json) | +| [glean_in128out1024_4x2_300k_ffhq_celebahq](./glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py) | 27.94 | 4 (Tesla V100-SXM3-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812_100549.log.json) | +| [glean_fp16_cat_8x](./glean_x8-fp16_2xb8_cat.py) | - | - | - | +| [glean_fp16_ffhq_16x](./glean_x16-fp16_2xb8_ffhq.py) | - | - | - | +| [glean_fp16_in128out1024_4x2_300k_ffhq_celebahq](./glean_in128out1024-fp16_4xb2-300k_ffhq-celeba-hq.py) | - | - | - | ## 快速开始 diff --git a/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py b/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py index 2ecea3757d..ace9f0e59f 100644 --- a/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py +++ b/configs/glean/glean_in128out1024_4xb2-300k_ffhq-celeba-hq.py @@ -15,7 +15,7 @@ style_channels=512, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-ffhq-config-f-official_20210327' '_171224-bce9310c.pth', prefix='generator_ema')), @@ -24,7 +24,7 @@ in_size=1024, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-ffhq-config-f-official_20210327' '_171224-bce9310c.pth', prefix='discriminator')), diff --git a/configs/glean/glean_x16_2xb8_cat.py b/configs/glean/glean_x16_2xb8_cat.py index 601de1c61e..c90e673b59 100644 --- a/configs/glean/glean_x16_2xb8_cat.py +++ b/configs/glean/glean_x16_2xb8_cat.py @@ -15,7 +15,7 @@ style_channels=512, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-cat-config-f-official_20210327' '_172444-15bc485b.pth', prefix='generator_ema')), @@ -24,7 +24,7 @@ in_size=256, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-cat-config-f-official_20210327' '_172444-15bc485b.pth', prefix='discriminator')), diff --git a/configs/glean/glean_x16_2xb8_ffhq.py b/configs/glean/glean_x16_2xb8_ffhq.py index 84d4d70205..cdd902b8da 100644 --- a/configs/glean/glean_x16_2xb8_ffhq.py +++ b/configs/glean/glean_x16_2xb8_ffhq.py @@ -15,7 +15,7 @@ style_channels=512, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-ffhq-config-f-official_20210327' '_171224-bce9310c.pth', prefix='generator_ema')), @@ -24,7 +24,7 @@ in_size=1024, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-ffhq-config-f-official_20210327' '_171224-bce9310c.pth', prefix='discriminator')), diff --git a/configs/glean/glean_x8_2xb8_cat.py b/configs/glean/glean_x8_2xb8_cat.py index 50023da0e6..e0d501f940 100644 --- a/configs/glean/glean_x8_2xb8_cat.py +++ b/configs/glean/glean_x8_2xb8_cat.py @@ -15,7 +15,7 @@ style_channels=512, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-cat-config-f-official_20210327' '_172444-15bc485b.pth', prefix='generator_ema')), @@ -24,7 +24,7 @@ in_size=256, init_cfg=dict( type='Pretrained', - checkpoint='http://download.openmmlab.com/mmgen/stylegan2/' + checkpoint='http://download.openmmlab.com/mmediting/stylegan2/' 'official_weights/stylegan2-cat-config-f-official_20210327' '_172444-15bc485b.pth', prefix='discriminator')), diff --git a/configs/glean/metafile.yml b/configs/glean/metafile.yml index cd62fca475..54a3d932d3 100644 --- a/configs/glean/metafile.yml +++ b/configs/glean/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2012.00739 README: configs/glean/README.md + Task: + - image super-resolution + Year: 2021 Models: - Config: configs/glean/glean_x8_2xb8_cat.py In Collection: GLEAN diff --git a/configs/global_local/README.md b/configs/global_local/README.md index 62067e062e..210ce5b732 100644 --- a/configs/global_local/README.md +++ b/configs/global_local/README.md @@ -24,15 +24,15 @@ We present a novel approach for image completion that results in images that are **Places365-Challenge** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :-------------------------------------------------------------: | :---------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :---------------------------------------------------------------: | -| [Global&Local](/configs/global_local/gl_8xb12_places-256x256.py) | square bbox | 256x256 | 500k | Places365-val | 11.164 | 23.152 | 0.862 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :------------------------------------------: | :---------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :----------------------------------------------------------------------------------: | +| [Global&Local](./gl_8xb12_places-256x256.py) | square bbox | 256x256 | 500k | Places365-val | 11.164 | 23.152 | 0.862 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.log.json) | **CelebA-HQ** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :--------------------------------------------------------------: | :---------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :-----------------------------------------------------------------: | -| [Global&Local](/configs/global_local/gl_8xb12_celeba-256x256.py) | square bbox | 256x256 | 500k | CelebA-val | 6.678 | 26.780 | 0.904 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :------------------------------------------: | :---------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :-------------------------------------------------------------------------------------: | +| [Global&Local](./gl_8xb12_celeba-256x256.py) | square bbox | 256x256 | 500k | CelebA-val | 6.678 | 26.780 | 0.904 | 8 | [model](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.log.json) | ## Quick Start diff --git a/configs/global_local/README_zh-CN.md b/configs/global_local/README_zh-CN.md index 3836593023..827227a03e 100644 --- a/configs/global_local/README_zh-CN.md +++ b/configs/global_local/README_zh-CN.md @@ -28,15 +28,15 @@ **Places365-Challenge** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :--------------------------------------------------------------: | :---------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :-------------------------------------------------------------------: | -| [Global&Local](/configs/global_local/gl_8xb12_places-256x256.py) | square bbox | 256x256 | 500k | Places365-val | 11.164 | 23.152 | 0.862 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :------------------------------------------: | :---------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :---------------------------------------------------------------------------------------: | +| [Global&Local](./gl_8xb12_places-256x256.py) | square bbox | 256x256 | 500k | Places365-val | 11.164 | 23.152 | 0.862 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_places_20200619-52a040a8.log.json) | **CelebA-HQ** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :--------------------------------------------------------------: | :---------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :----------------------------------------------------------------------: | -| [Global&Local](/configs/global_local/gl_8xb12_celeba-256x256.py) | square bbox | 256x256 | 500k | CelebA-val | 6.678 | 26.780 | 0.904 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :------------------------------------------: | :---------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :------------------------------------------------------------------------------------------: | +| [Global&Local](./gl_8xb12_celeba-256x256.py) | square bbox | 256x256 | 500k | CelebA-val | 6.678 | 26.780 | 0.904 | 8 | [模型](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.log.json) | ## 快速开始 diff --git a/configs/global_local/metafile.yml b/configs/global_local/metafile.yml index e123af05ff..af45dc1945 100644 --- a/configs/global_local/metafile.yml +++ b/configs/global_local/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - http://iizuka.cs.tsukuba.ac.jp/projects/completion/data/completion_sig2017.pdf README: configs/global_local/README.md + Task: + - inpainting + Year: 2017 Models: - Config: configs/global_local/gl_8xb12_places-256x256.py In Collection: Global&Local diff --git a/configs/guided_diffusion/README.md b/configs/guided_diffusion/README.md index eb659b8e72..0f107cc1a7 100644 --- a/configs/guided_diffusion/README.md +++ b/configs/guided_diffusion/README.md @@ -22,10 +22,11 @@ We show that diffusion models can achieve image sample quality superior to the c **ImageNet** -| Method | Resolution | Config | Weights | -| ------ | ---------- | ------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------- | -| adm-u | 64x64 | [config](configs/guided_diffusion/adm-u_8xb32_imagenet-64x64.py) | [model](https://download.openmmlab.com/mmgen/guided_diffusion/adm-u-cvt-rgb_8xb32_imagenet-64x64-7ff0080b.pth) | -| adm-u | 512x512 | [config](configs/guided_diffusion/adm-u_8xb32_imagenet-512x512.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmgen/guided_diffusion/adm-u_8xb32_imagenet-512x512-60b381cb.pth) | +| Model | Resolution | Config | Download | +| :---: | :--------: | :-------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------: | +| adm-u | 64x64 | [config](./adm-u_8xb32_imagenet-64x64.py) | [model](https://download.openmmlab.com/mmediting/guided_diffusion/adm-u-cvt-rgb_8xb32_imagenet-64x64-7ff0080b.pth) | +| adm-u | 512x512 | [config](./adm-u_8xb32_imagenet-512x512.py) | [model](https://download.openmmlab.com/mmediting/guided_diffusion/adm-u_8xb32_imagenet-512x512-60b381cb.pth) | +| adm-u | 512x512 | [config](./adm-u_ddim250_8xb32_imagenet-512x512.py) | [model](<>) | **Note** To support disco diffusion, we support guided diffusion briefly. Complete support of guided diffusion with metrics and test/train logs will come soom! diff --git a/configs/guided_diffusion/metafile.yml b/configs/guided_diffusion/metafile.yml index 1eedef6e1a..c77e2d9908 100644 --- a/configs/guided_diffusion/metafile.yml +++ b/configs/guided_diffusion/metafile.yml @@ -6,4 +6,37 @@ Collections: Paper: - https://papers.nips.cc/paper/2021/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf README: configs/guided_diffusion/README.md -Models: [] + Task: + - image generation + Year: 2021 +Models: +- Config: configs/guided_diffusion/adm-u_8xb32_imagenet-64x64.py + In Collection: Guided Diffusion + Metadata: + Training Data: Others + Name: adm-u_8xb32_imagenet-64x64 + Results: + - Dataset: Others + Metrics: {} + Task: Image Generation + Weights: https://download.openmmlab.com/mmediting/guided_diffusion/adm-u-cvt-rgb_8xb32_imagenet-64x64-7ff0080b.pth +- Config: configs/guided_diffusion/adm-u_8xb32_imagenet-512x512.py + In Collection: Guided Diffusion + Metadata: + Training Data: Others + Name: adm-u_8xb32_imagenet-512x512 + Results: + - Dataset: Others + Metrics: {} + Task: Image Generation + Weights: https://download.openmmlab.com/mmediting/guided_diffusion/adm-u_8xb32_imagenet-512x512-60b381cb.pth +- Config: configs/guided_diffusion/adm-u_ddim250_8xb32_imagenet-512x512.py + In Collection: Guided Diffusion + Metadata: + Training Data: Others + Name: adm-u_ddim250_8xb32_imagenet-512x512 + Results: + - Dataset: Others + Metrics: {} + Task: Image Generation + Weights: <> diff --git a/configs/iconvsr/README.md b/configs/iconvsr/README.md index 2925949748..869e49e76a 100644 --- a/configs/iconvsr/README.md +++ b/configs/iconvsr/README.md @@ -25,15 +25,15 @@ The pretrained weights of the IconVSR components can be found here: [SPyNet](htt | Method | REDS4 (BIx4) PSNR (RGB) | Vimeo-90K-T (BIx4) PSNR (Y) | Vid4 (BIx4) PSNR (Y) | UDM10 (BDx4) PSNR (Y) | Vimeo-90K-T (BDx4) PSNR (Y) | Vid4 (BDx4) PSNR (Y) | GPU Info | Download | | :-----------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :-------------------: | :--------------------: | -| [iconvsr_reds4](/configs/iconvsr/iconvsr_2xb4_reds4.py) | **31.6926** | 36.4983 | **27.4809** | 35.3377 | 34.4299 | 25.2110 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | -| [iconvsr_vimeo90k_bi](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bi.py) | 30.3452 | **37.3729** | 27.4238 | 34.2595 | 34.5548 | 24.6666 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | -| [iconvsr_vimeo90k_bd](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bd.py) | 29.0150 | 34.6780 | 26.3109 | **40.0640** | **37.7573** | **28.2464** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | +| [iconvsr_reds4](./iconvsr_2xb4_reds4.py) | **31.6926** | 36.4983 | **27.4809** | 35.3377 | 34.4299 | 25.2110 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | +| [iconvsr_vimeo90k_bi](./iconvsr_2xb4_vimeo90k-bi.py) | 30.3452 | **37.3729** | 27.4238 | 34.2595 | 34.5548 | 24.6666 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | +| [iconvsr_vimeo90k_bd](./iconvsr_2xb4_vimeo90k-bd.py) | 29.0150 | 34.6780 | 26.3109 | **40.0640** | **37.7573** | **28.2464** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | | Method | REDS4 (BIx4) SSIM (RGB) | Vimeo-90K-T (BIx4) SSIM (Y) | Vid4 (BIx4) SSIM (Y) | UDM10 (BDx4) SSIM (Y) | Vimeo-90K-T (BDx4) SSIM (Y) | Vid4 (BDx4) SSIM (Y) | GPU Info | Download | | :-----------------: | :---------------------: | :-------------------------: | :------------------: | :-------------------: | :-------------------------: | :------------------: | :-------------------: | :--------------------: | -| [iconvsr_reds4](/configs/iconvsr/iconvsr_2xb4_reds4.py) | **0.8951** | 0.9416 | **0.8354** | 0.9471 | 0.9287 | 0.7732 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | -| [iconvsr_vimeo90k_bi](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bi.py) | 0.8659 | **0.9467** | 0.8297 | 0.9398 | 0.9295 | 0.7491 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | -| [iconvsr_vimeo90k_bd](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bd.py) | 0.8465 | 0.9339 | 0.8028 | **0.9697** | **0.9517** | **0.8612** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | +| [iconvsr_reds4](./iconvsr_2xb4_reds4.py) | **0.8951** | 0.9416 | **0.8354** | 0.9471 | 0.9287 | 0.7732 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | +| [iconvsr_vimeo90k_bi](./iconvsr_2xb4_vimeo90k-bi.py) | 0.8659 | **0.9467** | 0.8297 | 0.9398 | 0.9295 | 0.7491 | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | +| [iconvsr_vimeo90k_bd](./iconvsr_2xb4_vimeo90k-bd.py) | 0.8465 | 0.9339 | 0.8028 | **0.9697** | **0.9517** | **0.8612** | 2 (Tesla V100-PCIE-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | ## Quick Start diff --git a/configs/iconvsr/README_zh-CN.md b/configs/iconvsr/README_zh-CN.md index 90c21e71c0..02fe20589a 100644 --- a/configs/iconvsr/README_zh-CN.md +++ b/configs/iconvsr/README_zh-CN.md @@ -25,9 +25,9 @@ IconVSR 组件的预训练权重可以在这里找到:[SPyNet](https://downloa | 算法 | REDS4 (BIx4)
PSNR/SSIM (RGB) | Vimeo-90K-T (BIx4)
PSNR/SSIM (Y) | Vid4 (BIx4)
PSNR/SSIM (Y) | UDM10 (BDx4)
PSNR/SSIM (Y) | Vimeo-90K-T (BDx4)
PSNR/SSIM (Y) | Vid4 (BDx4)
PSNR/SSIM (Y) | GPU 信息 | 下载 | | :-: | :-----------------------------: | :---------------------------------: | :--------------------------: | :---------------------------: | :---------------------------------: | :--------------------------: | :-----: | :--: | -| [iconvsr_reds4](/configs/iconvsr/iconvsr_2xb4_reds4.py) | **31.6926/0.8951** | 36.4983/0.9416 | **27.4809/0.8354** | 35.3377/0.9471 | 34.4299/0.9287 | 25.2110/0.7732 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | -| [iconvsr_vimeo90k_bi](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bi.py) | 30.3452/0.8659 | **37.3729/0.9467** | 27.4238/0.8297 | 34.2595/0.9398 | 34.5548/0.9295 | 24.6666/0.7491 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | -| [iconvsr_vimeo90k_bd](/configs/iconvsr/iconvsr_2xb4_vimeo90k-bd.py) | 29.0150/0.8465 | 34.6780/0.9339 | 26.3109/0.8028 | **40.0640/0.9697** | **37.7573/0.9517** | **28.2464/0.8612** | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | +| [iconvsr_reds4](./iconvsr_2xb4_reds4.py) | **31.6926/0.8951** | 36.4983/0.9416 | **27.4809/0.8354** | 35.3377/0.9471 | 34.4299/0.9287 | 25.2110/0.7732 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413-9e09d621.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_reds4_20210413_222735.log.json) | +| [iconvsr_vimeo90k_bi](./iconvsr_2xb4_vimeo90k-bi.py) | 30.3452/0.8659 | **37.3729/0.9467** | 27.4238/0.8297 | 34.2595/0.9398 | 34.5548/0.9295 | 24.6666/0.7491 | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413-7c7418dc.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bi_20210413_222757.log.json) | +| [iconvsr_vimeo90k_bd](./iconvsr_2xb4_vimeo90k-bd.py) | 29.0150/0.8465 | 34.6780/0.9339 | 26.3109/0.8028 | **40.0640/0.9697** | **37.7573/0.9517** | **28.2464/0.8612** | 2 (Tesla V100-PCIE-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414-5f38cb34.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/iconvsr/iconvsr_vimeo90k_bd_20210414_084128.log.json) | ## 快速开始 diff --git a/configs/iconvsr/metafile.yml b/configs/iconvsr/metafile.yml index 8b92f4ec08..f9e58680b5 100644 --- a/configs/iconvsr/metafile.yml +++ b/configs/iconvsr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2012.02181 README: configs/iconvsr/README.md + Task: + - video super-resolution + Year: 2021 Models: - Config: configs/iconvsr/iconvsr_2xb4_reds4.py In Collection: IconVSR diff --git a/configs/indexnet/README.md b/configs/indexnet/README.md index 6ea2e18e83..2a44aa751e 100644 --- a/configs/indexnet/README.md +++ b/configs/indexnet/README.md @@ -20,18 +20,18 @@ We show that existing upsampling operators can be unified with the notion of the ## Results and models -| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | -| :--------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :---------------------------------------------------------------------------------------: | -| M2O DINs (paper) | 45.8 | 0.013 | 25.9 | **43.7** | - | - | -| [M2O DINs (our)](/configs/indexnet/indexnet_mobv2_1xb16-78k_comp1k.py) | **45.6** | **0.012** | **25.5** | 44.8 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_SAD-45.6_20200618_173817-26dd258d.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_20200618_173817.log.json) | +| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | +| :----------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :-------------------------------------------------------------------------------------------------------: | +| M2O DINs (paper) | 45.8 | 0.013 | 25.9 | **43.7** | - | - | +| [M2O DINs (our)](./indexnet_mobv2_1xb16-78k_comp1k.py) | **45.6** | **0.012** | **25.5** | 44.8 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_SAD-45.6_20200618_173817-26dd258d.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_20200618_173817.log.json) | > The performance of training (best performance) with different random seeds diverges in a large range. You may need to run several experiments for each setting to obtain the above performance. **More result** -| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | -| :------------------------------------------------------------------------------------: | :--: | :---: | :--: | :--: | :------: | :---------------------------------------------------------------------------------------: | -| [M2O DINs (with DIM pipeline)](/configs/indexnet/indexnet_mobv2-dimaug_1xb16-78k_comp1k.py) | 50.1 | 0.016 | 30.8 | 49.5 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_SAD-50.1_20200626_231857-af359436.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_20200626_231857.log.json) | +| Method | SAD | MSE | GRAD | CONN | GPU Info | Download | +| :-------------------------------------------------------------------------: | :--: | :---: | :--: | :--: | :------: | :--------------------------------------------------------------------------------------------------: | +| [M2O DINs (with DIM pipeline)](./indexnet_mobv2-dimaug_1xb16-78k_comp1k.py) | 50.1 | 0.016 | 30.8 | 49.5 | 1 | [model](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_SAD-50.1_20200626_231857-af359436.pth) \| [log](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_20200626_231857.log.json) | ## Quick Start diff --git a/configs/indexnet/README_zh-CN.md b/configs/indexnet/README_zh-CN.md index c54e9a6530..3a3ddd6156 100644 --- a/configs/indexnet/README_zh-CN.md +++ b/configs/indexnet/README_zh-CN.md @@ -20,18 +20,18 @@
-| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | -| :---------------------------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :--------------------------------------------------------------------------------------: | -| M2O DINs (原文) | 45.8 | 0.013 | 25.9 | **43.7** | - | - | -| [M2O DINs (复现)](/configs/indexnet/indexnet_mobv2_1xb16-78k_comp1k.py) | **45.6** | **0.012** | **25.5** | 44.8 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_SAD-45.6_20200618_173817-26dd258d.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_20200618_173817.log.json) | +| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | +| :-----------------------------------------------------: | :------: | :-------: | :------: | :------: | :------: | :------------------------------------------------------------------------------------------------------: | +| M2O DINs (原文) | 45.8 | 0.013 | 25.9 | **43.7** | - | - | +| [M2O DINs (复现)](./indexnet_mobv2_1xb16-78k_comp1k.py) | **45.6** | **0.012** | **25.5** | 44.8 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_SAD-45.6_20200618_173817-26dd258d.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_mobv2_1x16_78k_comp1k_20200618_173817.log.json) | > The performance of training (best performance) with different random seeds diverges in a large range. You may need to run several experiments for each setting to obtain the above performance. **其他结果** -| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | -| :-------------------------------------------------------------------------------------: | :--: | :---: | :--: | :--: | :------: | :--------------------------------------------------------------------------------------: | -| [M2O DINs (使用 DIM 流水线)](/configs/indexnet/indexnet_mobv2-dimaug_1xb16-78k_comp1k.py) | 50.1 | 0.016 | 30.8 | 49.5 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_SAD-50.1_20200626_231857-af359436.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_20200626_231857.log.json) | +| 算法 | SAD | MSE | GRAD | CONN | GPU 信息 | 下载 | +| :-----------------------------------------------------------------------: | :--: | :---: | :--: | :--: | :------: | :----------------------------------------------------------------------------------------------------: | +| [M2O DINs (使用 DIM 流水线)](./indexnet_mobv2-dimaug_1xb16-78k_comp1k.py) | 50.1 | 0.016 | 30.8 | 49.5 | 1 | [模型](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_SAD-50.1_20200626_231857-af359436.pth) \| [日志](https://download.openmmlab.com/mmediting/mattors/indexnet/indexnet_dimaug_mobv2_1x16_78k_comp1k_20200626_231857.log.json) | ## 快速开始 diff --git a/configs/indexnet/metafile.yml b/configs/indexnet/metafile.yml index 7330abdc8f..d141bfece4 100644 --- a/configs/indexnet/metafile.yml +++ b/configs/indexnet/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1908.00672 README: configs/indexnet/README.md + Task: + - matting + Year: 2019 Models: - Config: configs/indexnet/indexnet_mobv2_1xb16-78k_comp1k.py In Collection: IndexNet diff --git a/configs/inst_colorization/README.md b/configs/inst_colorization/README.md index fdfdfb9e50..cc5a5585e8 100644 --- a/configs/inst_colorization/README.md +++ b/configs/inst_colorization/README.md @@ -20,9 +20,9 @@ Image colorization is inherently an ill-posed problem with multi-modal uncertain ## Results and models -| Method | Download | -| :-------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------: | -| [instance_aware_colorization_officiial](/configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth) | +| Method | Download | +| :----------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: | +| [instance_aware_colorization_officiial](./inst-colorizatioon_full_official_cocostuff-256x256.py) | [model](https://download.openmmlab.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth) | ## Quick Start @@ -33,7 +33,7 @@ You can use the following commands to colorize an image. ```shell -python demo/colorization_demo.py configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth input.jpg output.jpg +python demo/colorization_demo.py configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py https://download.openmmlab.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth input.jpg output.jpg ``` For more demos, you can refer to [Tutorial 3: inference with pre-trained models](https://mmediting.readthedocs.io/en/1.x/user_guides/3_inference.html). diff --git a/configs/inst_colorization/README_zh-CN.md b/configs/inst_colorization/README_zh-CN.md index 19e59c64fc..b73dbe46df 100644 --- a/configs/inst_colorization/README_zh-CN.md +++ b/configs/inst_colorization/README_zh-CN.md @@ -20,9 +20,9 @@ Image colorization is inherently an ill-posed problem with multi-modal uncertain ## 结果和模型 -| Method | Download | -| :-------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------: | -| [instance_aware_colorization_officiial](/configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth) | +| Method | Download | +| :----------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------: | +| [instance_aware_colorization_officiial](./inst-colorizatioon_full_official_cocostuff-256x256.py) | [model](https://download.openmmlab.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth) | ## 快速开始 @@ -32,7 +32,7 @@ Image colorization is inherently an ill-posed problem with multi-modal uncertain 您可以使用以下命令来对一张图像进行上色。 ```shell -python demo/colorization_demo.py configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth input.jpg output.jpg +python demo/colorization_demo.py configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py https://download.openmmlab.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth input.jpg output.jpg ``` 更多细节可以参考 [Tutorial 3: inference with pre-trained models](https://mmediting.readthedocs.io/en/1.x/user_guides/3_inference.html)。 diff --git a/configs/inst_colorization/metafile.yml b/configs/inst_colorization/metafile.yml index eec9463695..aba5ed83e9 100644 --- a/configs/inst_colorization/metafile.yml +++ b/configs/inst_colorization/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content_CVPR_2020/html/Su_Instance-Aware_Image_Colorization_CVPR_2020_paper.html README: configs/inst_colorization/README.md + Task: + - colorization + Year: 2020 Models: - Config: configs/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256.py In Collection: Instance-aware Image Colorization @@ -16,4 +19,4 @@ Models: - Dataset: Others Metrics: {} Task: Colorization - Weights: https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth + Weights: https://download.openmmlab.com/mmediting/inst_colorization/inst-colorizatioon_full_official_cocostuff-256x256-5b9d4eee.pth diff --git a/configs/liif/README.md b/configs/liif/README.md index f19b3940ee..d9d0229bde 100644 --- a/configs/liif/README.md +++ b/configs/liif/README.md @@ -22,13 +22,13 @@ How to represent an image? While the visual world is presented in a continuous m | Method | scale | Set5 PSNR | Set5 SSIM | Set14 PSNR | Set14 SSIM | DIV2K PSNR | DIV2K SSIM | GPU Info | Download | | :-----------------------------------------------------------: | :---: | :-------: | :-------: | :--------: | :--------: | :--------: | :--------: | :----------: | :--------------------------------------------------------------: | -| [liif_edsr_norm_c64b16_g1_1000k_div2k](/configs/liif/liif-edsr-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7131 | 0.9366 | 31.5579 | 0.8889 | 34.6647 | 0.9355 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.log.json) | +| [liif_edsr_norm_c64b16_g1_1000k_div2k](./liif-edsr-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7131 | 0.9366 | 31.5579 | 0.8889 | 34.6647 | 0.9355 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.log.json) | | △ | x3 | 32.3805 | 0.8915 | 28.4605 | 0.8039 | 30.9808 | 0.8724 | △ | △ | | △ | x4 | 30.2748 | 0.8509 | 26.8415 | 0.7381 | 29.0245 | 0.8187 | △ | △ | | △ | x6 | 27.1187 | 0.7774 | 24.7461 | 0.6444 | 26.7770 | 0.7425 | △ | △ | | △ | x18 | 20.8516 | 0.5406 | 20.0096 | 0.4525 | 22.1987 | 0.5955 | △ | △ | | △ | x30 | 18.8467 | 0.5010 | 18.1321 | 0.3963 | 20.5050 | 0.5577 | △ | △ | -| [liif_rdn_norm_c64b16_g1_1000k_div2k](/configs/liif/liif-rdn-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7874 | 0.9366 | 31.6866 | 0.8896 | 34.7548 | 0.9356 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.log.json) | +| [liif_rdn_norm_c64b16_g1_1000k_div2k](./liif-rdn-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7874 | 0.9366 | 31.6866 | 0.8896 | 34.7548 | 0.9356 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.log.json) | | △ | x3 | 32.4992 | 0.8923 | 28.4905 | 0.8037 | 31.0744 | 0.8731 | △ | △ | | △ | x4 | 30.3835 | 0.8513 | 26.8734 | 0.7373 | 29.1101 | 0.8197 | △ | △ | | △ | x6 | 27.1914 | 0.7751 | 24.7824 | 0.6434 | 26.8693 | 0.7437 | △ | △ | diff --git a/configs/liif/README_zh-CN.md b/configs/liif/README_zh-CN.md index 97fa1da352..e6a21b9e43 100644 --- a/configs/liif/README_zh-CN.md +++ b/configs/liif/README_zh-CN.md @@ -23,13 +23,13 @@ | 算法 | scale | Set5
PSNR / SSIM | Set14
PSNR / SSIM | DIV2K
PSNR / SSIM | GPU 信息 | 下载 | | :-----------------------------------------------------------: | :---: | :-----------------: | :------------------: | :-------------------: | :----------: | :------------------------------------------------------------: | -| [liif_edsr_norm_c64b16_g1_1000k_div2k](/configs/liif/liif-edsr-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7131 / 0.9366 | 31.5579 / 0.8889 | 34.6647 / 0.9355 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.log.json) | +| [liif_edsr_norm_c64b16_g1_1000k_div2k](./liif-edsr-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7131 / 0.9366 | 31.5579 / 0.8889 | 34.6647 / 0.9355 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/liif/liif_edsr_norm_c64b16_g1_1000k_div2k_20210715-ab7ce3fc.log.json) | | △ | x3 | 32.3805 / 0.8915 | 28.4605 / 0.8039 | 30.9808 / 0.8724 | △ | △ | | △ | x4 | 30.2748 / 0.8509 | 26.8415 / 0.7381 | 29.0245 / 0.8187 | △ | △ | | △ | x6 | 27.1187 / 0.7774 | 24.7461 / 0.6444 | 26.7770 / 0.7425 | △ | △ | | △ | x18 | 20.8516 / 0.5406 | 20.0096 / 0.4525 | 22.1987 / 0.5955 | △ | △ | | △ | x30 | 18.8467 / 0.5010 | 18.1321 / 0.3963 | 20.5050 / 0.5577 | △ | △ | -| [liif_rdn_norm_c64b16_g1_1000k_div2k](/configs/liif/liif-rdn-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7874 / 0.9366 | 31.6866 / 0.8896 | 34.7548 / 0.9356 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.log.json) | +| [liif_rdn_norm_c64b16_g1_1000k_div2k](./liif-rdn-norm_c64b16_1xb16-1000k_div2k.py) | x2 | 35.7874 / 0.9366 | 31.6866 / 0.8896 | 34.7548 / 0.9356 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/liif/liif_rdn_norm_c64b16_g1_1000k_div2k_20210717-22d6fdc8.log.json) | | △ | x3 | 32.4992 / 0.8923 | 28.4905 / 0.8037 | 31.0744 / 0.8731 | △ | △ | | △ | x4 | 30.3835 / 0.8513 | 26.8734 / 0.7373 | 29.1101 / 0.8197 | △ | △ | | △ | x6 | 27.1914 / 0.7751 | 24.7824 / 0.6434 | 26.8693 / 0.7437 | △ | △ | @@ -40,7 +40,7 @@ - △ 指同上。 - 这两个配置仅在 _testing pipeline_ 上有所不同。 所以他们使用相同的检查点。 -- 数据根据 [EDSR](/configs/restorers/edsr) 进行正则化。 +- 数据根据 [EDSR](../edsr/README.md) 进行正则化。 - 在 RGB 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 ## 快速开始 diff --git a/configs/liif/metafile.yml b/configs/liif/metafile.yml index 465ba6c525..933acee0fe 100644 --- a/configs/liif/metafile.yml +++ b/configs/liif/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2012.09161 README: configs/liif/README.md + Task: + - image super-resolution + Year: 2021 Models: - Config: configs/liif/liif-edsr-norm_c64b16_1xb16-1000k_div2k.py In Collection: LIIF diff --git a/configs/lsgan/README.md b/configs/lsgan/README.md index f0d8626ef4..75a10b5a60 100644 --- a/configs/lsgan/README.md +++ b/configs/lsgan/README.md @@ -28,10 +28,10 @@ Unsupervised learning with generative adversarial networks (GANs) has proven hug | Models | Dataset | SWD | MS-SSIM | FID | Config | Download | | :-----------: | :------------: | :-----------------------------: | :-----: | :-----: | :-------------------------------------------------------------: | :---------------------------------------------------------------: | -| LSGAN 64x64 | CelebA-Cropped | 6.16, 6.83, 37.64/16.87 | 0.3216 | 11.9258 | [config](/configs/lsgan/lsgan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py) | [model](https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210429_144001-92ca1d0d.pth)\| [log](https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210422_131925.log.json) | -| LSGAN 64x64 | LSUN-Bedroom | 5.66, 9.0, 18.6/11.09 | 0.0671 | 30.7390 | [config](/configs/lsgan/lsgan_dcgan-archi_lr1e-4-1xb128-12Mimgs_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210429_144602-ec4ec6bb.pth)\| [log](https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210423_005020.log.json) | -| LSGAN 128x128 | CelebA-Cropped | 21.66, 9.83, 16.06, 70.76/29.58 | 0.3691 | 38.3752 | [config](/configs/lsgan/lsgan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210429_144229-01ba67dc.pth)\| [log](https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210423_132126.log.json) | -| LSGAN 128x128 | LSUN-Bedroom | 19.52, 9.99, 7.48, 14.3/12.82 | 0.0612 | 51.5500 | [config](/configs/lsgan/lsgan_lsgan-archi_lr1e-4-1xb64-10Mimgs_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_155605-cf78c0a8.pth)\| [log](https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_142302.log.json) | +| LSGAN 64x64 | CelebA-Cropped | 6.16, 6.83, 37.64/16.87 | 0.3216 | 11.9258 | [config](./lsgan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py) | [model](https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210429_144001-92ca1d0d.pth)\| [log](https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210422_131925.log.json) | +| LSGAN 64x64 | LSUN-Bedroom | 5.66, 9.0, 18.6/11.09 | 0.0671 | 30.7390 | [config](./lsgan_dcgan-archi_lr1e-4-1xb128-12Mimgs_lsun-bedroom-64x64.py) | [model](https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210429_144602-ec4ec6bb.pth)\| [log](https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210423_005020.log.json) | +| LSGAN 128x128 | CelebA-Cropped | 21.66, 9.83, 16.06, 70.76/29.58 | 0.3691 | 38.3752 | [config](./lsgan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210429_144229-01ba67dc.pth)\| [log](https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210423_132126.log.json) | +| LSGAN 128x128 | LSUN-Bedroom | 19.52, 9.99, 7.48, 14.3/12.82 | 0.0612 | 51.5500 | [config](./lsgan_lsgan-archi_lr1e-4-1xb64-10Mimgs_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_155605-cf78c0a8.pth)\| [log](https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_142302.log.json) | ## Citation diff --git a/configs/lsgan/metafile.yml b/configs/lsgan/metafile.yml index 43aa2678b8..917c313449 100644 --- a/configs/lsgan/metafile.yml +++ b/configs/lsgan/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content_iccv_2017/html/Mao_Least_Squares_Generative_ICCV_2017_paper.html README: configs/lsgan/README.md + Task: + - unconditional gans + Year: 2017 Models: - Config: configs/lsgan/lsgan_dcgan-archi_lr1e-3-1xb128-12Mimgs_celeba-cropped-64x64.py In Collection: LSGAN @@ -18,7 +21,7 @@ Models: FID: 11.9258 MS-SSIM: 0.3216 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210429_144001-92ca1d0d.pth + Weights: https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-3_64_b128x1_12m_20210429_144001-92ca1d0d.pth - Config: configs/lsgan/lsgan_dcgan-archi_lr1e-4-1xb128-12Mimgs_lsun-bedroom-64x64.py In Collection: LSGAN Metadata: @@ -30,7 +33,7 @@ Models: FID: 30.739 MS-SSIM: 0.0671 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210429_144602-ec4ec6bb.pth + Weights: https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_dcgan-archi_lr-1e-4_64_b128x1_12m_20210429_144602-ec4ec6bb.pth - Config: configs/lsgan/lsgan_dcgan-archi_lr1e-4-1xb64-10Mimgs_celeba-cropped-128x128.py In Collection: LSGAN Metadata: @@ -42,7 +45,7 @@ Models: FID: 38.3752 MS-SSIM: 0.3691 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210429_144229-01ba67dc.pth + Weights: https://download.openmmlab.com/mmediting/lsgan/lsgan_celeba-cropped_dcgan-archi_lr-1e-4_128_b64x1_10m_20210429_144229-01ba67dc.pth - Config: configs/lsgan/lsgan_lsgan-archi_lr1e-4-1xb64-10Mimgs_lsun-bedroom-128x128.py In Collection: LSGAN Metadata: @@ -54,4 +57,4 @@ Models: FID: 51.55 MS-SSIM: 0.0612 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_155605-cf78c0a8.pth + Weights: https://download.openmmlab.com/mmediting/lsgan/lsgan_lsun-bedroom_lsgan-archi_lr-1e-4_128_b64x1_10m_20210429_155605-cf78c0a8.pth diff --git a/configs/nafnet/README.md b/configs/nafnet/README.md index e1ec757413..48e9bc872a 100644 --- a/configs/nafnet/README.md +++ b/configs/nafnet/README.md @@ -22,8 +22,8 @@ Although there have been significant advances in the field of image restoration | Method | image size | PSNR | SSIM | GPU Info | Download | | :-------------------------------------------------------------------------: | :--------: | :--------------: | :------------: | :------: | :---------------------------------------------------------------------------: | -| [nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd](/configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py) | 256X256 | 40.3045(40.3045) | 0.9253(0.9614) | 1 (A100) | [model](https://download.openmmlab.com/mmediting/nafnet/NAFNet-SIDD-midc64.pth) \| log(coming soon) | -| [nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro](/configs/nafnet/nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro.py) | 1280x720 | 33.7246(33.7103) | 0.9479(0.9668) | 1 (A100) | [model](https://download.openmmlab.com/mmediting/nafnet/NAFNet-GoPro-midc64.pth) \| log(coming soon) | +| [nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd](./nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py) | 256X256 | 40.3045(40.3045) | 0.9253(0.9614) | 1 (A100) | [model](https://download.openmmlab.com/mmediting/nafnet/NAFNet-SIDD-midc64.pth) \| log(coming soon) | +| [nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro](./nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro.py) | 1280x720 | 33.7246(33.7103) | 0.9479(0.9668) | 1 (A100) | [model](https://download.openmmlab.com/mmediting/nafnet/NAFNet-GoPro-midc64.pth) \| log(coming soon) | Note: @@ -51,7 +51,7 @@ python tools/train.py configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k ./tools/dist_train.sh configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py 8 ``` -For more details, you can refer to **Train a model** part in [train_test.md](/docs/en/user_guides/train_test.md#Train-a-model-in-MMEditing). +For more details, you can refer to **Train a model** part in [train_test.md](../../docs/en/user_guides/train_test.md). @@ -75,7 +75,7 @@ python tools/test.py configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_ Pretrained checkpoints will come soon. -For more details, you can refer to **Test a pre-trained model** part in [train_test.md](/docs/en/user_guides/train_test.md#Test-a-pre-trained-model-in-MMEditing). +For more details, you can refer to **Test a pre-trained model** part in [train_test.md](../../docs/en/user_guides/train_test.md). diff --git a/configs/nafnet/README_zh-CN.md b/configs/nafnet/README_zh-CN.md index f9151c2d61..4c337a8864 100644 --- a/configs/nafnet/README_zh-CN.md +++ b/configs/nafnet/README_zh-CN.md @@ -20,8 +20,8 @@ | 方法 | 图片尺寸 | PSNR | SSIM | GPU信息 | 下载 | | :---------------------------------------------------------------------------: | :------: | :--------------: | :------------: | :------: | :---------------------------------------------------------------------------: | -| [nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd](/configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py) | 256X256 | 40.3045(40.3045) | 0.9253(0.9614) | 1 (A100) | [模型](https://download.openmmlab.com/mmediting/nafnet/NAFNet-SIDD-midc64.pth) \| 日志(即将到来) | -| [nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro](/configs/nafnet/nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro.py) | 1280x720 | 33.7246(33.7103) | 0.9479(0.9668) | 1 (A100) | [模型](https://download.openmmlab.com/mmediting/nafnet/NAFNet-GoPro-midc64.pth) \| 日志(即将到来) | +| [nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd](./nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py) | 256X256 | 40.3045(40.3045) | 0.9253(0.9614) | 1 (A100) | [模型](https://download.openmmlab.com/mmediting/nafnet/NAFNet-SIDD-midc64.pth) \| 日志(即将到来) | +| [nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro](./nafnet_c64eb11128mb1db1111_8xb8-lr1e-3-400k_gopro.py) | 1280x720 | 33.7246(33.7103) | 0.9479(0.9668) | 1 (A100) | [模型](https://download.openmmlab.com/mmediting/nafnet/NAFNet-GoPro-midc64.pth) \| 日志(即将到来) | Note: @@ -49,7 +49,7 @@ python tools/train.py configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k ./tools/dist_train.sh configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py 8 ``` -更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Train a model** 部分。 +更多细节可以参考 [train_test.md](../../docs/zh_cn/user_guides/train_test.md) 中的 **Train a model** 部分。 @@ -72,6 +72,6 @@ python tools/test.py configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_ ``` 预训练模型未来将会上传,敬请等待。 -更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Test a pre-trained model** 部分。 +更多细节可以参考 [train_test.md](../../docs/zh_cn/user_guides/train_test.md) 中的 **Test a pre-trained model** 部分。 diff --git a/configs/nafnet/metafile.yml b/configs/nafnet/metafile.yml index 32f8520a3c..3a2b913d00 100644 --- a/configs/nafnet/metafile.yml +++ b/configs/nafnet/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2204.04676 README: configs/nafnet/README.md + Task: + - image restoration + Year: 2022 Models: - Config: configs/nafnet/nafnet_c64eb2248mb12db2222_8xb8-lr1e-3-400k_sidd.py In Collection: NAFNet diff --git a/configs/partial_conv/README.md b/configs/partial_conv/README.md index 0315a12030..9d02a7c33c 100644 --- a/configs/partial_conv/README.md +++ b/configs/partial_conv/README.md @@ -22,17 +22,17 @@ Existing deep learning based image inpainting methods use a standard convolution **Places365-Challenge** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :--------------------------------------------------------------: | :-------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :----------------------------------------------------------------: | -| [PConv_Stage1](/configs/partial_conv/pconv_stage1_8xb12_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | - | - | - | 8 | - | -| [PConv_Stage2](/configs/partial_conv/pconv_stage2_4xb2_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | 8.776 | 22.762 | 0.801 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :----------------------------------------------------: | :-------: | :--------: | :---------: | :-----------: | :------: | :----: | :---: | :------: | :--------------------------------------------------------------------------: | +| [PConv_Stage1](./pconv_stage1_8xb12_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | - | - | - | 8 | - | +| [PConv_Stage2](./pconv_stage2_4xb2_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | 8.776 | 22.762 | 0.801 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.log.json) | **CelebA-HQ** -| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | -| :---------------------------------------------------------------: | :-------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :------------------------------------------------------------------: | -| [PConv_Stage1](/configs/partial_conv/pconv_stage1_8xb1_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | - | - | - | 8 | - | -| [PConv_Stage2](/configs/partial_conv/pconv_stage2_4xb2_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | 5.990 | 25.404 | 0.853 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.log.json) | +| Method | Mask Type | Resolution | Train Iters | Test Set | l1 error | PSNR | SSIM | GPU Info | Download | +| :---------------------------------------------------: | :-------: | :--------: | :---------: | :--------: | :------: | :----: | :---: | :------: | :------------------------------------------------------------------------------: | +| [PConv_Stage1](./pconv_stage1_8xb1_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | - | - | - | 8 | - | +| [PConv_Stage2](./pconv_stage2_4xb2_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | 5.990 | 25.404 | 0.853 | 4 | [model](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.pth) \| [log](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.log.json) | ## Quick Start diff --git a/configs/partial_conv/README_zh-CN.md b/configs/partial_conv/README_zh-CN.md index dace459583..3f09969a1d 100644 --- a/configs/partial_conv/README_zh-CN.md +++ b/configs/partial_conv/README_zh-CN.md @@ -23,17 +23,17 @@ **Places365-Challenge** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :-----------------------------------------------------------------: | :-------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :------------------------------------------------------------------: | -| [PConv_Stage1](/configs/partial_conv/pconv_stage1_8xb12_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | - | - | - | 8 | - | -| [PConv_Stage2](/configs/partial_conv/pconv_stage2_4xb2_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | 8.776 | 22.762 | 0.801 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :----------------------------------------------------: | :-------: | :-----: | :--------: | :-----------: | :-----: | :----: | :---: | :------: | :-------------------------------------------------------------------------------: | +| [PConv_Stage1](./pconv_stage1_8xb12_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | - | - | - | 8 | - | +| [PConv_Stage2](./pconv_stage2_4xb2_places-256x256.py) | free-form | 256x256 | 500k | Places365-val | 8.776 | 22.762 | 0.801 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_places_20200619-1ffed0e8.log.json) | **CelebA-HQ** -| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | -| :-------------------------------------------------------------------: | :-------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :-------------------------------------------------------------------: | -| [PConv_Stage1](/configs/partial_conv/pconv_stage1_8xb1_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | - | - | - | 8 | - | -| [PConv_Stage2](/configs/partial_conv/pconv_stage2_4xb2_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | 5.990 | 25.404 | 0.853 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.log.json) | +| 算法 | 掩膜类型 | 分辨率 | 训练集容量 | 测试集 | l1 损失 | PSNR | SSIM | GPU 信息 | 下载 | +| :---------------------------------------------------: | :-------: | :-----: | :--------: | :--------: | :-----: | :----: | :---: | :------: | :-----------------------------------------------------------------------------------: | +| [PConv_Stage1](./pconv_stage1_8xb1_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | - | - | - | 8 | - | +| [PConv_Stage2](./pconv_stage2_4xb2_celeba-256x256.py) | free-form | 256x256 | 500k | CelebA-val | 5.990 | 25.404 | 0.853 | 4 | [模型](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.pth) \| [日志](https://download.openmmlab.com/mmediting/inpainting/pconv/pconv_256x256_stage2_4x2_celeba_20200619-860f8b95.log.json) | ## 快速开始 diff --git a/configs/partial_conv/metafile.yml b/configs/partial_conv/metafile.yml index 3fe5dcbda0..41d7ea013b 100644 --- a/configs/partial_conv/metafile.yml +++ b/configs/partial_conv/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1804.07723 README: configs/partial_conv/README.md + Task: + - inpainting + Year: 2018 Models: - Config: configs/partial_conv/pconv_stage1_8xb12_places-256x256.py In Collection: PConv diff --git a/configs/pggan/README.md b/configs/pggan/README.md index 6810b50e59..2b8f7316e9 100644 --- a/configs/pggan/README.md +++ b/configs/pggan/README.md @@ -26,11 +26,11 @@ We describe a new training methodology for generative adversarial networks. The -| Models | Details | MS-SSIM | SWD(xx,xx,xx,xx/avg) | Config | Download | -| :-------------: | :------------: | :-----: | :--------------------------: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: | -| pggan_128x128 | celeba-cropped | 0.3023 | 3.42, 4.04, 4.78, 20.38/8.15 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmgen/pggan/pggan_celeba-cropped_128_g8_20210408_181931-85a2e72c.pth) | -| pggan_128x128 | lsun-bedroom | 0.0602 | 3.5, 2.96, 2.76, 9.65/4.72 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimgs_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmgen/pggan/pggan_lsun-bedroom_128x128_g8_20210408_182033-5e59f45d.pth) | -| pggan_1024x1024 | celeba-hq | 0.3379 | 8.93, 3.98, 3.07, 2.64/4.655 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimg_celeba-hq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/pggan/pggan_celeba-hq_1024_g8_20210408_181911-f1ef51c3.pth) | +| Models | Details | MS-SSIM | SWD(xx,xx,xx,xx/avg) | Config | Download | +| :-------------: | :------------: | :-----: | :--------------------------: | :------------------------------------------------------: | :------------------------------------------------------------------------------: | +| pggan_128x128 | celeba-cropped | 0.3023 | 3.42, 4.04, 4.78, 20.38/8.15 | [config](./pggan_8xb4-12Mimgs_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmediting/pggan/pggan_celeba-cropped_128_g8_20210408_181931-85a2e72c.pth) | +| pggan_128x128 | lsun-bedroom | 0.0602 | 3.5, 2.96, 2.76, 9.65/4.72 | [config](./pggan_8xb4-12Mimgs_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmediting/pggan/pggan_lsun-bedroom_128x128_g8_20210408_182033-5e59f45d.pth) | +| pggan_1024x1024 | celeba-hq | 0.3379 | 8.93, 3.98, 3.07, 2.64/4.655 | [config](./pggan_8xb4-12Mimg_celeba-hq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/pggan/pggan_celeba-hq_1024_g8_20210408_181911-f1ef51c3.pth) | ## Citation diff --git a/configs/pggan/metafile.yml b/configs/pggan/metafile.yml index 025278cbd7..52b2b08c4f 100644 --- a/configs/pggan/metafile.yml +++ b/configs/pggan/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://arxiv.org/abs/1710.10196 README: configs/pggan/README.md + Task: + - unconditional gans + Year: 2018 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimgs_celeba-cropped-128x128.py +- Config: configs/pggan/pggan_8xb4-12Mimgs_celeba-cropped-128x128.py In Collection: PGGAN Metadata: Training Data: CELEBA @@ -17,8 +20,8 @@ Models: Metrics: MS-SSIM: 0.3023 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pggan/pggan_celeba-cropped_128_g8_20210408_181931-85a2e72c.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimgs_lsun-bedroom-128x128.py + Weights: https://download.openmmlab.com/mmediting/pggan/pggan_celeba-cropped_128_g8_20210408_181931-85a2e72c.pth +- Config: configs/pggan/pggan_8xb4-12Mimgs_lsun-bedroom-128x128.py In Collection: PGGAN Metadata: Training Data: Others @@ -28,8 +31,8 @@ Models: Metrics: MS-SSIM: 0.0602 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pggan/pggan_lsun-bedroom_128x128_g8_20210408_182033-5e59f45d.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pggan/pggan_8xb4-12Mimg_celeba-hq-1024x1024.py + Weights: https://download.openmmlab.com/mmediting/pggan/pggan_lsun-bedroom_128x128_g8_20210408_182033-5e59f45d.pth +- Config: configs/pggan/pggan_8xb4-12Mimg_celeba-hq-1024x1024.py In Collection: PGGAN Metadata: Training Data: CELEBA @@ -39,4 +42,4 @@ Models: Metrics: MS-SSIM: 0.3379 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pggan/pggan_celeba-hq_1024_g8_20210408_181911-f1ef51c3.pth + Weights: https://download.openmmlab.com/mmediting/pggan/pggan_celeba-hq_1024_g8_20210408_181911-f1ef51c3.pth diff --git a/configs/pix2pix/README.md b/configs/pix2pix/README.md index 99636fa555..2ec6d0457a 100644 --- a/configs/pix2pix/README.md +++ b/configs/pix2pix/README.md @@ -2,7 +2,7 @@ > [Pix2Pix: Image-to-Image Translation with Conditional Adversarial Networks](https://openaccess.thecvf.com/content_cvpr_2017/html/Isola_Image-To-Image_Translation_With_CVPR_2017_paper.html) -> **Task**: Image2Image Translation +> **Task**: Image2Image @@ -27,15 +27,17 @@ We investigate conditional adversarial networks as a general-purpose solution to We use `FID` and `IS` metrics to evaluate the generation performance of pix2pix.1 -| Models | Dataset | FID | IS | Config | Download | -| :----: | :---------: | :------: | :---: | :----------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------: | -| Ours | facades | 124.9773 | 1.620 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth) \| [log](https://download.openmmlab.com/mmgen/pix2pix/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210317_172625.log.json)2 | -| Ours | aerial2maps | 122.5856 | 3.137 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_aerial2maps.py) | [model](https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_a2b_1x1_219200_maps_convert-bgr_20210902_170729-59a31517.pth) | -| Ours | maps2aerial | 88.4635 | 3.310 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_maps2aerial.py) | [model](https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_b2a_1x1_219200_maps_convert-bgr_20210902_170814-6d2eac4a.pth) | -| Ours | edges2shoes | 84.3750 | 2.815 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_wo-jitter-flip-1xb4-190kiters_edges2shoes.py) | [model](https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth) | +| Models | Dataset | FID | IS | Config | Download | +| :----: | :---------: | :------: | :---: | :------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------: | +| Ours | facades | 124.9773 | 1.620 | [config](./pix2pix_vanilla-unet-bn_1xb1-80kiters_facades.py) | [model](https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth) \| [log](https://download.openmmlab.com/mmediting/pix2pix/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210317_172625.log.json)2 | +| Ours | aerial2maps | 122.5856 | 3.137 | [config](./pix2pix_vanilla-unet-bn_1xb1-220kiters_aerial2maps.py) | [model](https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_a2b_1x1_219200_maps_convert-bgr_20210902_170729-59a31517.pth) | +| Ours | maps2aerial | 88.4635 | 3.310 | [config](./pix2pix_vanilla-unet-bn_1xb1-220kiters_maps2aerial.py) | [model](https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_b2a_1x1_219200_maps_convert-bgr_20210902_170814-6d2eac4a.pth) | +| Ours | edges2shoes | 84.3750 | 2.815 | [config](./pix2pix_vanilla-unet-bn_wo-jitter-flip-1xb4-190kiters_edges2shoes.py) | [model](https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth) | `FID` comparison with official: + + | Dataset | facades | aerial2maps | maps2aerial | edges2shoes | average | | :------: | :---------: | :----------: | :---------: | :---------: | :----------: | | official | **119.135** | 149.731 | 102.072 | **75.774** | 111.678 | @@ -43,6 +45,8 @@ We use `FID` and `IS` metrics to evaluate the generation performance of pix2pix. `IS` comparison with official: + + | Dataset | facades | aerial2maps | maps2aerial | edges2shoes | average | | :------: | :-------: | :---------: | :---------: | :---------: | :--------: | | official | **1.650** | 2.529 | **3.552** | 2.766 | 2.624 | diff --git a/configs/pix2pix/metafile.yml b/configs/pix2pix/metafile.yml index 48b8c6db13..b0a321d61c 100644 --- a/configs/pix2pix/metafile.yml +++ b/configs/pix2pix/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://openaccess.thecvf.com/content_cvpr_2017/html/Isola_Image-To-Image_Translation_With_CVPR_2017_paper.html README: configs/pix2pix/README.md + Task: + - image2image + Year: 2017 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-80kiters_facades.py +- Config: configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-80kiters_facades.py In Collection: Pix2Pix Metadata: Training Data: FACADES @@ -17,9 +20,9 @@ Models: Metrics: FID: 124.9773 IS: 1.62 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_aerial2maps.py + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth +- Config: configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_aerial2maps.py In Collection: Pix2Pix Metadata: Training Data: MAPS @@ -29,9 +32,9 @@ Models: Metrics: FID: 122.5856 IS: 3.137 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_a2b_1x1_219200_maps_convert-bgr_20210902_170729-59a31517.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_maps2aerial.py + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_a2b_1x1_219200_maps_convert-bgr_20210902_170729-59a31517.pth +- Config: configs/pix2pix/pix2pix_vanilla-unet-bn_1xb1-220kiters_maps2aerial.py In Collection: Pix2Pix Metadata: Training Data: MAPS @@ -41,9 +44,9 @@ Models: Metrics: FID: 88.4635 IS: 3.31 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_b2a_1x1_219200_maps_convert-bgr_20210902_170814-6d2eac4a.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/pix2pix/pix2pix_vanilla-unet-bn_wo-jitter-flip-1xb4-190kiters_edges2shoes.py + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_b2a_1x1_219200_maps_convert-bgr_20210902_170814-6d2eac4a.pth +- Config: configs/pix2pix/pix2pix_vanilla-unet-bn_wo-jitter-flip-1xb4-190kiters_edges2shoes.py In Collection: Pix2Pix Metadata: Training Data: EDGES2SHOES @@ -53,5 +56,5 @@ Models: Metrics: FID: 84.375 IS: 2.815 - Task: Image2Image Translation - Weights: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth + Task: Image2Image + Weights: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth diff --git a/configs/positional_encoding_in_gans/README.md b/configs/positional_encoding_in_gans/README.md index 4301a696f5..f78e0ca7d2 100644 --- a/configs/positional_encoding_in_gans/README.md +++ b/configs/positional_encoding_in_gans/README.md @@ -1,4 +1,4 @@ -# Positional Encoding in GANs +# Positional Encoding in GANs (CVPR'2021) > [Positional Encoding as Spatial Inductive Bias in GANs](https://openaccess.thecvf.com/content/CVPR2021/html/Xu_Positional_Encoding_As_Spatial_Inductive_Bias_in_GANs_CVPR_2021_paper.html) @@ -23,40 +23,40 @@ SinGAN shows impressive capability in learning internal patch distribution despi
896x896 results generated from a 256 generator using MS-PIE
- +
| Models | Reference in Paper | Scales | FID50k | P&R10k | Config | Download | | :--------------------------: | :----------------: | :------------: | :----: | :---------: | :----------------------------------------------------------: | :-------------------------------------------------------------: | -| stylegan2_c2_256_baseline | Tab.5 config-a | 256 | 5.56 | 75.92/51.24 | [stylegan2_c2_8xb3-1100kiters_ffhq-256x256](/configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth) | -| stylegan2_c2_512_baseline | Tab.5 config-b | 512 | 4.91 | 75.65/54.58 | [stylegan2_c2_8xb3-1100kiters_ffhq-512x512](/configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth) | -| ms-pie_stylegan2_c2_config-c | Tab.5 config-c | 256, 384, 512 | 3.35 | 73.84/55.77 | [mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth) | -| ms-pie_stylegan2_c2_config-d | Tab.5 config-d | 256, 384, 512 | 3.50 | 73.28/56.16 | [mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth) | -| ms-pie_stylegan2_c2_config-e | Tab.5 config-e | 256, 384, 512 | 3.15 | 74.13/56.88 | [mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth) | -| ms-pie_stylegan2_c2_config-f | Tab.5 config-f | 256, 384, 512 | 2.93 | 73.51/57.32 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth) | -| ms-pie_stylegan2_c1_config-g | Tab.5 config-g | 256, 384, 512 | 3.40 | 73.05/56.45 | [mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth) | -| ms-pie_stylegan2_c2_config-h | Tab.5 config-h | 256, 384, 512 | 4.01 | 72.81/54.35 | [mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth) | -| ms-pie_stylegan2_c2_config-i | Tab.5 config-i | 256, 384, 512 | 3.76 | 73.26/54.71 | [mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth) | -| ms-pie_stylegan2_c2_config-j | Tab.5 config-j | 256, 384, 512 | 4.23 | 73.11/54.63 | [mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth) | -| ms-pie_stylegan2_c2_config-k | Tab.5 config-k | 256, 384, 512 | 4.17 | 73.05/51.07 | [mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth) | -| ms-pie_stylegan2_c2_config-f | higher-resolution | 256, 512, 896 | 4.10 | 72.21/50.29 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth) | -| ms-pie_stylegan2_c1_config-f | higher-resolution | 256, 512, 1024 | 6.24 | 71.79/49.92 | [mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth) | +| stylegan2_c2_256_baseline | Tab.5 config-a | 256 | 5.56 | 75.92/51.24 | [stylegan2_c2_8xb3-1100kiters_ffhq-256x256](./stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth) | +| stylegan2_c2_512_baseline | Tab.5 config-b | 512 | 4.91 | 75.65/54.58 | [stylegan2_c2_8xb3-1100kiters_ffhq-512x512](./stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth) | +| ms-pie_stylegan2_c2_config-c | Tab.5 config-c | 256, 384, 512 | 3.35 | 73.84/55.77 | [mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth) | +| ms-pie_stylegan2_c2_config-d | Tab.5 config-d | 256, 384, 512 | 3.50 | 73.28/56.16 | [mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth) | +| ms-pie_stylegan2_c2_config-e | Tab.5 config-e | 256, 384, 512 | 3.15 | 74.13/56.88 | [mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth) | +| ms-pie_stylegan2_c2_config-f | Tab.5 config-f | 256, 384, 512 | 2.93 | 73.51/57.32 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth) | +| ms-pie_stylegan2_c1_config-g | Tab.5 config-g | 256, 384, 512 | 3.40 | 73.05/56.45 | [mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth) | +| ms-pie_stylegan2_c2_config-h | Tab.5 config-h | 256, 384, 512 | 4.01 | 72.81/54.35 | [mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth) | +| ms-pie_stylegan2_c2_config-i | Tab.5 config-i | 256, 384, 512 | 3.76 | 73.26/54.71 | [mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth) | +| ms-pie_stylegan2_c2_config-j | Tab.5 config-j | 256, 384, 512 | 4.23 | 73.11/54.63 | [mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth) | +| ms-pie_stylegan2_c2_config-k | Tab.5 config-k | 256, 384, 512 | 4.17 | 73.05/51.07 | [mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth) | +| ms-pie_stylegan2_c2_config-f | higher-resolution | 256, 512, 896 | 4.10 | 72.21/50.29 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896](./mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth) | +| ms-pie_stylegan2_c1_config-f | higher-resolution | 256, 512, 1024 | 6.24 | 71.79/49.92 | [mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024](./mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth) | | Models | Reference in Paper | Scales | FID50k | Precision10k | Recall10k | Config | Download | | :--------------------------: | :----------------: | :------------: | :----: | :----------: | :-------: | :-----------------------------------------------------: | :--------------------------------------------------------: | -| stylegan2_c2_256_baseline | Tab.5 config-a | 256 | 5.56 | 75.92 | 51.24 | [stylegan2_c2_8xb3-1100kiters_ffhq-256x256](/configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth) | -| stylegan2_c2_512_baseline | Tab.5 config-b | 512 | 4.91 | 75.65 | 54.58 | [stylegan2_c2_8xb3-1100kiters_ffhq-512x512](/configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth) | -| ms-pie_stylegan2_c2_config-c | Tab.5 config-c | 256, 384, 512 | 3.35 | 73.84 | 55.77 | [mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth) | -| ms-pie_stylegan2_c2_config-d | Tab.5 config-d | 256, 384, 512 | 3.50 | 73.28 | 56.16 | [mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth) | -| ms-pie_stylegan2_c2_config-e | Tab.5 config-e | 256, 384, 512 | 3.15 | 74.13 | 56.88 | [mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth) | -| ms-pie_stylegan2_c2_config-f | Tab.5 config-f | 256, 384, 512 | 2.93 | 73.51 | 57.32 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth) | -| ms-pie_stylegan2_c1_config-g | Tab.5 config-g | 256, 384, 512 | 3.40 | 73.05 | 56.45 | [mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth) | -| ms-pie_stylegan2_c2_config-h | Tab.5 config-h | 256, 384, 512 | 4.01 | 72.81 | 54.35 | [mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth) | -| ms-pie_stylegan2_c2_config-i | Tab.5 config-i | 256, 384, 512 | 3.76 | 73.26 | 54.71 | [mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth) | -| ms-pie_stylegan2_c2_config-j | Tab.5 config-j | 256, 384, 512 | 4.23 | 73.11 | 54.63 | [mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth) | -| ms-pie_stylegan2_c2_config-k | Tab.5 config-k | 256, 384, 512 | 4.17 | 73.05 | 51.07 | [mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512](/configs/positional_encoding_in_gans/mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth) | -| ms-pie_stylegan2_c2_config-f | higher-resolution | 256, 512, 896 | 4.10 | 72.21 | 50.29 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth) | -| ms-pie_stylegan2_c1_config-f | higher-resolution | 256, 512, 1024 | 6.24 | 71.79 | 49.92 | [mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024](/configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py) | [model](https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth) | +| stylegan2_c2_256_baseline | Tab.5 config-a | 256 | 5.56 | 75.92 | 51.24 | [stylegan2_c2_8xb3-1100kiters_ffhq-256x256](./stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth) | +| stylegan2_c2_512_baseline | Tab.5 config-b | 512 | 4.91 | 75.65 | 54.58 | [stylegan2_c2_8xb3-1100kiters_ffhq-512x512](./stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth) | +| ms-pie_stylegan2_c2_config-c | Tab.5 config-c | 256, 384, 512 | 3.35 | 73.84 | 55.77 | [mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth) | +| ms-pie_stylegan2_c2_config-d | Tab.5 config-d | 256, 384, 512 | 3.50 | 73.28 | 56.16 | [mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth) | +| ms-pie_stylegan2_c2_config-e | Tab.5 config-e | 256, 384, 512 | 3.15 | 74.13 | 56.88 | [mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth) | +| ms-pie_stylegan2_c2_config-f | Tab.5 config-f | 256, 384, 512 | 2.93 | 73.51 | 57.32 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth) | +| ms-pie_stylegan2_c1_config-g | Tab.5 config-g | 256, 384, 512 | 3.40 | 73.05 | 56.45 | [mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth) | +| ms-pie_stylegan2_c2_config-h | Tab.5 config-h | 256, 384, 512 | 4.01 | 72.81 | 54.35 | [mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth) | +| ms-pie_stylegan2_c2_config-i | Tab.5 config-i | 256, 384, 512 | 3.76 | 73.26 | 54.71 | [mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth) | +| ms-pie_stylegan2_c2_config-j | Tab.5 config-j | 256, 384, 512 | 4.23 | 73.11 | 54.63 | [mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth) | +| ms-pie_stylegan2_c2_config-k | Tab.5 config-k | 256, 384, 512 | 4.17 | 73.05 | 51.07 | [mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512](./mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth) | +| ms-pie_stylegan2_c2_config-f | higher-resolution | 256, 512, 896 | 4.10 | 72.21 | 50.29 | [mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896](./mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth) | +| ms-pie_stylegan2_c1_config-f | higher-resolution | 256, 512, 1024 | 6.24 | 71.79 | 49.92 | [mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024](./mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py) | [model](https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth) | Note that we report the FID and P&R metric (FFHQ dataset) in the largest scale. @@ -70,14 +70,14 @@ Note that we report the FID and P&R metric (FFHQ dataset) in the largest scale. | Model | Data | Num Scales | Config | Download | | :-----------------------------: | :-------------------------------------------------: | :--------: | :---------------------------------------------------: | :-----------------------------------------------------: | -| SinGAN + no pad | [balloons.png](https://download.openmmlab.com/mmgen/dataset/singan/balloons.png) | 8 | [singan_interp-pad_balloons](/configs/positional_encoding_in_gans/singan_interp-pad_balloons.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pkl) | -| SinGAN + no pad + no bn in disc | [balloons.png](https://download.openmmlab.com/mmgen/dataset/singan/balloons.png) | 8 | [singan_interp-pad_disc-nobn_balloons](/configs/positional_encoding_in_gans/singan_interp-pad_disc-nobn_balloons.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pkl) | -| SinGAN + no pad + no bn in disc | [fish.jpg](https://download.openmmlab.com/mmgen/dataset/singan/fish-crop.jpg) | 10 | [singan_interp-pad_disc-nobn_fish](/configs/positional_encoding_in_gans/singan_interp-pad_disc-nobn_fish.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pkl) | -| SinGAN + CSG | [fish.jpg](https://download.openmmlab.com/mmgen/dataset/singan/fish-crop.jpg) | 10 | [singan-csg_fish](/configs/positional_encoding_in_gans/singan-csg_fish.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pkl) | -| SinGAN + CSG | [bohemian.png](https://download.openmmlab.com/mmgen/dataset/singan/bohemian.png) | 10 | [singan-csg_bohemian](/configs/positional_encoding_in_gans/singan-csg_bohemian.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pkl) | -| SinGAN + SPE-dim4 | [fish.jpg](https://download.openmmlab.com/mmgen/dataset/singan/fish-crop.jpg) | 10 | [singan_spe-dim4_fish](/configs/positional_encoding_in_gans/singan_spe-dim4_fish.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pkl) | -| SinGAN + SPE-dim4 | [bohemian.png](https://download.openmmlab.com/mmgen/dataset/singan/bohemian.png) | 10 | [singan_spe-dim4_bohemian](/configs/positional_encoding_in_gans/singan_spe-dim4_bohemian.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pkl) | -| SinGAN + SPE-dim8 | [bohemian.png](https://download.openmmlab.com/mmgen/dataset/singan/bohemian.png) | 10 | [singan_spe-dim8_bohemian](/configs/positional_encoding_in_gans/singan_spe-dim8_bohemian.py) | [ckpt](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pth) \| [pkl](https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pkl) | +| SinGAN + no pad | [balloons.png](https://download.openmmlab.com/mmediting/dataset/singan/balloons.png) | 8 | [singan_interp-pad_balloons](./singan_interp-pad_balloons.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pkl) | +| SinGAN + no pad + no bn in disc | [balloons.png](https://download.openmmlab.com/mmediting/dataset/singan/balloons.png) | 8 | [singan_interp-pad_disc-nobn_balloons](./singan_interp-pad_disc-nobn_balloons.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pkl) | +| SinGAN + no pad + no bn in disc | [fish.jpg](https://download.openmmlab.com/mmediting/dataset/singan/fish-crop.jpg) | 10 | [singan_interp-pad_disc-nobn_fish](./singan_interp-pad_disc-nobn_fish.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pkl) | +| SinGAN + CSG | [fish.jpg](https://download.openmmlab.com/mmediting/dataset/singan/fish-crop.jpg) | 10 | [singan-csg_fish](./singan-csg_fish.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pkl) | +| SinGAN + CSG | [bohemian.png](https://download.openmmlab.com/mmediting/dataset/singan/bohemian.png) | 10 | [singan-csg_bohemian](./singan-csg_bohemian.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pkl) | +| SinGAN + SPE-dim4 | [fish.jpg](https://download.openmmlab.com/mmediting/dataset/singan/fish-crop.jpg) | 10 | [singan_spe-dim4_fish](./singan_spe-dim4_fish.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pkl) | +| SinGAN + SPE-dim4 | [bohemian.png](https://download.openmmlab.com/mmediting/dataset/singan/bohemian.png) | 10 | [singan_spe-dim4_bohemian](./singan_spe-dim4_bohemian.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pkl) | +| SinGAN + SPE-dim8 | [bohemian.png](https://download.openmmlab.com/mmediting/dataset/singan/bohemian.png) | 10 | [singan_spe-dim8_bohemian](./singan_spe-dim8_bohemian.py) | [ckpt](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pth) \| [pkl](https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pkl) | ## Citation diff --git a/configs/positional_encoding_in_gans/metafile.yml b/configs/positional_encoding_in_gans/metafile.yml index d649dc171c..f46e4fdd62 100644 --- a/configs/positional_encoding_in_gans/metafile.yml +++ b/configs/positional_encoding_in_gans/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content/CVPR2021/html/Xu_Positional_Encoding_As_Spatial_Inductive_Bias_in_GANs_CVPR_2021_paper.html README: configs/positional_encoding_in_gans/README.md + Task: + - unconditional gans + Year: 2021 Models: - Config: configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py In Collection: Positional Encoding in GANs @@ -21,7 +24,7 @@ Models: SSIM: 51.24 Scales: 256.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth - Config: configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py In Collection: Positional Encoding in GANs Metadata: @@ -36,7 +39,7 @@ Models: SSIM: 54.58 Scales: 512.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -50,7 +53,7 @@ Models: PSNR: 73.84 SSIM: 55.77 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -64,7 +67,7 @@ Models: PSNR: 73.28 SSIM: 56.16 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -78,7 +81,7 @@ Models: PSNR: 74.13 SSIM: 56.88 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -92,7 +95,7 @@ Models: PSNR: 73.51 SSIM: 57.32 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -106,7 +109,7 @@ Models: PSNR: 73.05 SSIM: 56.45 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -120,7 +123,7 @@ Models: PSNR: 72.81 SSIM: 54.35 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -134,7 +137,7 @@ Models: PSNR: 73.26 SSIM: 54.71 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -148,7 +151,7 @@ Models: PSNR: 73.11 SSIM: 54.63 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -162,7 +165,7 @@ Models: PSNR: 73.05 SSIM: 51.07 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py In Collection: Positional Encoding in GANs Metadata: @@ -176,7 +179,7 @@ Models: PSNR: 72.21 SSIM: 50.29 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py In Collection: Positional Encoding in GANs Metadata: @@ -190,7 +193,7 @@ Models: PSNR: 71.79 SSIM: 49.92 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth - Config: configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-256x256.py In Collection: Positional Encoding in GANs Metadata: @@ -204,7 +207,7 @@ Models: Recall10k: 51.24 Scales: 256.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-a_ffhq_256x256_b3x8_1100k_20210406_145127-71d9634b.pth - Config: configs/positional_encoding_in_gans/stylegan2_c2_8xb3-1100kiters_ffhq-512x512.py In Collection: Positional Encoding in GANs Metadata: @@ -218,7 +221,7 @@ Models: Recall10k: 54.58 Scales: 512.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/stylegan2_c2_config-b_ffhq_512x512_b3x8_1100k_20210406_145142-e85e5cf4.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-c_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -231,7 +234,7 @@ Models: Precision10k: 73.84 Recall10k: 55.77 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-c_ffhq_256-512_b3x8_1100k_20210406_144824-9f43b07d.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-d_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -244,7 +247,7 @@ Models: Precision10k: 73.28 Recall10k: 56.16 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-d_ffhq_256-512_b3x8_1100k_20210406_144840-dbefacf6.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-e_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -257,7 +260,7 @@ Models: Precision10k: 74.13 Recall10k: 56.88 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-e_ffhq_256-512_b3x8_1100k_20210406_144906-98d5a42a.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -270,7 +273,7 @@ Models: Precision10k: 73.51 Recall10k: 57.32 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-512_b3x8_1100k_20210406_144927-4f4d5391.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-g_c1_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -283,7 +286,7 @@ Models: Precision10k: 73.05 Recall10k: 56.45 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-g_ffhq_256-512_b3x8_1100k_20210406_144758-2df61752.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-h_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -296,7 +299,7 @@ Models: Precision10k: 72.81 Recall10k: 54.35 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-h_ffhq_256-512_b3x8_1100k_20210406_145006-84cf3f48.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-i_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -309,7 +312,7 @@ Models: Precision10k: 73.26 Recall10k: 54.71 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-i_ffhq_256-512_b3x8_1100k_20210406_145023-c2b0accf.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-j_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -322,7 +325,7 @@ Models: Precision10k: 73.11 Recall10k: 54.63 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-j_ffhq_256-512_b3x8_1100k_20210406_145044-c407481b.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-k_c2_8xb3-1100kiters_ffhq-256-512.py In Collection: Positional Encoding in GANs Metadata: @@ -335,7 +338,7 @@ Models: Precision10k: 73.05 Recall10k: 51.07 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-k_ffhq_256-512_b3x8_1100k_20210406_145105-6d8cc39f.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c2_8xb3-1100kiters_ffhq-256-896.py In Collection: Positional Encoding in GANs Metadata: @@ -348,7 +351,7 @@ Models: Precision10k: 72.21 Recall10k: 50.29 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c2_config-f_ffhq_256-896_b3x8_1100k_20210406_144943-6c18ad5d.pth - Config: configs/positional_encoding_in_gans/mspie-stylegan2-config-f_c1_8xb2-1600kiters_ffhq-256-1024.py In Collection: Positional Encoding in GANs Metadata: @@ -361,7 +364,7 @@ Models: Precision10k: 71.79 Recall10k: 49.92 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/mspie-stylegan2_c1_config-f_ffhq_256-1024_b2x8_1600k_20210406_144716-81cbdc96.pth - Config: configs/positional_encoding_in_gans/singan_interp-pad_balloons.py In Collection: Positional Encoding in GANs Metadata: @@ -372,7 +375,7 @@ Models: Metrics: Num Scales: 8.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_balloons_20210406_180014-96f51555.pth - Config: configs/positional_encoding_in_gans/singan_interp-pad_disc-nobn_balloons.py In Collection: Positional Encoding in GANs Metadata: @@ -383,7 +386,7 @@ Models: Metrics: Num Scales: 8.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_balloons_20210406_180059-7d63e65d.pth - Config: configs/positional_encoding_in_gans/singan_interp-pad_disc-nobn_fish.py In Collection: Positional Encoding in GANs Metadata: @@ -394,7 +397,7 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_interp-pad_disc-nobn_fis_20210406_175720-9428517a.pth - Config: configs/positional_encoding_in_gans/singan-csg_fish.py In Collection: Positional Encoding in GANs Metadata: @@ -405,7 +408,7 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_fis_20210406_175532-f0ec7b61.pth - Config: configs/positional_encoding_in_gans/singan-csg_bohemian.py In Collection: Positional Encoding in GANs Metadata: @@ -416,7 +419,7 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_csg_bohemian_20210407_195455-5ed56db2.pth - Config: configs/positional_encoding_in_gans/singan_spe-dim4_fish.py In Collection: Positional Encoding in GANs Metadata: @@ -427,7 +430,7 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_fish_20210406_175933-f483a7e3.pth - Config: configs/positional_encoding_in_gans/singan_spe-dim4_bohemian.py In Collection: Positional Encoding in GANs Metadata: @@ -438,7 +441,7 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim4_bohemian_20210406_175820-6e484a35.pth - Config: configs/positional_encoding_in_gans/singan_spe-dim8_bohemian.py In Collection: Positional Encoding in GANs Metadata: @@ -449,4 +452,4 @@ Models: Metrics: Num Scales: 10.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pth + Weights: https://download.openmmlab.com/mmediting/pe_in_gans/singan_spe-dim8_bohemian_20210406_175858-7faa50f3.pth diff --git a/configs/rdn/README.md b/configs/rdn/README.md index 88a05ec7a3..827bc90192 100644 --- a/configs/rdn/README.md +++ b/configs/rdn/README.md @@ -25,9 +25,9 @@ The metrics are `PSNR and SSIM` . | Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | | :--------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :----------: | :----------------------------------------------------------------: | -| [rdn_x4c64b16_g1_1000k_div2k](/configs/rdn/rdn_x4c64b16_1xb16-1000k_div2k.py) | 30.4922 | 26.9570 | 29.1925 | 0.8548 | 0.7423 | 0.8233 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.log.json) | -| [rdn_x3c64b16_g1_1000k_div2k](/configs/rdn/rdn_x3c64b16_1xb16-1000k_div2k.py) | 32.6051 | 28.6338 | 31.2153 | 0.8943 | 0.8077 | 0.8763 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.log.json) | -| [rdn_x2c64b16_g1_1000k_div2k](/configs/rdn/rdn_x2c64b16_1xb16-1000k_div2k.py) | 35.9883 | 31.8366 | 34.9392 | 0.9385 | 0.8920 | 0.9380 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.log.json) | +| [rdn_x4c64b16_g1_1000k_div2k](./rdn_x4c64b16_1xb16-1000k_div2k.py) | 30.4922 | 26.9570 | 29.1925 | 0.8548 | 0.7423 | 0.8233 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.log.json) | +| [rdn_x3c64b16_g1_1000k_div2k](./rdn_x3c64b16_1xb16-1000k_div2k.py) | 32.6051 | 28.6338 | 31.2153 | 0.8943 | 0.8077 | 0.8763 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.log.json) | +| [rdn_x2c64b16_g1_1000k_div2k](./rdn_x2c64b16_1xb16-1000k_div2k.py) | 35.9883 | 31.8366 | 34.9392 | 0.9385 | 0.8920 | 0.9380 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.log.json) | ## Quick Start diff --git a/configs/rdn/README_zh-CN.md b/configs/rdn/README_zh-CN.md index e183d7b650..77082fa95d 100644 --- a/configs/rdn/README_zh-CN.md +++ b/configs/rdn/README_zh-CN.md @@ -24,11 +24,11 @@ 在 RGB 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 我们使用 `PSNR` 和 `SSIM` 作为指标。 -| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | -| :--------------------------------------------------------------------: | :--------------: | :--------------: | :--------------: | :----------: | :--------------------------------------------------------------------: | -| [rdn_x2c64b16_g1_1000k_div2k](/configs/rdn/rdn_x2c64b16_1xb16-1000k_div2k.py) | 35.9883 / 0.9385 | 31.8366 / 0.8920 | 34.9392 / 0.9380 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.log.json) | -| [rdn_x3c64b16_g1_1000k_div2k](/configs/rdn/rdn_x3c64b16_1xb16-1000k_div2k.py) | 32.6051 / 0.8943 | 28.6338 / 0.8077 | 31.2153 / 0.8763 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.log.json) | -| [rdn_x4c64b16_g1_1000k_div2k](/configs/rdn/rdn_x4c64b16_1xb16-1000k_div2k.py) | 30.4922 / 0.8548 | 26.9570 / 0.7423 | 29.1925 / 0.8233 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.log.json) | +| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | +| :----------------------------------------------------------------: | :--------------: | :--------------: | :--------------: | :----------: | :------------------------------------------------------------------------: | +| [rdn_x2c64b16_g1_1000k_div2k](./rdn_x2c64b16_1xb16-1000k_div2k.py) | 35.9883 / 0.9385 | 31.8366 / 0.8920 | 34.9392 / 0.9380 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x2c64b16_g1_1000k_div2k_20210419-dc146009.log.json) | +| [rdn_x3c64b16_g1_1000k_div2k](./rdn_x3c64b16_1xb16-1000k_div2k.py) | 32.6051 / 0.8943 | 28.6338 / 0.8077 | 31.2153 / 0.8763 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x3c64b16_g1_1000k_div2k_20210419-b93cb6aa.log.json) | +| [rdn_x4c64b16_g1_1000k_div2k](./rdn_x4c64b16_1xb16-1000k_div2k.py) | 30.4922 / 0.8548 | 26.9570 / 0.7423 | 29.1925 / 0.8233 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/rdn/rdn_x4c64b16_g1_1000k_div2k_20210419-3577d44f.log.json) | ## 快速开始 diff --git a/configs/rdn/metafile.yml b/configs/rdn/metafile.yml index 0a6c9f2055..4abdd146aa 100644 --- a/configs/rdn/metafile.yml +++ b/configs/rdn/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1802.08797 README: configs/rdn/README.md + Task: + - image super-resolution + Year: 2018 Models: - Config: configs/rdn/rdn_x4c64b16_1xb16-1000k_div2k.py In Collection: RDN diff --git a/configs/real_basicvsr/README.md b/configs/real_basicvsr/README.md index 60a662a6a0..7b0e6c6e7a 100644 --- a/configs/real_basicvsr/README.md +++ b/configs/real_basicvsr/README.md @@ -24,8 +24,8 @@ Evaluated on Y channel. The code for computing NRQM, NIQE, and PI can be found [ | Method | NRQM (Y) | NIQE (Y) | PI (Y) | BRISQUE (Y) | GPU Info | Download | | :--------------------------------------------------------------------: | :------: | :------: | :----: | :---------: | :----------------------: | :-----------------------------------------------------------------------: | -| [realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds](/configs/real_basicvsr/realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py) | 6.0477 | 3.7662 | 3.8593 | 29.030 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104-52f77c2c.pth)/[log](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104_183640.log.json) | -| [realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds](/configs/real_basicvsr/realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027-0e2ff207.pth)/[log](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027_114039.log.json) | +| [realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds](./realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py) | 6.0477 | 3.7662 | 3.8593 | 29.030 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104-52f77c2c.pth)/[log](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104_183640.log.json) | +| [realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds](./realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027-0e2ff207.pth)/[log](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027_114039.log.json) | ## Quick Start diff --git a/configs/real_basicvsr/README_zh-CN.md b/configs/real_basicvsr/README_zh-CN.md index 45d40cb39f..e753988416 100644 --- a/configs/real_basicvsr/README_zh-CN.md +++ b/configs/real_basicvsr/README_zh-CN.md @@ -24,19 +24,19 @@ | 算法 | NRQM (Y) | NIQE (Y) | PI (Y) | BRISQUE (Y) | GPU 信息 | Download | | :------------------------------------------------------------------: | :------: | :------: | :----: | :---------: | :----------------------: | :-------------------------------------------------------------------------: | -| [realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds](/configs/real_basicvsr/realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py) | 6.0477 | 3.7662 | 3.8593 | 29.030 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104-52f77c2c.pth)/[log](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104_183640.log.json) | -| [realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds](/configs/real_basicvsr/realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | [model](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027-0e2ff207.pth)/[log](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027_114039.log.json) | +| [realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds](./realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py) | 6.0477 | 3.7662 | 3.8593 | 29.030 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104-52f77c2c.pth)/[log](https://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds_20211104_183640.log.json) | +| [realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds](./realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | [model](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027-0e2ff207.pth)/[log](http://download.openmmlab.com/mmediting/restorers/real_basicvsr/realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds_20211027_114039.log.json) | ## 训练 训练分为两个阶段: -1. 使用 [realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds.py](realbasicvsr_wogan_c64b20_2x30x8_lr1e-4_300k_reds.py) 训练一个没有感知损失和对抗性损失的模型。 -2. 使用感知损失和对抗性损失 [realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds.py](realbasicvsr_c64b20_1x30x8_lr5e-5_150k_reds.py) 微调模型。 +1. 使用 [realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py](realbasicvsr_wogan-c64b20-2x30x8_8xb2-lr1e-4-300k_reds.py) 训练一个没有感知损失和对抗性损失的模型。 +2. 使用感知损失和对抗性损失 [realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py ](realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py) 微调模型。 **注:** -1. 您可能希望将图像裁剪为子图像以加快 IO。请参阅[此处](/tools/data/super-resolution/reds/preprocess_reds_dataset.py)了解更多详情。 +1. 您可能希望将图像裁剪为子图像以加快 IO。请参阅[此处](../../tools/dataset_converters/reds/preprocess_reds_dataset.py)了解更多详情。 ## 快速开始 diff --git a/configs/real_basicvsr/metafile.yml b/configs/real_basicvsr/metafile.yml index 2ff2d21612..825560fed6 100644 --- a/configs/real_basicvsr/metafile.yml +++ b/configs/real_basicvsr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2111.12704 README: configs/real_basicvsr/README.md + Task: + - video super-resolution + Year: 2022 Models: - Config: configs/real_basicvsr/realbasicvsr_c64b20-1x30x8_8xb1-lr5e-5-150k_reds.py In Collection: RealBasicVSR diff --git a/configs/real_esrgan/README.md b/configs/real_esrgan/README.md index 596ccad510..b908beda44 100644 --- a/configs/real_esrgan/README.md +++ b/configs/real_esrgan/README.md @@ -24,8 +24,8 @@ Evaluated on Set5 dataset with RGB channels. The metrics are `PSNR` and `SSIM`. | Method | PSNR | SSIM | GPU Info | Download | | :------------------------------------------------------------------------------: | :-----: | :----: | :----------------------: | :---------------------------------------------------------------------------------: | -| [realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost](/configs/real_esrgan/realesrnet_c64b23g32_4xb12-lr2e-4-1000k_df2k-ost.py) | 28.0297 | 0.8236 | 4 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost_20210816-4ae3b5a4.pth)/log | -| [realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost](/configs/real_esrgan/realesrgan_c64b23g32_4xb12-lr1e-4-400k_df2k-ost.py) | 26.2204 | 0.7655 | 4 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20211010-34798885.pth) /[log](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20210922_142838.log.json) | +| [realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost](./realesrnet_c64b23g32_4xb12-lr2e-4-1000k_df2k-ost.py) | 28.0297 | 0.8236 | 4 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost_20210816-4ae3b5a4.pth)/log | +| [realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost](./realesrgan_c64b23g32_4xb12-lr1e-4-400k_df2k-ost.py) | 26.2204 | 0.7655 | 4 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20211010-34798885.pth) /[log](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20210922_142838.log.json) | ## Quick Start diff --git a/configs/real_esrgan/README_zh-CN.md b/configs/real_esrgan/README_zh-CN.md index 54485dd2de..c35f4ac571 100644 --- a/configs/real_esrgan/README_zh-CN.md +++ b/configs/real_esrgan/README_zh-CN.md @@ -25,8 +25,8 @@ | 算法 | Set5 | GPU 信息 | 下载 | | :-------------------------------------------------------------------------------: | :------------: | :----------------------: | :-------------------------------------------------------------------------------: | -| [realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost](/configs/real_esrgan/realesrnet_c64b23g32_4xb12-lr2e-4-1000k_df2k-ost.py) | 28.0297/0.8236 | 4 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost_20210816-4ae3b5a4.pth)/日志 | -| [realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost](/configs/real_esrgan/realesrgan_c64b23g32_4xb12-lr1e-4-400k_df2k-ost.py) | 26.2204/0.7655 | 4 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20211010-34798885.pth) /[日志](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20210922_142838.log.json) | +| [realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost](./realesrnet_c64b23g32_4xb12-lr2e-4-1000k_df2k-ost.py) | 28.0297/0.8236 | 4 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrnet_c64b23g32_12x4_lr2e-4_1000k_df2k_ost_20210816-4ae3b5a4.pth)/日志 | +| [realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost](./realesrgan_c64b23g32_4xb12-lr1e-4-400k_df2k-ost.py) | 26.2204/0.7655 | 4 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20211010-34798885.pth) /[日志](https://download.openmmlab.com/mmediting/restorers/real_esrgan/realesrgan_c64b23g32_12x4_lr1e-4_400k_df2k_ost_20210922_142838.log.json) | ## 快速开始 diff --git a/configs/real_esrgan/metafile.yml b/configs/real_esrgan/metafile.yml index f58b4ec7e9..ac94674d2f 100644 --- a/configs/real_esrgan/metafile.yml +++ b/configs/real_esrgan/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2107.10833 README: configs/real_esrgan/README.md + Task: + - image super-resolution + Year: 2021 Models: - Config: configs/real_esrgan/realesrnet_c64b23g32_4xb12-lr2e-4-1000k_df2k-ost.py In Collection: Real-ESRGAN diff --git a/configs/restormer/README.md b/configs/restormer/README.md new file mode 100644 index 0000000000..32d0cca049 --- /dev/null +++ b/configs/restormer/README.md @@ -0,0 +1,261 @@ +# Restormer (CVPR'2022) + +> [Restormer: Efficient Transformer for High-Resolution Image Restoration](https://arxiv.org/abs/2111.09881) + +> **Task**: Denoising, Deblurring, Deraining + + + +## Abstract + + + +Since convolutional neural networks (CNNs) perform well at learning generalizable image priors from large-scale data, these models have been extensively applied to image restoration and related tasks. Recently, another class of neural architectures, Transformers, have shown significant performance gains on natural language and high-level vision tasks. While the Transformer model mitigates the shortcomings of CNNs (i.e., limited receptive field and inadaptability to input content), its computational complexity grows quadratically with the spatial resolution, therefore making it infeasible to apply to most image restoration tasks involving high-resolution images. In this work, we propose an efficient Transformer model by making several key designs in the building blocks (multi-head attention and feed-forward network) such that it can capture long-range pixel interactions, while still remaining applicable to large images. Our model, named Restoration Transformer (Restormer), achieves state-of-the-art results on several image restoration tasks, including image deraining, single-image motion deblurring, defocus deblurring (single-image and dual-pixel data), and image denoising (Gaussian grayscale/color denoising, and real image denoising). + + + +
+ +
+ +## Results and models + +### **Deraining** + +Evaluated on Y channels. The metrics are `PSNR` / `SSIM` . + +| Method | Rain100H
PSNR/SSIM (Y) | Rain100L
PSNR/SSIM (Y) | Test100
PSNR/SSIM (Y) | Test1200
PSNR/SSIM (Y) | Test2800
PSNR/SSIM (Y) | GPU Info | Download | +| :-------------------------------: | :-----------------------: | :-----------------------: | :----------------------: | :-----------------------: | :-----------------------: | :------: | :---------------------------------: | +| [restormer_official_rain13k](./restormer_official_rain13k.py) | 31.4804/0.9056 | 39.1023/0.9787 | 32.0287/0.9239 | 33.2251/0.9272 | 34.2170/0.9451 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth) \| log | + +### **Motion Deblurring** + +Evaluated on RGB channels for GoPro and HIDE, and Y channel for ReakBlur-J and ReakBlur-R. The metrics are `PSNR` / `SSIM` . + +| Method | GoPro
PSNR/SSIM (RGB) | HIDE
PSNR/SSIM (RGB) | RealBlur-J
PSNR/SSIM (Y) | RealBlur-R
PSNR/SSIM (Y) | GPU Info | Download | +| :------------------------------------------: | :----------------------: | :---------------------: | :-------------------------: | :-------------------------: | :------: | :---------------------------------------------: | +| [restormer_official_gopro](./restormer_official_gopro.py) | 32.9295/0.9496 | 31.2289/0.9345 | 28.4356/0.8681 | 35.9141/0.9707 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth) \| log | + +### **Defocus Deblurring** + +Evaluated on RGB channels. The metrics are `PSNR` / `SSIM` / `MAE` / `LPIPS`. + +| Method | Indoor Scenes PSNR | Indoor Scenes SSIM | Indoor Scenes MAE | Indoor Scenes LPIPS | Outdoor Scenes PSNR | Outdoor Scenes SSIM | Outdoor Scenes MAE | Outdoor Scenes LPIPS | Combined PSNR | Combined SSIM | Combined MAE | Combined LPIPS | GPU Info | Download | +| :-------------------------------------------------------------------: | :----------------: | :----------------: | :---------------: | :-----------------: | :-----------------: | :-----------------: | :----------------: | :------------------: | :-----------: | :-----------: | :----------: | :------------: | :------: | :------------------------------------------------------------------------------------------------------------: | +| [restormer_official_dpdd-single](./restormer_official_dpdd-single.py) | 28.8681 | 0.8859 | 0.0251 | - | 23.2410 | 0.7509 | 0.0499 | - | 25.9805 | 0.8166 | 0.0378 | - | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth) \| log | +| [restormer_official_dpdd-dual](./restormer_official_dpdd-dual.py) | 26.6160 | 0.8346 | 0.0354 | - | 26.6160 | 0.8346 | 0.0354 | - | 26.6160 | 0.8346 | 0.0354 | - | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth) \| log | + +### **Gaussian Denoising** + +**Test Grayscale Gaussian Noise** + +Evaluated on grayscale images. The metrics are `PSNR` / `SSIM` . + +| Method | $\\sigma$ | Set12
PSNR/SSIM | BSD68
PSNR/SSIM | Urban100
PSNR/SSIM | GPU Info | Download | +| :------------------------------------------------------------: | :-------: | :----------------: | :----------------: | :-------------------: | :------: | :--------------------------------------------------------------: | +| [restormer_official_dfwb-gray-sigma15](./restormer_official_dfwb-gray-sigma15.py) | 15 | 34.0182/0.9160 | 32.4987/0.8940 | 34.4336/0.9419 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth) \| log | +| [restormer_official_dfwb-gray-sigma25](./restormer_official_dfwb-gray-sigma25.py) | 25 | 31.7289/0.8811 | 30.1613/0.8370 | 32.1162/0.9140 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth) \| log | +| [restormer_official_dfwb-gray-sigma50](./restormer_official_dfwb-gray-sigma50.py) | 50 | 28.6269/0.8188 | 27.3266/0.7434 | 28.9636/0.8571 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth) \| log | +| | | | | | | | +| [restormer_official_dfwb-gray-sigma15](./restormer_official_dfwb-gray-sigma15.py) | 15 | 33.9642/0.9153 | 32.4994/0.8928 | 34.3152/0.9409 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | +| [restormer_official_dfwb-gray-sigma25](./restormer_official_dfwb-gray-sigma25.py) | 25 | 31.7106/0.8810 | 30.1486/0.8360 | 32.0457/0.9131 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | +| [restormer_official_dfwb-gray-sigma50](./restormer_official_dfwb-gray-sigma50.py) | 50 | 28.6614/0.8197 | 27.3537/0.7422 | 28.9848/0.8571 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | + +> Top super-row: training a separate model for each noise level. Bottom super-row: learning a single model to handle various noise levels. + +**Test Color Gaussian Noise** + +Evaluated on RGB channels. The metrics are `PSNR` / `SSIM` . + +| Method | $\\sigma$ | CBSD68
PSNR/SSIM (RGB) | Kodak24
PSNR/SSIM (RGB) | McMaster
PSNR/SSIM (RGB) | Urban100
PSNR/SSIM (RGB) | GPU Info | Download | +| :------------------------------------: | :-------: | :-----------------------: | :------------------------: | :-------------------------: | :-------------------------: | :------: | :--------------------------------------: | +| [restormer_official_dfwb-color-sigma15](./restormer_official_dfwb-color-sigma15.py) | 15 | 34.3506/0.9352 | 35.4900/0.9312 | 35.6072/0.9352 | 35.1522/0.9530 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth) \| log | +| [restormer_official_dfwb-color-sigma25](./restormer_official_dfwb-color-sigma25.py) | 25 | 31.7457/0.8942 | 33.0489/0.8943 | 33.3260/0.9066 | 32.9670/0.9317 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth) \| log | +| [restormer_official_dfwb-color-sigma50](./restormer_official_dfwb-color-sigma50.py) | 50 | 28.5569/0.8127 | 30.0122/0.8238 | 30.2608/0.8515 | 30.0230/0.8902 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth) \| log | +| | | | | | | | | +| [restormer_official_dfwb-color-sigma15](./restormer_official_dfwb-color-sigma15.py) | 15 | 34.3422/0.9356 | 35.4544/0.9308 | 35.5473/0.9344 | 35.0754/0.9524 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | +| [restormer_official_dfwb-color-sigma25](./restormer_official_dfwb-color-sigma25.py) | 25 | 31.7391/0.8945 | 33.0380/0.8941 | 33.3040/0.9063 | 32.9165/0.9312 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | +| [restormer_official_dfwb-color-sigma50](./restormer_official_dfwb-color-sigma50.py) | 50 | 28.5582/0.8126 | 30.0074/0.8233 | 30.2671/0.8520 | 30.0172/0.8898 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | + +> Top super-row: training a separate model for each noise level. Bottom super-row: learning a single model to handle various noise levels. + +### **Real Image Denoising** + +Evaluated on RGB channels. The metrics are `PSNR` / `SSIM` . + +| Method | SIDD
PSNR/SSIM | GPU Info | Download | +| :-----------------------------------------------------: | :---------------: | :------: | :-----------------------------------------------------------------------------------------------------: | +| [restormer_official_sidd](./restormer_official_sidd.py) | 40.0156/0.9225 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_sidd-9e7025db.pth) \| log | + +## Quick Start + +**Train** + +You can refer to **Train a model** part in [train_test.md](/docs/en/user_guides/train_test.md#Train-a-model-in-MMEditing). + + + +**Test** + +
+Test Instructions + +You can use the following commands to test a model with cpu or single/multiple GPUs. + +```shell +# cpu test +# Deraining +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# single-gpu test +# Deraining +python tools/test.py configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +python tools/test.py configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +python tools/test.py configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +python tools/test.py configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + + +# multi-gpu test +# Deraining +./tools/dist_test.sh configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +./tools/dist_test.sh configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +./tools/dist_test.sh configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +./tools/dist_test.sh configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +``` + +For more details, you can refer to **Test a pre-trained model** part in [train_test.md](/docs/en/user_guides/train_test.md#Test-a-pre-trained-model-in-MMEditing). + +
+ +## Citation + +```bibtex +@inproceedings{Zamir2021Restormer, + title={Restormer: Efficient Transformer for High-Resolution Image Restoration}, + author={Syed Waqas Zamir and Aditya Arora and Salman Khan and Munawar Hayat + and Fahad Shahbaz Khan and Ming-Hsuan Yang}, + booktitle={CVPR}, + year={2022} +} +``` diff --git a/configs/restormer/README_zh-CN.md b/configs/restormer/README_zh-CN.md new file mode 100644 index 0000000000..91c669f492 --- /dev/null +++ b/configs/restormer/README_zh-CN.md @@ -0,0 +1,239 @@ +# Restormer (CVPR'2022) + +> [Restormer: Efficient Transformer for High-Resolution Image Restoration](https://arxiv.org/abs/2111.09881) + +> **任务**: 图像去噪,图像去模糊,图像去雨 + + + +
+ +
+ +## **各个任务下模型的测试结果** + +### **图像去雨** + +所有数据集均在Y通道上进行测试,测试指标为PSNR和SSIM。 + +| 方法 | Rain100H
PSNR/SSIM (Y) | Rain100L
PSNR/SSIM (Y) | Test100
PSNR/SSIM (Y) | Test1200
PSNR/SSIM (Y) | Test2800
PSNR/SSIM (Y) | GPU信息 | 下载 | +| :--------------------------------: | :-----------------------: | :-----------------------: | :----------------------: | :-----------------------: | :-----------------------: | :-----: | :---------------------------------: | +| [restormer_official_rain13k](./restormer_official_rain13k.py) | 31.4804/0.9056 | 39.1023/0.9787 | 32.0287/0.9239 | 33.2251/0.9272 | 34.2170/0.9451 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth) \| log | + +### **图像去模糊** + +Gopro和HIDE数据集上使用RGB通道测试,ReakBlur-J 和 ReakBlur-R数据集使用Y通道测试。测试指标为PSNR和SSIM。 + +| 方法 | GoPro
PSNR/SSIM (RGB) | HIDE
PSNR/SSIM (RGB) | RealBlur-J
PSNR/SSIM (Y) | RealBlur-R
PSNR/SSIM (Y) | GPU信息 | 下载 | +| :--------------------------------------------: | :----------------------: | :---------------------: | :-------------------------: | :-------------------------: | :-----: | :--------------------------------------------: | +| [restormer_official_gopro](./restormer_official_gopro.py) | 32.9295/0.9496 | 31.2289/0.9345 | 28.4356/0.8681 | 35.9141/0.9707 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth) \| log | + +### **图像去失焦模糊** + +所有指标均在RGB通道上进行测试。测试指标为PSNR、SSIM、MAE和LPIPS. + +| 方法 | 室内场景图像的PSNR | 室内场景图像的SSIM | 室内场景图像的MAE | 室内场景图像的LPIPS | 室外场景图像的PSNR | 室外场景图像的SSIM | 室外场景图像的MAE | 室外场景图像的LPIPS | 所有图像平均PSNR | 所有图像平均SSIM | 所有图像平均MAE | 所有图像平均LPIPS | GPU 信息 | 下载 | +| :----: | :-------------: | :-------------: | :------------: | :--------------: | :-------------: | :-------------: | :------------: | :--------------: | :------------: | :-------------: | :------------: | :--------------: | :------: | :-----: | +| [restormer_official_dpdd-single](./restormer_official_dpdd-single.py) | 28.8681 | 0.8859 | 0.0251 | - | 23.2410 | 0.7509 | 0.0499 | - | 25.9805 | 0.8166 | 0.0378 | - | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth) \| log | +| [restormer_official_dpdd-dual](./restormer_official_dpdd-dual.py) | 26.6160 | 0.8346 | 0.0354 | - | 26.6160 | 0.8346 | 0.0354 | - | 26.6160 | 0.8346 | 0.0354 | - | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth) \| log | + +### **图像高斯噪声去除** + +**灰度图的高斯噪声** + +使用PSNR和SSIM指标对数据集上的灰度图进行测试。 + +| 方法 | $\\sigma$ | Set12
PSNR/SSIM | BSD68
PSNR/SSIM | Urban100
PSNR/SSIM | GPU信息 | 下载 | +| :-------------------------------------------------------------: | :-------: | :----------------: | :----------------: | :-------------------: | :-----: | :--------------------------------------------------------------: | +| [restormer_official_dfwb-gray-sigma15](./restormer_official_dfwb-gray-sigma15.py) | 15 | 34.0182/0.9160 | 32.4987/0.8940 | 34.4336/0.9419 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth) \| log | +| [restormer_official_dfwb-gray-sigma25](./restormer_official_dfwb-gray-sigma25.py) | 25 | 31.7289/0.8811 | 30.1613/0.8370 | 32.1162/0.9140 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth) \| log | +| [restormer_official_dfwb-gray-sigma50](./restormer_official_dfwb-gray-sigma50.py) | 50 | 28.6269/0.8188 | 27.3266/0.7434 | 28.9636/0.8571 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth) \| log | +| | | | | | | | +| [restormer_official_dfwb-gray-sigma15](./restormer_official_dfwb-gray-sigma15.py) | 15 | 33.9642/0.9153 | 32.4994/0.8928 | 34.3152/0.9409 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | +| [restormer_official_dfwb-gray-sigma25](./restormer_official_dfwb-gray-sigma25.py) | 25 | 31.7106/0.8810 | 30.1486/0.8360 | 32.0457/0.9131 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | +| [restormer_official_dfwb-gray-sigma50](./restormer_official_dfwb-gray-sigma50.py) | 50 | 28.6614/0.8197 | 27.3537/0.7422 | 28.9848/0.8571 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth) \| log | + +> 上面三行代表每个噪声等级训练一个单独的模型,下面三行代表学习一个单一的模型来处理各种噪音水平。 + +**彩色图像的高斯噪声** + +所有指标均在RGB通道上进行测试,测试指标为PSNR和SSIM。 + +| 方法 | $\\sigma$ | CBSD68
PSNR/SSIM (RGB) | Kodak24
PSNR/SSIM (RGB) | McMaster
PSNR/SSIM (RGB) | Urban100
PSNR/SSIM (RGB) | GPU信息 | 下载 | +| :-------------------------------------: | :-------: | :-----------------------: | :------------------------: | :-------------------------: | :-------------------------: | :-----: | :--------------------------------------: | +| [restormer_official_dfwb-color-sigma15](./restormer_official_dfwb-color-sigma15.py) | 15 | 34.3506/0.9352 | 35.4900/0.9312 | 35.6072/0.9352 | 35.1522/0.9530 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth) \| log | +| [restormer_official_dfwb-color-sigma25](./restormer_official_dfwb-color-sigma25.py) | 25 | 31.7457/0.8942 | 33.0489/0.8943 | 33.3260/0.9066 | 32.9670/0.9317 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth) \| log | +| [restormer_official_dfwb-color-sigma50](./restormer_official_dfwb-color-sigma50.py) | 50 | 28.5569/0.8127 | 30.0122/0.8238 | 30.2608/0.8515 | 30.0230/0.8902 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth) \| log | +| | | | | | | | | +| [restormer_official_dfwb-color-sigma15](./restormer_official_dfwb-color-sigma15.py) | 15 | 34.3422/0.9356 | 35.4544/0.9308 | 35.5473/0.9344 | 35.0754/0.9524 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | +| [restormer_official_dfwb-color-sigma25](./restormer_official_dfwb-color-sigma25.py) | 25 | 31.7391/0.8945 | 33.0380/0.8941 | 33.3040/0.9063 | 32.9165/0.9312 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | +| [restormer_official_dfwb-color-sigma50](./restormer_official_dfwb-color-sigma50.py) | 50 | 28.5582/0.8126 | 30.0074/0.8233 | 30.2671/0.8520 | 30.0172/0.8898 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth) \| log | + +> 上面三行代表每个噪声等级训练一个单独的模型,下面三行代表学习一个单一的模型来处理各种噪音水平。 + +### **真实场景图像去噪** + +所有指标均在RGB通道上进行测试,测试指标为PSNR和SSIM。 + +| 方法 | SIDD
PSNR/SSIM | GPU信息 | 下载 | +| :-----------------------------------------------------: | :---------------: | :-----: | :-----------------------------------------------------------------------------------------------------: | +| [restormer_official_sidd](./restormer_official_sidd.py) | 40.0156/0.9225 | 1 | [model](https://download.openmmlab.com/mmediting/restormer/restormer_official_sidd-9e7025db.pth) \| log | + +## 使用方法 + +**训练** + +可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Train a model** 部分。 + +**测试** + +
+测试说明 + +您可以使用以下命令来测试模型。 + +```shell +# cpu test +# Deraining +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# single-gpu test +# Deraining +python tools/test.py configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +python tools/test.py configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +python tools/test.py configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +python tools/test.py configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +python tools/test.py configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + + +# multi-gpu test +# Deraining +./tools/dist_test.sh configs/restormer/restormer_official_rain13k.py https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth + +# Motion Deblurring +./tools/dist_test.sh configs/restormer/restormer_official_gopro.py https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth + +# Defocus Deblurring +# Single +./tools/dist_test.sh configs/restormer/restormer_official_dpdd-dual.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +# Dual +./tools/dist_test.sh configs/restormer/restormer_official_dpdd-single.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth + +# Gaussian Denoising +# Test Grayscale Gaussian Noise +# sigma15 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma25 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# sigma50 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-gray-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth + +# Test Color Gaussian Noise +# sigma15 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma15.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma25 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma25.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +# sigma50 +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth + +./tools/dist_test.sh configs/restormer/restormer_official_dfwb-color-sigma50.py https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth + +``` + +更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Test a pre-trained model** 部分。 + +
diff --git a/configs/restormer/metafile.yml b/configs/restormer/metafile.yml new file mode 100644 index 0000000000..e6741f5e77 --- /dev/null +++ b/configs/restormer/metafile.yml @@ -0,0 +1,387 @@ +Collections: +- Metadata: + Architecture: + - Restormer + Name: Restormer + Paper: + - https://arxiv.org/abs/2111.09881 + README: configs/restormer/README.md + Task: + - denoising + - deblurring + - deraining + Year: 2022 +Models: +- Config: configs/restormer/restormer_official_rain13k.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_rain13k + Results: + - Dataset: Others + Metrics: + Rain100H
PSNR/SSIM (Y): + PSNR: 31.4804 + SSIM: 0.9056 + Rain100L
PSNR/SSIM (Y): + PSNR: 39.1023 + SSIM: 0.9787 + Test100
PSNR/SSIM (Y): + PSNR: 32.0287 + SSIM: 0.9239 + Test1200
PSNR/SSIM (Y): + PSNR: 33.2251 + SSIM: 0.9272 + Test2800
PSNR/SSIM (Y): + PSNR: 34.217 + SSIM: 0.9451 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_rain13k-2be7b550.pth +- Config: configs/restormer/restormer_official_gopro.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_gopro + Results: + - Dataset: Others + Metrics: + GoPro
PSNR/SSIM (RGB): + PSNR: 32.9295 + SSIM: 0.9496 + HIDE
PSNR/SSIM (RGB): + PSNR: 31.2289 + SSIM: 0.9345 + RealBlur-J
PSNR/SSIM (Y): + PSNR: 28.4356 + SSIM: 0.8681 + RealBlur-R
PSNR/SSIM (Y): + PSNR: 35.9141 + SSIM: 0.9707 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_gopro-db7363a0.pth +- Config: configs/restormer/restormer_official_dpdd-single.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dpdd-single + Results: + - Dataset: Others + Metrics: + Combined MAE: 0.0378 + Combined PSNR: 25.9805 + Combined SSIM: 0.8166 + Indoor Scenes MAE: 0.0251 + Indoor Scenes PSNR: 28.8681 + Indoor Scenes SSIM: 0.8859 + Outdoor Scenes MAE: 0.0499 + Outdoor Scenes PSNR: 23.241 + Outdoor Scenes SSIM: 0.7509 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-single-6bc31582.pth +- Config: configs/restormer/restormer_official_dpdd-dual.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dpdd-dual + Results: + - Dataset: Others + Metrics: + Combined MAE: 0.0354 + Combined PSNR: 26.616 + Combined SSIM: 0.8346 + Indoor Scenes MAE: 0.0354 + Indoor Scenes PSNR: 26.616 + Indoor Scenes SSIM: 0.8346 + Outdoor Scenes MAE: 0.0354 + Outdoor Scenes PSNR: 26.616 + Outdoor Scenes SSIM: 0.8346 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dpdd-dual-52c94c00.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma15.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma15 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 15.0 + BSD68
PSNR/SSIM: + PSNR: 32.4987 + SSIM: 0.894 + Set12
PSNR/SSIM: + PSNR: 34.0182 + SSIM: 0.916 + Urban100
PSNR/SSIM: + PSNR: 34.4336 + SSIM: 0.9419 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma15-da74417f.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma25.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma25 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 25.0 + BSD68
PSNR/SSIM: + PSNR: 30.1613 + SSIM: 0.837 + Set12
PSNR/SSIM: + PSNR: 31.7289 + SSIM: 0.8811 + Urban100
PSNR/SSIM: + PSNR: 32.1162 + SSIM: 0.914 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma25-08010841.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma50.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma50 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 50.0 + BSD68
PSNR/SSIM: + PSNR: 27.3266 + SSIM: 0.7434 + Set12
PSNR/SSIM: + PSNR: 28.6269 + SSIM: 0.8188 + Urban100
PSNR/SSIM: + PSNR: 28.9636 + SSIM: 0.8571 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-sigma50-ee852dfe.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma15.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma15 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 15.0 + BSD68
PSNR/SSIM: + PSNR: 32.4994 + SSIM: 0.8928 + Set12
PSNR/SSIM: + PSNR: 33.9642 + SSIM: 0.9153 + Urban100
PSNR/SSIM: + PSNR: 34.3152 + SSIM: 0.9409 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma25.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma25 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 25.0 + BSD68
PSNR/SSIM: + PSNR: 30.1486 + SSIM: 0.836 + Set12
PSNR/SSIM: + PSNR: 31.7106 + SSIM: 0.881 + Urban100
PSNR/SSIM: + PSNR: 32.0457 + SSIM: 0.9131 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth +- Config: configs/restormer/restormer_official_dfwb-gray-sigma50.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-gray-sigma50 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 50.0 + BSD68
PSNR/SSIM: + PSNR: 27.3537 + SSIM: 0.7422 + Set12
PSNR/SSIM: + PSNR: 28.6614 + SSIM: 0.8197 + Urban100
PSNR/SSIM: + PSNR: 28.9848 + SSIM: 0.8571 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-gray-blind-5f094bcc.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma15.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma15 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 15.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 34.3506 + SSIM: 0.9352 + Kodak24
PSNR/SSIM (RGB): + PSNR: 35.49 + SSIM: 0.9312 + McMaster
PSNR/SSIM (RGB): + PSNR: 35.6072 + SSIM: 0.9352 + Urban100
PSNR/SSIM (RGB): + PSNR: 35.1522 + SSIM: 0.953 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma15-012ceb71.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma25.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma25 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 25.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 31.7457 + SSIM: 0.8942 + Kodak24
PSNR/SSIM (RGB): + PSNR: 33.0489 + SSIM: 0.8943 + McMaster
PSNR/SSIM (RGB): + PSNR: 33.326 + SSIM: 0.9066 + Urban100
PSNR/SSIM (RGB): + PSNR: 32.967 + SSIM: 0.9317 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma25-e307f222.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma50.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma50 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 50.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 28.5569 + SSIM: 0.8127 + Kodak24
PSNR/SSIM (RGB): + PSNR: 30.0122 + SSIM: 0.8238 + McMaster
PSNR/SSIM (RGB): + PSNR: 30.2608 + SSIM: 0.8515 + Urban100
PSNR/SSIM (RGB): + PSNR: 30.023 + SSIM: 0.8902 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-sigma50-a991983d.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma15.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma15 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 15.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 34.3422 + SSIM: 0.9356 + Kodak24
PSNR/SSIM (RGB): + PSNR: 35.4544 + SSIM: 0.9308 + McMaster
PSNR/SSIM (RGB): + PSNR: 35.5473 + SSIM: 0.9344 + Urban100
PSNR/SSIM (RGB): + PSNR: 35.0754 + SSIM: 0.9524 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma25.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma25 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 25.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 31.7391 + SSIM: 0.8945 + Kodak24
PSNR/SSIM (RGB): + PSNR: 33.038 + SSIM: 0.8941 + McMaster
PSNR/SSIM (RGB): + PSNR: 33.304 + SSIM: 0.9063 + Urban100
PSNR/SSIM (RGB): + PSNR: 32.9165 + SSIM: 0.9312 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth +- Config: configs/restormer/restormer_official_dfwb-color-sigma50.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_dfwb-color-sigma50 + Results: + - Dataset: Others + Metrics: + $\\sigma$: 50.0 + CBSD68
PSNR/SSIM (RGB): + PSNR: 28.5582 + SSIM: 0.8126 + Kodak24
PSNR/SSIM (RGB): + PSNR: 30.0074 + SSIM: 0.8233 + McMaster
PSNR/SSIM (RGB): + PSNR: 30.2671 + SSIM: 0.852 + Urban100
PSNR/SSIM (RGB): + PSNR: 30.0172 + SSIM: 0.8898 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_dfwb-color-blind-dfd03c9f.pth +- Config: configs/restormer/restormer_official_sidd.py + In Collection: Restormer + Metadata: + GPUs: '1' + Training Data: Others + Name: restormer_official_sidd + Results: + - Dataset: Others + Metrics: + SIDD
PSNR/SSIM: + PSNR: 40.0156 + SSIM: 0.9225 + Task: Denoising, Deblurring, Deraining + Weights: https://download.openmmlab.com/mmediting/restormer/restormer_official_sidd-9e7025db.pth diff --git a/configs/restormer/restormer_official_dfwb-color-sigma15.py b/configs/restormer/restormer_official_dfwb-color-sigma15.py new file mode 100644 index 0000000000..3985289ef6 --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-color-sigma15.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_color_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_color_sigma15' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 15 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_dfwb-color-sigma25.py b/configs/restormer/restormer_official_dfwb-color-sigma25.py new file mode 100644 index 0000000000..e1ecdb5cfd --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-color-sigma25.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_color_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_color_sigma25' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 25 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_dfwb-color-sigma50.py b/configs/restormer/restormer_official_dfwb-color-sigma50.py new file mode 100644 index 0000000000..37a355083f --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-color-sigma50.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_color_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_color_sigma50' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 50 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_dfwb-gray-sigma15.py b/configs/restormer/restormer_official_dfwb-gray-sigma15.py new file mode 100644 index 0000000000..a7db922251 --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-gray-sigma15.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_gray_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_gray_sigma15' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 15 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=1, + out_channels=1, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0.], + std=[255.], + )) diff --git a/configs/restormer/restormer_official_dfwb-gray-sigma25.py b/configs/restormer/restormer_official_dfwb-gray-sigma25.py new file mode 100644 index 0000000000..3ce1537c69 --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-gray-sigma25.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_gray_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_gray_sigma25' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 25 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=1, + out_channels=1, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0.], + std=[255.], + )) diff --git a/configs/restormer/restormer_official_dfwb-gray-sigma50.py b/configs/restormer/restormer_official_dfwb-gray-sigma50.py new file mode 100644 index 0000000000..e97ecfd2b0 --- /dev/null +++ b/configs/restormer/restormer_official_dfwb-gray-sigma50.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_gray_test_config.py' +] + +experiment_name = 'restormer_official_dfwb_gray_sigma50' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 50 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma * 255, sigma * 255] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=1, + out_channels=1, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0.], + std=[255.], + )) diff --git a/configs/restormer/restormer_official_dpdd-dual.py b/configs/restormer/restormer_official_dpdd-dual.py new file mode 100644 index 0000000000..ed9526de08 --- /dev/null +++ b/configs/restormer/restormer_official_dpdd-dual.py @@ -0,0 +1,18 @@ +_base_ = [ + 'restormer_official_dpdd-single.py', +] + +experiment_name = 'restormer_official_dpdd-dual' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify PackEditInputs +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[4] = dict(type='PackEditInputs', keys=['imgL', 'imgR']) + +# model settings +model = dict( + generator=dict(inp_channels=6, dual_pixel_task=True), + data_preprocessor=dict(type='GenDataPreprocessor')) diff --git a/configs/restormer/restormer_official_dpdd-single.py b/configs/restormer/restormer_official_dpdd-single.py new file mode 100644 index 0000000000..136520da00 --- /dev/null +++ b/configs/restormer/restormer_official_dpdd-single.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/deblurring-defocus_test_config.py' +] + +experiment_name = 'restormer_official_dpdd-single' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_gopro.py b/configs/restormer/restormer_official_gopro.py new file mode 100644 index 0000000000..a07a489725 --- /dev/null +++ b/configs/restormer/restormer_official_gopro.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/deblurring-motion_test_config.py' +] + +experiment_name = 'restormer_official_gopro' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_rain13k.py b/configs/restormer/restormer_official_rain13k.py new file mode 100644 index 0000000000..c352582737 --- /dev/null +++ b/configs/restormer/restormer_official_rain13k.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/deraining_test_config.py' +] + +experiment_name = 'restormer_official_rain13k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/restormer/restormer_official_sidd.py b/configs/restormer/restormer_official_sidd.py new file mode 100644 index 0000000000..926796fcde --- /dev/null +++ b/configs/restormer/restormer_official_sidd.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-real_test_config.py' +] + +experiment_name = 'restormer_official_sidd' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='Restormer', + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', + mean=[0., 0., 0.], + std=[255., 255., 255.], + )) diff --git a/configs/sagan/README.md b/configs/sagan/README.md index 6e4a3cf5f9..ddb9d96149 100644 --- a/configs/sagan/README.md +++ b/configs/sagan/README.md @@ -28,14 +28,14 @@ In this paper, we propose the Self-Attention Generative Adversarial Network (SAG | Models | Dataset | Inplace ReLU | dist_step | Total Batchsize (BZ_PER_GPU * NGPU) | Total Iters\* | Iter | IS | FID | Config | Download | Log | | :------------------------: | :------: | :----------: | :-------: | :---------------------------------: | :-----------: | :----: | :-----: | :-----: | :------------------------: | :--------------------------: | :---------------------: | -| SAGAN-32x32-woInplaceReLU Best IS | CIFAR10 | w/o | 5 | 64x1 | 500000 | 400000 | 9.3217 | 10.5030 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) | -| SAGAN-32x32-woInplaceReLU Best FID | CIFAR10 | w/o | 5 | 64x1 | 500000 | 480000 | 9.3174 | 9.4252 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) | -| SAGAN-32x32-wInplaceReLU Best IS | CIFAR10 | w | 5 | 64x1 | 500000 | 380000 | 9.2286 | 11.7760 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) | -| SAGAN-32x32-wInplaceReLU Best FID | CIFAR10 | w | 5 | 64x1 | 500000 | 460000 | 9.2061 | 10.7781 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) | -| SAGAN-128x128-woInplaceReLU Best IS | ImageNet | w/o | 1 | 64x4 | 1000000 | 980000 | 31.5938 | 36.7712 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) | -| SAGAN-128x128-woInplaceReLU Best FID | ImageNet | w/o | 1 | 64x4 | 1000000 | 950000 | 28.4936 | 34.7838 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) | -| SAGAN-128x128-BigGAN Schedule Best IS | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) | -| SAGAN-128x128-BigGAN Schedule Best FID | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) | +| SAGAN-32x32-woInplaceReLU Best IS | CIFAR10 | w/o | 5 | 64x1 | 500000 | 400000 | 9.3217 | 10.5030 | [config](./sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) | +| SAGAN-32x32-woInplaceReLU Best FID | CIFAR10 | w/o | 5 | 64x1 | 500000 | 480000 | 9.3174 | 9.4252 | [config](./sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_20210730_125449_fid-d50568a4_is-04008a9ca.json) | +| SAGAN-32x32-wInplaceReLU Best IS | CIFAR10 | w | 5 | 64x1 | 500000 | 380000 | 9.2286 | 11.7760 | [config](./sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) | +| SAGAN-32x32-wInplaceReLU Best FID | CIFAR10 | w | 5 | 64x1 | 500000 | 460000 | 9.2061 | 10.7781 | [config](./sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_20210730_125155_fid-cbefb354_is-c77b4d25.json) | +| SAGAN-128x128-woInplaceReLU Best IS | ImageNet | w/o | 1 | 64x4 | 1000000 | 980000 | 31.5938 | 36.7712 | [config](./sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) | +| SAGAN-128x128-woInplaceReLU Best FID | ImageNet | w/o | 1 | 64x4 | 1000000 | 950000 | 28.4936 | 34.7838 | [config](./sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_20210730_163431_fid-d7916963_is-cfbebfc6.json) | +| SAGAN-128x128-BigGAN Schedule Best IS | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](./sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) | +| SAGAN-128x128-BigGAN Schedule Best FID | ImageNet | w/o | 1 | 32x8 | 1000000 | 826000 | 69.5350 | 12.8295 | [config](./sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth) | [Log](https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.json) | '\*' Iteration counting rule in our implementation is different from others. If you want to align with other codebases, you can use the following conversion formula: @@ -48,8 +48,8 @@ To be noted that, in Pytorch Studio GAN, **inplace ReLU** is used in generator a | Models | Dataset | Inplace ReLU | n_disc | Total Iters | IS (Our Pipeline) | FID (Our Pipeline) | IS (StudioGAN) | FID (StudioGAN) | Config | Download | Original Download link | | :------------------: | :------: | :----------: | :----: | :---------: | :---------------: | :----------------: | :------------: | :-------------: | :------------------: | :--------------------: | :-----------------------------------: | -| SAGAN-32x32 StudioGAN | CIFAR10 | w | 5 | 100000 | 9.116 | 10.2011 | 8.680 | 14.009 | [Config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_cvt-studioGAN_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth) | [model](https://drive.google.com/drive/folders/1FA8hcz4MB8-hgTwLuDA0ZUfr8slud5P_) | -| SAGAN0-128x128 StudioGAN | ImageNet | w | 1 | 1000000 | 27.367 | 40.1162 | 29.848 | 34.726 | [Config](https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_128_cvt_studioGAN.py) | [model](https://download.openmmlab.com/mmgen/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth) | [model](https://drive.google.com/drive/folders/1ZYaqeeumDgxOPDhRR5QLeLFIpgBJ9S6B) | +| SAGAN-32x32 StudioGAN | CIFAR10 | w | 5 | 100000 | 9.116 | 10.2011 | 8.680 | 14.009 | [Config](./sagan_cvt-studioGAN_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth) | [model](https://drive.google.com/drive/folders/1FA8hcz4MB8-hgTwLuDA0ZUfr8slud5P_) | +| SAGAN0-128x128 StudioGAN | ImageNet | w | 1 | 1000000 | 27.367 | 40.1162 | 29.848 | 34.726 | [Config](./sagan_128_cvt_studioGAN.py) | [model](https://download.openmmlab.com/mmediting/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth) | [model](https://drive.google.com/drive/folders/1ZYaqeeumDgxOPDhRR5QLeLFIpgBJ9S6B) | - `Our Pipeline` denote results evaluated with our pipeline. - `StudioGAN` denote results released by Pytorch-StudioGAN. @@ -61,7 +61,7 @@ For IS metric, our implementation is different from PyTorch-Studio GAN in the fo For FID evaluation, we follow the pipeline of [BigGAN](https://github.com/ajbrock/BigGAN-PyTorch/blob/98459431a5d618d644d54cd1e9fceb1e5045648d/calculate_inception_moments.py#L52), where the whole training set is adopted to extract inception statistics, and Pytorch Studio GAN uses 50000 randomly selected samples. Besides, we also use [Tero's Inception](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt) for feature extraction. -You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/imagenet.pkl). +You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/imagenet.pkl). You can use following commands to extract those inception states by yourself. diff --git a/configs/sagan/metafile.yml b/configs/sagan/metafile.yml index 1eb6cbe472..43c95f81a0 100644 --- a/configs/sagan/metafile.yml +++ b/configs/sagan/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://proceedings.mlr.press/v97/zhang19d.html README: configs/sagan/README.md + Task: + - conditional gans + Year: 2019 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py +- Config: configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SAGAN Metadata: Training Data: Others @@ -21,8 +24,8 @@ Models: Total Iters\*: 500000.0 dist_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_is-iter400000_20210730_125743-4008a9ca.pth +- Config: configs/sagan/sagan_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SAGAN Metadata: Training Data: Others @@ -36,8 +39,8 @@ Models: Total Iters\*: 500000.0 dist_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_woReUinplace_fid-iter480000_20210730_125449-d50568a4.pth +- Config: configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SAGAN Metadata: Training Data: Others @@ -51,8 +54,8 @@ Models: Total Iters\*: 500000.0 dist_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_is-iter380000_20210730_124937-c77b4d25.pth +- Config: configs/sagan/sagan_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SAGAN Metadata: Training Data: Others @@ -66,8 +69,8 @@ Models: Total Iters\*: 500000.0 dist_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_cifar10_32_lr2e-4_ndisc5_b64x1_wReLUinplace_fid-iter460000_20210730_125155-cbefb354.pth +- Config: configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py In Collection: SAGAN Metadata: Training Data: Others @@ -81,8 +84,8 @@ Models: Total Iters\*: 1000000.0 dist_step: 1.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_is-iter980000_20210730_163140-cfbebfc6.pth +- Config: configs/sagan/sagan_woReLUinplace_Glr1e-4_Dlr4e-4_ndisc1-4xb64_imagenet1k-128x128.py In Collection: SAGAN Metadata: Training Data: Others @@ -96,8 +99,8 @@ Models: Total Iters\*: 1000000.0 dist_step: 1.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_imagenet1k_128_Glr1e-4_Dlr4e-4_ndisc1_b32x4_woReLUinplace_fid-iter950000_20210730_163431-d7916963.pth +- Config: configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py In Collection: SAGAN Metadata: Training Data: Others @@ -111,8 +114,8 @@ Models: Total Iters\*: 1000000.0 dist_step: 1.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth +- Config: configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py In Collection: SAGAN Metadata: Training Data: Others @@ -126,8 +129,8 @@ Models: Total Iters\*: 1000000.0 dist_step: 1.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_cvt-studioGAN_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth +- Config: configs/sagan/sagan_cvt-studioGAN_cifar10-32x32.py In Collection: SAGAN Metadata: Training Data: Others @@ -142,8 +145,8 @@ Models: Total Iters: 100000.0 n_disc: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sagan/sagan_128_cvt_studioGAN.py + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_32_cifar10_convert-studio-rgb_20210730_153321-080da7e2.pth +- Config: configs/sagan/sagan_128_cvt_studioGAN.py In Collection: SAGAN Metadata: Training Data: Others @@ -158,4 +161,4 @@ Models: Total Iters: 1000000.0 n_disc: 1.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth + Weights: https://download.openmmlab.com/mmediting/sagan/sagan_128_imagenet1k_convert-studio-rgb_20210730_153357-eddb0d1d.pth diff --git a/configs/singan/README.md b/configs/singan/README.md index 7d13fe333f..e39785f5c3 100644 --- a/configs/singan/README.md +++ b/configs/singan/README.md @@ -26,11 +26,11 @@ We introduce SinGAN, an unconditional generative model that can be learned from -| Models | Data | Num Scales | Config | Download | -| :----: | :---------------------------------------------------------: | :--------: | :-----------------------------------------------------------: | :--------------------------------------------------------------: | -| SinGAN | [balloons.png](https://download.openmmlab.com/mmgen/dataset/singan/balloons.png) | 8 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_balloons.py) | [ckpt](https://download.openmmlab.com/mmgen/singan/singan_balloons_20210406_191047-8fcd94cf.pth) \| [pkl](https://download.openmmlab.com/mmgen/singan/singan_balloons_20210406_191047-8fcd94cf.pkl) | -| SinGAN | [fish.jpg](https://download.openmmlab.com/mmgen/dataset/singan/fish-crop.jpg) | 10 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_fish.py) | [ckpt](https://download.openmmlab.com/mmgen/singan/singan_fis_20210406_201006-860d91b6.pth) \| [pkl](https://download.openmmlab.com/mmgen/singan/singan_fis_20210406_201006-860d91b6.pkl) | -| SinGAN | [bohemian.png](https://download.openmmlab.com/mmgen/dataset/singan/bohemian.png) | 10 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_bohemian.py) | [ckpt](https://download.openmmlab.com/mmgen/singan/singan_bohemian_20210406_175439-f964ee38.pth) \| [pkl](https://download.openmmlab.com/mmgen/singan/singan_bohemian_20210406_175439-f964ee38.pkl) | +| Models | Data | Num Scales | Config | Download | +| :----: | :-------------------------------------------------------------------------: | :--------: | :----------------------------: | :-----------------------------------------------------------------------------: | +| SinGAN | [balloons.png](https://download.openmmlab.com/mmediting/dataset/singan/balloons.png) | 8 | [config](./singan_balloons.py) | [ckpt](https://download.openmmlab.com/mmediting/singan/singan_balloons_20210406_191047-8fcd94cf.pth) \| [pkl](https://download.openmmlab.com/mmediting/singan/singan_balloons_20210406_191047-8fcd94cf.pkl) | +| SinGAN | [fish.jpg](https://download.openmmlab.com/mmediting/dataset/singan/fish-crop.jpg) | 10 | [config](./singan_fish.py) | [ckpt](https://download.openmmlab.com/mmediting/singan/singan_fis_20210406_201006-860d91b6.pth) \| [pkl](https://download.openmmlab.com/mmediting/singan/singan_fis_20210406_201006-860d91b6.pkl) | +| SinGAN | [bohemian.png](https://download.openmmlab.com/mmediting/dataset/singan/bohemian.png) | 10 | [config](./singan_bohemian.py) | [ckpt](https://download.openmmlab.com/mmediting/singan/singan_bohemian_20210406_175439-f964ee38.pth) \| [pkl](https://download.openmmlab.com/mmediting/singan/singan_bohemian_20210406_175439-f964ee38.pkl) | ## Notes for using SinGAN diff --git a/configs/singan/metafile.yml b/configs/singan/metafile.yml index 1f83fe0814..5cae1250b7 100644 --- a/configs/singan/metafile.yml +++ b/configs/singan/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://openaccess.thecvf.com/content_ICCV_2019/html/Shaham_SinGAN_Learning_a_Generative_Model_From_a_Single_Natural_Image_ICCV_2019_paper.html README: configs/singan/README.md + Task: + - internal learning + Year: 2019 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_balloons.py +- Config: configs/singan/singan_balloons.py In Collection: SinGAN Metadata: Training Data: Others @@ -17,8 +20,8 @@ Models: Metrics: Num Scales: 8.0 Task: Internal Learning - Weights: https://download.openmmlab.com/mmgen/singan/singan_balloons_20210406_191047-8fcd94cf.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_fish.py + Weights: https://download.openmmlab.com/mmediting/singan/singan_balloons_20210406_191047-8fcd94cf.pth +- Config: configs/singan/singan_fish.py In Collection: SinGAN Metadata: Training Data: Others @@ -28,8 +31,8 @@ Models: Metrics: Num Scales: 10.0 Task: Internal Learning - Weights: https://download.openmmlab.com/mmgen/singan/singan_fis_20210406_201006-860d91b6.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/singan/singan_bohemian.py + Weights: https://download.openmmlab.com/mmediting/singan/singan_fis_20210406_201006-860d91b6.pth +- Config: configs/singan/singan_bohemian.py In Collection: SinGAN Metadata: Training Data: Others @@ -39,4 +42,4 @@ Models: Metrics: Num Scales: 10.0 Task: Internal Learning - Weights: https://download.openmmlab.com/mmgen/singan/singan_bohemian_20210406_175439-f964ee38.pth + Weights: https://download.openmmlab.com/mmediting/singan/singan_bohemian_20210406_175439-f964ee38.pth diff --git a/configs/sngan_proj/README.md b/configs/sngan_proj/README.md index 8f3e4c0bec..adfda64dfc 100644 --- a/configs/sngan_proj/README.md +++ b/configs/sngan_proj/README.md @@ -29,14 +29,14 @@ One of the challenges in the study of generative adversarial networks is the ins | Models | Dataset | Inplace ReLU | disc_step | Total Iters\* | Iter | IS | FID | Config | Download | Log | | :--------------------------------: | :------: | :----------: | :-------: | :-----------: | :----: | :-----: | :-----: | :---------------------------------: | :-----------------------------------: | :------------------------------: | -| SNGAN_Proj-32x32-woInplaceReLU Best IS | CIFAR10 | w/o | 5 | 500000 | 400000 | 9.6919 | 9.8203 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_is-iter400000_20210709_163823-902ce1ae.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | -| SNGAN_Proj-32x32-woInplaceReLU Best FID | CIFAR10 | w/o | 5 | 500000 | 490000 | 9.5659 | 8.1158 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_fid-iter490000_20210709_163329-ba0862a0.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | -| SNGAN_Proj-32x32-wInplaceReLU Best IS | CIFAR10 | w | 5 | 500000 | 490000 | 9.5564 | 8.3462 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_is-iter490000_20210709_202230-cd863c74.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | -| SNGAN_Proj-32x32-wInplaceReLU Best FID | CIFAR10 | w | 5 | 500000 | 490000 | 9.5564 | 8.3462 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4-b64x1_wReLUinplace_fid-iter490000_20210709_203038-191b2648.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | -| SNGAN_Proj-128x128-woInplaceReLU Best IS | ImageNet | w/o | 5 | 1000000 | 952000 | 30.0651 | 33.4682 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_is-iter952000_20210730_132027-9c884a21.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_20210730_131424_fid-061bf803_is-9c884a21.json) | -| SNGAN_Proj-128x128-woInplaceReLU Best FID | ImageNet | w/o | 5 | 1000000 | 989000 | 29.5779 | 32.6193 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_fid-iter988000_20210730_131424-061bf803.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_20210730_131424_fid-061bf803_is-9c884a21.json) | -| SNGAN_Proj-128x128-wInplaceReLU Best IS | ImageNet | w | 5 | 1000000 | 944000 | 28.1799 | 34.3383 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_is-iter944000_20210730_132714-ca0ccd07.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_20210730_132401_fid-9a682411_is-ca0ccd07.json) | -| SNGAN_Proj-128x128-wInplaceReLU Best FID | ImageNet | w | 5 | 1000000 | 988000 | 27.7948 | 33.4821 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_fid-iter988000_20210730_132401-9a682411.pth) | [Log](https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_20210730_132401_fid-9a682411_is-ca0ccd07.json) | +| SNGAN_Proj-32x32-woInplaceReLU Best IS | CIFAR10 | w/o | 5 | 500000 | 400000 | 9.6919 | 9.8203 | [config](./sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_is-iter400000_20210709_163823-902ce1ae.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | +| SNGAN_Proj-32x32-woInplaceReLU Best FID | CIFAR10 | w/o | 5 | 500000 | 490000 | 9.5659 | 8.1158 | [config](./sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_fid-iter490000_20210709_163329-ba0862a0.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_20210624_065306_fid-ba0862a0_is-902ce1ae.json) | +| SNGAN_Proj-32x32-wInplaceReLU Best IS | CIFAR10 | w | 5 | 500000 | 490000 | 9.5564 | 8.3462 | [config](./sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_is-iter490000_20210709_202230-cd863c74.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_20210624_063454_is-cd863c74_fid-191b2648.json) | +| SNGAN_Proj-32x32-wInplaceReLU Best FID | CIFAR10 | w | 5 | 500000 | 490000 | 9.5564 | 8.3462 | [config](./sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4-b64x1_wReLUinplace_fid-iter490000_20210709_203038-191b2648.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_20210624_063454_is-cd863c74_fid-191b2648.json) | +| SNGAN_Proj-128x128-woInplaceReLU Best IS | ImageNet | w/o | 5 | 1000000 | 952000 | 30.0651 | 33.4682 | [config](./sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_is-iter952000_20210730_132027-9c884a21.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_20210730_131424_fid-061bf803_is-9c884a21.json) | +| SNGAN_Proj-128x128-woInplaceReLU Best FID | ImageNet | w/o | 5 | 1000000 | 989000 | 29.5779 | 32.6193 | [config](./sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_fid-iter988000_20210730_131424-061bf803.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_20210730_131424_fid-061bf803_is-9c884a21.json) | +| SNGAN_Proj-128x128-wInplaceReLU Best IS | ImageNet | w | 5 | 1000000 | 944000 | 28.1799 | 34.3383 | [config](./sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_is-iter944000_20210730_132714-ca0ccd07.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_20210730_132401_fid-9a682411_is-ca0ccd07.json) | +| SNGAN_Proj-128x128-wInplaceReLU Best FID | ImageNet | w | 5 | 1000000 | 988000 | 27.7948 | 33.4821 | [config](./sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py) | [ckpt](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_fid-iter988000_20210730_132401-9a682411.pth) | [Log](https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_20210730_132401_fid-9a682411_is-ca0ccd07.json) | '\*' Iteration counting rule in our implementation is different from others. If you want to align with other codebases, you can use the following conversion formula: @@ -49,8 +49,8 @@ To be noted that, in Pytorch Studio GAN, **inplace ReLU** is used in generator a | Models | Dataset | Inplace ReLU | disc_step | Total Iters | IS (Our Pipeline) | FID (Our Pipeline) | IS (StudioGAN) | FID (StudioGAN) | Config | Download | Original Download link | | :-----------------: | :------: | :----------: | :-------: | :---------: | :---------------: | :----------------: | :------------: | :-------------: | :-----------------: | :--------------------: | :----------------------------------: | -| SAGAN_Proj-32x32 StudioGAN | CIFAR10 | w | 5 | 100000 | 9.372 | 10.2011 | 8.677 | 13.248 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj-cvt-studioGAN_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmgen/sngan_proj/sngan_cifar10_convert-studio-rgb_20210709_111346-2979202d.pth) | [model](https://drive.google.com/drive/folders/16s5Cr-V-NlfLyy_uyXEkoNxLBt-8wYSM) | -| SAGAN_Proj-128x128 StudioGAN | ImageNet | w | 2 | 1000000 | 30.218 | 29.8199 | 32.247 | 26.792 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj-cvt-studioGAN_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmgen/sngan_proj/sngan_imagenet1k_convert-studio-rgb_20210709_111406-877b1130.pth) | [model](https://drive.google.com/drive/folders/1Ek2wAMlxpajL_M8aub4DKQ9B313K8XhS) | +| SAGAN_Proj-32x32 StudioGAN | CIFAR10 | w | 5 | 100000 | 9.372 | 10.2011 | 8.677 | 13.248 | [config](./sngan-proj-cvt-studioGAN_cifar10-32x32.py) | [model](https://download.openmmlab.com/mmediting/sngan_proj/sngan_cifar10_convert-studio-rgb_20210709_111346-2979202d.pth) | [model](https://drive.google.com/drive/folders/16s5Cr-V-NlfLyy_uyXEkoNxLBt-8wYSM) | +| SAGAN_Proj-128x128 StudioGAN | ImageNet | w | 2 | 1000000 | 30.218 | 29.8199 | 32.247 | 26.792 | [config](./sngan-proj-cvt-studioGAN_imagenet1k-128x128.py) | [model](https://download.openmmlab.com/mmediting/sngan_proj/sngan_imagenet1k_convert-studio-rgb_20210709_111406-877b1130.pth) | [model](https://drive.google.com/drive/folders/1Ek2wAMlxpajL_M8aub4DKQ9B313K8XhS) | - `Our Pipeline` denote results evaluated with our pipeline. - `StudioGAN` denote results released by Pytorch-StudioGAN. @@ -62,7 +62,7 @@ For IS metric, our implementation is different from PyTorch-Studio GAN in the fo For FID evaluation, we follow the pipeline of [BigGAN](https://github.com/ajbrock/BigGAN-PyTorch/blob/98459431a5d618d644d54cd1e9fceb1e5045648d/calculate_inception_moments.py#L52), where the whole training set is adopted to extract inception statistics, and Pytorch Studio GAN uses 50000 randomly selected samples. Besides, we also use [Tero's Inception](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt) for feature extraction. -You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/imagenet.pkl). +You can download the preprocessed inception state by the following url: [CIFAR10](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/cifar10.pkl) and [ImageNet1k](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/imagenet.pkl). You can use following commands to extract those inception states by yourself. diff --git a/configs/sngan_proj/metafile.yml b/configs/sngan_proj/metafile.yml index d772bd7807..b6f5067693 100644 --- a/configs/sngan_proj/metafile.yml +++ b/configs/sngan_proj/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://openreview.net/forum?id=B1QRgziT- README: configs/sngan_proj/README.md + Task: + - conditional gans + Year: 2018 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py +- Config: configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SNGAN Metadata: Training Data: Others @@ -21,8 +24,8 @@ Models: Total Iters\*: 500000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_is-iter400000_20210709_163823-902ce1ae.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_is-iter400000_20210709_163823-902ce1ae.pth +- Config: configs/sngan_proj/sngan-proj_woReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SNGAN Metadata: Training Data: Others @@ -36,8 +39,8 @@ Models: Total Iters\*: 500000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_fid-iter490000_20210709_163329-ba0862a0.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_woReLUinplace_fid-iter490000_20210709_163329-ba0862a0.pth +- Config: configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SNGAN Metadata: Training Data: Others @@ -51,8 +54,8 @@ Models: Total Iters\*: 500000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_is-iter490000_20210709_202230-cd863c74.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4_b64x1_wReLUinplace_is-iter490000_20210709_202230-cd863c74.pth +- Config: configs/sngan_proj/sngan-proj_wReLUinplace_lr2e-4-ndisc5-1xb64_cifar10-32x32.py In Collection: SNGAN Metadata: Training Data: Others @@ -66,8 +69,8 @@ Models: Total Iters\*: 500000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_cifar10_32_lr-2e-4-b64x1_wReLUinplace_fid-iter490000_20210709_203038-191b2648.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_cifar10_32_lr-2e-4-b64x1_wReLUinplace_fid-iter490000_20210709_203038-191b2648.pth +- Config: configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py In Collection: SNGAN Metadata: Training Data: Others @@ -81,8 +84,8 @@ Models: Total Iters\*: 1000000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_is-iter952000_20210730_132027-9c884a21.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_is-iter952000_20210730_132027-9c884a21.pth +- Config: configs/sngan_proj/sngan-proj_woReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py In Collection: SNGAN Metadata: Training Data: Others @@ -96,8 +99,8 @@ Models: Total Iters\*: 1000000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_fid-iter988000_20210730_131424-061bf803.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_woReLUinplace_fid-iter988000_20210730_131424-061bf803.pth +- Config: configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py In Collection: SNGAN Metadata: Training Data: Others @@ -111,8 +114,8 @@ Models: Total Iters\*: 1000000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_is-iter944000_20210730_132714-ca0ccd07.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_is-iter944000_20210730_132714-ca0ccd07.pth +- Config: configs/sngan_proj/sngan-proj_wReLUinplace_Glr2e-4_Dlr5e-5_ndisc5-2xb128_imagenet1k-128x128.py In Collection: SNGAN Metadata: Training Data: Others @@ -126,8 +129,8 @@ Models: Total Iters\*: 1000000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_fid-iter988000_20210730_132401-9a682411.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj-cvt-studioGAN_cifar10-32x32.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_proj_imagenet1k_128_Glr2e-4_Dlr5e-5_ndisc5_b128x2_wReLUinplace_fid-iter988000_20210730_132401-9a682411.pth +- Config: configs/sngan_proj/sngan-proj-cvt-studioGAN_cifar10-32x32.py In Collection: SNGAN Metadata: Training Data: Others @@ -142,8 +145,8 @@ Models: Total Iters: 100000.0 disc_step: 5.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_cifar10_convert-studio-rgb_20210709_111346-2979202d.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/sngan_proj/sngan-proj-cvt-studioGAN_imagenet1k-128x128.py + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_cifar10_convert-studio-rgb_20210709_111346-2979202d.pth +- Config: configs/sngan_proj/sngan-proj-cvt-studioGAN_imagenet1k-128x128.py In Collection: SNGAN Metadata: Training Data: Others @@ -158,4 +161,4 @@ Models: Total Iters: 1000000.0 disc_step: 2.0 Task: Conditional GANs - Weights: https://download.openmmlab.com/mmgen/sngan_proj/sngan_imagenet1k_convert-studio-rgb_20210709_111406-877b1130.pth + Weights: https://download.openmmlab.com/mmediting/sngan_proj/sngan_imagenet1k_convert-studio-rgb_20210709_111406-877b1130.pth diff --git a/configs/srcnn/README.md b/configs/srcnn/README.md index 990c5b9c03..520fc41da6 100644 --- a/configs/srcnn/README.md +++ b/configs/srcnn/README.md @@ -25,7 +25,7 @@ The metrics are `PSNR / SSIM` . | Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | | :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | -| [srcnn_x4k915_1x16_1000k_div2k](/configs/srcnn/srcnn_x4k915_1xb16-1000k_div2k.py) | 28.4316 | 25.6486 | 27.7460 | 0.8099 | 0.7014 | 0.7854 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608-4186f232.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608_120159.log.json) | +| [srcnn_x4k915_1x16_1000k_div2k](./srcnn_x4k915_1xb16-1000k_div2k.py) | 28.4316 | 25.6486 | 27.7460 | 0.8099 | 0.7014 | 0.7854 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608-4186f232.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608_120159.log.json) | ## Quick Start diff --git a/configs/srcnn/README_zh-CN.md b/configs/srcnn/README_zh-CN.md index 757f2c8edc..fb5cc9e83e 100644 --- a/configs/srcnn/README_zh-CN.md +++ b/configs/srcnn/README_zh-CN.md @@ -27,9 +27,9 @@ 在 RGB 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 我们使用 `PSNR` 和 `SSIM` 作为指标。 -| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | -| :---------------------------------------------------------------------: | :--------------: | :---------------: | :--------------: | :------: | :----------------------------------------------------------------------: | -| [srcnn_x4k915_1x16_1000k_div2k](/configs/srcnn/srcnn_x4k915_1xb16-1000k_div2k.py) | 28.4316 / 0.8099 | 25.6486 / 0.7014 | 27.7460 / 0.7854 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608-4186f232.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608_120159.log.json) | +| 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | +| :------------------------------------------------------------------: | :--------------: | :---------------: | :--------------: | :------: | :-------------------------------------------------------------------------: | +| [srcnn_x4k915_1x16_1000k_div2k](./srcnn_x4k915_1xb16-1000k_div2k.py) | 28.4316 / 0.8099 | 25.6486 / 0.7014 | 27.7460 / 0.7854 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608-4186f232.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srcnn/srcnn_x4k915_1x16_1000k_div2k_20200608_120159.log.json) | ## 快速开始 diff --git a/configs/srcnn/metafile.yml b/configs/srcnn/metafile.yml index 39ffe96499..778d67b92a 100644 --- a/configs/srcnn/metafile.yml +++ b/configs/srcnn/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1501.00092 README: configs/srcnn/README.md + Task: + - image super-resolution + Year: 2015 Models: - Config: configs/srcnn/srcnn_x4k915_1xb16-1000k_div2k.py In Collection: SRCNN diff --git a/configs/srgan_resnet/README.md b/configs/srgan_resnet/README.md index 3b51828e1f..b40bb093d3 100644 --- a/configs/srgan_resnet/README.md +++ b/configs/srgan_resnet/README.md @@ -26,8 +26,8 @@ The metrics are `PSNR / SSIM` . | Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | | :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | -| [msrresnet_x4c64b16_1x16_300k_div2k](/configs/srgan_resnet/msrresnet_x4c64b16_1xb16-1000k_div2k.py) | 30.2252 | 26.7762 | 28.9748 | 0.8491 | 0.7369 | 0.8178 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521-61556be5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521_110246.log.json) | -| [srgan_x4c64b16_1x16_1000k_div2k](/configs/srgan_resnet/srgan_x4c64b16_1xb16-1000k_div2k.py) | 27.9499 | 24.7383 | 26.5697 | 0.7846 | 0.6491 | 0.7365 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200606-a1f0810e.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200506_191442.log.json) | +| [msrresnet_x4c64b16_1x16_300k_div2k](./msrresnet_x4c64b16_1xb16-1000k_div2k.py) | 30.2252 | 26.7762 | 28.9748 | 0.8491 | 0.7369 | 0.8178 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521-61556be5.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521_110246.log.json) | +| [srgan_x4c64b16_1x16_1000k_div2k](./srgan_x4c64b16_1xb16-1000k_div2k.py) | 27.9499 | 24.7383 | 26.5697 | 0.7846 | 0.6491 | 0.7365 | 1 | [model](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200606-a1f0810e.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200506_191442.log.json) | ## Quick Start diff --git a/configs/srgan_resnet/README_zh-CN.md b/configs/srgan_resnet/README_zh-CN.md index 5d16d8db50..3758d4c1b1 100644 --- a/configs/srgan_resnet/README_zh-CN.md +++ b/configs/srgan_resnet/README_zh-CN.md @@ -25,8 +25,8 @@ | 算法 | Set5 | Set14 | DIV2K | GPU 信息 | 下载 | | :---------------------------------------------------------------------: | :---------------: | :--------------: | :--------------: | :------: | :----------------------------------------------------------------------: | -| [msrresnet_x4c64b16_1x16_300k_div2k](/configs/srgan_resnet/msrresnet_x4c64b16_1xb16-1000k_div2k.py) | 30.2252 / 0.8491 | 26.7762 / 0.7369 | 28.9748 / 0.8178 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521-61556be5.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521_110246.log.json) | -| [srgan_x4c64b16_1x16_1000k_div2k](/configs/srgan_resnet/srgan_x4c64b16_1xb16-1000k_div2k.py) | 27.9499 / 0.7846 | 24.7383 / 0.6491 | 26.5697 / 0.7365 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200606-a1f0810e.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200506_191442.log.json) | +| [msrresnet_x4c64b16_1x16_300k_div2k](./msrresnet_x4c64b16_1xb16-1000k_div2k.py) | 30.2252 / 0.8491 | 26.7762 / 0.7369 | 28.9748 / 0.8178 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521-61556be5.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/msrresnet_x4c64b16_1x16_300k_div2k_20200521_110246.log.json) | +| [srgan_x4c64b16_1x16_1000k_div2k](./srgan_x4c64b16_1xb16-1000k_div2k.py) | 27.9499 / 0.7846 | 24.7383 / 0.6491 | 26.5697 / 0.7365 | 1 | [模型](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200606-a1f0810e.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/srresnet_srgan/srgan_x4c64b16_1x16_1000k_div2k_20200506_191442.log.json) | ## 快速开始 diff --git a/configs/srgan_resnet/metafile.yml b/configs/srgan_resnet/metafile.yml index bace23e8a9..c80cb0aa74 100644 --- a/configs/srgan_resnet/metafile.yml +++ b/configs/srgan_resnet/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/1609.04802 README: configs/srgan_resnet/README.md + Task: + - image super-resolution + Year: 2016 Models: - Config: configs/srgan_resnet/msrresnet_x4c64b16_1xb16-1000k_div2k.py In Collection: SRGAN diff --git a/configs/stable_diffusion/README.md b/configs/stable_diffusion/README.md new file mode 100644 index 0000000000..5f11460a2e --- /dev/null +++ b/configs/stable_diffusion/README.md @@ -0,0 +1,67 @@ +# Stable Diffusion (2022) + +> [Stable Diffusion](https://github.com/CompVis/stable-diffusion) + +> **Task**: Text2Image + + + +## Abstract + + + +Stable Diffusion is a latent diffusion model conditioned on the text embeddings of a CLIP text encoder, which allows you to create images from text inputs. + + + +
+ +
+ +## Pretrained models + +We use stable diffusion v1.5 weights. This model has several weights including vae, unet and clip. You should download the weights from [stable-diffusion-1.5](https://huggingface.co/runwayml/stable-diffusion-v1-5) and change the 'pretrained_model_path' in config to the weights dir. + +| Diffusion Model | Config | Download | +| :-------------------: | :------------------------------------------------: | :------------------------------------------------------------: | +| stable_diffusion_v1.5 | [config](./stable-diffusion_ddim_denoisingunet.py) | [model](https://huggingface.co/runwayml/stable-diffusion-v1-5) | + +## Quick Start + +Running the following codes, you can get a text-generated image. + +```python +from mmengine import MODELS, Config +from torchvision import utils + +from mmedit.utils import register_all_modules + +register_all_modules() + +config = 'configs/stable_diffusion/stable-diffusion_ddim_denoisingunet.py' +StableDiffuser = MODELS.build(Config.fromfile(config).model) +prompt = 'A mecha robot in a favela in expressionist style' +StableDiffuser = StableDiffuser.to('cuda') + +image = StableDiffuser.infer(prompt)['samples'] +utils.save_image(image, 'robot.png') +``` + +## Comments + +Our codebase for the stable diffusion models builds heavily on [diffusers codebase](https://github.com/huggingface/diffusers) and the model weights are from [stable-diffusion-1.5](https://huggingface.co/runwayml/stable-diffusion-v1-5). + +Thanks for the efforts of the community! + +## Citation + +```bibtex +@misc{rombach2021highresolution, + title={High-Resolution Image Synthesis with Latent Diffusion Models}, + author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Björn Ommer}, + year={2021}, + eprint={2112.10752}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/stable_diffusion/metafile.yml b/configs/stable_diffusion/metafile.yml new file mode 100644 index 0000000000..3253a59f58 --- /dev/null +++ b/configs/stable_diffusion/metafile.yml @@ -0,0 +1,22 @@ +Collections: +- Metadata: + Architecture: + - Stable Diffusion + Name: Stable Diffusion + Paper: + - https://github.com/CompVis/stable-diffusion + README: configs/stable_diffusion/README.md + Task: + - text2image + Year: 2022 +Models: +- Config: configs/stable_diffusion/stable-diffusion_ddim_denoisingunet.py + In Collection: Stable Diffusion + Metadata: + Training Data: Others + Name: stable-diffusion_ddim_denoisingunet + Results: + - Dataset: Others + Metrics: {} + Task: Text2Image + Weights: https://huggingface.co/runwayml/stable-diffusion-v1-5 diff --git a/configs/stable_diffusion/stable-diffusion_ddim_denoisingunet.py b/configs/stable_diffusion/stable-diffusion_ddim_denoisingunet.py new file mode 100644 index 0000000000..e83921f4d7 --- /dev/null +++ b/configs/stable_diffusion/stable-diffusion_ddim_denoisingunet.py @@ -0,0 +1,58 @@ +unet = dict( + type='DenoisingUnet', + image_size=512, + base_channels=320, + channels_cfg=[1, 2, 4, 4], + unet_type='stable', + act_cfg=dict(type='silu'), + cross_attention_dim=768, + num_heads=8, + in_channels=4, + layers_per_block=2, + down_block_types=[ + 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', + 'DownBlock2D' + ], + up_block_types=[ + 'UpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D', + 'CrossAttnUpBlock2D' + ], + output_cfg=dict(var='fixed')) + +vae = dict( + act_fn='silu', + block_out_channels=[128, 256, 512, 512], + down_block_types=[ + 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', + 'DownEncoderBlock2D' + ], + in_channels=3, + latent_channels=4, + layers_per_block=2, + norm_num_groups=32, + out_channels=3, + sample_size=512, + up_block_types=[ + 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', + 'UpDecoderBlock2D' + ]) + +diffusion_scheduler = dict( + type='DDIMScheduler', + variance_type='learned_range', + beta_end=0.012, + beta_schedule='scaled_linear', + beta_start=0.00085, + num_train_timesteps=1000, + set_alpha_to_one=False, + clip_sample=False) + +init_cfg = dict(type='Pretrained', pretrained_model_path='') + +model = dict( + type='StableDiffusion', + diffusion_scheduler=diffusion_scheduler, + unet=unet, + vae=vae, + init_cfg=init_cfg, +) diff --git a/configs/styleganv1/README.md b/configs/styleganv1/README.md index 1df71be75d..e9f0d331f1 100644 --- a/configs/styleganv1/README.md +++ b/configs/styleganv1/README.md @@ -26,10 +26,10 @@ We propose an alternative generator architecture for generative adversarial netw -| Model | FID50k | P&R50k_full | Config | Download | -| :------------------: | :----: | :-----------: | :-----------------------------------------------------------------------------: | :--------------------------------------------------------------------------------: | -| styleganv1_ffhq_256 | 6.090 | 70.228/27.050 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv1/styleganv1_ffhq-256x256_8xb4-25Mimgs.py) | [model](https://download.openmmlab.com/mmgen/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth) | -| styleganv1_ffhq_1024 | 4.056 | 70.302/36.869 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv1/styleganv1_ffhq-1024x1024_8xb4-25Mimgs.py) | [model](https://download.openmmlab.com/mmgen/styleganv1/styleganv1_ffhq_1024_g8_25Mimg_20210407_161627-850a7234.pth) | +| Model | FID50k | P&R50k_full | Config | Download | +| :------------------: | :----: | :-----------: | :---------------------------------------------------: | :----------------------------------------------------------------------------------------------------------: | +| styleganv1_ffhq_256 | 6.090 | 70.228/27.050 | [config](./styleganv1_ffhq-256x256_8xb4-25Mimgs.py) | [model](https://download.openmmlab.com/mmediting/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth) | +| styleganv1_ffhq_1024 | 4.056 | 70.302/36.869 | [config](./styleganv1_ffhq-1024x1024_8xb4-25Mimgs.py) | [model](https://download.openmmlab.com/mmediting/styleganv1/styleganv1_ffhq_1024_g8_25Mimg_20210407_161627-850a7234.pth) | ## Citation diff --git a/configs/styleganv1/metafile.yml b/configs/styleganv1/metafile.yml index d1a62f0f92..30611fdbe5 100644 --- a/configs/styleganv1/metafile.yml +++ b/configs/styleganv1/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://openaccess.thecvf.com/content_CVPR_2019/html/Karras_A_Style-Based_Generator_Architecture_for_Generative_Adversarial_Networks_CVPR_2019_paper.html README: configs/styleganv1/README.md + Task: + - unconditional gans + Year: 2019 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv1/styleganv1_ffhq-256x256_8xb4-25Mimgs.py +- Config: configs/styleganv1/styleganv1_ffhq-256x256_8xb4-25Mimgs.py In Collection: StyleGANv1 Metadata: Training Data: FFHQ @@ -20,8 +23,8 @@ Models: PSNR: 70.228 SSIM: 27.05 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv1/styleganv1_ffhq-1024x1024_8xb4-25Mimgs.py + Weights: https://download.openmmlab.com/mmediting/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth +- Config: configs/styleganv1/styleganv1_ffhq-1024x1024_8xb4-25Mimgs.py In Collection: StyleGANv1 Metadata: Training Data: FFHQ @@ -34,4 +37,4 @@ Models: PSNR: 70.302 SSIM: 36.869 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/styleganv1/styleganv1_ffhq_1024_g8_25Mimg_20210407_161627-850a7234.pth + Weights: https://download.openmmlab.com/mmediting/styleganv1/styleganv1_ffhq_1024_g8_25Mimg_20210407_161627-850a7234.pth diff --git a/configs/styleganv2/README.md b/configs/styleganv2/README.md index 631ea0dbb4..22edb54cb4 100644 --- a/configs/styleganv2/README.md +++ b/configs/styleganv2/README.md @@ -28,14 +28,14 @@ The style-based GAN architecture (StyleGAN) yields state-of-the-art results in d | Model | Comment | FID50k | Precision50k | Recall50k | Config | Download | | :---------------------------------: | :-------------: | :----: | :----------: | :-------: | :----------------------------------------------------------: | :-------------------------------------------------------------: | -| stylegan2_config-f_ffhq_1024 | official weight | 2.8134 | 62.856 | 49.400 | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) | -| stylegan2_config-f_lsun-car_384x512 | official weight | 5.4316 | 65.986 | 48.190 | [stylegan2_c2_8xb4_lsun-car-384x512](/configs/styleganv2/stylegan2_c2_8xb4_lsun-car-384x512.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth) | -| stylegan2_config-f_horse_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-horse-256x256](/configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-horse-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth) | -| stylegan2_config-f_church_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-church-256x256](/configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-church-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth) | -| stylegan2_config-f_cat_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-cat-256x256](/configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-cat-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth) | -| stylegan2_config-f_ffhq_256 | our training | 3.992 | 69.012 | 40.417 | [stylegan2_c2_8xb4-800kiters_ffhq-256x256](/configs/styleganv2/stylegan2_c2_8xb4-800kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth) | -| stylegan2_config-f_ffhq_1024 | our training | 2.8185 | 68.236 | 49.583 | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) | -| stylegan2_config-f_lsun-car_384x512 | our training | 2.4116 | 66.760 | 50.576 | [stylegan2_c2_8xb4_lsun-car-384x512](/configs/styleganv2/stylegan2_c2_8xb4_lsun-car-384x512.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_lsun-car_384x512_b4x8_1800k_20210424_160929-fc9072ca.pth) | +| stylegan2_config-f_ffhq_1024 | official weight | 2.8134 | 62.856 | 49.400 | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) | +| stylegan2_config-f_lsun-car_384x512 | official weight | 5.4316 | 65.986 | 48.190 | [stylegan2_c2_8xb4_lsun-car-384x512](./stylegan2_c2_8xb4_lsun-car-384x512.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth) | +| stylegan2_config-f_horse_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-horse-256x256](./stylegan2_c2_8xb4-800kiters_lsun-horse-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth) | +| stylegan2_config-f_church_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-church-256x256](./stylegan2_c2_8xb4-800kiters_lsun-church-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth) | +| stylegan2_config-f_cat_256 | official weight | - | - | - | [stylegan2_c2_8xb4-800kiters_lsun-cat-256x256](./stylegan2_c2_8xb4-800kiters_lsun-cat-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth) | +| stylegan2_config-f_ffhq_256 | our training | 3.992 | 69.012 | 40.417 | [stylegan2_c2_8xb4-800kiters_ffhq-256x256](./stylegan2_c2_8xb4-800kiters_ffhq-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth) | +| stylegan2_config-f_ffhq_1024 | our training | 2.8185 | 68.236 | 49.583 | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) | +| stylegan2_config-f_lsun-car_384x512 | our training | 2.4116 | 66.760 | 50.576 | [stylegan2_c2_8xb4_lsun-car-384x512](./stylegan2_c2_8xb4_lsun-car-384x512.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_lsun-car_384x512_b4x8_1800k_20210424_160929-fc9072ca.pth) | ## FP16 Support and Experiments @@ -49,26 +49,16 @@ Currently, we have supported FP16 training for StyleGAN2, and here are the resul As shown in the figure, we provide **3** ways to do mixed-precision training for `StyleGAN2`: -- [stylegan2_c2_fp16_PL-no-scaler](/configs/styleganv2/stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256.py): In this setting, we try our best to follow the official FP16 implementation in [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada). Similar to the official version, we only adopt FP16 training for the higher-resolution feature maps (the last 4 stages in G and the first 4 stages). Note that we do not adopt the `clamp` way to avoid gradient overflow used in the official implementation. We use the `autocast` function from `torch.cuda.amp` package. -- [stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler](/configs/styleganv2/stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256.py): In this config, we try to adopt mixed-precision training for the whole generator, but in partial discriminator (the first 4 higher-resolution stages). Note that we do not apply the loss scaler in the path length loss and gradient penalty loss. Because we always meet divergence after adopting the loss scaler to scale the gradient in these two losses. -- [stylegan2_c2_apex_fp16_PL-R1-no-scaler](/configs/styleganv2/stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256.py): In this setting, we adopt the [APEX](https://github.com/NVIDIA/apex) toolkit to implement mixed-precision training with multiple loss/gradient scalers. In APEX, you can assign different loss scalers for the generator and the discriminator respectively. Note that we still ignore the gradient scaler in the path length loss and gradient penalty loss. +- [stylegan2_c2_fp16_PL-no-scaler](./stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256.py): In this setting, we try our best to follow the official FP16 implementation in [StyleGAN2-ADA](https://github.com/NVlabs/stylegan2-ada). Similar to the official version, we only adopt FP16 training for the higher-resolution feature maps (the last 4 stages in G and the first 4 stages). Note that we do not adopt the `clamp` way to avoid gradient overflow used in the official implementation. We use the `autocast` function from `torch.cuda.amp` package. +- [stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler](./stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256.py): In this config, we try to adopt mixed-precision training for the whole generator, but in partial discriminator (the first 4 higher-resolution stages). Note that we do not apply the loss scaler in the path length loss and gradient penalty loss. Because we always meet divergence after adopting the loss scaler to scale the gradient in these two losses. +- [stylegan2_c2_apex_fp16_PL-R1-no-scaler](./stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256.py): In this setting, we adopt the [APEX](https://github.com/NVIDIA/apex) toolkit to implement mixed-precision training with multiple loss/gradient scalers. In APEX, you can assign different loss scalers for the generator and the discriminator respectively. Note that we still ignore the gradient scaler in the path length loss and gradient penalty loss. | Model | Comment | Dataset | FID50k | Config | Download | | :----------------------------------------------: | :-------------------------------------: | :-----: | :----: | :-----------------------------------------------: | :-------------------------------------------------: | -| stylegan2_config-f_ffhq_256 | baseline | FFHQ256 | 3.992 | [stylegan2_c2_8xb4-800kiters_ffhq-256x256](/configs/styleganv2/stylegan2_c2_8xb4-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth) | -| stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k | partial layers in fp16 | FFHQ256 | 4.331 | [stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256](/configs/styleganv2/stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k_20210508_114854-dacbe4c9.pth) | -| stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k | the whole G in fp16 | FFHQ256 | 4.362 | [stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256](/configs/styleganv2/stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114930-ef8270d4.pth) | -| stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k | the whole G&D in fp16 + two loss scaler | FFHQ256 | 4.614 | [stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256](/configs/styleganv2/stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114701-c2bb8afd.pth) | - -In addition, we also provide `QuickTestImageDataset` to users for quickly checking whether the code can be run correctly. It's more important for FP16 experiments, because some cuda operations may no support mixed precision training. Esepcially for `APEX`, you can use [this config](/configs/styleganv2/stylegan2_c2_8xb4-apex-fp16-800kiters_quicktest-ffhq-256x256.py) in your local machine by running: - -```bash -bash tools/dist_train.sh \ - configs/styleganv2/stylegan2_c2_8xb4-apex-fp16-800kiters_quicktest-ffhq-256x256.py 1 \ - --work-dir ./work_dirs/quick-test -``` - -With a similar way, users can switch to [config for partial-GD](/configs/styleganv2/stylegan2_c2_8xb4-fp16-800kiters_quicktest-ffhq-256x256.py) and [config for globalG-partialD](/configs/styleganv2/stylegan2_c2_8xb4-fp16-global-800kiters_quicktest-ffhq-256x256.py) to test the other two mixed precision training configuration. +| stylegan2_config-f_ffhq_256 | baseline | FFHQ256 | 3.992 | [stylegan2_c2_8xb4-800kiters_ffhq-256x256](./stylegan2_c2_8xb4-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth) | +| stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k | partial layers in fp16 | FFHQ256 | 4.331 | [stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256](./stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k_20210508_114854-dacbe4c9.pth) | +| stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k | the whole G in fp16 | FFHQ256 | 4.362 | [stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256](./stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114930-ef8270d4.pth) | +| stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k | the whole G&D in fp16 + two loss scaler | FFHQ256 | 4.614 | [stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256](./stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256.py) | [ckpt](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114701-c2bb8afd.pth) | *Note that to use the [APEX](https://github.com/NVIDIA/apex) toolkit, you have to installed it following the official guidance. (APEX is not included in our requirements.) If you are using GPUs without tensor core, you would better to switch to the newer PyTorch version (>= 1.7,0). Otherwise, the APEX installation or running may meet several bugs.* @@ -76,25 +66,25 @@ With a similar way, users can switch to [config for partial-GD](/configs/stylega | Model | Comment | FID50k | FID Version | Config | Download | | :--------------------------: | :-------------: | :----: | :-------------: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: | -| stylegan2_config-f_ffhq_1024 | official weight | 2.8732 | Tero's StyleGAN | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) \| [FID-Reals](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/ffhq-1024-50k-stylegan.pkl) | -| stylegan2_config-f_ffhq_1024 | our training | 2.9413 | Tero's StyleGAN | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) \| [FID-Reals](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/ffhq-1024-50k-stylegan.pkl) | -| stylegan2_config-f_ffhq_1024 | official weight | 2.8134 | Our PyTorch | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) \| [FID-Reals](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/ffhq-1024-50k-rgb.pkl) | -| stylegan2_config-f_ffhq_1024 | our training | 2.8185 | Our PyTorch | [stylegan2_c2_8xb4_ffhq-1024x1024](/configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) \| [FID-Reals](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/ffhq-1024-50k-rgb.pkl) | +| stylegan2_config-f_ffhq_1024 | official weight | 2.8732 | Tero's StyleGAN | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) \| [FID-Reals](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/ffhq-1024-50k-stylegan.pkl) | +| stylegan2_config-f_ffhq_1024 | our training | 2.9413 | Tero's StyleGAN | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) \| [FID-Reals](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/ffhq-1024-50k-stylegan.pkl) | +| stylegan2_config-f_ffhq_1024 | official weight | 2.8134 | Our PyTorch | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) \| [FID-Reals](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/ffhq-1024-50k-rgb.pkl) | +| stylegan2_config-f_ffhq_1024 | our training | 2.8185 | Our PyTorch | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth) \| [FID-Reals](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/ffhq-1024-50k-rgb.pkl) | In this table, we observe that the FID with Tero's inception network is similar to that with PyTorch Inception (in mmediting). Thus, we use the FID with PyTorch's Inception net (but the weight is not the official model zoo) by default. Because it can be run on different PyTorch versions. If you use [Tero's Inception net](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/inception-2015-12-05.pt), your PyTorch must meet `>=1.6.0`. More precalculated inception pickle files are listed here: -- FFHQ 256x256 real inceptions, PyTorch InceptionV3. [download](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/ffhq-256-50k-rgb.pkl) -- LSUN-Car 384x512 real inceptions, PyTorch InceptionV3. [download](https://download.openmmlab.com/mmgen/evaluation/fid_inception_pkl/lsun-car-512_50k_rgb.pkl) +- FFHQ 256x256 real inceptions, PyTorch InceptionV3. [download](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/ffhq-256-50k-rgb.pkl) +- LSUN-Car 384x512 real inceptions, PyTorch InceptionV3. [download](https://download.openmmlab.com/mmediting/evaluation/fid_inception_pkl/lsun-car-512_50k_rgb.pkl) ## About Different Implementation and Setting of PR Metric -| Model | P&R Details | Precision | Recall | -| :--------------------------------------------: | :------------------------------: | :-------: | :----: | -| stylegan2_config-f_ffhq_1024 (official weight) | use Tero's VGG16, P&R50k_full | 67.876 | 49.299 | -| stylegan2_config-f_ffhq_1024 (official weight) | use Tero's VGG16, P&R50k | 62.856 | 49.400 | -| stylegan2_config-f_ffhq_1024 (official weight) | use PyTorch's VGG16, P&R50k_full | 67.662 | 55.460 | +| Model | Config | Download | P&R Details | Precision | Recall | +| :--------------------------------------------: | :--------------------------------------------------: | :-----------------------------------------------------: | :------------------------------: | :-------: | :----: | +| stylegan2_config-f_ffhq_1024 (official weight) | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) | use Tero's VGG16, P&R50k_full | 67.876 | 49.299 | +| stylegan2_config-f_ffhq_1024 (official weight) | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) | use Tero's VGG16, P&R50k | 62.856 | 49.400 | +| stylegan2_config-f_ffhq_1024 (official weight) | [stylegan2_c2_8xb4_ffhq-1024x1024](./stylegan2_c2_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth) | use PyTorch's VGG16, P&R50k_full | 67.662 | 55.460 | As shown in this table, `P&R50k_full` is the metric used in StyleGANv1 and StyleGANv2. `full` indicates that we use the whole dataset for extracting the real distribution, e.g., 70000 images in FFHQ dataset. However, adopting the VGG16 provided from [Tero](https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/metrics/vgg16.pt) requires that your PyTorch version must fulfill `>=1.6.0`. Be careful about using the PyTorch's VGG16 to extract features, which will cause higher precision and recall. diff --git a/configs/styleganv2/metafile.yml b/configs/styleganv2/metafile.yml index e8c2a5f8bf..ff6883d861 100644 --- a/configs/styleganv2/metafile.yml +++ b/configs/styleganv2/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://openaccess.thecvf.com/content_CVPR_2020/html/Karras_Analyzing_and_Improving_the_Image_Quality_of_StyleGAN_CVPR_2020_paper.html README: configs/styleganv2/README.md + Task: + - unconditional gans + Year: 2020 Models: - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 @@ -19,7 +22,7 @@ Models: Precision50k: 62.856 Recall50k: 49.4 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_lsun-car-384x512.py In Collection: StyleGANv2 Metadata: @@ -32,7 +35,7 @@ Models: Precision50k: 65.986 Recall50k: 48.19 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth - Config: configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-horse-256x256.py In Collection: StyleGANv2 Metadata: @@ -42,7 +45,7 @@ Models: - Dataset: Others Metrics: {} Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth - Config: configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-church-256x256.py In Collection: StyleGANv2 Metadata: @@ -52,7 +55,7 @@ Models: - Dataset: Others Metrics: {} Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth - Config: configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-cat-256x256.py In Collection: StyleGANv2 Metadata: @@ -62,7 +65,7 @@ Models: - Dataset: CAT Metrics: {} Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth - Config: configs/styleganv2/stylegan2_c2_8xb4-800kiters_ffhq-256x256.py In Collection: StyleGANv2 Metadata: @@ -75,7 +78,7 @@ Models: Precision50k: 69.012 Recall50k: 40.417 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 Metadata: @@ -88,7 +91,7 @@ Models: Precision50k: 68.236 Recall50k: 49.583 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_lsun-car-384x512.py In Collection: StyleGANv2 Metadata: @@ -101,7 +104,7 @@ Models: Precision50k: 66.76 Recall50k: 50.576 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_lsun-car_384x512_b4x8_1800k_20210424_160929-fc9072ca.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_lsun-car_384x512_b4x8_1800k_20210424_160929-fc9072ca.pth - Config: configs/styleganv2/stylegan2_c2_8xb4-800kiters_ffhq-256x256.py In Collection: StyleGANv2 Metadata: @@ -112,7 +115,7 @@ Models: Metrics: FID50k: 3.992 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_256_b4x8_20210407_160709-7890ae1f.pth - Config: configs/styleganv2/stylegan2_c2-PL_8xb4-fp16-partial-GD-no-scaler-800kiters_ffhq-256x256.py In Collection: StyleGANv2 Metadata: @@ -123,7 +126,7 @@ Models: Metrics: FID50k: 4.331 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k_20210508_114854-dacbe4c9.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_fp16_partial-GD_PL-no-scaler_ffhq_256_b4x8_800k_20210508_114854-dacbe4c9.pth - Config: configs/styleganv2/stylegan2_c2-PL-R1_8xb4-fp16-globalG-partialD-no-scaler-800kiters_ffhq-256x256.py In Collection: StyleGANv2 Metadata: @@ -134,7 +137,7 @@ Models: Metrics: FID50k: 4.362 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114930-ef8270d4.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_fp16-globalG-partialD_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114930-ef8270d4.pth - Config: configs/styleganv2/stylegan2_c2-PL-R1_8xb4-apex-fp16-no-scaler-800kiters_ffhq-256x256.py In Collection: StyleGANv2 Metadata: @@ -145,7 +148,7 @@ Models: Metrics: FID50k: 4.614 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114701-c2bb8afd.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_apex_fp16_PL-R1-no-scaler_ffhq_256_b4x8_800k_20210508_114701-c2bb8afd.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 Metadata: @@ -156,7 +159,7 @@ Models: Metrics: FID50k: 2.8732 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 Metadata: @@ -167,7 +170,7 @@ Models: Metrics: FID50k: 2.9413 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 Metadata: @@ -178,7 +181,7 @@ Models: Metrics: FID50k: 2.8134 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth - Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py In Collection: StyleGANv2 Metadata: @@ -189,4 +192,40 @@ Models: Metrics: FID50k: 2.8185 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth + Weights: https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth +- Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py + In Collection: StyleGANv2 + Metadata: + Training Data: FFHQ + Name: stylegan2_c2_8xb4_ffhq-1024x1024 + Results: + - Dataset: FFHQ + Metrics: + Precision: 67.876 + Recall: 49.299 + Task: Unconditional GANs + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth +- Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py + In Collection: StyleGANv2 + Metadata: + Training Data: FFHQ + Name: stylegan2_c2_8xb4_ffhq-1024x1024 + Results: + - Dataset: FFHQ + Metrics: + Precision: 62.856 + Recall: 49.4 + Task: Unconditional GANs + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth +- Config: configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py + In Collection: StyleGANv2 + Metadata: + Training Data: FFHQ + Name: stylegan2_c2_8xb4_ffhq-1024x1024 + Results: + - Dataset: FFHQ + Metrics: + Precision: 67.662 + Recall: 55.46 + Task: Unconditional GANs + Weights: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth diff --git a/configs/styleganv2/stylegan2_c2_8xb4-apex-fp16-800kiters_quicktest-ffhq-256x256.py b/configs/styleganv2/stylegan2_c2_8xb4-apex-fp16-800kiters_quicktest-ffhq-256x256.py deleted file mode 100644 index 021613a3f8..0000000000 --- a/configs/styleganv2/stylegan2_c2_8xb4-apex-fp16-800kiters_quicktest-ffhq-256x256.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Config for the `config-f` setting in StyleGAN2.""" - -_base_ = ['./stylegan2_c2_8xb4-800kiters_ffhq-256x256.py'] - -model = dict( - generator=dict(out_size=256), - discriminator=dict(in_size=256, convert_input_fp32=False), -) - -# remain to be refactored -apex_amp = dict( - mode='gan', init_args=dict(opt_level='O1', num_losses=2, loss_scale=512.)) - -train_cfg = dict(max_iters=800002) - -batch_size = 2 -dataset_type = 'QuickTestImageDataset' - -train_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -val_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -test_dataloader = dict( - batch_size=batch_size, dataset=dict(dataset_type=dataset_type)) - -default_hooks = dict(logger=dict(type='LoggerHook', interval=1)) - -# METRICS -metrics = [ - dict( - type='FrechetInceptionDistance', - prefix='FID-Full-50k', - fake_nums=50000, - inception_style='StyleGAN', - sample_model='ema') -] - -val_evaluator = dict(metrics=metrics) -test_evaluator = dict(metrics=metrics) diff --git a/configs/styleganv2/stylegan2_c2_8xb4-fp16-800kiters_quicktest-ffhq-256x256.py b/configs/styleganv2/stylegan2_c2_8xb4-fp16-800kiters_quicktest-ffhq-256x256.py deleted file mode 100644 index da117af082..0000000000 --- a/configs/styleganv2/stylegan2_c2_8xb4-fp16-800kiters_quicktest-ffhq-256x256.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Config for the `config-f` setting in StyleGAN2.""" - -_base_ = ['./stylegan2_c2_8xb4-800kiters_ffhq-256x256.py'] - -model = dict( - generator=dict(out_size=256, num_fp16_scales=4), - discriminator=dict(in_size=256, num_fp16_scales=4), - disc_auxiliary_loss=dict(data_info=dict(loss_scaler='loss_scaler')), - # gen_auxiliary_loss=dict(data_info=dict(loss_scaler='loss_scaler')), -) - -batch_size = 2 -dataset_type = 'QuickTestImageDataset' - -train_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -val_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -test_dataloader = dict( - batch_size=batch_size, dataset=dict(dataset_type=dataset_type)) - -default_hooks = dict(logger=dict(type='LoggerHook', interval=1)) - -train_cfg = dict(max_iters=800002) - -optim_wrapper = dict( - generator=dict(type='AmpOptimWrapper', loss_scale=512), - discriminator=dict(type='AmpOptimWrapper', loss_scale=512)) - -# METRICS -metrics = [ - dict( - type='FrechetInceptionDistance', - prefix='FID-Full-50k', - fake_nums=50000, - inception_style='StyleGAN', - sample_model='ema') -] - -val_evaluator = dict(metrics=metrics) -test_evaluator = dict(metrics=metrics) diff --git a/configs/styleganv2/stylegan2_c2_8xb4-fp16-global-800kiters_quicktest-ffhq-256x256.py b/configs/styleganv2/stylegan2_c2_8xb4-fp16-global-800kiters_quicktest-ffhq-256x256.py deleted file mode 100644 index a053408f29..0000000000 --- a/configs/styleganv2/stylegan2_c2_8xb4-fp16-global-800kiters_quicktest-ffhq-256x256.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Config for the `config-f` setting in StyleGAN2.""" - -_base_ = ['./stylegan2_c2_8xb4-800kiters_ffhq-256x256.py'] - -model = dict( - generator=dict(out_size=256, fp16_enabled=True), - discriminator=dict(in_size=256, fp16_enabled=True), - disc_auxiliary_loss=dict(data_info=dict(loss_scaler='loss_scaler')), - # gen_auxiliary_loss=dict(data_info=dict(loss_scaler='loss_scaler')), -) - -batch_size = 2 -dataset_type = 'QuickTestImageDataset' - -train_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -val_dataloader = dict(batch_size=batch_size, dataset=dict(type=dataset_type)) - -test_dataloader = dict( - batch_size=batch_size, dataset=dict(dataset_type=dataset_type)) - -default_hooks = dict(logger=dict(type='LoggerHook', interval=1)) - -train_cfg = dict(max_iters=800002) - -optim_wrapper = dict( - generator=dict(type='AmpOptimWrapper', loss_scale=512), - discriminator=dict(type='AmpOptimWrapper', loss_scale=512)) - -# METRICS -metrics = [ - dict( - type='FrechetInceptionDistance', - prefix='FID-Full-50k', - fake_nums=50000, - inception_style='StyleGAN', - sample_model='ema') -] - -val_evaluator = dict(metrics=metrics) -test_evaluator = dict(metrics=metrics) diff --git a/configs/styleganv3/README.md b/configs/styleganv3/README.md index bcb1c677d4..7c67da280c 100644 --- a/configs/styleganv3/README.md +++ b/configs/styleganv3/README.md @@ -40,26 +40,26 @@ For user convenience, we also offer the converted version of official weights. | Model | Dataset | Iter | FID50k | Config | Log | Download | | :-------------: | :---------------: | :----: | :---------------: | :----------------------------------------------: | :--------------------------------------------: | :-------------------------------------------------: | -| stylegan3-t | ffhq 1024x1024 | 490000 | 3.37\* | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_gamma32.8_8xb4-fp16-noaug_ffhq-1024x1024.py) | [log](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_20220322_090417.log.json) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_best_fid_iter_490000_20220401_120733-4ff83434.pth) | -| stylegan3-t-ada | metface 1024x1024 | 130000 | 15.09 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py) | [log](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_20220328_142211.log.json) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_best_fid_iter_130000_20220401_115101-f2ef498e.pth) | +| stylegan3-t | ffhq 1024x1024 | 490000 | 3.37\* | [config](./stylegan3-t_gamma32.8_8xb4-fp16-noaug_ffhq-1024x1024.py) | [log](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_20220322_090417.log.json) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_best_fid_iter_490000_20220401_120733-4ff83434.pth) | +| stylegan3-t-ada | metface 1024x1024 | 130000 | 15.09 | [config](./stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py) | [log](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_20220328_142211.log.json) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_best_fid_iter_130000_20220401_115101-f2ef498e.pth) | ### Experimental Settings | Model | Dataset | Iter | FID50k | Config | Log | Download | | :-------------: | :------------: | :----: | :----: | :---------------------------------------------------: | :------------------------------------------------: | :------------------------------------------------------: | -| stylegan3-t | ffhq 256x256 | 740000 | 4.51 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_gamma2.0_8xb4-fp16-noaug_ffhq-256x256.py) | [log](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_20220323_144815.log.json) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_best_fid_iter_740000_20220401_122456-730e1fba.pth) | -| stylegan3-r-ada | ffhq 1024x1024 | - | - | [config](/configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py) | - | - | +| stylegan3-t | ffhq 256x256 | 740000 | 4.51 | [config](./stylegan3-t_gamma2.0_8xb4-fp16-noaug_ffhq-256x256.py) | [log](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_20220323_144815.log.json) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_best_fid_iter_740000_20220401_122456-730e1fba.pth) | +| stylegan3-r-ada | ffhq 1024x1024 | - | - | [config](./stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py) | - | - | ### Converted Weights -| Model | Dataset | Comment | FID50k | EQ-T | EQ-R | Config | Download | -| :---------: | :------------: | :-------------: | :----: | :---: | :---: | :---------------------------------------------------------------------: | :-----------------------------------------------------------------------: | -| stylegan3-t | ffhqu 256x256 | official weight | 4.62 | 63.01 | 13.12 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhqu-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ffhqu_256_b4x8_cvt_official_rgb_20220329_235046-153df4c8.pth) | -| stylegan3-t | afhqv2 512x512 | official weight | 4.04 | 60.15 | 13.51 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_afhqv2-512x512.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official_rgb_20220329_235017-ee6b037a.pth) | -| stylegan3-t | ffhq 1024x1024 | official weight | 2.79 | 61.21 | 13.82 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth) | -| stylegan3-r | ffhqu 256x256 | official weight | 4.50 | 66.65 | 40.48 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhqu-256x256.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official_rgb_20220329_234909-4521d963.pth) | -| stylegan3-r | afhqv2 512x512 | official weight | 4.40 | 64.89 | 40.34 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4x8_afhqv2-512x512.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_afhqv2_512_b4x8_cvt_official_rgb_20220329_234829-f2eaca72.pth) | -| stylegan3-r | ffhq 1024x1024 | official weight | 3.07 | 64.76 | 46.62 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth) | +| Model | Dataset | Comment | FID50k | EQ-T | EQ-R | Config | Download | +| :---------: | :------------: | :-------------: | :----: | :---: | :---: | :---------------------------------------------------------------: | :-----------------------------------------------------------------------------: | +| stylegan3-t | ffhqu 256x256 | official weight | 4.62 | 63.01 | 13.12 | [config](./stylegan3-t_cvt-official-rgb_8xb4_ffhqu-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ffhqu_256_b4x8_cvt_official_rgb_20220329_235046-153df4c8.pth) | +| stylegan3-t | afhqv2 512x512 | official weight | 4.04 | 60.15 | 13.51 | [config](./stylegan3-t_cvt-official-rgb_8xb4_afhqv2-512x512.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official_rgb_20220329_235017-ee6b037a.pth) | +| stylegan3-t | ffhq 1024x1024 | official weight | 2.79 | 61.21 | 13.82 | [config](./stylegan3-t_cvt-official-rgb_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth) | +| stylegan3-r | ffhqu 256x256 | official weight | 4.50 | 66.65 | 40.48 | [config](./stylegan3-r_cvt-official-rgb_8xb4_ffhqu-256x256.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official_rgb_20220329_234909-4521d963.pth) | +| stylegan3-r | afhqv2 512x512 | official weight | 4.40 | 64.89 | 40.34 | [config](./stylegan3-r_cvt-official-rgb_8xb4x8_afhqv2-512x512.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_afhqv2_512_b4x8_cvt_official_rgb_20220329_234829-f2eaca72.pth) | +| stylegan3-r | ffhq 1024x1024 | official weight | 3.07 | 64.76 | 46.62 | [config](./stylegan3-r_cvt-official-rgb_8xb4_ffhq-1024x1024.py) | [model](https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth) | ## Interpolation @@ -67,7 +67,7 @@ We provide a tool to generate video by walking through GAN's latent space. Run this command to get the following video. ```bash -python apps/interpolate_sample.py configs/styleganv3/stylegan3_t_afhqv2_512_b4x8_official.py https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official.pkl --export-video --samples-path work_dirs/demos/ --endpoint 6 --interval 60 --space z --seed 2022 --sample-cfg truncation=0.8 +python apps/interpolate_sample.py configs/styleganv3/stylegan3_t_afhqv2_512_b4x8_official.py https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official.pkl --export-video --samples-path work_dirs/demos/ --endpoint 6 --interval 60 --space z --seed 2022 --sample-cfg truncation=0.8 ``` https://user-images.githubusercontent.com/22982797/151506918-83da9ee3-0d63-4c5b-ad53-a41562b92075.mp4 @@ -78,11 +78,11 @@ We also provide a tool to visualize the equivarience properties for StyleGAN3. Run these commands to get the results below. ```bash -python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.5 --transform rotate --seed 5432 +python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.5 --transform rotate --seed 5432 -python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmgen/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.25 --transform x_t --seed 5432 +python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.25 --transform x_t --seed 5432 -python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmgen/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.25 --transform y_t --seed 5432 +python tools/utils/equivariance_viz.py configs/styleganv3/stylegan3_r_ffhqu_256_b4x8_official.py https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official.pkl --translate_max 0.25 --transform y_t --seed 5432 ``` https://user-images.githubusercontent.com/22982797/151504902-f3cbfef5-9014-4607-bbe1-deaf48ec6d55.mp4 @@ -102,7 +102,7 @@ metrics = dict( compute_eqt_int=True, compute_eqt_frac=True, compute_eqr=True))) ``` -And we highly recommend you to use [slurm_eval_multi_gpu](tools/slurm_eval_multi_gpu.sh) script to accelerate evaluation time. +And we highly recommend you to use [slurm_test.sh](../../tools/slurm_test.sh) script to accelerate evaluation time. ## Citation diff --git a/configs/styleganv3/metafile.yml b/configs/styleganv3/metafile.yml index 9389454d6a..4224b0e174 100644 --- a/configs/styleganv3/metafile.yml +++ b/configs/styleganv3/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://nvlabs-fi-cdn.nvidia.com/stylegan3/stylegan3-paper.pdf README: configs/styleganv3/README.md + Task: + - unconditional gans + Year: 2021 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_gamma32.8_8xb4-fp16-noaug_ffhq-1024x1024.py +- Config: configs/styleganv3/stylegan3-t_gamma32.8_8xb4-fp16-noaug_ffhq-1024x1024.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -17,8 +20,8 @@ Models: Metrics: Iter: 490000.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_best_fid_iter_490000_20220401_120733-4ff83434.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma32.8_ffhq_1024_b4x8_best_fid_iter_490000_20220401_120733-4ff83434.pth +- Config: configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py In Collection: StyleGANv3 Metadata: Training Data: Others @@ -29,8 +32,8 @@ Models: FID50k: 15.09 Iter: 130000.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_best_fid_iter_130000_20220401_115101-f2ef498e.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_gamma2.0_8xb4-fp16-noaug_ffhq-256x256.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ada_fp16_gamma6.6_metfaces_1024_b4x8_best_fid_iter_130000_20220401_115101-f2ef498e.pth +- Config: configs/styleganv3/stylegan3-t_gamma2.0_8xb4-fp16-noaug_ffhq-256x256.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -41,7 +44,7 @@ Models: FID50k: 4.51 Iter: 740000.0 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_best_fid_iter_740000_20220401_122456-730e1fba.pth + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_noaug_fp16_gamma2.0_ffhq_256_b4x8_best_fid_iter_740000_20220401_122456-730e1fba.pth - Config: configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py In Collection: StyleGANv3 Metadata: @@ -52,7 +55,7 @@ Models: Metrics: {} Task: Unconditional GANs Weights: '' -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhqu-256x256.py +- Config: configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhqu-256x256.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -64,8 +67,8 @@ Models: EQ-T: 63.01 FID50k: 4.62 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ffhqu_256_b4x8_cvt_official_rgb_20220329_235046-153df4c8.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_afhqv2-512x512.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ffhqu_256_b4x8_cvt_official_rgb_20220329_235046-153df4c8.pth +- Config: configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_afhqv2-512x512.py In Collection: StyleGANv3 Metadata: Training Data: Others @@ -77,8 +80,8 @@ Models: EQ-T: 60.15 FID50k: 4.04 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official_rgb_20220329_235017-ee6b037a.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhq-1024x1024.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_afhqv2_512_b4x8_cvt_official_rgb_20220329_235017-ee6b037a.pth +- Config: configs/styleganv3/stylegan3-t_cvt-official-rgb_8xb4_ffhq-1024x1024.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -90,8 +93,8 @@ Models: EQ-T: 61.21 FID50k: 2.79 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhqu-256x256.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth +- Config: configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhqu-256x256.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -103,8 +106,8 @@ Models: EQ-T: 66.65 FID50k: 4.5 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official_rgb_20220329_234909-4521d963.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4x8_afhqv2-512x512.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhqu_256_b4x8_cvt_official_rgb_20220329_234909-4521d963.pth +- Config: configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4x8_afhqv2-512x512.py In Collection: StyleGANv3 Metadata: Training Data: Others @@ -116,8 +119,8 @@ Models: EQ-T: 64.89 FID50k: 4.4 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_afhqv2_512_b4x8_cvt_official_rgb_20220329_234829-f2eaca72.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhq-1024x1024.py + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_afhqv2_512_b4x8_cvt_official_rgb_20220329_234829-f2eaca72.pth +- Config: configs/styleganv3/stylegan3-r_cvt-official-rgb_8xb4_ffhq-1024x1024.py In Collection: StyleGANv3 Metadata: Training Data: FFHQ @@ -129,4 +132,4 @@ Models: EQ-T: 64.76 FID50k: 3.07 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth + Weights: https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth diff --git a/configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py b/configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py index cef1bab104..094fa6edae 100644 --- a/configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py +++ b/configs/styleganv3/stylegan3-r_ada-gamma3.3_8xb4-fp16_metfaces-1024x1024.py @@ -19,7 +19,7 @@ g_reg_ratio = g_reg_interval / (g_reg_interval + 1) d_reg_ratio = d_reg_interval / (d_reg_interval + 1) -load_from = 'https://download.openmmlab.com/mmgen/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth' # noqa +load_from = 'https://download.openmmlab.com/mmediting/stylegan3/stylegan3_r_ffhq_1024_b4x8_cvt_official_rgb_20220329_234933-ac0500a1.pth' # noqa # ada settings aug_kwargs = { diff --git a/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py b/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py index eb62691618..56ced90640 100644 --- a/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py +++ b/configs/styleganv3/stylegan3-t_ada-gamma6.6_8xb4-fp16_metfaces-1024x1024.py @@ -17,7 +17,7 @@ g_reg_ratio = g_reg_interval / (g_reg_interval + 1) d_reg_ratio = d_reg_interval / (d_reg_interval + 1) -load_from = 'https://download.openmmlab.com/mmgen/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth' # noqa +load_from = 'https://download.openmmlab.com/mmediting/stylegan3/stylegan3_t_ffhq_1024_b4x8_cvt_official_rgb_20220329_235113-db6c6580.pth' # noqa # ada settings aug_kwargs = { 'xflip': 1, diff --git a/configs/swinir/README.md b/configs/swinir/README.md new file mode 100644 index 0000000000..2ada8e12e6 --- /dev/null +++ b/configs/swinir/README.md @@ -0,0 +1,512 @@ +# SwinIR (ICCVW'2021) + +> [SwinIR: Image Restoration Using Swin Transformer](https://arxiv.org/abs/2108.10257) + +> **Task**: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + + + +## Abstract + + + +Image restoration is a long-standing low-level vision problem that aims to restore high-quality images from low-quality images (e.g., downscaled, noisy and compressed images). While state-of-the-art image restoration methods are based on convolutional neural networks, few attempts have been made with Transformers which show impressive performance on high-level vision tasks. In this paper, we propose a strong baseline model SwinIR for image restoration based on the Swin Transformer. SwinIR consists of three parts: shallow feature extraction, deep feature extraction and high-quality image reconstruction. In particular, the deep feature extraction module is composed of several residual Swin Transformer blocks (RSTB), each of which has several Swin Transformer layers together with a residual connection. We conduct experiments on three representative tasks: image super-resolution (including classical, lightweight and real-world image super-resolution), image denoising (including grayscale and color image denoising) and JPEG compression artifact reduction. Experimental results demonstrate that SwinIR outperforms state-of-the-art methods on different tasks by up to 0.14~0.45dB, while the total number of parameters can be reduced by up to 67%. + + + +
+ +
+ +## Results and models + +### **Classical Image Super-Resolution** + +Evaluated on Y channels, `scale` pixels in each border are cropped before evaluation. +The metrics are `PSNR / SSIM` . + +| Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | +| :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | +| [swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 38.3240 | 34.1174 | 37.8921 | 0.9626 | 0.9230 | 0.9481 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth) \| log | +| [swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 34.8640 | 30.7669 | 34.1397 | 0.9317 | 0.8508 | 0.8917 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth) \| log | +| [swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 32.7315 | 28.9065 | 32.0953 | 0.9029 | 0.7915 | 0.8418 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth) \| log | +| [swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 38.3971 | 34.4149 | 37.9473 | 0.9629 | 0.9252 | 0.9488 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth) \| log | +| [swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 34.9335 | 30.9258 | 34.2830 | 0.9323 | 0.8540 | 0.8939 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth) \| log | +| [swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 32.9214 | 29.0792 | 32.3021 | 0.9053 | 0.7953 | 0.8451 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth) \| log | + +### **Lightweight Image Super-Resolution** + +Evaluated on Y channels, `scale` pixels in each border are cropped before evaluation. +The metrics are `PSNR / SSIM` . + +| Method | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU Info | Download | +| :----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :------------------------------------------------------------------: | +| [swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 38.1289 | 33.8404 | 37.5844 | 0.9617 | 0.9207 | 0.9459 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth) \| log | +| [swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 34.6037 | 30.5340 | 33.8394 | 0.9293 | 0.8468 | 0.8867 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth) \| log | +| [swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 32.4343 | 28.7441 | 31.8636 | 0.8984 | 0.7861 | 0.8353 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth) \| log | + +### **Real-World Image Super-Resolution** + +Evaluated on Y channels. +The metrics are `NIQE` . + +| Method | RealSRSet+5images NIQE | GPU Info | Download | +| :----------------------------------------------------------------------------------: | :--------------------: | :------: | :------------------------------------------------------------------------------------: | +| [swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 5.7975 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth) \| log | +| [swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 7.2738 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth) \| log | +| [swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 5.2329 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth) \| log | +| [swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 7.7460 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth) \| log | +| [swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py) | 5.1464 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/) \| log | +| [swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py) | 7.6378 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth) \| log | + +### **Grayscale Image Deoising** + +Evaluated on grayscale images. +The metrics are `PSNR` . + +| Method | Set12 PSNR | BSD68 PSNR | Urban100 PSNR | GPU Info | Download | +| :----------------------------------------------------------------------------: | :--------: | :--------: | :-----------: | :------: | :-------------------------------------------------------------------------------: | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py) | 33.9731 | 32.5203 | 34.3424 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py) | 31.6434 | 30.1377 | 31.9493 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py) | 28.5651 | 27.3157 | 28.6626 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth) \| log | + +### **Color Image Deoising** + +Evaluated on RGB channels. +The metrics are `PSNR` . + +| Method | CBSD68 PSNR | Kodak24 PSNR | McMaster PSNR | Urban100 PSNR | GPU Info | Download | +| :--------------------------------------------------------------------: | :---------: | :----------: | :-----------: | :-----------: | :------: | :-----------------------------------------------------------------------: | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py) | 34.4136 | 35.3555 | 35.6205 | 35.1836 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py) | 31.7626 | 32.9003 | 33.3198 | 32.9458 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py) | 28.5346 | 29.8058 | 30.2027 | 29.8832 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth) \| log | + +### **JPEG Compression Artifact Reduction (grayscale)** + +Evaluated on grayscale images. +The metrics are `PSNR / SSIM` . + +| Method | Classic5 PSNR | Classic5 SSIM | LIVE1 PSNR | LIVE1 SSIM | GPU Info | Download | +| :----------------------------------------------------------------------: | :-----------: | :-----------: | :--------: | :--------: | :------: | :------------------------------------------------------------------------: | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py) | 30.2746 | 0.8254 | 29.8611 | 0.8292 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py) | 32.5331 | 0.8753 | 32.2667 | 0.8914 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py) | 33.7504 | 0.8966 | 33.7001 | 0.9179 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py) | 34.5377 | 0.9087 | 34.6846 | 0.9322 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth) \| log | + +### **JPEG Compression Artifact Reduction (color)** + +Evaluated on RGB channels. +The metrics are `PSNR / SSIM` . + +| Method | Classic5 PSNR | Classic5 SSIM | LIVE1 PSNR | LIVE1 SSIM | GPU Info | Download | +| :----------------------------------------------------------------------: | :-----------: | :-----------: | :--------: | :--------: | :------: | :------------------------------------------------------------------------: | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py) | 30.1019 | 0.8217 | 28.0676 | 0.8094 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py) | 32.3489 | 0.8727 | 30.4514 | 0.8745 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py) | 33.6028 | 0.8949 | 31.8235 | 0.9023 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py) | 34.4344 | 0.9076 | 32.7610 | 0.9179 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth) \| log | + +## Quick Start + +**Train** + +
+Train Instructions + +You can use the following commands to train a model with cpu or single/multiple GPUs. + +```shell +# cpu train +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + +# 002 Lightweight Image Super-Resolution (small size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + +# 003 Real-World Image Super-Resolution +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + +# 004 Grayscale Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py + +# 005 Color Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py + +# color +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py + + +# single-gpu train +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +python tools/train.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +python tools/train.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +python tools/train.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +python tools/train.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + +# 002 Lightweight Image Super-Resolution (small size) +python tools/train.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + +# 003 Real-World Image Super-Resolution +python tools/train.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + +# 004 Grayscale Image Deoising (middle size) +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py + +# 005 Color Image Deoising (middle size) +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py + +# color +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py + + +# multi-gpu train +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +./tools/dist_train.sh configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +./tools/dist_train.sh configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 + +# 002 Lightweight Image Super-Resolution (small size) +./tools/dist_train.sh configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 + +# 003 Real-World Image Super-Resolution +./tools/dist_train.sh configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py 8 + +# 004 Grayscale Image Deoising (middle size) +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py 8 + +# 005 Color Image Deoising (middle size) +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py 8 + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py 8 + +# color +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py 8 +``` + +For more details, you can refer to **Train a model** part in [train_test.md](/docs/en/user_guides/train_test.md#Train-a-model-in-MMEditing). + +
+ +**Test** + +
+Test Instructions + +You can use the following commands to test a model with cpu or single/multiple GPUs. + +```shell +# cpu test +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + + +# 002 Lightweight Image Super-Resolution (small size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + +# 003 Real-World Image Super-Resolution +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + +# 004 Grayscale Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + +# 005 Color Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding usesx8 blocks) +# grayscale +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + + +# color +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth + + + +# single-gpu test +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +python tools/test.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +python tools/test.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +python tools/test.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +python tools/test.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +python tools/test.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +python tools/test.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + + +# 002 Lightweight Image Super-Resolution (small size) +python tools/test.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +python tools/test.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +python tools/test.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + + +# 003 Real-World Image Super-Resolution +python tools/test.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +python tools/test.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +python tools/test.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +python tools/test.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +python tools/test.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +python tools/test.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + + +# 004 Grayscale Image Deoising (middle size) +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + + +# 005 Color Image Deoising (middle size) +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding usesx8 blocks) +# grayscale +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + + +# color +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth + + + +# multi-gpu test +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +./tools/dist_test.sh configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +./tools/dist_test.sh configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + +# 002 Lightweight Image Super-Resolution (small size) +./tools/dist_test.sh configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + +# 003 Real-World Image Super-Resolution +./tools/dist_test.sh configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +./tools/dist_test.sh configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +./tools/dist_test.sh configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + +# 004 Grayscale Image Deoising (middle size) +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + +# 005 Color Image Deoising (middle size) +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + +# color +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/ + +``` + +For more details, you can refer to **Test a pre-trained model** part in [train_test.md](/docs/en/user_guides/train_test.md#Test-a-pre-trained-model-in-MMEditing). + +
+ +## Citation + +```bibtex +@inproceedings{liang2021swinir, + title={Swinir: Image restoration using swin transformer}, + author={Liang, Jingyun and Cao, Jiezhang and Sun, Guolei and Zhang, Kai and Van Gool, Luc and Timofte, Radu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={1833--1844}, + year={2021} +} +``` diff --git a/configs/swinir/README_zh-CN.md b/configs/swinir/README_zh-CN.md new file mode 100644 index 0000000000..4fccfcbaac --- /dev/null +++ b/configs/swinir/README_zh-CN.md @@ -0,0 +1,503 @@ +# SwinIR (ICCVW'2021) + +> **任务**: 图像超分辨率, 图像去噪, JPEG压缩伪影移除 + + + +
+SwinIR (ICCVW'2021) + +```bibtex +@inproceedings{liang2021swinir, + title={Swinir: Image restoration using swin transformer}, + author={Liang, Jingyun and Cao, Jiezhang and Sun, Guolei and Zhang, Kai and Van Gool, Luc and Timofte, Radu}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={1833--1844}, + year={2021} +} +``` + +
+ +
+ +### **Classical Image Super-Resolution** + +在 Y 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 +我们使用 `PSNR` 和 `SSIM` 作为指标。 + +| 算法 | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :-----------------------------------------------------------------: | +| [swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 38.3240 | 34.1174 | 37.8921 | 0.9626 | 0.9230 | 0.9481 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth) \| log | +| [swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 34.8640 | 30.7669 | 34.1397 | 0.9317 | 0.8508 | 0.8917 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth) \| log | +| [swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py) | 32.7315 | 28.9065 | 32.0953 | 0.9029 | 0.7915 | 0.8418 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth) \| log | +| [swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 38.3971 | 34.4149 | 37.9473 | 0.9629 | 0.9252 | 0.9488 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth) \| log | +| [swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 34.9335 | 30.9258 | 34.2830 | 0.9323 | 0.8540 | 0.8939 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth) \| log | +| [swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k](/configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py) | 32.9214 | 29.0792 | 32.3021 | 0.9053 | 0.7953 | 0.8451 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth) \| log | + +### **Lightweight Image Super-Resolution** + +在 Y 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 +我们使用 `PSNR` 和 `SSIM` 作为指标。 + +| 算法 | Set5 PSNR | Set14 PSNR | DIV2K PSNR | Set5 SSIM | Set14 SSIM | DIV2K SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------------------------: | :-------: | :--------: | :--------: | :-------: | :--------: | :--------: | :------: | :-----------------------------------------------------------------: | +| [swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 38.1289 | 33.8404 | 37.5844 | 0.9617 | 0.9207 | 0.9459 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth) \| log | +| [swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 34.6037 | 30.5340 | 33.8394 | 0.9293 | 0.8468 | 0.8867 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth) \| log | +| [swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k](/configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py) | 32.4343 | 28.7441 | 31.8636 | 0.8984 | 0.7861 | 0.8353 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth) \| log | + +### **Real-World Image Super-Resolution** + +在 Y 通道上进行评估。 +我们使用 NIQE 作为指标。 + +| 算法 | RealSRSet+5images NIQE | GPU 信息 | 下载 | +| :-----------------------------------------------------------------------------------: | :--------------------: | :------: | :-----------------------------------------------------------------------------------: | +| [swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 5.7975 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth) \| log | +| [swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 7.2738 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth) \| log | +| [swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 5.2329 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth) \| log | +| [swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py) | 7.7460 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth) \| log | +| [swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py) | 5.1464 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/) \| log | +| [swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost](/configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py) | 7.6378 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth) \| log | + +### **Grayscale Image Deoising** + +在灰度图上进行评估。 +我们使用 PSNR 作为指标。 + +| 算法 | Set12 PSNR | BSD68 PSNR | Urban100 PSNR | GPU 信息 | 下载 | +| :-----------------------------------------------------------------------------: | :--------: | :--------: | :-----------: | :------: | :------------------------------------------------------------------------------: | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py) | 33.9731 | 32.5203 | 34.3424 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py) | 31.6434 | 30.1377 | 31.9493 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py) | 28.5651 | 27.3157 | 28.6626 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth) \| log | + +### **Color Image Deoising** + +在 RGB 通道上进行评估。 +我们使用 PSNR 作为指标。 + +| 算法 | CBSD68 PSNR | Kodak24 PSNR | McMaster PSNR | Urban100 PSNR | GPU 信息 | 下载 | +| :---------------------------------------------------------------------: | :---------: | :----------: | :-----------: | :-----------: | :------: | :----------------------------------------------------------------------: | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py) | 34.4136 | 35.3555 | 35.6205 | 35.1836 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py) | 31.7626 | 32.9003 | 33.3198 | 32.9458 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth) \| log | +| [swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50](/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py) | 28.5346 | 29.8058 | 30.2027 | 29.8832 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth) \| log | + +### **JPEG Compression Artifact Reduction (grayscale)** + +在灰度图上进行评估。 +我们使用 PSNR 和 SSIM 作为指标。 + +| 算法 | Classic5 PSNR | Classic5 SSIM | LIVE1 PSNR | LIVE1 SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------------------------------: | :-----------: | :-----------: | :--------: | :--------: | :------: | :-----------------------------------------------------------------------: | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py) | 30.2746 | 0.8254 | 29.8611 | 0.8292 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py) | 32.5331 | 0.8753 | 32.2667 | 0.8914 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py) | 33.7504 | 0.8966 | 33.7001 | 0.9179 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py) | 34.5377 | 0.9087 | 34.6846 | 0.9322 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth) \| log | + +### **JPEG Compression Artifact Reduction (color)** + +在 RGB 通道上进行评估。 +我们使用 PSNR 和 SSIM 作为指标。 + +| 算法 | Classic5 PSNR | Classic5 SSIM | LIVE1 PSNR | LIVE1 SSIM | GPU 信息 | 下载 | +| :-----------------------------------------------------------------------: | :-----------: | :-----------: | :--------: | :--------: | :------: | :-----------------------------------------------------------------------: | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py) | 30.1019 | 0.8217 | 28.0676 | 0.8094 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py) | 32.3489 | 0.8727 | 30.3489 | 0.8745 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py) | 33.6028 | 0.8949 | 31.8235 | 0.9023 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth) \| log | +| [swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40](/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py) | 34.4344 | 0.9076 | 32.7610 | 0.9179 | 8 | [model](https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth) \| log | + +## 快速开始 + +**训练** + +
+训练说明 + +您可以使用以下命令来训练模型。 + +```shell +# CPU上训练 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + +# 002 Lightweight Image Super-Resolution (small size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + +# 003 Real-World Image Super-Resolution +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + +# 004 Grayscale Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py + +# 005 Color Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py + +# color +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py +CUDA_VISIBLE_DEVICES=-1 python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py + + + +# 单个GPU上训练 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +python tools/train.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +python tools/train.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +python tools/train.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py +python tools/train.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + +# 002 Lightweight Image Super-Resolution (small size) +python tools/train.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py +python tools/train.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + +# 003 Real-World Image Super-Resolution +python tools/train.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py +python tools/train.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + +# 004 Grayscale Image Deoising (middle size) +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py + +# 005 Color Image Deoising (middle size) +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py +python tools/train.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py + +# color +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py +python tools/train.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py + + + +# 多个GPU上训练 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +./tools/dist_train.sh configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py 8 + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +./tools/dist_train.sh configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py 8 + +# 002 Lightweight Image Super-Resolution (small size) +./tools/dist_train.sh configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 +./tools/dist_train.sh configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py 8 + +# 003 Real-World Image Super-Resolution +./tools/dist_train.sh configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py 8 +./tools/dist_train.sh configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py 8 + +# 004 Grayscale Image Deoising (middle size) +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py 8 + +# 005 Color Image Deoising (middle size) +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py 8 +./tools/dist_train.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py 8 + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py 8 + +# color +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py 8 +./tools/dist_train.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py 8 +``` + +更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Train a model** 部分。 + +
+ +**测试** + +
+测试说明 + +您可以使用以下命令来测试模型。 + +```shell +# CPU上测试 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + + +# 002 Lightweight Image Super-Resolution (small size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + +# 003 Real-World Image Super-Resolution +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + +# 004 Grayscale Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + +# 005 Color Image Deoising (middle size) +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding usesx8 blocks) +# grayscale +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + + +# color +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +CUDA_VISIBLE_DEVICES=-1 python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth + + + +# 单个GPU上测试 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +python tools/test.py configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +python tools/test.py configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +python tools/test.py configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +python tools/test.py configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +python tools/test.py configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +python tools/test.py configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + + +# 002 Lightweight Image Super-Resolution (small size) +python tools/test.py configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +python tools/test.py configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +python tools/test.py configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + + +# 003 Real-World Image Super-Resolution +python tools/test.py configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +python tools/test.py configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +python tools/test.py configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +python tools/test.py configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +python tools/test.py configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +python tools/test.py configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + + +# 004 Grayscale Image Deoising (middle size) +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + + +# 005 Color Image Deoising (middle size) +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +python tools/test.py configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding usesx8 blocks) +# grayscale +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + + +# color +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +python tools/test.py configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth + + + +# 多GPU测试 +# 001 Classical Image Super-Resolution (middle size) +# (setting1: when model is trained on DIV2K and with training_patch_size=48) +./tools/dist_test.sh configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth + +# (setting2: when model is trained on DIV2K+Flickr2K and with training_patch_size=64) +./tools/dist_test.sh configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth + +# 002 Lightweight Image Super-Resolution (small size) +./tools/dist_test.sh configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth + +./tools/dist_test.sh configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth + +./tools/dist_test.sh configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth + +# 003 Real-World Image Super-Resolution +./tools/dist_test.sh configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth + +./tools/dist_test.sh configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth + +./tools/dist_test.sh configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth + +./tools/dist_test.sh configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-25f1722a.pth + +# 004 Grayscale Image Deoising (middle size) +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth + +# 005 Color Image Deoising (middle size) +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth + +./tools/dist_test.sh configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth + +# 006 JPEG Compression Artifact Reduction (middle size, using window_size=7 because JPEG encoding uses 8x8 blocks) +# grayscale +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth + +# color +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth + +./tools/dist_test.sh configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py https://download.openmmlab.com/mmediting/swinir/ + +``` + +更多细节可以参考 [train_test.md](/docs/zh_cn/user_guides/train_test.md) 中的 **Test a pre-trained model** 部分。 + +
diff --git a/configs/swinir/metafile.yml b/configs/swinir/metafile.yml new file mode 100644 index 0000000000..f6a4a58617 --- /dev/null +++ b/configs/swinir/metafile.yml @@ -0,0 +1,446 @@ +Collections: +- Metadata: + Architecture: + - SwinIR + Name: SwinIR + Paper: + - https://arxiv.org/abs/2108.10257 + README: configs/swinir/README.md + Task: + - image super-resolution + - image denoising + - jpeg compression artifact reduction + Year: 2021 +Models: +- Config: configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 37.8921 + DIV2K SSIM: 0.9481 + Set14 PSNR: 34.1174 + Set14 SSIM: 0.923 + Set5 PSNR: 38.324 + Set5 SSIM: 0.9626 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k-ed2d419e.pth +- Config: configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 34.1397 + DIV2K SSIM: 0.8917 + Set14 PSNR: 30.7669 + Set14 SSIM: 0.8508 + Set5 PSNR: 34.864 + Set5 SSIM: 0.9317 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k-926950f1.pth +- Config: configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 32.0953 + DIV2K SSIM: 0.8418 + Set14 PSNR: 28.9065 + Set14 SSIM: 0.7915 + Set5 PSNR: 32.7315 + Set5 SSIM: 0.9029 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k-88e4903d.pth +- Config: configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k + Results: + - Dataset: Others + Metrics: + DIV2K PSNR: 37.9473 + DIV2K SSIM: 0.9488 + Set14 PSNR: 34.4149 + Set14 SSIM: 0.9252 + Set5 PSNR: 38.3971 + Set5 SSIM: 0.9629 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k-69e15fb6.pth +- Config: configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k + Results: + - Dataset: Others + Metrics: + DIV2K PSNR: 34.283 + DIV2K SSIM: 0.8939 + Set14 PSNR: 30.9258 + Set14 SSIM: 0.854 + Set5 PSNR: 34.9335 + Set5 SSIM: 0.9323 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k-d6982f7b.pth +- Config: configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k + Results: + - Dataset: Others + Metrics: + DIV2K PSNR: 32.3021 + DIV2K SSIM: 0.8451 + Set14 PSNR: 29.0792 + Set14 SSIM: 0.7953 + Set5 PSNR: 32.9214 + Set5 SSIM: 0.9053 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k-0502d775.pth +- Config: configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 37.5844 + DIV2K SSIM: 0.9459 + Set14 PSNR: 33.8404 + Set14 SSIM: 0.9207 + Set5 PSNR: 38.1289 + Set5 SSIM: 0.9617 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k-131d3f64.pth +- Config: configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 33.8394 + DIV2K SSIM: 0.8867 + Set14 PSNR: 30.534 + Set14 SSIM: 0.8468 + Set5 PSNR: 34.6037 + Set5 SSIM: 0.9293 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k-309cb239.pth +- Config: configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: DIV2K + Name: swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k + Results: + - Dataset: DIV2K + Metrics: + DIV2K PSNR: 31.8636 + DIV2K SSIM: 0.8353 + Set14 PSNR: 28.7441 + Set14 SSIM: 0.7861 + Set5 PSNR: 32.4343 + Set5 SSIM: 0.8984 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k-d6622d03.pth +- Config: configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 5.7975 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-c6425057.pth +- Config: configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 7.2738 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-6f0c425f.pth +- Config: configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 5.2329 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-36960d18.pth +- Config: configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 7.746 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-os-a016a72f.pth +- Config: configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 5.1464 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/ +- Config: configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost + Results: + - Dataset: Others + Metrics: + RealSRSet+5images NIQE: 7.6378 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-os-9f1599b5.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15 + Results: + - Dataset: Others + Metrics: + BSD68 PSNR: 32.5203 + Set12 PSNR: 33.9731 + Urban100 PSNR: 34.3424 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15-6782691b.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25 + Results: + - Dataset: Others + Metrics: + BSD68 PSNR: 30.1377 + Set12 PSNR: 31.6434 + Urban100 PSNR: 31.9493 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25-d0d8d4da.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50 + Results: + - Dataset: Others + Metrics: + BSD68 PSNR: 27.3157 + Set12 PSNR: 28.5651 + Urban100 PSNR: 28.6626 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50-54c9968a.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15 + Results: + - Dataset: Others + Metrics: + CBSD68 PSNR: 34.4136 + Kodak24 PSNR: 35.3555 + McMaster PSNR: 35.6205 + Urban100 PSNR: 35.1836 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15-c74a2cee.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25 + Results: + - Dataset: Others + Metrics: + CBSD68 PSNR: 31.7626 + Kodak24 PSNR: 32.9003 + McMaster PSNR: 33.3198 + Urban100 PSNR: 32.9458 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25-df2b1c0c.pth +- Config: configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50 + Results: + - Dataset: Others + Metrics: + CBSD68 PSNR: 28.5346 + Kodak24 PSNR: 29.8058 + McMaster PSNR: 30.2027 + Urban100 PSNR: 29.8832 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50-e369874c.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 30.2746 + Classic5 SSIM: 0.8254 + LIVE1 PSNR: 29.8611 + LIVE1 SSIM: 0.8292 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10-da93c8e9.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 32.5331 + Classic5 SSIM: 0.8753 + LIVE1 PSNR: 32.2667 + LIVE1 SSIM: 0.8914 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20-d47367b1.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 33.7504 + Classic5 SSIM: 0.8966 + LIVE1 PSNR: 33.7001 + LIVE1 SSIM: 0.9179 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30-52c083cf.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 34.5377 + Classic5 SSIM: 0.9087 + LIVE1 PSNR: 34.6846 + LIVE1 SSIM: 0.9322 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40-803e8d9b.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 30.1019 + Classic5 SSIM: 0.8217 + LIVE1 PSNR: 28.0676 + LIVE1 SSIM: 0.8094 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10-09aafadc.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 32.3489 + Classic5 SSIM: 0.8727 + LIVE1 PSNR: 30.4514 + LIVE1 SSIM: 0.8745 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20-b8a42b5e.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 33.6028 + Classic5 SSIM: 0.8949 + LIVE1 PSNR: 31.8235 + LIVE1 SSIM: 0.9023 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30-e9fe6859.pth +- Config: configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py + In Collection: SwinIR + Metadata: + GPUs: '8' + Training Data: Others + Name: swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40 + Results: + - Dataset: Others + Metrics: + Classic5 PSNR: 34.4344 + Classic5 SSIM: 0.9076 + LIVE1 PSNR: 32.761 + LIVE1 SSIM: 0.9179 + Task: Image Super-Resolution, Image denoising, JPEG compression artifact reduction + Weights: https://download.openmmlab.com/mmediting/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40-5b77a6e6.pth diff --git a/configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..b527c7b70a --- /dev/null +++ b/configs/swinir/swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,5 @@ +_base_ = ['swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py'] + +experiment_name = 'swinir_gan-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' diff --git a/configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..fff5e6b4a5 --- /dev/null +++ b/configs/swinir/swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,5 @@ +_base_ = ['swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py'] + +experiment_name = 'swinir_gan-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' diff --git a/configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..86bd2a9175 --- /dev/null +++ b/configs/swinir/swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,5 @@ +_base_ = ['swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py'] + +experiment_name = 'swinir_gan-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' diff --git a/configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..ee1dc77bfb --- /dev/null +++ b/configs/swinir/swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,61 @@ +_base_ = ['../_base_/default_runtime.py'] + +experiment_name = 'swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 2 +img_size = 64 + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=scale, + in_chans=3, + img_size=img_size, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='nearest+conv', + resi_connection='1conv'), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +test_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +test_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type='BasicImageDataset', + metainfo=dict(dataset_type='realsrset', task_name='realsr'), + data_root='data/RealSRSet+5images', + data_prefix=dict(img='', gt=''), + pipeline=test_pipeline)) + +test_evaluator = [dict(type='NIQE', input_order='CHW', convert_to='Y')] + +test_cfg = dict(type='TestLoop') diff --git a/configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..2f8e0451ec --- /dev/null +++ b/configs/swinir/swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,10 @@ +_base_ = ['swinir_psnr-x2s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py'] + +experiment_name = 'swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 4 + +# model settings +model = dict(generator=dict(upscale=scale)) diff --git a/configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py b/configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py new file mode 100644 index 0000000000..d84a851aa3 --- /dev/null +++ b/configs/swinir/swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost.py @@ -0,0 +1,13 @@ +_base_ = ['swinir_psnr-x4s64w8d6e180_8xb4-lr1e-4-600k_df2k-ost.py'] + +experiment_name = 'swinir_psnr-x4s64w8d9e240_8xb4-lr1e-4-600k_df2k-ost' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# model settings +model = dict( + generator=dict( + depths=[6, 6, 6, 6, 6, 6, 6, 6, 6], + embed_dim=240, + num_heads=[8, 8, 8, 8, 8, 8, 8, 8, 8], + resi_connection='3conv')) diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py new file mode 100644 index 0000000000..7eea2d978c --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py @@ -0,0 +1,134 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/decompression_test_config.py' +] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +quality = 10 + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=1, + in_chans=3, + img_size=126, + window_size=7, + img_range=255.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='', + resi_connection='1conv'), + pixel_loss=dict(type='CharbonnierLoss', eps=1e-9), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=1)), + dict(type='PairedRandomCrop', gt_patch_size=126), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict( + type='RandomJPEGCompression', + params=dict(quality=[quality, quality], color_type='color'), + keys=['img']), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='RandomJPEGCompression', + params=dict(quality=[quality, quality], color_type='color'), + keys=['img']), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=2, + batch_size=1, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DFWB8550sub_GT.txt', + metainfo=dict(dataset_type='dfwb', task_name='CAR'), + data_root=data_root + '/DFWB', + data_prefix=dict(img='', gt=''), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=2, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='live1', task_name='CAR'), + data_root=data_root + '/LIVE1', + data_prefix=dict(img='', gt=''), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', prefix='LIVE1'), + dict(type='SSIM', prefix='LIVE1'), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=1_600_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[800000, 1200000, 1400000, 1500000, 1600000], + gamma=0.5) diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py new file mode 100644 index 0000000000..f83071f0d0 --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR20' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 20 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py new file mode 100644 index 0000000000..44872b9de8 --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR30' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 30 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py new file mode 100644 index 0000000000..69ae48385a --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-colorCAR40' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 40 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py new file mode 100644 index 0000000000..d4206be1c5 --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py @@ -0,0 +1,139 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/decompression_test_config.py' +] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +quality = 10 + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=1, + in_chans=1, + img_size=126, + window_size=7, + img_range=255.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='', + resi_connection='1conv'), + pixel_loss=dict(type='CharbonnierLoss', eps=1e-9), + data_preprocessor=dict(type='EditDataPreprocessor', mean=[0.], std=[255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='grayscale', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=1)), + dict(type='PairedRandomCrop', gt_patch_size=126), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict( + type='RandomJPEGCompression', + params=dict(quality=[quality, quality], color_type='grayscale'), + keys=['img']), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='RandomJPEGCompression', + params=dict(quality=[quality, quality], color_type='grayscale'), + keys=['img']), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=1, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DFWB8550sub_GT.txt', + metainfo=dict(dataset_type='dfwb', task_name='CAR'), + data_root=data_root + '/DFWB', + data_prefix=dict(img='', gt=''), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='classic5', task_name='CAR'), + data_root=data_root + '/classic5', + data_prefix=dict(img='', gt=''), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', prefix='classic5'), + dict(type='SSIM', prefix='classic5'), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=1_600_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +test_dataloader = _base_.test_dataloader +for idx in range(len(test_dataloader)): + test_pipeline = test_dataloader[idx]['dataset']['pipeline'] + if idx > 0: + test_pipeline[0]['to_y_channel'] = True + test_pipeline[1]['to_y_channel'] = True + else: + test_pipeline[0]['color_type'] = 'grayscale' + test_pipeline[1]['color_type'] = 'grayscale' + test_pipeline[2]['params']['color_type'] = 'grayscale' + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[800000, 1200000, 1400000, 1500000, 1600000], + gamma=0.5) diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py new file mode 100644 index 0000000000..f3ea47b95f --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR20' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 20 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py new file mode 100644 index 0000000000..6054939a18 --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR30' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 30 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py new file mode 100644 index 0000000000..4b0d6d6f02 --- /dev/null +++ b/configs/swinir/swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR10.py'] + +experiment_name = 'swinir_s126w7d6e180_8xb1-lr2e-4-1600k_dfwb-grayCAR40' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify JPEG quality factor of RandomJPEGCompression +quality = 40 +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['quality'] = [quality, quality] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['quality'] = [quality, quality] + +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['quality'] = [quality, quality] diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py new file mode 100644 index 0000000000..469e4c6e8b --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py @@ -0,0 +1,147 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_color_test_config.py' +] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 15 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=1, + in_chans=3, + img_size=128, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='', + resi_connection='1conv'), + pixel_loss=dict(type='CharbonnierLoss', eps=1e-9), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=1)), + dict(type='PairedRandomCrop', gt_patch_size=128), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma * 255, sigma * 255], + gaussian_gray_noise_prob=0), + keys=['img']), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma * 255, sigma * 255], + gaussian_gray_noise_prob=0), + keys=['img']), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=1, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DFWB8550sub_GT.txt', + metainfo=dict(dataset_type='dfwb', task_name='denoising'), + data_root=data_root + '/DFWB', + data_prefix=dict(img='', gt=''), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='mcmaster', task_name='denoising'), + data_root=data_root + '/McMaster', + data_prefix=dict(img='', gt=''), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', prefix='McMaster'), + dict(type='SSIM', prefix='McMaster'), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=1_600_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[800000, 1200000, 1400000, 1500000, 1600000], + gamma=0.5) diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py new file mode 100644 index 0000000000..1b11cb3c80 --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py'] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN25' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 25 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['gaussian_sigma'] = [sigma, sigma] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py new file mode 100644 index 0000000000..c57d636a71 --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN15.py'] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-colorDN50' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 50 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['gaussian_sigma'] = [sigma, sigma] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py new file mode 100644 index 0000000000..2f765c3c3a --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py @@ -0,0 +1,141 @@ +_base_ = [ + '../_base_/default_runtime.py', + '../_base_/datasets/denoising-gaussian_gray_test_config.py' +] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 15 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=1, + in_chans=1, + img_size=128, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='', + resi_connection='1conv'), + pixel_loss=dict(type='CharbonnierLoss', eps=1e-9), + data_preprocessor=dict(type='EditDataPreprocessor', mean=[0.], std=[255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='grayscale', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=1)), + dict(type='PairedRandomCrop', gt_patch_size=128), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma, sigma], + gaussian_gray_noise_prob=0), + keys=['img']), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='grayscale', + imdecode_backend='cv2'), + dict( + type='RandomNoise', + params=dict( + noise_type=['gaussian'], + noise_prob=[1], + gaussian_sigma=[sigma, sigma], + gaussian_gray_noise_prob=0), + keys=['img']), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=1, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DFWB8550sub_GT.txt', + metainfo=dict(dataset_type='dfwb', task_name='denoising'), + data_root=data_root + '/DFWB', + data_prefix=dict(img='', gt=''), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='set12', task_name='denoising'), + data_root=data_root + '/Set12', + data_prefix=dict(img='', gt=''), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', prefix='Set12'), + dict(type='SSIM', prefix='Set12'), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=1_600_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[800000, 1200000, 1400000, 1500000, 1600000], + gamma=0.5) diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py new file mode 100644 index 0000000000..34f364400b --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py'] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN25' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 25 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['gaussian_sigma'] = [sigma, sigma] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] diff --git a/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py new file mode 100644 index 0000000000..5f96a5ae6d --- /dev/null +++ b/configs/swinir/swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50.py @@ -0,0 +1,20 @@ +_base_ = ['swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN15.py'] + +experiment_name = 'swinir_s128w8d6e180_8xb1-lr2e-4-1600k_dfwb-grayDN50' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +# modify sigma of RandomNoise +sigma = 50 +test_dataloader = _base_.test_dataloader +for dataloader in test_dataloader: + test_pipeline = dataloader['dataset']['pipeline'] + test_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] + +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[-2]['params']['gaussian_sigma'] = [sigma, sigma] + +val_dataloader = _base_.val_dataloader +val_pipeline = val_dataloader['dataset']['pipeline'] +val_pipeline[2]['params']['gaussian_sigma'] = [sigma, sigma] diff --git a/configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..7abface52f --- /dev/null +++ b/configs/swinir/swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/sisr_x2_test_config.py' +] + +experiment_name = 'swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 2 +img_size = 48 + +# evaluated on Y channels +test_evaluator = _base_.test_evaluator +for evaluator in test_evaluator: + for metric in evaluator: + metric['convert_to'] = 'Y' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=scale, + in_chans=3, + img_size=img_size, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='pixelshuffle', + resi_connection='1conv'), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=scale)), + dict(type='PairedRandomCrop', gt_patch_size=img_size * scale), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DIV2K800sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DIV2K', + data_prefix=dict( + img='DIV2K_train_LR_bicubic/X2_sub', gt='DIV2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='set5', task_name='sisr'), + data_root=data_root + '/Set5', + data_prefix=dict(img='LRbicx2', gt='GTmod12'), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', crop_border=scale), + dict(type='SSIM', crop_border=scale), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=500_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[250000, 400000, 450000, 475000], + gamma=0.5) diff --git a/configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..88f5d001bf --- /dev/null +++ b/configs/swinir/swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,22 @@ +_base_ = ['swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x2s64w8d4e60_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 2 +img_size = 64 + +# model settings +model = dict( + generator=dict( + img_size=img_size, + depths=[6, 6, 6, 6], + embed_dim=60, + num_heads=[6, 6, 6, 6], + upsampler='pixelshuffledirect')) + +# modify patch size of train_dataloader +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[3]['gt_patch_size'] = img_size * scale diff --git a/configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py b/configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py new file mode 100644 index 0000000000..475e42eef5 --- /dev/null +++ b/configs/swinir/swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k.py @@ -0,0 +1,35 @@ +_base_ = ['swinir_x2s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x2s64w8d6e180_8xb4-lr2e-4-500k_df2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 2 +img_size = 64 + +# model settings +model = dict(generator=dict(img_size=img_size)) + +# modify patch size of train_pipeline +train_pipeline = _base_.train_pipeline +train_pipeline[3]['gt_patch_size'] = img_size * scale + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DF2K3450sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DF2K', + data_prefix=dict( + img='DF2K_train_LR_bicubic/X2_sub', gt='DF2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) diff --git a/configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..20f8e1c533 --- /dev/null +++ b/configs/swinir/swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/sisr_x3_test_config.py' +] + +experiment_name = 'swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 3 +img_size = 48 + +# evaluated on Y channels +test_evaluator = _base_.test_evaluator +for evaluator in test_evaluator: + for metric in evaluator: + metric['convert_to'] = 'Y' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=scale, + in_chans=3, + img_size=img_size, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='pixelshuffle', + resi_connection='1conv'), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=scale)), + dict(type='PairedRandomCrop', gt_patch_size=img_size * scale), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DIV2K800sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DIV2K', + data_prefix=dict( + img='DIV2K_train_LR_bicubic/X3_sub', gt='DIV2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='set5', task_name='sisr'), + data_root=data_root + '/Set5', + data_prefix=dict(img='LRbicx3', gt='GTmod12'), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', crop_border=scale), + dict(type='SSIM', crop_border=scale), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=500_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[250000, 400000, 450000, 475000], + gamma=0.5) diff --git a/configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..6f929ec8d7 --- /dev/null +++ b/configs/swinir/swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,22 @@ +_base_ = ['swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x3s64w8d4e60_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 3 +img_size = 64 + +# model settings +model = dict( + generator=dict( + img_size=img_size, + depths=[6, 6, 6, 6], + embed_dim=60, + num_heads=[6, 6, 6, 6], + upsampler='pixelshuffledirect')) + +# modify patch size of train_dataloader +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[3]['gt_patch_size'] = img_size * scale diff --git a/configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py b/configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py new file mode 100644 index 0000000000..656046e9d1 --- /dev/null +++ b/configs/swinir/swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k.py @@ -0,0 +1,35 @@ +_base_ = ['swinir_x3s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x3s64w8d6e180_8xb4-lr2e-4-500k_df2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 3 +img_size = 64 + +# model settings +model = dict(generator=dict(img_size=img_size)) + +# modify patch size of train_pipeline +train_pipeline = _base_.train_pipeline +train_pipeline[3]['gt_patch_size'] = img_size * scale + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DF2K3450sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DF2K', + data_prefix=dict( + img='DF2K_train_LR_bicubic/X3_sub', gt='DF2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) diff --git a/configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..7409802acd --- /dev/null +++ b/configs/swinir/swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,133 @@ +_base_ = [ + '../_base_/default_runtime.py', '../_base_/datasets/sisr_x4_test_config.py' +] + +experiment_name = 'swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 4 +img_size = 48 + +# evaluated on Y channels +test_evaluator = _base_.test_evaluator +for evaluator in test_evaluator: + for metric in evaluator: + metric['convert_to'] = 'Y' + +# model settings +model = dict( + type='BaseEditModel', + generator=dict( + type='SwinIRNet', + upscale=scale, + in_chans=3, + img_size=img_size, + window_size=8, + img_range=1.0, + depths=[6, 6, 6, 6, 6, 6], + embed_dim=180, + num_heads=[6, 6, 6, 6, 6, 6], + mlp_ratio=2, + upsampler='pixelshuffle', + resi_connection='1conv'), + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean'), + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[0., 0., 0.], std=[255., 255., + 255.])) + +train_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='SetValues', dictionary=dict(scale=scale)), + dict(type='PairedRandomCrop', gt_patch_size=img_size * scale), + dict( + type='Flip', + keys=['img', 'gt'], + flip_ratio=0.5, + direction='horizontal'), + dict( + type='Flip', keys=['img', 'gt'], flip_ratio=0.5, direction='vertical'), + dict(type='RandomTransposeHW', keys=['img', 'gt'], transpose_ratio=0.5), + dict(type='PackEditInputs') +] + +val_pipeline = [ + dict( + type='LoadImageFromFile', + key='img', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict( + type='LoadImageFromFile', + key='gt', + color_type='color', + channel_order='rgb', + imdecode_backend='cv2'), + dict(type='PackEditInputs') +] + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DIV2K800sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DIV2K', + data_prefix=dict( + img='DIV2K_train_LR_bicubic/X4_sub', gt='DIV2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) + +val_dataloader = dict( + num_workers=4, + persistent_workers=False, + drop_last=False, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + metainfo=dict(dataset_type='set5', task_name='sisr'), + data_root=data_root + '/Set5', + data_prefix=dict(img='LRbicx4', gt='GTmod12'), + pipeline=val_pipeline)) + +val_evaluator = [ + dict(type='PSNR', crop_border=scale), + dict(type='SSIM', crop_border=scale), +] + +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=500_000, val_interval=5000) +val_cfg = dict(type='ValLoop') + +# optimizer +optim_wrapper = dict( + constructor='DefaultOptimWrapperConstructor', + type='OptimWrapper', + optimizer=dict(type='Adam', lr=2e-4, betas=(0.9, 0.999))) + +# learning policy +param_scheduler = dict( + type='MultiStepLR', + by_epoch=False, + milestones=[250000, 400000, 450000, 475000], + gamma=0.5) diff --git a/configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py b/configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py new file mode 100644 index 0000000000..09a08ed95a --- /dev/null +++ b/configs/swinir/swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k.py @@ -0,0 +1,22 @@ +_base_ = ['swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x4s64w8d4e60_8xb4-lr2e-4-500k_div2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 4 +img_size = 64 + +# model settings +model = dict( + generator=dict( + img_size=img_size, + depths=[6, 6, 6, 6], + embed_dim=60, + num_heads=[6, 6, 6, 6], + upsampler='pixelshuffledirect')) + +# modify patch size of train_dataloader +train_dataloader = _base_.train_dataloader +train_pipeline = train_dataloader['dataset']['pipeline'] +train_pipeline[3]['gt_patch_size'] = img_size * scale diff --git a/configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py b/configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py new file mode 100644 index 0000000000..87cdf5e609 --- /dev/null +++ b/configs/swinir/swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k.py @@ -0,0 +1,35 @@ +_base_ = ['swinir_x4s48w8d6e180_8xb4-lr2e-4-500k_div2k.py'] + +experiment_name = 'swinir_x4s64w8d6e180_8xb4-lr2e-4-500k_df2k' +work_dir = f'./work_dirs/{experiment_name}' +save_dir = './work_dirs/' + +scale = 4 +img_size = 64 + +# model settings +model = dict(generator=dict(img_size=img_size)) + +# modify patch size of train_pipeline +train_pipeline = _base_.train_pipeline +train_pipeline[3]['gt_patch_size'] = img_size * scale + +# dataset settings +dataset_type = 'BasicImageDataset' +data_root = 'data' + +train_dataloader = dict( + num_workers=4, + batch_size=4, + drop_last=True, + persistent_workers=False, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + ann_file='meta_info_DF2K3450sub_GT.txt', + metainfo=dict(dataset_type='div2k', task_name='sisr'), + data_root=data_root + '/DF2K', + data_prefix=dict( + img='DF2K_train_LR_bicubic/X4_sub', gt='DF2K_train_HR_sub'), + filename_tmpl=dict(img='{}', gt='{}'), + pipeline=train_pipeline)) diff --git a/configs/tdan/README.md b/configs/tdan/README.md index 58d56e83f4..21a846bfa3 100644 --- a/configs/tdan/README.md +++ b/configs/tdan/README.md @@ -25,17 +25,17 @@ The metrics are `PSNR / SSIM` . | Method | Vid4 (BIx4) PSNR (Y) | SPMCS-30 (BIx4) PSNR (Y) | Vid4 (BDx4) PSNR (Y) | SPMCS-30 (BDx4) PSNR (Y) | GPU Info | Download | | :-----------------------------------------: | :------------------: | :----------------------: | :------------------: | :----------------------: | :----------------------: | :-------------------------------------------: | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi.py) | **26.49** | **30.42** | 25.93 | 29.69 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | -| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd.py) | 25.80 | 29.56 | **26.87** | **30.77** | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](./tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py) | **26.49** | **30.42** | 25.93 | 29.69 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | +| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](./tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py) | 25.80 | 29.56 | **26.87** | **30.77** | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | | Method | Vid4 (BIx4) SSIM (Y) | SPMCS-30 (BIx4) SSIM (Y) | Vid4 (BDx4) SSIM (Y) | SPMCS-30 (BDx4) SSIM (Y) | GPU Info | Download | | :-----------------------------------------: | :------------------: | :----------------------: | :------------------: | :----------------------: | :----------------------: | :-------------------------------------------: | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi.py) | **0.792** | **0.856** | 0.772 | 0.842 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | -| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd.py) | 0.784 | 0.851 | **0.815** | **0.868** | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](./tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py) | **0.792** | **0.856** | 0.772 | 0.842 | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | +| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](./tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py) | 0.784 | 0.851 | **0.815** | **0.868** | 8 (Tesla V100-SXM2-32GB) | [model](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | ## Quick Start diff --git a/configs/tdan/README_zh-CN.md b/configs/tdan/README_zh-CN.md index bbee9ff5ec..e960d82e15 100644 --- a/configs/tdan/README_zh-CN.md +++ b/configs/tdan/README_zh-CN.md @@ -25,10 +25,10 @@ | 算法 | Vid4 (BIx4) | SPMCS-30 (BIx4) | Vid4 (BDx4) | SPMCS-30 (BDx4) | GPU 信息 | 下载 | | :--------------------------------------------------------: | :-------------: | :-------------: | :-------------: | :-------------: | :----------------------: | :--------------------------------------------------------: | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](/configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | -| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi.py) | **26.49/0.792** | **30.42/0.856** | 25.93/0.772 | 29.69/0.842 | 8 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | -| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](/configs/tdan/tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd.py) | 25.80/0.784 | 29.56/0.851 | **26.87/0.815** | **30.77/0.868** | 8 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd](./tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py) | - | - | - | - | 8 (Tesla V100-SXM2-32GB) | - | +| [tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi](./tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py) | **26.49/0.792** | **30.42/0.856** | 25.93/0.772 | 29.69/0.842 | 8 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528_135616.log.json) | +| [tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd](./tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py) | 25.80/0.784 | 29.56/0.851 | **26.87/0.815** | **30.77/0.868** | 8 (Tesla V100-SXM2-32GB) | [模型](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528_122401.log.json) | ## 快速开始 diff --git a/configs/tdan/metafile.yml b/configs/tdan/metafile.yml index b47fd6ccd8..5bb95de354 100644 --- a/configs/tdan/metafile.yml +++ b/configs/tdan/metafile.yml @@ -6,35 +6,38 @@ Collections: Paper: - https://arxiv.org/abs/1812.02898 README: configs/tdan/README.md + Task: + - video super-resolution + Year: 2020 Models: -- Config: configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py +- Config: configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi + Name: tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi Results: - Dataset: VIMEO90K Metrics: {} Task: Video Super-Resolution Weights: '' -- Config: configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py +- Config: configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd + Name: tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd Results: - Dataset: VIMEO90K Metrics: {} Task: Video Super-Resolution Weights: '' -- Config: configs/tdan/tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi.py +- Config: configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi + Name: tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi Results: - Dataset: VIMEO90K Metrics: @@ -44,12 +47,12 @@ Models: Vid4 (BIx4) PSNR (Y): 26.49 Task: Video Super-Resolution Weights: https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth -- Config: configs/tdan/tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd.py +- Config: configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd + Name: tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd Results: - Dataset: VIMEO90K Metrics: @@ -59,34 +62,34 @@ Models: Vid4 (BIx4) PSNR (Y): 25.8 Task: Video Super-Resolution Weights: https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bdx4_20210528-c53ab844.pth -- Config: configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py +- Config: configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi + Name: tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi Results: - Dataset: VIMEO90K Metrics: {} Task: Video Super-Resolution Weights: '' -- Config: configs/tdan/tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py +- Config: configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd + Name: tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd Results: - Dataset: VIMEO90K Metrics: {} Task: Video Super-Resolution Weights: '' -- Config: configs/tdan/tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi.py +- Config: configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi + Name: tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi Results: - Dataset: VIMEO90K Metrics: @@ -96,12 +99,12 @@ Models: Vid4 (BIx4) SSIM (Y): 0.792 Task: Video Super-Resolution Weights: https://download.openmmlab.com/mmediting/restorers/tdan/tdan_vimeo90k_bix4_20210528-739979d9.pth -- Config: configs/tdan/tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd.py +- Config: configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py In Collection: TDAN Metadata: GPUs: 8 (Tesla V100-SXM2-32GB) Training Data: VIMEO90K - Name: tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd + Name: tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd Results: - Dataset: VIMEO90K Metrics: diff --git a/configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py b/configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py index e6af5d8922..8e7dbc0e5a 100644 --- a/configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py +++ b/configs/tdan/tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py @@ -1,4 +1,4 @@ -_base_ = './tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py' +_base_ = './tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py' experiment_name = 'tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi' work_dir = f'./work_dirs/{experiment_name}' diff --git a/configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py b/configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py index 4da1fd0974..4a510b8e59 100644 --- a/configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py +++ b/configs/tdan/tdan_x4ft_8xb16-lr5e-5-400k_vimeo90k-bi.py @@ -1,4 +1,4 @@ -_base_ = './tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bi.py' +_base_ = './tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bi.py' experiment_name = 'tdan_x4ft_1xb16-lr5e-5-400k_vimeo90k-bi' work_dir = f'./work_dirs/{experiment_name}' diff --git a/configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py b/configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py index 6f80adf061..ab96135d87 100644 --- a/configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py +++ b/configs/tdan/tdan_x4ft_8xb16-lr5e-5-800k_vimeo90k-bd.py @@ -1,4 +1,4 @@ -_base_ = './tdan_x4_1xb16-lr1e-4-400k_vimeo90k-bd.py' +_base_ = './tdan_x4_8xb16-lr1e-4-400k_vimeo90k-bd.py' experiment_name = 'tdan_x4ft_1xb16-lr5e-5-800k_vimeo90k-bd' work_dir = f'./work_dirs/{experiment_name}' diff --git a/configs/tof/README.md b/configs/tof/README.md index f623808251..aecf07bae2 100644 --- a/configs/tof/README.md +++ b/configs/tof/README.md @@ -25,28 +25,28 @@ The metrics are `PSNR / SSIM` . | Method | Pretrained SPyNet | PSNR | GPU Info | Download | | :---------------------------------------------------: | :---------------------------------------------------------------: | :-----: | :-----------------: | :------------------------------------------------------: | -| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 33.3294 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | -| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 33.3339 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | -| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 33.3170 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | -| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 33.3237 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | -| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 33.3426 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | +| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](./tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 33.3294 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | +| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](./tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 33.3339 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | +| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 33.3170 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | +| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 33.3237 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | +| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](./tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 33.3426 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | | Method | Pretrained SPyNet | SSIM | GPU Info | Download | | :----------------------------------------------------: | :---------------------------------------------------------------: | :----: | :-----------------: | :------------------------------------------------------: | -| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 0.9465 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | -| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 0.9466 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | -| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 0.9464 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | -| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 0.9465 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | -| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 0.9467 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | +| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](./tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 0.9465 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | +| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](./tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 0.9466 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | +| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 0.9464 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | +| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 0.9465 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | +| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](./tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 0.9467 | 1 (Tesla PG503-216) | [model](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [log](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | Note: These pretrained SPyNets don't contain BN layer since `batch_size=1`, which is consistent with `https://github.com/Coldog2333/pytoflow`. Evaluated on RGB channels. The metrics are `PSNR / SSIM` . -| Method | Vid4 | GPU Info | Download | -| :------------------------------------------------------------------: | :--------------: | :------: | :---------------------------------------------------------------------------------------------------: | -| [tof_x4_vimeo90k_official](/configs/tof/tof_x4_official_vimeo90k.py) | 24.4377 / 0.7433 | - | [model](https://download.openmmlab.com/mmediting/restorers/tof/tof_x4_vimeo90k_official-a569ff50.pth) | +| Method | Vid4 | GPU Info | Download | +| :-------------------------------------------------------: | :--------------: | :------: | :---------------------------------------------------------------------------------------------------: | +| [tof_x4_vimeo90k_official](./tof_x4_official_vimeo90k.py) | 24.4377 / 0.7433 | - | [model](https://download.openmmlab.com/mmediting/restorers/tof/tof_x4_vimeo90k_official-a569ff50.pth) | ## Quick Start diff --git a/configs/tof/README_zh-CN.md b/configs/tof/README_zh-CN.md index 6901e9c8b3..86d04b0793 100644 --- a/configs/tof/README_zh-CN.md +++ b/configs/tof/README_zh-CN.md @@ -13,11 +13,11 @@ | 算法 | 预训练 SPyNet | Vimeo90k-triplet | GPU 信息 | 下载 | | :--------------------------------------------------: | :----------------------------------------------------------: | :--------------: | :-----------------: | :---------------------------------------------------: | -| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 33.3294 / 0.9465 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | -| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 33.3339 / 0.9466 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | -| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 33.3170 / 0.9464 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | -| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 33.3237 / 0.9465 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | -| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](/configs/tof/tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 33.3426 / 0.9467 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | +| [tof_vfi_spynet_chair_nobn_1xb1_vimeo90k](./tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_chair_20220321-4d82e91b.pth) | 33.3294 / 0.9465 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_chair_nobn_1xb1_vimeo90k_20220321-2fc9e258.log.json) | +| [tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k](./tof_spynet-kitti-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_kitti_20220321-dbcc1cc1.pth) | 33.3339 / 0.9466 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_kitti_nobn_1xb1_vimeo90k_20220321-3f7ca4cd.log.json) | +| [tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-clean_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_clean_20220321-0756630b.pth) | 33.3170 / 0.9464 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_clean_nobn_1xb1_vimeo90k_20220321-6e52a6fd.log.json) | +| [tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k](./tof_spynet-sintel-wobn-final_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_sintel_final_20220321-5e89dcec.pth) | 33.3237 / 0.9465 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_sintel_final_nobn_1xb1_vimeo90k_20220321-8ab70dbb.log.json) | +| [tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k](./tof_spynet-pytoflow-wobn_1xb1_vimeo90k-triplet.py) | [spynet_chairs_final](https://download.openmmlab.com/mmediting/video_interpolators/toflow/pretrained_spynet_pytoflow_20220321-5bab842d.pth) | 33.3426 / 0.9467 | 1 (Tesla PG503-216) | [模型](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.pth) \| [日志](https://download.openmmlab.com/mmediting/video_interpolators/toflow/tof_vfi_spynet_pytoflow_nobn_1xb1_vimeo90k_20220321-5f4b243e.log.json) | 注: 由于 `batch_size=1` 预训练的 SPyNet 不包含 BN 层,这与 `https://github.com/Coldog2333/pytoflow` 一致. diff --git a/configs/tof/metafile.yml b/configs/tof/metafile.yml index 4f4bfff379..517e60ae56 100644 --- a/configs/tof/metafile.yml +++ b/configs/tof/metafile.yml @@ -6,6 +6,10 @@ Collections: Paper: - https://arxiv.org/abs/1711.09078 README: configs/tof/README.md + Task: + - video interpolation + - video super-resolution + Year: 2019 Models: - Config: configs/tof/tof_spynet-chair-wobn_1xb1_vimeo90k-triplet.py In Collection: TOFlow diff --git a/configs/ttsr/README.md b/configs/ttsr/README.md index bf9c63bd8f..bba437c4ef 100644 --- a/configs/ttsr/README.md +++ b/configs/ttsr/README.md @@ -23,10 +23,10 @@ We study on image super-resolution (SR), which aims to recover realistic texture Evaluated on CUFED dataset (RGB channels), `scale` pixels in each border are cropped before evaluation. The metrics are `PSNR and SSIM` . -| Method | scale | PSNR | SSIM | GPU Info | Download | -| :----------------------------------------------------------------------------------: | :---: | :-----: | :----: | :----------: | :------------------------------------------------------------------------------------: | -| [ttsr-rec_x4_c64b16_g1_200k_CUFED](/configs/ttsr/ttsr-rec_x4c64b16_1xb9-200k_CUFED.py) | x4 | 25.2433 | 0.7491 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.log.json) | -| [ttsr-gan_x4_c64b16_g1_500k_CUFED](/configs/ttsr/ttsr-gan_x4c64b16_1xb9-500k_CUFED.py) | x4 | 24.6075 | 0.7234 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.log.json) | +| Method | scale | PSNR | SSIM | GPU Info | Download | +| :------------------------------------------------------------------------: | :---: | :-----: | :----: | :----------: | :----------------------------------------------------------------------------------------------: | +| [ttsr-rec_x4_c64b16_g1_200k_CUFED](./ttsr-rec_x4c64b16_1xb9-200k_CUFED.py) | x4 | 25.2433 | 0.7491 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.log.json) | +| [ttsr-gan_x4_c64b16_g1_500k_CUFED](./ttsr-gan_x4c64b16_1xb9-500k_CUFED.py) | x4 | 24.6075 | 0.7234 | 1 (TITAN Xp) | [model](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth) \| [log](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.log.json) | ## Quick Start diff --git a/configs/ttsr/README_zh-CN.md b/configs/ttsr/README_zh-CN.md index 18007679c0..3f8de2dc35 100644 --- a/configs/ttsr/README_zh-CN.md +++ b/configs/ttsr/README_zh-CN.md @@ -24,10 +24,10 @@ 在 RGB 通道上进行评估,在评估之前裁剪每个边界中的 `scale` 像素。 我们使用 `PSNR` 和 `SSIM` 作为指标。 -| 算法 | scale | CUFED | GPU 信息 | 下载 | -| :---------------------------------------------------------------------------------: | :---: | :--------------: | :----------: | :----------------------------------------------------------------------------------: | -| [ttsr-rec_x4_c64b16_g1_200k_CUFED](/configs/ttsr/ttsr-rec_x4c64b16_1xb9-200k_CUFED.py) | x4 | 25.2433 / 0.7491 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.log.json) | -| [ttsr-gan_x4_c64b16_g1_500k_CUFED](/configs/ttsr/ttsr-gan_x4c64b16_1xb9-500k_CUFED.py) | x4 | 24.6075 / 0.7234 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.log.json) | +| 算法 | scale | CUFED | GPU 信息 | 下载 | +| :------------------------------------------------------------------------: | :---: | :--------------: | :----------: | :-------------------------------------------------------------------------------------------: | +| [ttsr-rec_x4_c64b16_g1_200k_CUFED](./ttsr-rec_x4c64b16_1xb9-200k_CUFED.py) | x4 | 25.2433 / 0.7491 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-rec_x4_c64b16_g1_200k_CUFED_20210525-b0dba584.log.json) | +| [ttsr-gan_x4_c64b16_g1_500k_CUFED](./ttsr-gan_x4c64b16_1xb9-500k_CUFED.py) | x4 | 24.6075 / 0.7234 | 1 (TITAN Xp) | [模型](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth) \| [日志](https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.log.json) | ## 快速开始 diff --git a/configs/ttsr/metafile.yml b/configs/ttsr/metafile.yml index 559b4fe754..b48de2a895 100644 --- a/configs/ttsr/metafile.yml +++ b/configs/ttsr/metafile.yml @@ -6,6 +6,9 @@ Collections: Paper: - https://arxiv.org/abs/2006.04139 README: configs/ttsr/README.md + Task: + - image super-resolution + Year: 2020 Models: - Config: configs/ttsr/ttsr-rec_x4c64b16_1xb9-200k_CUFED.py In Collection: TTSR diff --git a/configs/wgan-gp/README.md b/configs/wgan-gp/README.md index b88d74875c..9d9ae9c436 100644 --- a/configs/wgan-gp/README.md +++ b/configs/wgan-gp/README.md @@ -28,8 +28,8 @@ Generative Adversarial Networks (GANs) are powerful generative models, but suffe | Models | Dataset | Details | SWD | MS-SSIM | Config | Download | | :---------: | :------------: | :----------------: | :---------------------------: | :-----: | :---------------------------------------------------------: | :------------------------------------------------------------: | -| WGAN-GP 128 | CelebA-Cropped | GN | 5.87, 9.76, 9.43, 18.84/10.97 | 0.2601 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/wgan-gp/wgangp_GN_1xb64-160kiters_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmgen/wgangp/wgangp_GN_celeba-cropped_128_b64x1_160k_20210408_170611-f8a99336.pth) | -| WGAN-GP 128 | LSUN-Bedroom | GN, GP-lambda = 50 | 11.7, 7.87, 9.82, 25.36/13.69 | 0.059 | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/wgan-gp/wgangp_GN-GP-50_1xb64-160kiters_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmgen/wgangp/wgangp_GN_GP-50_lsun-bedroom_128_b64x1_130k_20210408_170509-56f2a37c.pth) | +| WGAN-GP 128 | CelebA-Cropped | GN | 5.87, 9.76, 9.43, 18.84/10.97 | 0.2601 | [config](./wgangp_GN_1xb64-160kiters_celeba-cropped-128x128.py) | [model](https://download.openmmlab.com/mmediting/wgangp/wgangp_GN_celeba-cropped_128_b64x1_160k_20210408_170611-f8a99336.pth) | +| WGAN-GP 128 | LSUN-Bedroom | GN, GP-lambda = 50 | 11.7, 7.87, 9.82, 25.36/13.69 | 0.059 | [config](./wgangp_GN-GP-50_1xb64-160kiters_lsun-bedroom-128x128.py) | [model](https://download.openmmlab.com/mmediting/wgangp/wgangp_GN_GP-50_lsun-bedroom_128_b64x1_130k_20210408_170509-56f2a37c.pth) | ## Citation diff --git a/configs/wgan-gp/metafile.yml b/configs/wgan-gp/metafile.yml index 0b088dc713..c2cfc69cbb 100644 --- a/configs/wgan-gp/metafile.yml +++ b/configs/wgan-gp/metafile.yml @@ -6,8 +6,11 @@ Collections: Paper: - https://arxiv.org/abs/1704.00028 README: configs/wgan-gp/README.md + Task: + - unconditional gans + Year: 2017 Models: -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/wgan-gp/wgangp_GN_1xb64-160kiters_celeba-cropped-128x128.py +- Config: configs/wgan-gp/wgangp_GN_1xb64-160kiters_celeba-cropped-128x128.py In Collection: WGAN-GP Metadata: Training Data: CELEBA @@ -17,8 +20,8 @@ Models: Metrics: MS-SSIM: 0.2601 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/wgangp/wgangp_GN_celeba-cropped_128_b64x1_160k_20210408_170611-f8a99336.pth -- Config: https://github.com/open-mmlab/mmediting/tree/master/configs/wgan-gp/wgangp_GN-GP-50_1xb64-160kiters_lsun-bedroom-128x128.py + Weights: https://download.openmmlab.com/mmediting/wgangp/wgangp_GN_celeba-cropped_128_b64x1_160k_20210408_170611-f8a99336.pth +- Config: configs/wgan-gp/wgangp_GN-GP-50_1xb64-160kiters_lsun-bedroom-128x128.py In Collection: WGAN-GP Metadata: Training Data: Others @@ -28,4 +31,4 @@ Models: Metrics: MS-SSIM: 0.059 Task: Unconditional GANs - Weights: https://download.openmmlab.com/mmgen/wgangp/wgangp_GN_GP-50_lsun-bedroom_128_b64x1_130k_20210408_170509-56f2a37c.pth + Weights: https://download.openmmlab.com/mmediting/wgangp/wgangp_GN_GP-50_lsun-bedroom_128_b64x1_130k_20210408_170509-56f2a37c.pth diff --git a/demo/README.md b/demo/README.md index 469e232e7e..6ce948fd45 100644 --- a/demo/README.md +++ b/demo/README.md @@ -71,10 +71,10 @@ print all supported tasks for inference. python mmediting_inference_demo.py --print-supported-tasks ``` -print all supported models for one task, take 'Image2Image Translation' for example. +print all supported models for one task, take 'Image2Image' for example. ```shell -python mmediting_inference_demo.py --print-task-supported-models 'Image2Image Translation' +python mmediting_inference_demo.py --print-task-supported-models 'Image2Image' ``` ### 2.2 Perform inference with command line diff --git a/demo/mmediting_inference_tutorial.ipynb b/demo/mmediting_inference_tutorial.ipynb index 738b6ff9af..96c2fdfee4 100644 --- a/demo/mmediting_inference_tutorial.ipynb +++ b/demo/mmediting_inference_tutorial.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -34,7 +35,7 @@ "\n", "  [4.4 Inference of super resolution models](#44-inference-of-image-super-resolution-models)\n", "\n", - "  [4.5 Inference of image2image translation models](#45-inference-of-image-translation-models)\n", + "  [4.5 Inference of image2image models](#45-inference-of-image-translation-models)\n", "\n", "  [4.6 Inference of unconditional GANs models](#46-inference-of-unconditional-gan-models)\n", "\n", @@ -261,12 +262,13 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Check inference supported tasks and models\n", "\n", - "There are multiple task types in MMEditing: Matting, Inpainting, Video Super-Resolution, Image Super-Resolution, Image2Image Translation, Unconditional GANs, Conditional GANs, Video Interpolation. \n", + "There are multiple task types in MMEditing: Matting, Inpainting, Video Super-Resolution, Image Super-Resolution, Image2Image, Unconditional GANs, Conditional GANs, Video Interpolation. \n", "\n", "We provide some models for each task. All available models and tasks could be printed out like this." ] @@ -303,7 +305,7 @@ "print(supported_tasks)\n", "\n", "# print all supported models for one task, take image translation for example.\n", - "task_supported_models = MMEdit.get_task_supported_models('Image2Image Translation')\n", + "task_supported_models = MMEdit.get_task_supported_models('Image2Image')\n", "print('translation models:')\n", "print(task_supported_models)" ] @@ -478,7 +480,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth\n" + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth\n" ] } ], @@ -545,7 +547,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" ] }, { @@ -595,7 +597,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" ] }, { @@ -641,7 +643,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n", + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n", "['num_batches', 'sample_model']\n" ] } @@ -689,7 +691,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/biggan/biggan_imagenet1k_128x128_b32x8_best_fid_iter_1232000_20211111_122548-5315b13d.pth\n" ] }, { @@ -966,7 +968,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth\n" + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_1x1_80k_facades_20210902_170442-c0958d50.pth\n" ] }, { @@ -1021,7 +1023,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http loads checkpoint from path: https://download.openmmlab.com/mmgen/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth\n", + "http loads checkpoint from path: https://download.openmmlab.com/mmediting/styleganv1/styleganv1_ffhq_256_g8_25Mimg_20210407_161748-0094da86.pth\n", "Switch to evaluation style mode: single\n", "Switch to evaluation style mode: single\n" ] diff --git a/docs/en/.dev_scripts/update_dataset_zoo.py b/docs/en/.dev_scripts/update_dataset_zoo.py new file mode 100644 index 0000000000..07060dab11 --- /dev/null +++ b/docs/en/.dev_scripts/update_dataset_zoo.py @@ -0,0 +1,51 @@ +import os + +from tqdm import tqdm + + +def update_dataset_zoo(): + + target_dir = 'dataset_zoo' + source_dir = '../../tools/dataset_converters' + os.makedirs(target_dir, exist_ok=True) + + # generate overview + overviewmsg = """ +# Overview + +""" + + # generate index.rst + rstmsg = """ +.. toctree:: + :maxdepth: 1 + :caption: Dataset Zoo + + overview.md +""" + + subfolders = os.listdir(source_dir) + for subf in tqdm(subfolders, desc='update dataset zoo'): + + target_subf = subf.replace('-', '_').lower() + target_readme = os.path.join(target_dir, target_subf + '.md') + source_readme = os.path.join(source_dir, subf, 'README.md') + if not os.path.exists(source_readme): + continue + + overviewmsg += f'\n- [{subf}]({target_subf}.md)' + rstmsg += f'\n {target_subf}.md' + + # generate all tasks dataset_zoo + command = f'cat {source_readme} > {target_readme}' + os.popen(command) + + with open(os.path.join(target_dir, 'overview.md'), 'w') as f: + f.write(overviewmsg) + + with open(os.path.join(target_dir, 'index.rst'), 'w') as f: + f.write(rstmsg) + + +if __name__ == '__main__': + update_dataset_zoo() diff --git a/docs/en/.dev_scripts/update_dataset_zoo.sh b/docs/en/.dev_scripts/update_dataset_zoo.sh deleted file mode 100644 index 6ac06a6126..0000000000 --- a/docs/en/.dev_scripts/update_dataset_zoo.sh +++ /dev/null @@ -1,39 +0,0 @@ -# generate all tasks dataset_zoo -cat ../../tools/dataset_converters/super-resolution/README.md > dataset_zoo/1_super_resolution_datasets.md -cat ../../tools/dataset_converters/inpainting/README.md > dataset_zoo/2_inpainting_datasets.md -cat ../../tools/dataset_converters/matting/README.md > dataset_zoo/3_matting_datasets.md -cat ../../tools/dataset_converters/video-interpolation/README.md > dataset_zoo/4_video_interpolation_datasets.md -cat ../../tools/dataset_converters/unconditional_gans/README.md > dataset_zoo/5_unconditional_gans_datasets.md -cat ../../tools/dataset_converters/image_translation/README.md > dataset_zoo/6_image_translation_datasets.md - -# generate markdown TOC -sed -i -e 's/](comp1k\(\/README.md)\)/](composition-1k\1/g' dataset_zoo/3_matting_datasets.md - -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/1_super_resolution_datasets.md -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/2_inpainting_datasets.md -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/3_matting_datasets.md -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/4_video_interpolation_datasets.md -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/5_unconditional_gans_datasets.md -sed -i -e 's/](\(.*\)\/README.md)/](#\1-dataset)/g' dataset_zoo/6_image_translation_datasets.md - -# gather all datasets -cat ../../tools/dataset_converters/super-resolution/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/1_super_resolution_datasets.md -cat ../../tools/dataset_converters/inpainting/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/2_inpainting_datasets.md -cat ../../tools/dataset_converters/matting/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/3_matting_datasets.md -cat ../../tools/dataset_converters/video-interpolation/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/4_video_interpolation_datasets.md -cat ../../tools/dataset_converters/unconditional_gans/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/5_unconditional_gans_datasets.md -cat ../../tools/dataset_converters/image_translation/*/README.md | sed 's/# Preparing /\n# /g' | sed "s/#/#&/" >> dataset_zoo/6_image_translation_datasets.md - -echo '# Overview' > dataset_zoo/0_overview.md -echo "- [Prepare Super-Resolution Datasets](./1_super_resolution_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/1_super_resolution_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed 's/- \[/ - \[/g' | sed 's/(#/(.\/1_super_resolution_datasets.md#/g' >> dataset_zoo/0_overview.md -echo "- [Prepare Inpainting Datasets](./2_inpainting_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/2_inpainting_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed 's/- \[/ - \[/g' | sed 's/(#/(.\/2_inpainting_datasets.md#/g' >> dataset_zoo/0_overview.md -echo "- [Prepare Matting Datasets](./3_matting_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/3_matting_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed 's/- \[/ - \[/g' | sed 's/(#/(.\/3_matting_datasets.md#/g' >> dataset_zoo/0_overview.md -echo "- [Prepare Video Frame Interpolation Datasets](./4_video_interpolation_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/4_video_interpolation_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed 's/- \[/ - \[/g' | sed 's/(#/(.\/4_video_interpolation_datasets.md#/g' >> dataset_zoo/0_overview.md -echo "- [Prepare Unconditional GANs Datasets](./5_unconditional_gans_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/5_unconditional_gans_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed 's/- \[/ - \[/g' | sed 's/(#/(.\/5_unconditional_gans_datasets.md#/g' >> dataset_zoo/0_overview.md -echo "- [Prepare Image Translation Datasets](./6_image_translation_datasets.md)" >> dataset_zoo/0_overview.md -cat dataset_zoo/6_image_translation_datasets.md | grep -oP '(- \[.*-dataset.*)' | sed '$a\n' |sed 's/- \[/ - \[/g' | sed 's/(#/(.\/6_image_translation_datasets.md#/g' >> dataset_zoo/0_overview.md diff --git a/docs/en/.dev_scripts/update_model_zoo.py b/docs/en/.dev_scripts/update_model_zoo.py index e24c664a3b..ce8f16651e 100755 --- a/docs/en/.dev_scripts/update_model_zoo.py +++ b/docs/en/.dev_scripts/update_model_zoo.py @@ -1,134 +1,116 @@ -#!/usr/bin/env python -# Copyright (c) OpenMMLab. All rights reserved. +import os +from glob import glob +from os import path as osp +from pathlib import Path -import functools as func -import glob -import os.path as osp -import re -from os.path import basename, dirname - -import numpy as np -import titlecase +from modelindex.load_model_index import load from tqdm import tqdm -github_link = 'https://github.com/open-mmlab/mmediting/blob/1.x/' +MMEDIT_ROOT = Path(__file__).absolute().parents[3] +TARGET_ROOT = Path(__file__).absolute().parents[1] / 'model_zoo' -def anchor(name): - return re.sub(r'-+', '-', - re.sub(r'[^a-zA-Z0-9\+]', '-', - name.strip().lower())).strip('-') +def write_file(file, content): + os.makedirs(osp.dirname(file), exist_ok=True) + with open(file, 'w') as f: + f.write(content) -# Count algorithms def update_model_zoo(): - - root_dir = dirname(dirname(dirname(dirname(osp.abspath(__file__))))) - files = sorted(glob.glob(osp.join(root_dir, 'configs/*/README.md'))) - stats = [] - - for f in tqdm(files, desc='update model zoo'): - with open(f, 'r') as content_file: - content = content_file.read() - - # title - title = content.split('\n')[0].replace('#', '') - - # count papers - papers = set( - (papertype, - titlecase.titlecase(paper.lower().strip()).replace('+', r'\+')) - for (papertype, paper) in re.findall( - r'\s*\n.*?\btitle\s*=\s*{(.*?)}', - content, re.DOTALL)) - - # paper links - revcontent = '\n'.join(list(reversed(content.splitlines()))) - paperlinks = {} - for _, p in papers: - paper_link = osp.join(github_link, 'configs', basename(dirname(f)), - 'README.md') - # print(p, paper_link) - paperlinks[p] = ' '.join( - (f'[⇨]({paper_link}#{anchor(paperlink)})' - for paperlink in re.findall( - rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n', - revcontent, re.DOTALL | re.IGNORECASE))) - # print(' ', paperlinks[p]) - paperlist = '\n'.join( - sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers)) - - # count configs - configs = set(x.lower().strip() - for x in re.findall(r'/configs/.*?\.py', content)) - - # count ckpts - ckpts = list( - x.lower().strip() - for x in re.findall(r'\[model\]\(https\:\/\/.*\.pth', content)) - ckpts.extend( - x.lower().strip() - for x in re.findall(r'\[ckpt\]\(https\:\/\/.*\.pth', content)) - ckpts = set(ckpts) - - # count tasks - task_desc = list( - set(x.lower().strip() - for x in re.findall(r'\*\*Task\*\*: .*', content))) - tasks = set() - if len(task_desc) > 0: - tasks = set(task_desc[0].split('**task**: ')[1].split(', ')) - - statsmsg = f"""## {title}""" - if len(tasks) > 0: - statsmsg += f"\n* Tasks: {','.join(list(tasks))}" - statsmsg += f""" -* Number of checkpoints: {len(ckpts)} -* Number of configs: {len(configs)} -* Number of papers: {len(papers)} -{paperlist} - -""" - - # * We should have: {len(glob.glob(osp.join(dirname(f), '*.py')))} - stats.append((papers, configs, ckpts, tasks, statsmsg)) - - allpapers = func.reduce(lambda a, b: a.union(b), - [p for p, _, _, _, _ in stats]) - allconfigs = func.reduce(lambda a, b: a.union(b), - [c for _, c, _, _, _ in stats]) - allckpts = func.reduce(lambda a, b: a.union(b), - [c for _, _, c, _, _ in stats]) - alltasks = func.reduce(lambda a, b: a.union(b), - [t for _, _, _, t, _ in stats]) - task_desc = '\n - '.join(list(alltasks)) - - # Summarize - - msglist = '\n'.join(x for _, _, _, _, x in stats) - papertypes, papercounts = np.unique([t for t, _ in allpapers], - return_counts=True) - countstr = '\n'.join( - [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) - countstr = '\n'.join([f' - ALGORITHM: {len(stats)}']) - - modelzoo = f"""# Overview - -* Number of checkpoints: {len(allckpts)} -* Number of configs: {len(allconfigs)} -* Number of papers: {len(allpapers)} -{countstr} -* Tasks: - - {task_desc} - -For supported datasets, see [datasets overview](dataset_zoo/0_overview.md). - -{msglist} - - """ - - with open('3_model_zoo.md', 'w') as f: - f.write(modelzoo) + """load collections and models from model index, return summary, + collections and models.""" + model_index_file = MMEDIT_ROOT / 'model-index.yml' + model_index = load(str(model_index_file)) + model_index.build_models_with_collections() + + # parse model_index according to task + tasks = {} + full_models = set() + for model in model_index.models: + full_models.add(model.full_model) + for r in model.results: + _task = r.task.lower().split(', ') + for t in _task: + if t not in tasks: + tasks[t] = set() + tasks[t].add(model.full_model) + + # assert the number of configs with the number of files + collections = set([m.in_collection for m in full_models]) + assert len(collections) == len(os.listdir(MMEDIT_ROOT / 'configs')) - 1 + + configs = set([str(MMEDIT_ROOT / m.config) for m in full_models]) + base_configs = glob( + str(MMEDIT_ROOT / 'configs/_base_/**/*.py'), recursive=True) + all_configs = glob(str(MMEDIT_ROOT / 'configs/**/*.py'), recursive=True) + valid_configs = set(all_configs) - set(base_configs) + untrackable_configs = valid_configs - configs + assert len(untrackable_configs) == 0, '/n'.join( + list(untrackable_configs)) + ' are not trackable.' + + # write for overview.md + papers = set() + for m in full_models: + papers.update(m.paper) + checkpoints = set( + [m.weights for m in full_models if m.weights.startswith('https:')]) + task_desc = '\n'.join([ + f" - [{t}]({t.replace('-', '_').replace(' ', '_')}.md)" + for t in list(tasks.keys()) + ]) + + # write overview.md + overview = (f'# Overview\n\n' + f'* Number of checkpoints: {len(checkpoints)}\n' + f'* Number of configs: {len(configs)}\n' + f'* Number of papers: {len(papers)}\n' + f' - ALGORITHM: {len(collections)}\n\n' + f'* Tasks:\n{task_desc}') + write_file(TARGET_ROOT / 'overview.md', overview) + + # write for index.rst + task_desc = '\n'.join([ + f" {t.replace('-', '_').replace(' ', '_')}.md" + for t in list(tasks.keys()) + ]) + overview = (f'.. toctree::\n' + f' :maxdepth: 1\n' + f' :caption: Model Zoo\n\n' + f' overview.md\n' + f'{task_desc}') + write_file(TARGET_ROOT / 'index.rst', overview) + + # write for all the tasks + for task, models in tqdm(tasks.items(), desc='create markdown files'): + target_md = f"{task.replace('-', '_').replace(' ', '_')}.md" + target_md = TARGET_ROOT / target_md + models = sorted(models, key=lambda x: -x.data['Year']) + + checkpoints = set( + [m.weights for m in models if m.weights.startswith('https:')]) + collections = set([m.in_collection for m in models]) + + papers = set() + for m in models: + papers.update(m.paper) + + content = '' + readme = set() + for m in models: + if m.readme not in readme: + readme.add(m.readme) + with open(MMEDIT_ROOT / m.readme, 'r') as f: + c = f.read() + content += c.replace('# ', '## ') + overview = (f'# {task}\n\n' + f'## Summary\n' + f'* Number of checkpoints: {len(checkpoints)}\n' + f'* Number of configs: {len(models)}\n' + f'* Number of papers: {len(papers)}\n' + f' - ALGORITHM: {len(collections)}\n\n' + f'{content}') + + write_file(target_md, overview) if __name__ == '__main__': diff --git a/docs/en/.gitignore b/docs/en/.gitignore new file mode 100644 index 0000000000..db69732497 --- /dev/null +++ b/docs/en/.gitignore @@ -0,0 +1,3 @@ +model_zoo +dataset_zoo +autoapi diff --git a/docs/en/3_model_zoo.md b/docs/en/3_model_zoo.md deleted file mode 100644 index d3eff263c7..0000000000 --- a/docs/en/3_model_zoo.md +++ /dev/null @@ -1,362 +0,0 @@ -# Overview - -- Number of checkpoints: 169 -- Number of configs: 169 -- Number of papers: 42 - - ALGORITHM: 43 -- Tasks: - - image2image translation - - video interpolation - - unconditional gans - - image super-resolution - - internal learning - - conditional gans - - inpainting - - video super-resolution - - colorization - - matting - -For supported datasets, see [datasets overview](dataset_zoo/0_overview.md). - -## AOT-GAN (TVCG'2021) - -- Tasks: inpainting -- Number of checkpoints: 1 -- Number of configs: 1 -- Number of papers: 1 - - \[ALGORITHM\] Aggregated Contextual Transformations for High-Resolution Image Inpainting ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/aot_gan/README.md#citation)) - -## BasicVSR (CVPR'2021) - -- Tasks: video super-resolution -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Basicvsr: The Search for Essential Components in Video Super-Resolution and Beyond ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/basicvsr/README.md#citation)) - -## BasicVSR++ (CVPR'2022) - -- Tasks: video super-resolution -- Number of checkpoints: 7 -- Number of configs: 7 -- Number of papers: 1 - - \[ALGORITHM\] Basicvsr++: Improving Video Super-Resolution With Enhanced Propagation and Alignment ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/basicvsr_pp/README.md#citation)) - -## BigGAN (ICLR'2019) - -- Tasks: conditional gans -- Number of checkpoints: 7 -- Number of configs: 6 -- Number of papers: 1 - - \[ALGORITHM\] Large Scale {Gan ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/biggan/README.md#citation)) - -## CAIN (AAAI'2020) - -- Tasks: video interpolation -- Number of checkpoints: 1 -- Number of configs: 1 -- Number of papers: 1 - - \[ALGORITHM\] Channel Attention Is All You Need for Video Frame Interpolation ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/cain/README.md#citation)) - -## CycleGAN: Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks (ICCV'2017) - -- Tasks: image2image translation -- Number of checkpoints: 6 -- Number of configs: 6 -- Number of papers: 1 - - \[ALGORITHM\] Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/cyclegan/README.md#citation)) - -## Unsupervised Representation Learning with Deep Convolutional Generative Adversarial Networks (ICLR'2016) - -- Tasks: unconditional gans -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Unsupervised Representation Learning With Deep Convolutional Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/dcgan/README.md#citation)) - -## DeepFillv1 (CVPR'2018) - -- Tasks: inpainting -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Generative Image Inpainting With Contextual Attention ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/deepfillv1/README.md#citation)) - -## DeepFillv2 (CVPR'2019) - -- Tasks: inpainting -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Free-Form Image Inpainting With Gated Convolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/deepfillv2/README.md#citation)) - -## DIC (CVPR'2020) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Deep Face Super-Resolution With Iterative Collaboration Between Attentive Recovery and Landmark Estimation ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/dic/README.md#citation)) - -## DIM (CVPR'2017) - -- Tasks: matting -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Deep Image Matting ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/dim/README.md#citation)) - -## EDSR (CVPR'2017) - -- Tasks: image super-resolution -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Enhanced Deep Residual Networks for Single Image Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/edsr/README.md#citation)) - -## EDVR (CVPRW'2019) - -- Tasks: video super-resolution -- Number of checkpoints: 4 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Edvr: Video Restoration With Enhanced Deformable Convolutional Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/edvr/README.md#citation)) - -## ESRGAN (ECCVW'2018) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Esrgan: Enhanced Super-Resolution Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/esrgan/README.md#citation)) - -## FLAVR (arXiv'2020) - -- Tasks: video interpolation -- Number of checkpoints: 1 -- Number of configs: 1 -- Number of papers: 1 - - \[ALGORITHM\] Flavr: Flow-Agnostic Video Representations for Fast Frame Interpolation ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/flavr/README.md#citation)) - -## GCA (AAAI'2020) - -- Tasks: matting -- Number of checkpoints: 4 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Natural Image Matting via Guided Contextual Attention ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/gca/README.md#citation)) - -## GGAN (ArXiv'2017) - -- Tasks: unconditional gans -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Geometric Gan ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/ggan/README.md#citation)) - -## GLEAN (CVPR'2021) - -- Tasks: image super-resolution -- Number of checkpoints: 4 -- Number of configs: 7 -- Number of papers: 1 - - \[ALGORITHM\] Glean: Generative Latent Bank for Large-Factor Image Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/glean/README.md#citation)) - -## Global&Local (ToG'2017) - -- Tasks: inpainting -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Globally and Locally Consistent Image Completion ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/global_local/README.md#citation)) - -## IconVSR (CVPR'2021) - -- Tasks: video super-resolution -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Basicvsr: The Search for Essential Components in Video Super-Resolution and Beyond ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/iconvsr/README.md#citation)) - -## IndexNet (ICCV'2019) - -- Tasks: matting -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Indices Matter: Learning to Index for Deep Image Matting ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/indexnet/README.md#citation)) - -## LIIF (CVPR'2021) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Learning Continuous Image Representation With Local Implicit Image Function ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/liif/README.md#citation)) - -## Instance-aware Image Colorization (CVPR'2020) - -- Tasks: colorization -- Number of checkpoints: 1 -- Number of configs: 1 -- Number of papers: 1 - - \[ALGORITHM\] Instance-Aware Image Colorization ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/inst_colorization/README.md#quick-start)) - -## LSGAN (ICCV'2017) - -- Tasks: unconditional gans -- Number of checkpoints: 4 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Least Squares Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/lsgan/README.md#citation)) - -## PConv (ECCV'2018) - -- Tasks: inpainting -- Number of checkpoints: 2 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Image Inpainting for Irregular Holes Using Partial Convolutions ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/partial_conv/README.md#citation)) - -## PGGAN (ICLR'2018) - -- Tasks: unconditional gans -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Progressive Growing of Gans for Improved Quality, Stability, and Variation ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/pggan/README.md#citation)) - -## Pix2Pix (CVPR'2017) - -- Tasks: image2image translation -- Number of checkpoints: 4 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Image-to-Image Translation With Conditional Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/pix2pix/README.md#citation)) - -## Positional Encoding in GANs - -- Number of checkpoints: 21 -- Number of configs: 21 -- Number of papers: 1 - - \[ALGORITHM\] Positional Encoding as Spatial Inductive Bias in Gans ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/positional_encoding_in_gans/README.md#citation)) - -## RDN (CVPR'2018) - -- Tasks: image super-resolution -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Residual Dense Network for Image Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/rdn/README.md#citation)) - -## RealBasicVSR (CVPR'2022) - -- Tasks: video super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Realbasicvsr: Investigating Tradeoffs in Real-World Video Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/real_basicvsr/README.md#citation)) - -## Real-ESRGAN (ICCVW'2021) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Real-Esrgan: Training Real-World Blind Super-Resolution With Pure Synthetic Data ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/real_esrgan/README.md#citation)) - -## SAGAN (ICML'2019) - -- Tasks: conditional gans -- Number of checkpoints: 9 -- Number of configs: 6 -- Number of papers: 1 - - \[ALGORITHM\] Self-Attention Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/sagan/README.md#citation)) - -## SinGAN (ICCV'2019) - -- Tasks: internal learning -- Number of checkpoints: 3 -- Number of configs: 3 -- Number of papers: 1 - - \[ALGORITHM\] Singan: Learning a Generative Model From a Single Natural Image ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/singan/README.md#citation)) - -## SNGAN (ICLR'2018) - -- Tasks: conditional gans -- Number of checkpoints: 10 -- Number of configs: 6 -- Number of papers: 1 - - \[ALGORITHM\] Spectral Normalization for Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/sngan_proj/README.md#citation)) - -## SRCNN (TPAMI'2015) - -- Tasks: image super-resolution -- Number of checkpoints: 1 -- Number of configs: 1 -- Number of papers: 1 - - \[ALGORITHM\] Image Super-Resolution Using Deep Convolutional Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/srcnn/README.md#citation)) - -## SRGAN (CVPR'2016) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/srgan_resnet/README.md#citation)) - -## StyleGANv1 (CVPR'2019) - -- Tasks: unconditional gans -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] A Style-Based Generator Architecture for Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/styleganv1/README.md#citation)) - -## StyleGANv2 (CVPR'2020) - -- Tasks: unconditional gans -- Number of checkpoints: 12 -- Number of configs: 12 -- Number of papers: 1 - - \[ALGORITHM\] Analyzing and Improving the Image Quality of Stylegan ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/styleganv2/README.md#citation)) - -## StyleGANv3 (NeurIPS'2021) - -- Tasks: unconditional gans -- Number of checkpoints: 9 -- Number of configs: 10 -- Number of papers: 1 - - \[ALGORITHM\] Alias-Free Generative Adversarial Networks ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/styleganv3/README.md#citation)) - -## TDAN (CVPR'2020) - -- Tasks: video super-resolution -- Number of checkpoints: 2 -- Number of configs: 4 -- Number of papers: 1 - - \[ALGORITHM\] Tdan: Temporally-Deformable Alignment Network for Video Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/tdan/README.md#citation)) - -## TOFlow (IJCV'2019) - -- Tasks: video interpolation, video super-resolution -- Number of checkpoints: 6 -- Number of configs: 6 -- Number of papers: 1 - - \[ALGORITHM\] Video Enhancement With Task-Oriented Flow ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/tof/README.md#citation)) - -## TTSR (CVPR'2020) - -- Tasks: image super-resolution -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Learning Texture Transformer Network for Image Super-Resolution ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/ttsr/README.md#citation)) - -## WGAN-GP (NeurIPS'2017) - -- Tasks: unconditional gans -- Number of checkpoints: 2 -- Number of configs: 2 -- Number of papers: 1 - - \[ALGORITHM\] Improved Training of Wasserstein Gans ([⇨](https://github.com/open-mmlab/mmediting/blob/1.x/configs/wgan-gp/README.md#citation)) diff --git a/docs/en/Makefile b/docs/en/Makefile index d4bb2cbb9e..56ae5906ce 100644 --- a/docs/en/Makefile +++ b/docs/en/Makefile @@ -17,4 +17,7 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile + rm -rf _build + rm -rf model_zoo + rm -rf dataset_zoo @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/en/_template/classtemplate.rst b/docs/en/_template/classtemplate.rst deleted file mode 100644 index 4f74842394..0000000000 --- a/docs/en/_template/classtemplate.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. role:: hidden - :class: hidden-section -.. currentmodule:: {{ module }} - - -{{ name | underline}} - -.. autoclass:: {{ name }} - :members: - - -.. - autogenerated from source/_templates/classtemplate.rst - note it does not have :inherited-members: diff --git a/docs/en/_templates/404.html b/docs/en/_templates/404.html new file mode 100644 index 0000000000..1cd18b7310 --- /dev/null +++ b/docs/en/_templates/404.html @@ -0,0 +1,16 @@ +{% extends "layout.html" %} + +{% block body %} + +

Page Not Found

+

+ Oops! The page you are looking for cannot be found. +

+

+ This is likely to happen when you are switching document versions and the page you are reading is moved to another location in the new version. You can look for it in the content table left, or go to the homepage. +

+

+ If you cannot find documentation you want, please open an issue to tell us! +

+ +{% endblock %} diff --git a/docs/en/_templates/python/attribute.rst b/docs/en/_templates/python/attribute.rst new file mode 100644 index 0000000000..ebaba555ad --- /dev/null +++ b/docs/en/_templates/python/attribute.rst @@ -0,0 +1 @@ +{% extends "python/data.rst" %} diff --git a/docs/en/_templates/python/class.rst b/docs/en/_templates/python/class.rst new file mode 100644 index 0000000000..df5edffb62 --- /dev/null +++ b/docs/en/_templates/python/class.rst @@ -0,0 +1,58 @@ +{% if obj.display %} +.. py:{{ obj.type }}:: {{ obj.short_name }}{% if obj.args %}({{ obj.args }}){% endif %} +{% for (args, return_annotation) in obj.overloads %} + {{ " " * (obj.type | length) }} {{ obj.short_name }}{% if args %}({{ args }}){% endif %} +{% endfor %} + + + {% if obj.bases %} + {% if "show-inheritance" in autoapi_options %} + Bases: {% for base in obj.bases %}{{ base|link_objs }}{% if not loop.last %}, {% endif %}{% endfor %} + {% endif %} + + + {% if "show-inheritance-diagram" in autoapi_options and obj.bases != ["object"] %} + .. autoapi-inheritance-diagram:: {{ obj.obj["full_name"] }} + :parts: 1 + {% if "private-members" in autoapi_options %} + :private-bases: + {% endif %} + + {% endif %} + {% endif %} + {% if obj.docstring %} + {{ obj.docstring|indent(3) }} + {% endif %} + {% if "inherited-members" in autoapi_options %} + {% set visible_classes = obj.classes|selectattr("display")|list %} + {% else %} + {% set visible_classes = obj.classes|rejectattr("inherited")|selectattr("display")|list %} + {% endif %} + {% for klass in visible_classes %} + {{ klass.render()|indent(3) }} + {% endfor %} + {% if "inherited-members" in autoapi_options %} + {% set visible_properties = obj.properties|selectattr("display")|list %} + {% else %} + {% set visible_properties = obj.properties|rejectattr("inherited")|selectattr("display")|list %} + {% endif %} + {% for property in visible_properties %} + {{ property.render()|indent(3) }} + {% endfor %} + {% if "inherited-members" in autoapi_options %} + {% set visible_attributes = obj.attributes|selectattr("display")|list %} + {% else %} + {% set visible_attributes = obj.attributes|rejectattr("inherited")|selectattr("display")|list %} + {% endif %} + {% for attribute in visible_attributes %} + {{ attribute.render()|indent(3) }} + {% endfor %} + {% if "inherited-members" in autoapi_options %} + {% set visible_methods = obj.methods|selectattr("display")|list %} + {% else %} + {% set visible_methods = obj.methods|rejectattr("inherited")|selectattr("display")|list %} + {% endif %} + {% for method in visible_methods %} + {{ method.render()|indent(3) }} + {% endfor %} +{% endif %} diff --git a/docs/en/_templates/python/data.rst b/docs/en/_templates/python/data.rst new file mode 100644 index 0000000000..89417f1e15 --- /dev/null +++ b/docs/en/_templates/python/data.rst @@ -0,0 +1,32 @@ +{% if obj.display %} +.. py:{{ obj.type }}:: {{ obj.name }} + {%+ if obj.value is not none or obj.annotation is not none -%} + :annotation: + {%- if obj.annotation %} :{{ obj.annotation }} + {%- endif %} + {%- if obj.value is not none %} = {% + if obj.value is string and obj.value.splitlines()|count > 1 -%} + Multiline-String + + .. raw:: html + +
Show Value + + .. code-block:: text + :linenos: + + {{ obj.value|indent(width=8) }} + + .. raw:: html + +
+ + {%- else -%} + {{ obj.value|string|truncate(100) }} + {%- endif %} + {%- endif %} + {% endif %} + + + {{ obj.docstring|indent(3) }} +{% endif %} diff --git a/docs/en/_templates/python/exception.rst b/docs/en/_templates/python/exception.rst new file mode 100644 index 0000000000..92f3d38fd5 --- /dev/null +++ b/docs/en/_templates/python/exception.rst @@ -0,0 +1 @@ +{% extends "python/class.rst" %} diff --git a/docs/en/_templates/python/function.rst b/docs/en/_templates/python/function.rst new file mode 100644 index 0000000000..b00d5c2445 --- /dev/null +++ b/docs/en/_templates/python/function.rst @@ -0,0 +1,15 @@ +{% if obj.display %} +.. py:function:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %} + +{% for (args, return_annotation) in obj.overloads %} + {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %} + +{% endfor %} + {% for property in obj.properties %} + :{{ property }}: + {% endfor %} + + {% if obj.docstring %} + {{ obj.docstring|indent(3) }} + {% endif %} +{% endif %} diff --git a/docs/en/_templates/python/method.rst b/docs/en/_templates/python/method.rst new file mode 100644 index 0000000000..723cb7bbe5 --- /dev/null +++ b/docs/en/_templates/python/method.rst @@ -0,0 +1,19 @@ +{%- if obj.display %} +.. py:method:: {{ obj.short_name }}({{ obj.args }}){% if obj.return_annotation is not none %} -> {{ obj.return_annotation }}{% endif %} + +{% for (args, return_annotation) in obj.overloads %} + {{ obj.short_name }}({{ args }}){% if return_annotation is not none %} -> {{ return_annotation }}{% endif %} + +{% endfor %} + {% if obj.properties %} + {% for property in obj.properties %} + :{{ property }}: + {% endfor %} + + {% else %} + + {% endif %} + {% if obj.docstring %} + {{ obj.docstring|indent(3) }} + {% endif %} +{% endif %} diff --git a/docs/en/_templates/python/module.rst b/docs/en/_templates/python/module.rst new file mode 100644 index 0000000000..d2714f6c9d --- /dev/null +++ b/docs/en/_templates/python/module.rst @@ -0,0 +1,114 @@ +{% if not obj.display %} +:orphan: + +{% endif %} +:py:mod:`{{ obj.name }}` +=========={{ "=" * obj.name|length }} + +.. py:module:: {{ obj.name }} + +{% if obj.docstring %} +.. autoapi-nested-parse:: + + {{ obj.docstring|indent(3) }} + +{% endif %} + +{% block subpackages %} +{% set visible_subpackages = obj.subpackages|selectattr("display")|list %} +{% if visible_subpackages %} +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + +{% for subpackage in visible_subpackages %} + {{ subpackage.short_name }}/index.rst +{% endfor %} + + +{% endif %} +{% endblock %} +{% block submodules %} +{% set visible_submodules = obj.submodules|selectattr("display")|list %} +{% if visible_submodules %} +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + +{% for submodule in visible_submodules %} + {{ submodule.short_name }}/index.rst +{% endfor %} + + +{% endif %} +{% endblock %} +{% block content %} +{% if obj.all is not none %} +{% set visible_children = obj.children|selectattr("short_name", "in", obj.all)|list %} +{% elif obj.type is equalto("package") %} +{% set visible_children = obj.children|selectattr("display")|list %} +{% else %} +{% set visible_children = obj.children|selectattr("display")|rejectattr("imported")|list %} +{% endif %} +{% if visible_children %} +{{ obj.type|title }} Contents +{{ "-" * obj.type|length }}--------- + +{% set visible_classes = visible_children|selectattr("type", "equalto", "class")|list %} +{% set visible_functions = visible_children|selectattr("type", "equalto", "function")|list %} +{% set visible_attributes = visible_children|selectattr("type", "equalto", "data")|list %} +{% if "show-module-summary" in autoapi_options and (visible_classes or visible_functions) %} +{% block classes scoped %} +{% if visible_classes %} +Classes +~~~~~~~ + +.. autoapisummary:: + +{% for klass in visible_classes %} + {{ klass.id }} +{% endfor %} + + +{% endif %} +{% endblock %} + +{% block functions scoped %} +{% if visible_functions %} +Functions +~~~~~~~~~ + +.. autoapisummary:: + +{% for function in visible_functions %} + {{ function.id }} +{% endfor %} + + +{% endif %} +{% endblock %} + +{% block attributes scoped %} +{% if visible_attributes %} +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + +{% for attribute in visible_attributes %} + {{ attribute.id }} +{% endfor %} + + +{% endif %} +{% endblock %} +{% endif %} +{% for obj_item in visible_children %} +{{ obj_item.render()|indent(0) }} +{% endfor %} +{% endif %} +{% endblock %} diff --git a/docs/en/_templates/python/package.rst b/docs/en/_templates/python/package.rst new file mode 100644 index 0000000000..fb9a64965e --- /dev/null +++ b/docs/en/_templates/python/package.rst @@ -0,0 +1 @@ +{% extends "python/module.rst" %} diff --git a/docs/en/_templates/python/property.rst b/docs/en/_templates/python/property.rst new file mode 100644 index 0000000000..70af24236f --- /dev/null +++ b/docs/en/_templates/python/property.rst @@ -0,0 +1,15 @@ +{%- if obj.display %} +.. py:property:: {{ obj.short_name }} + {% if obj.annotation %} + :type: {{ obj.annotation }} + {% endif %} + {% if obj.properties %} + {% for property in obj.properties %} + :{{ property }}: + {% endfor %} + {% endif %} + + {% if obj.docstring %} + {{ obj.docstring|indent(3) }} + {% endif %} +{% endif %} diff --git a/docs/en/advanced_guides/data_flow.md b/docs/en/advanced_guides/data_flow.md new file mode 100644 index 0000000000..5fb38c7fd8 --- /dev/null +++ b/docs/en/advanced_guides/data_flow.md @@ -0,0 +1,5 @@ +# Data flow in MMEditing \[Coming Soon!\] + +We're improving this documentation. Don't hesitate to join us! + +[Make a pull request](https://github.com/open-mmlab/mmediting/compare) or [discuss with us](https://github.com/open-mmlab/mmediting/discussions/1429)! diff --git a/docs/en/advanced_guides/data_preprocessor.md b/docs/en/advanced_guides/data_preprocessor.md new file mode 100644 index 0000000000..70431b77b1 --- /dev/null +++ b/docs/en/advanced_guides/data_preprocessor.md @@ -0,0 +1,5 @@ +# Data pre-processor \[Coming Soon!\] + +We're improving this documentation. Don't hesitate to join us! + +[Make a pull request](https://github.com/open-mmlab/mmediting/compare) or [discuss with us](https://github.com/open-mmlab/mmediting/discussions/1429)! diff --git a/docs/en/advanced_guides/evaluator.md b/docs/en/advanced_guides/evaluator.md new file mode 100644 index 0000000000..ce5e922ccf --- /dev/null +++ b/docs/en/advanced_guides/evaluator.md @@ -0,0 +1,5 @@ +# Evaluator \[Coming Soon!\] + +We're improving this documentation. Don't hesitate to join us! + +[Make a pull request](https://github.com/open-mmlab/mmediting/compare) or [discuss with us](https://github.com/open-mmlab/mmediting/discussions/1429)! diff --git a/docs/en/advanced_guides/structures.md b/docs/en/advanced_guides/structures.md new file mode 100644 index 0000000000..1bd386f146 --- /dev/null +++ b/docs/en/advanced_guides/structures.md @@ -0,0 +1,5 @@ +# Data structure in MMEditing \[Coming Soon!\] + +We're improving this documentation. Don't hesitate to join us! + +[Make a pull request](https://github.com/open-mmlab/mmediting/compare) or [discuss with us](https://github.com/open-mmlab/mmediting/discussions/1429)! diff --git a/docs/en/api/apis.rst b/docs/en/api/apis.rst deleted file mode 100644 index 0e92c02b67..0000000000 --- a/docs/en/api/apis.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.apis -=================================== - -.. currentmodule:: mmedit.apis - -APIS ----------------- -.. autosummary:: - :toctree: generated - :nosignatures: - - matting_inference - inpainting_inference - restoration_inference - restoration_video_inference - restoration_face_inference - video_interpolation_inference - - init_model - delete_cfg - set_random_seed - sample_conditional_model - sample_unconditional_model - sample_img2img_model diff --git a/docs/en/api/datasets.rst b/docs/en/api/datasets.rst deleted file mode 100644 index e0cfc25858..0000000000 --- a/docs/en/api/datasets.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.datasets -=================================== - -.. currentmodule:: mmedit.datasets - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - AdobeComp1kDataset - BasicImageDataset - BasicFramesDataset - BasicConditionalDataset - UnpairedImageDataset - PairedImageDataset - ImageNet - CIFAR10 - GrowScaleImgDataset diff --git a/docs/en/api/datasets.transforms.rst b/docs/en/api/datasets.transforms.rst deleted file mode 100644 index b3796c3b2a..0000000000 --- a/docs/en/api/datasets.transforms.rst +++ /dev/null @@ -1,72 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.datasets.transforms -=================================== - -.. currentmodule:: mmedit.datasets.transforms - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BinarizeImage - Clip - ColorJitter - CopyValues - Crop - CropLike - DegradationsWithShuffle - LoadImageFromFile - LoadMask - Flip - FixedCrop - GenerateCoordinateAndCell - GenerateFacialHeatmap - GenerateFrameIndices - GenerateFrameIndiceswithPadding - GenerateSegmentIndices - GetMaskedImage - GetSpatialDiscountMask - MATLABLikeResize - MirrorSequence - ModCrop - Normalize - PackEditInputs - PairedRandomCrop - RandomAffine - RandomBlur - RandomDownSampling - RandomJPEGCompression - RandomMaskDilation - RandomNoise - RandomResize - RandomResizedCrop - RandomRotation - RandomTransposeHW - RandomVideoCompression - RescaleToZeroOne - Resize - SetValues - TemporalReverse - ToTensor - UnsharpMasking - CropAroundCenter - CropAroundFg - GenerateSeg - CropAroundUnknown - GenerateSoftSeg - FormatTrimap - TransformTrimap - GenerateTrimap - GenerateTrimapWithDistTransform - CompositeFg - RandomLoadResizeBg - MergeFgAndBg - PerturbBg - RandomJitter - LoadPairedImageFromFile - CenterCropLongEdge - RandomCropLongEdge - NumpyPad diff --git a/docs/en/api/engine.hooks.rst b/docs/en/api/engine.hooks.rst deleted file mode 100644 index 7f86d27383..0000000000 --- a/docs/en/api/engine.hooks.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.engine.hooks -=================================== - -.. currentmodule:: mmedit.engine.hooks - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - ReduceLRSchedulerHook - BasicVisualizationHook - GenVisualizationHook - ExponentialMovingAverageHook - GenIterTimerHook - PGGANFetchDataHook - PickleDataHook diff --git a/docs/en/api/engine.optimizers.rst b/docs/en/api/engine.optimizers.rst deleted file mode 100644 index 4425aeadef..0000000000 --- a/docs/en/api/engine.optimizers.rst +++ /dev/null @@ -1,16 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.engine.optimizers -=================================== - -.. currentmodule:: mmedit.engine.optimizers - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - MultiOptimWrapperConstructor - PGGANOptimWrapperConstructor - SinGANOptimWrapperConstructor diff --git a/docs/en/api/engine.runner.rst b/docs/en/api/engine.runner.rst deleted file mode 100644 index e8b71920f8..0000000000 --- a/docs/en/api/engine.runner.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.engine.runner -=================================== - -.. currentmodule:: mmedit.engine.runner - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - MultiValLoop - MultiTestLoop - GenTestLoop - GenValLoop - GenLogProcessor diff --git a/docs/en/api/engine.schedulers.rst b/docs/en/api/engine.schedulers.rst deleted file mode 100644 index 87b43e6873..0000000000 --- a/docs/en/api/engine.schedulers.rst +++ /dev/null @@ -1,15 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.engine.schedulers -=================================== - -.. currentmodule:: mmedit.engine.schedulers - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - LinearLrInterval - ReduceLR diff --git a/docs/en/api/evaluation.functional.rst b/docs/en/api/evaluation.functional.rst deleted file mode 100644 index 45ba9adb6b..0000000000 --- a/docs/en/api/evaluation.functional.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.evaluation.functional -=================================== - -.. currentmodule:: mmedit.evaluation.functional - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - InceptionV3 - -.. autosummary:: - :toctree: generated - :nosignatures: - - gauss_gradient - disable_gpu_fuser_on_pt19 - load_inception - prepare_vgg_feat - prepare_inception_feat diff --git a/docs/en/api/evaluation.metrics.rst b/docs/en/api/evaluation.metrics.rst deleted file mode 100644 index b2756a557a..0000000000 --- a/docs/en/api/evaluation.metrics.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.evaluation.metrics -=================================== - -.. currentmodule:: mmedit.evaluation.metrics - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - MAE - MSE - PSNR - SNR - SSIM - MultiScaleStructureSimilarity - FrechetInceptionDistance - TransFID - InceptionScore - TransIS - SAD - MattingMSE - ConnectivityError - GradientError - PerceptualPathLength - PrecisionAndRecall - SlicedWassersteinDistance - NIQE - Equivariance - psnr - snr - ssim - niqe diff --git a/docs/en/api/models.base_models.rst b/docs/en/api/models.base_models.rst deleted file mode 100644 index bbd7e74a02..0000000000 --- a/docs/en/api/models.base_models.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.models.base_models -=================================== - -.. currentmodule:: mmedit.models.base_models - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BaseEditModel - BaseGAN - BaseConditionalGAN - BaseMattor - BasicInterpolator - BaseTranslationModel - OneStageInpaintor - TwoStageInpaintor - ExponentialMovingAverage - RampUpEMA diff --git a/docs/en/api/models.data_preprocessors.rst b/docs/en/api/models.data_preprocessors.rst deleted file mode 100644 index f3f333faf3..0000000000 --- a/docs/en/api/models.data_preprocessors.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.models.data_preprocessors -=================================== - -.. currentmodule:: mmedit.models.data_preprocessors - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - EditDataPreprocessor - MattorPreprocessor - split_batch - stack_batch - GenDataPreprocessor diff --git a/docs/en/api/models.editors.rst b/docs/en/api/models.editors.rst deleted file mode 100644 index e029ba8d48..0000000000 --- a/docs/en/api/models.editors.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.models.editors -=================================== - -.. currentmodule:: mmedit.models.editors - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - AOTEncoderDecoder - AOTBlockNeck - AOTInpaintor - ContextualAttentionNeck - ContextualAttentionModule - CAIN - CAINNet - DIM - DIC - DICNet - LightCNN - FeedbackBlock - FeedbackBlockHeatmapAttention - FeedbackBlockCustom - MaxFeature - FLAVR - FLAVRNet - ToFResBlock - TOFlowVFINet - TOFlowVSRNet - DeepFillEncoder - DeepFillEncoderDecoder - DeepFillDecoder - DeepFillRefiner - DeepFillv1Inpaintor - DeepFillv1Discriminators - EDSRNet - ESRGAN - DepthwiseIndexBlock - HolisticIndexBlock - IndexNet - IndexNetEncoder - IndexedUpsample - IndexNetDecoder - GCA - GLEncoderDecoder - GLEncoder - GLDecoder - GLDilationNeck - PartialConv2d - PConvEncoderDecoder - PConvEncoder - PConvDecoder - PConvInpaintor - MaskConvModule - RRDBNet - SRCNNNet - RRDBNet - RealESRGAN - UNetDiscriminatorWithSpectralNorm - EDVR - EDVRNet - TDAN - TDANNet - BasicVSR - BasicVSRNet - BasicVSRPlusPlusNet - IconVSRNet - RealBasicVSR - RealBasicVSRNet - SRGAN - MaxFeature - ModifiedVGG - MSRResNet - RDNNet - LTE - TTSR - TTSRNet - TTSRDiscriminator - TTSRNet - SearchTransformer - GLEANStyleGANv2 - LIIF - MLPRefiner - PlainRefiner - PlainDecoder - FBAResnetDilated - FBADecoder - WGANGP - CycleGAN - SAGAN - LSGAN - GGAN - Pix2Pix - StyleGAN1 - StyleGAN2 - StyleGAN3 - BigGAN - DCGAN - ProgressiveGrowingGAN - SinGAN - IDLossModel - PESinGAN - MSPIEStyleGAN2 - StyleGAN3Generator diff --git a/docs/en/api/models.losses.rst b/docs/en/api/models.losses.rst deleted file mode 100644 index 292d9eb924..0000000000 --- a/docs/en/api/models.losses.rst +++ /dev/null @@ -1,44 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.models.losses -=================================== - -.. currentmodule:: mmedit.models.losses - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - L1Loss - MSELoss - CharbonnierLoss - L1CompositionLoss - MSECompositionLoss - CharbonnierCompLoss - GANLoss - GaussianBlur - GradientPenaltyLoss - PerceptualLoss - PerceptualVGG - reduce_loss - mask_reduce_loss - DiscShiftLoss - MaskedTVLoss - GradientLoss - TransferalPerceptualLoss - LightCNNFeatureLoss - gradient_penalty_loss - r1_gradient_penalty_loss - gen_path_regularizer - FaceIdLoss - CLIPLoss - CLIPLossComps - DiscShiftLossComps - FaceIdLossComps - GANLossComps - GeneratorPathRegularizerComps - GradientPenaltyLossComps - R1GradientPenaltyComps - disc_shift_loss diff --git a/docs/en/api/models.utils.rst b/docs/en/api/models.utils.rst deleted file mode 100644 index 6b075c2838..0000000000 --- a/docs/en/api/models.utils.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.models.utils -=================================== - -.. currentmodule:: mmedit.models.utils - -.. autosummary:: - :toctree: generated - :nosignatures: - - default_init_weights - make_layer - flow_warp - generation_init_weights - set_requires_grad - extract_bbox_patch - extract_around_bbox - get_unknown_tensor - noise_sample_fn - label_sample_fn - get_valid_num_batches - get_valid_noise_size - get_module_device diff --git a/docs/en/api/structures.rst b/docs/en/api/structures.rst deleted file mode 100644 index dec2284c8b..0000000000 --- a/docs/en/api/structures.rst +++ /dev/null @@ -1,15 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.structures -=================================== - -.. currentmodule:: mmedit.structures - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - EditDataSample - PixelData diff --git a/docs/en/api/utils.rst b/docs/en/api/utils.rst deleted file mode 100644 index 448f3e70ec..0000000000 --- a/docs/en/api/utils.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.utils -=================================== - -.. currentmodule:: mmedit.utils - -.. autosummary:: - :toctree: generated - :nosignatures: - - modify_args - print_colored_log - register_all_modules - download_from_url - get_sampler - tensor2img - random_choose_unknown - add_gaussian_noise - adjust_gamma - make_coord - bbox2mask - brush_stroke_mask - get_irregular_mask - random_bbox - reorder_image - to_numpy diff --git a/docs/en/api/visualization.rst b/docs/en/api/visualization.rst deleted file mode 100644 index 50cf94eb04..0000000000 --- a/docs/en/api/visualization.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmedit.visualization -=================================== - -.. currentmodule:: mmedit.visualization - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - ConcatImageVisualizer - GenVisualizer - GenVisBackend - PaviGenVisBackend - TensorboardGenVisBackend - WandbGenVisBackend diff --git a/docs/en/notes/3_changelog.md b/docs/en/changelog.md similarity index 95% rename from docs/en/notes/3_changelog.md rename to docs/en/changelog.md index a48d8d67a1..84f38a932f 100644 --- a/docs/en/notes/3_changelog.md +++ b/docs/en/changelog.md @@ -1,5 +1,52 @@ # Changelog +## v1.0.0rc5 (04/01/2023) + +**Highlights** +We are excited to announce the release of MMEditing 1.0.0rc5. This release supports 49+ models, 180+ configs and 177+ checkpoints in MMGeneration and MMEditing. We highlight the following new features + +- Support Restormer +- Support GLIDE +- Support SwinIR +- Support Stable Diffusion + +**New Features & Improvements** + +- Disco notebook.(#1507) +- Revise test requirements and CI.(#1514) +- Recursive generate summary and docstring.(#1517) +- Enable projects.(#1526) +- Support mscoco dataset.(#1520) +- Improve Chinese documents.(#1532) +- Type hints.(#1481) +- Update download link.(#1554) +- Update deployment guide.(#1551) + +**Bug Fixes** + +- Fix documentation link checker.(#1522) +- Fix ssim first channel bug.(#1515) +- Fix restormer ut.(#1550) +- Fix extract_gt_data of realesrgan.(#1542) +- Fix model index.(#1559) +- Fix config path in disco-diffusion.(#1553) +- Fix text2image inferencer.(#1523) + +**Contributors** +A total of 16 developers contributed to this release. +Thanks @plyfager, @LeoXing1996, @Z-Fran, @zengyh1900, @VongolaWu, @liuwenran, @AlexZou14, @lvhan028, @xiaomile, @ldr426, @austin273, @whu-lee, @willaty, @curiosity654, @Zdafeng, @Taited + +**New Contributors** + +- @xiaomile made their first contribution in https://github.com/open-mmlab/mmediting/pull/1481 +- @ldr426 made their first contribution in https://github.com/open-mmlab/mmediting/pull/1542 +- @austin273 made their first contribution in https://github.com/open-mmlab/mmediting/pull/1553 +- @whu-lee made their first contribution in https://github.com/open-mmlab/mmediting/pull/1539 +- @willaty made their first contribution in https://github.com/open-mmlab/mmediting/pull/1541 +- @curiosity654 made their first contribution in https://github.com/open-mmlab/mmediting/pull/1556 +- @Zdafeng made their first contribution in https://github.com/open-mmlab/mmediting/pull/1476 +- @Taited made their first contribution in https://github.com/open-mmlab/mmediting/pull/1534 + ## v1.0.0rc4 (05/12/2022) **Highlights** diff --git a/docs/en/community/contributing.md b/docs/en/community/contributing.md new file mode 100644 index 0000000000..7a6f5b72b8 --- /dev/null +++ b/docs/en/community/contributing.md @@ -0,0 +1,275 @@ +# Contributing guidance + +Welcome to the MMEditing community, we are committed to building a toolbox for cutting-edge image, video and 3D content generation, editing and processing techniques. + +This section introduces following contents: + +- [Pull Request Workflow](#pull-request-workflow) + - [1. fork and clone](#1-fork-and-clone) + - [2. configure pre-commit](#2-configure-pre-commit) + - [3. create a development branch](#3-create-a-development-branch) + - [4. commit the code and pass the unit test](#4-commit-the-code-and-pass-the-unit-test) + - [5. push the code to remote](#5-push-the-code-to-remote) + - [6. create a pull request](#6-create-a-pull-request) + - [7. resolve conflicts](#7-resolve-conflicts) +- [Guidance](#guidance) + - [unit test](#unit-test) + - [document rendering](#document-rendering) +- [Code Style](#code-style) + - [Python](#python) + - [C++ and CUDA](#c-and-cuda) + - [PR Specs](#pr-specs) + +All kinds of contributions are welcomed, including but not limited to + +**Fix bug** + +You can directly post a Pull Request to fix typo in code or documents + +The steps to fix the bug of code implementation are as follows. + +1. If the modification involve significant changes, you should create an issue first and describe the error information and how to trigger the bug. Other developers will discuss with you and propose an proper solution. + +2. Posting a pull request after fixing the bug and adding corresponding unit test. + +**New Feature or Enhancement** + +1. If the modification involve significant changes, you should create an issue to discuss with our developers to propose an proper design. +2. Post a Pull Request after implementing the new feature or enhancement and add corresponding unit test. + +**Document** + +You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable. + +### Pull Request Workflow + +If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the develop mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) + +#### 1. Fork and clone + +If you are posting a pull request for the first time, you should fork the OpenMMLab repositories by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repositories will appear under your GitHub profile. + + + +Then, you can clone the repositories to local: + +```shell +git clone git@github.com:{username}/mmediting.git +``` + +After that, you should ddd official repository as the upstream repository + +```bash +git remote add upstream git@github.com:open-mmlab/mmediting +``` + +Check whether remote repository has been added successfully by `git remote -v` + +```bash +origin git@github.com:{username}/mmediting.git (fetch) +origin git@github.com:{username}/mmediting.git (push) +upstream git@github.com:open-mmlab/mmediting (fetch) +upstream git@github.com:open-mmlab/mmediting (push) +``` + +```{note} +Here's a brief introduction to origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically. +``` + +#### 2. Configure pre-commit + +You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of OpenMMLab. **Note**: The following code should be executed under the mmediting directory. + +```shell +pip install -U pre-commit +pre-commit install +``` + +Check that pre-commit is configured successfully, and install the hooks defined in `.pre-commit-config.yaml`. + +```shell +pre-commit run --all-files +``` + + + + + +```{note} +Chinese users may fail to download the pre-commit hooks due to the network issue. In this case, you could download these hooks from gitee by setting the .pre-commit-config-zh-cn.yaml + +pre-commit install -c .pre-commit-config-zh-cn.yaml +pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml +``` + +If the installation process is interrupted, you can repeatedly run `pre-commit run ... ` to continue the installation. + +If the code does not conform to the code style specification, pre-commit will raise a warning and fixes some of the errors automatically. + + + +If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**. + +```shell +git commit -m "xxx" --no-verify +``` + +#### 3. Create a development branch + +After configuring the pre-commit, we should create a branch based on the master branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name` + +```shell +git checkout -b yhc/refactor_contributing_doc +``` + +In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command: + +```shell +git pull upstream master +``` + +#### 4. Commit the code and pass the unit test + +- MMEditing introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html). + +- The committed code should pass through the unit test + + ```shell + # Pass all unit tests + pytest tests + + # Pass the unit test of runner + pytest tests/test_runner/test_runner.py + ``` + + If the unit test fails for lack of dependencies, you can install the dependencies referring to the [guidance](#unit-test) + +- If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering) + +#### 5. Push the code to remote + +We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option. + +```shell +git push -u origin {branch_name} +``` + +This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository. + +#### 6. Create a Pull Request + +(1) Create a pull request in GitHub's Pull request interface + + + +(2) Modify the PR description according to the guidelines so that other developers can better understand your changes + + + +Find more details about Pull Request description in [pull request guidelines](#pr-specs). + +**note** + +(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) + +(b) If it is your first contribution, please sign the CLA + + + +(c) Check whether the Pull Request pass through the CI + + + +MMEditing will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code. + +(3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP. + + + +#### 7. Resolve conflicts + +If your local branch conflicts with the latest master branch of "upstream", you'll need to resolove them. There are two ways to do this: + +```shell +git fetch --all --prune +git rebase upstream/master +``` + +or + +```shell +git fetch --all --prune +git merge upstream/master +``` + +If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts. + +### Guidance + +#### Unit test + +We should make sure the committed code will not decrease the coverage of unit test, we could run the following command to check the coverage of unit test: + +```shell +python -m coverage run -m pytest /path/to/test_file +python -m coverage html +# check file in htmlcov/index.html +``` + +#### Document rendering + +If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results: + +```shell +pip install -r requirements/docs.txt +cd docs/zh_cn/ +# or docs/en +make html +# check file in ./docs/zh_cn/_build/html/index.html +``` + +### Code style + +#### Python + +We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. + +We use the following tools for linting and formatting: + +- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools. +- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. +- [yapf](https://github.com/google/yapf): A formatter for Python files. +- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files. +- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files. +- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. + +Style configurations of yapf and isort can be found in [setup.cfg](../../../setup.cfg). + +We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, +fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. +The config for a pre-commit hook is stored in [.pre-commit-config](../../../.pre-commit-config.yaml). + +#### C++ and CUDA + +We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). + +### PR Specs + +1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style + +2. One short-time branch should be matched with only one PR + +3. Accomplish a detailed change in one PR. Avoid large PR + + - Bad: Support Faster R-CNN + - Acceptable: Add a box head to Faster R-CNN + - Good: Add a parameter to box head to support custom conv-layer number + +4. Provide clear and significant commit message + +5. Provide clear and meaningful PR description + + - Task name should be clarified in title. The general format is: \[Prefix\] Short description of the PR (Suffix) + - Prefix: add new feature \[Feature\], fix bug \[Fix\], related to documents \[Docs\], in developing \[WIP\] (which will not be reviewed temporarily) + - Introduce main changes, results and influences on other modules in short description + - Associate related issues and pull requests with a milestone diff --git a/docs/en/community/projects.md b/docs/en/community/projects.md new file mode 100644 index 0000000000..50e25c903f --- /dev/null +++ b/docs/en/community/projects.md @@ -0,0 +1,67 @@ +# MMEditing projects + +Welcome to the MMEditing community! +The MMEditing ecosystem consists of tutorials, libraries, and projects from a broad set of researchers in academia and industry, ML and application engineers. +The goal of this ecosystem is to support, accelerate, and aid in your exploration with MMEditing for image, video, 3D content generation, editing and processing. + +Here are a few projects that are built upon MMEditing. They are examples of how to use MMEditing as a library, to make your projects more maintainable. +Please find more projects in [MMEditing Ecosystem](https://openmmlab.com/ecosystem). + +## Show your projects on OpenMMLab Ecosystem + +You can submit your project so that it can be shown on the homepage of [OpenMMLab](https://openmmlab.com/ecosystem). + +## Add example projects to MMEditing + +Here is an [example project](../../../projects/example_project) about how to add your projects to MMEditing. +You can copy and create your own project from the [example project](../../../projects/example_project). + +We also provide some documentation listed below for your reference: + +- [Contribution Guide](https://mmediting.readthedocs.io/en/dev-1.x/community/contributing.html) + + The guides for new contributors about how to add your projects to MMEditing. + +- [New Model Guide](https://mmediting.readthedocs.io/en/dev-1.x/howto/models.html) + + The documentation of adding new models. + +- [Discussions](https://github.com/open-mmlab/mmediting/discussions) + + Welcome to start a discussion! + +## Projects of libraries and toolboxes + +- [PowerVQE](https://github.com/ryanxingql/powervqe): Open framework for quality enhancement of compressed videos based on PyTorch and MMEditing. + +- [VR-Baseline](https://github.com/linjing7/VR-Baseline): Video Restoration Toolbox. + +- [Derain-Toolbox](https://github.com/biubiubiiu/derain-toolbox): Single Image Deraining Toolbox and Benchmark + +## Projects of research papers + +- [Towards Interpretable Video Super-Resolution via Alternating Optimization, ECCV 2022](https://arxiv.org/abs/2207.10765)[\[github\]](https://github.com/caojiezhang/DAVSR) + +- [SepLUT:Separable Image-adaptive Lookup Tables for Real-time Image Enhancement, ECCV 2022](https://arxiv.org/abs/2207.08351)[\[github\]](https://github.com/ImCharlesY/SepLUT) + +- [TTVSR: Learning Trajectory-Aware Transformer for Video Super-Resolution, CVPR 2022](https://arxiv.org/abs/2204.04216)[\[github\]](https://github.com/researchmm/TTVSR) + +- [Arbitrary-Scale Image Synthesis, CVPR 2022](https://arxiv.org/pdf/2204.02273.pdf)[\[github\]](https://github.com/vglsd/ScaleParty) + +- [Investigating Tradeoffs in Real-World Video Super-Resolution(RealBasicVSR), CVPR 2022](https://arxiv.org/abs/2111.12704)[\[github\]](https://github.com/ckkelvinchan/RealBasicVSR) + +- [BasicVSR++: Improving Video Super-Resolution with Enhanced Propagation and Alignment, CVPR 2022](https://arxiv.org/abs/2104.13371)[\[github\]](https://github.com/ckkelvinchan/BasicVSR_PlusPlus) + +- [Multi-Scale Memory-Based Video Deblurring, CVPR 2022](https://arxiv.org/abs/2204.02977)[\[github\]](https://github.com/jibo27/MemDeblur) + +- [AdaInt:Learning Adaptive Intervals for 3D Lookup Tables on Real-time Image Enhancement, CVPR 2022](https://arxiv.org/abs/2204.13983)[\[github\]](https://github.com/ImCharlesY/AdaInt) + +- [A New Dataset and Transformer for Stereoscopic Video Super-Resolution, CVPRW 2022](https://openaccess.thecvf.com/content/CVPR2022W/NTIRE/papers/Imani_A_New_Dataset_and_Transformer_for_Stereoscopic_Video_Super-Resolution_CVPRW_2022_paper.pdf)[\[github\]](https://github.com/H-deep/Trans-SVSR) + +- [Liquid warping GAN with attention: A unified framework for human image synthesis, TPAMI 2021](https://arxiv.org/pdf/2011.09055.pdf)[\[github\]](https://github.com/iPERDance/iPERCore) + +- [BasicVSR:The Search for Essential Components in Video Super-Resolution and Beyond, CVPR 2021](https://arxiv.org/abs/2012.02181)[\[github\]](https://github.com/ckkelvinchan/BasicVSR-IconVSR) + +- [GLEAN:Generative Latent Bank for Large-Factor Image Super-Resolution, CVPR 2021](https://arxiv.org/abs/2012.00739)[\[github\]](https://github.com/ckkelvinchan/GLEAN) + +- [DAN:Unfolding the Alternating Optimization for Blind Super Resolution, NeurIPS 2020](https://arxiv.org/abs/2010.02631v4)[\[github\]](https://github.com/AlexZou14/DAN-Basd-on-Openmmlab) diff --git a/docs/en/conf.py b/docs/en/conf.py index 81909842d5..d6aed67aad 100644 --- a/docs/en/conf.py +++ b/docs/en/conf.py @@ -31,21 +31,44 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx.ext.autosectionlabel', 'sphinx_markdown_tables', - 'myst_parser', 'sphinx_copybutton', - 'sphinx.ext.autodoc.typehints', + 'sphinx_tabs.tabs', + 'myst_parser', ] +extensions.append('notfound.extension') # enable customizing not-found page + +extensions.append('autoapi.extension') +autoapi_type = 'python' +autoapi_dirs = ['../../mmedit'] +autoapi_add_toctree_entry = False +autoapi_template_dir = '_templates' +# autoapi_options = ['members', 'undoc-members', 'show-module-summary'] + +# # Core library for html generation from docstrings +# extensions.append('sphinx.ext.autodoc') +# extensions.append('sphinx.ext.autodoc.typehints') +# # Enable 'expensive' imports for sphinx_autodoc_typehints +# set_type_checking_flag = True +# # Sphinx-native method. Not as good as sphinx_autodoc_typehints +# autodoc_typehints = "description" + +# extensions.append('sphinx.ext.autosummary') # Create neat summary tables +# autosummary_generate = True # Turn on sphinx.ext.autosummary +# # Add __init__ doc (ie. params) to class summaries +# autoclass_content = 'both' +# autodoc_skip_member = [] +# # If no docstring, inherit from base class +# autodoc_inherit_docstrings = True + autodoc_mock_imports = [ - 'mmedit.version', 'mmcv.ops.ModulatedDeformConv2d', - 'mmcv.ops.modulated_deform_conv2d', 'mmcv._ext' + 'mmedit.version', 'mmcv._ext', 'mmcv.ops.ModulatedDeformConv2d', + 'mmcv.ops.modulated_deform_conv2d', 'clip', 'resize_right', 'pandas' ] source_suffix = { @@ -53,6 +76,11 @@ '.md': 'markdown', } +# # Remove 'view source code' from top of page (for html, not python) +# html_show_sourcelink = False +# nbsphinx_allow_errors = True # Continue through Jupyter errors +# add_module_names = False # Remove namespaces from class/method signatures + # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True @@ -94,11 +122,6 @@ 'url': 'https://mmediting.readthedocs.io/en/1.x/', 'description': '1.x branch', }, - { - 'name': 'MMEditing 1.x', - 'url': 'https://mmediting.readthedocs.io/en/dev-1.x/', - 'description': 'docs at 1.x branch' - }, ], 'active': True, @@ -106,16 +129,6 @@ ], 'menu_lang': 'en', - 'header_note': { - 'content': - 'You are reading the documentation for MMEditing 0.x, which ' - 'will soon be deprecated by the end of 2022. We recommend you upgrade ' - 'to MMEditing 1.0 to enjoy fruitful new features and better performance ' # noqa - ' brought by OpenMMLab 2.0. Check out the ' - 'changelog, ' # noqa - 'code ' # noqa - 'and documentation of MMEditing 1.0 for more details.', # noqa - } } # Add any paths that contain custom static files (such as style sheets) here, @@ -130,13 +143,21 @@ language = 'en' # The master toctree document. -master_doc = 'index' +root_doc = 'index' +notfound_template = '404.html' def builder_inited_handler(app): - subprocess.run(['bash', './.dev_scripts/update_dataset_zoo.sh']) subprocess.run(['python', './.dev_scripts/update_model_zoo.py']) + subprocess.run(['python', './.dev_scripts/update_dataset_zoo.py']) + + +def skip_member(app, what, name, obj, skip, options): + if what == 'package' or what == 'module': + skip = True + return skip def setup(app): app.connect('builder-inited', builder_inited_handler) + app.connect('autoapi-skip-member', skip_member) diff --git a/docs/en/dataset_zoo/0_overview.md b/docs/en/dataset_zoo/0_overview.md deleted file mode 100644 index d0a2da7f40..0000000000 --- a/docs/en/dataset_zoo/0_overview.md +++ /dev/null @@ -1,18 +0,0 @@ -# Overview - -- [Prepare Super-Resolution Datasets](./1_super_resolution_datasets.md) - - [DF2K_OST](./1_super_resolution_datasets.md#df2k_ost-dataset) \[ [Homepage](https://github.com/xinntao/Real-ESRGAN/blob/master/docs/Training.md) \] - - [DIV2K](./1_super_resolution_datasets.md#div2k-dataset) \[ [Homepage](https://data.vision.ee.ethz.ch/cvl/DIV2K/) \] - - [REDS](./1_super_resolution_datasets.md#reds-dataset) \[ [Homepage](https://seungjunnah.github.io/Datasets/reds.html) \] - - [Vid4](./1_super_resolution_datasets.md#vid4-dataset) \[ [Homepage](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view) \] - - [Vimeo90K](./1_super_resolution_datasets.md#vimeo90k-dataset) \[ [Homepage](http://toflow.csail.mit.edu) \] -- [Prepare Inpainting Datasets](./2_inpainting_datasets.md) - - [CelebA-HQ](./2_inpainting_datasets.md#celeba-hq-dataset) \[ [Homepage](https://github.com/tkarras/progressive_growing_of_gans#preparing-datasets-for-training) \] - - [Paris Street View](./2_inpainting_datasets.md#paris-street-view-dataset) \[ [Homepage](https://github.com/pathak22/context-encoder/issues/24) \] - - [Places365](./2_inpainting_datasets.md#places365-dataset) \[ [Homepage](http://places2.csail.mit.edu/) \] -- [Prepare Matting Datasets](./3_matting_datasets.md) - - [Composition-1k](./3_matting_datasets.md#composition-1k-dataset) \[ [Homepage](https://sites.google.com/view/deepimagematting) \] -- [Prepare Video Frame Interpolation Datasets](./4_video_interpolation_datasets.md) - - [Vimeo90K-triplet](./4_video_interpolation_datasets.md#vimeo90k-triplet-dataset) \[ [Homepage](http://toflow.csail.mit.edu) \] -- [Prepare Unconditional GANs Datasets](./5_unconditional_gans_datasets.md) -- [Prepare Image Translation Datasets](./6_image_translation_datasets.md) diff --git a/docs/en/dataset_zoo/1_super_resolution_datasets.md b/docs/en/dataset_zoo/1_super_resolution_datasets.md deleted file mode 100644 index e6afa8ff6d..0000000000 --- a/docs/en/dataset_zoo/1_super_resolution_datasets.md +++ /dev/null @@ -1,356 +0,0 @@ -# Super-Resolution Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported super-resolution datasets: - -- Image Super-Resolution - - [DF2K_OST](#df2k_ost-dataset) \[ [Homepage](https://github.com/xinntao/Real-ESRGAN/blob/master/docs/Training.md) \] - - [DIV2K](#div2k-dataset) \[ [Homepage](https://data.vision.ee.ethz.ch/cvl/DIV2K/) \] -- Video Super-Resolution - - [REDS](#reds-dataset) \[ [Homepage](https://seungjunnah.github.io/Datasets/reds.html) \] - - [Vid4](#vid4-dataset) \[ [Homepage](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view) \] - - [Vimeo90K](#vimeo90k-dataset) \[ [Homepage](http://toflow.csail.mit.edu) \] - -## DF2K_OST Dataset - - - -```bibtex -@inproceedings{wang2021real, - title={Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data}, - author={Wang, Xintao and Xie, Liangbin and Dong, Chao and Shan, Ying}, - booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, - pages={1905--1914}, - year={2021} -} -``` - -- The DIV2K dataset can be downloaded from [here](https://data.vision.ee.ethz.ch/cvl/DIV2K/) (We use the training set only). -- The Flickr2K dataset can be downloaded [here](https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar) (We use the training set only). -- The OST dataset can be downloaded [here](https://openmmlab.oss-cn-hangzhou.aliyuncs.com/datasets/OST_dataset.zip) (We use the training set only). - -Please first put all the images into the `GT` folder (naming does not need to be in order): - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── df2k_ost -│ │ ├── GT -│ │ │ ├── 0001.png -│ │ │ ├── 0002.png -│ │ │ ├── ... -... -``` - -### Crop sub-images - -For faster IO, we recommend to crop the images to sub-images. We provide such a script: - -```shell -python tools/dataset_converters/super-resolution/df2k_ost/preprocess_df2k_ost_dataset.py --data-root ./data/df2k_ost -``` - -The generated data is stored under `df2k_ost` and the data structure is as follows, where `_sub` indicates the sub-images. - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── df2k_ost -│ │ ├── GT -│ │ ├── GT_sub -... -``` - -### Prepare LMDB dataset for DF2K_OST - -If you want to use LMDB datasets for faster IO speed, you can make LMDB files by: - -```shell -python tools/dataset_converters/super-resolution/df2k_ost/preprocess_df2k_ost_dataset.py --data-root ./data/df2k_ost --make-lmdb -``` - -## DIV2K Dataset - - - -```bibtex -@InProceedings{Agustsson_2017_CVPR_Workshops, - author = {Agustsson, Eirikur and Timofte, Radu}, - title = {NTIRE 2017 Challenge on Single Image Super-Resolution: Dataset and Study}, - booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, - month = {July}, - year = {2017} -} -``` - -- Training dataset: [DIV2K dataset](https://data.vision.ee.ethz.ch/cvl/DIV2K/). -- Validation dataset: Set5 and Set14. - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── DIV2K -│ │ ├── DIV2K_train_HR -│ │ ├── DIV2K_train_LR_bicubic -│ │ │ ├── X2 -│ │ │ ├── X3 -│ │ │ ├── X4 -│ │ ├── DIV2K_valid_HR -│ │ ├── DIV2K_valid_LR_bicubic -│ │ │ ├── X2 -│ │ │ ├── X3 -│ │ │ ├── X4 -│ ├── Set5 -│ │ ├── GTmod12 -│ │ ├── LRbicx2 -│ │ ├── LRbicx3 -│ │ ├── LRbicx4 -│ ├── Set14 -│ │ ├── GTmod12 -│ │ ├── LRbicx2 -│ │ ├── LRbicx3 -│ │ ├── LRbicx4 -``` - -### Crop sub-images - -For faster IO, we recommend to crop the DIV2K images to sub-images. We provide such a script: - -```shell -python tools/dataset_converters/super-resolution/div2k/preprocess_div2k_dataset.py --data-root ./data/DIV2K -``` - -The generated data is stored under `DIV2K` and the data structure is as follows, where `_sub` indicates the sub-images. - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── DIV2K -│ │ ├── DIV2K_train_HR -│ │ ├── DIV2K_train_HR_sub -│ │ ├── DIV2K_train_LR_bicubic -│ │ │ ├── X2 -│ │ │ ├── X3 -│ │ │ ├── X4 -│ │ │ ├── X2_sub -│ │ │ ├── X3_sub -│ │ │ ├── X4_sub -│ │ ├── DIV2K_valid_HR -│ │ ├── ... -... -``` - -### Prepare annotation list - -If you use the annotation mode for the dataset, you first need to prepare a specific `txt` file. - -Each line in the annotation file contains the image names and image shape (usually for the ground-truth images), separated by a white space. - -Example of an annotation file: - -```text -0001_s001.png (480,480,3) -0001_s002.png (480,480,3) -``` - -### Prepare LMDB dataset for DIV2K - -If you want to use LMDB datasets for faster IO speed, you can make LMDB files by: - -```shell -python tools/dataset_converters/super-resolution/div2k/preprocess_div2k_dataset.py --data-root ./data/DIV2K --make-lmdb -``` - -## REDS Dataset - - - -```bibtex -@InProceedings{Nah_2019_CVPR_Workshops_REDS, - author = {Nah, Seungjun and Baik, Sungyong and Hong, Seokil and Moon, Gyeongsik and Son, Sanghyun and Timofte, Radu and Lee, Kyoung Mu}, - title = {NTIRE 2019 Challenge on Video Deblurring and Super-Resolution: Dataset and Study}, - booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops}, - month = {June}, - year = {2019} -} -``` - -- Training dataset: [REDS dataset](https://seungjunnah.github.io/Datasets/reds.html). -- Validation dataset: [REDS dataset](https://seungjunnah.github.io/Datasets/reds.html) and Vid4. - -Note that we merge train and val datasets in REDS for easy switching between REDS4 partition (used in EDVR) and the official validation partition. -The original val dataset (clip names from 000 to 029) are modified to avoid conflicts with training dataset (total 240 clips). Specifically, the clip names are changed to 240, 241, ... 269. - -You can prepare the REDS dataset by running: - -```shell -python tools/dataset_converters/super-resolution/reds/preprocess_reds_dataset.py --root-path ./data/REDS -``` - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── REDS -│ │ ├── train_sharp -│ │ │ ├── 000 -│ │ │ ├── 001 -│ │ │ ├── ... -│ │ ├── train_sharp_bicubic -│ │ │ ├── 000 -│ │ │ ├── 001 -│ │ │ ├── ... -│ ├── REDS4 -│ │ ├── GT -│ │ ├── sharp_bicubic -``` - -### Prepare LMDB dataset for REDS - -If you want to use LMDB datasets for faster IO speed, you can make LMDB files by: - -```shell -python tools/dataset_converters/super-resolution/reds/preprocess_reds_dataset.py --root-path ./data/REDS --make-lmdb -``` - -### Crop to sub-images - -MMEditing also support cropping REDS images to sub-images for faster IO. We provide such a script: - -```shell -python tools/dataset_converters/super-resolution/reds/crop_sub_images.py --data-root ./data/REDS -scales 4 -``` - -The generated data is stored under `REDS` and the data structure is as follows, where `_sub` indicates the sub-images. - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── REDS -│ │ ├── train_sharp -│ │ │ ├── 000 -│ │ │ ├── 001 -│ │ │ ├── ... -│ │ ├── train_sharp_sub -│ │ │ ├── 000_s001 -│ │ │ ├── 000_s002 -│ │ │ ├── ... -│ │ │ ├── 001_s001 -│ │ │ ├── ... -│ │ ├── train_sharp_bicubic -│ │ │ ├── X4 -│ │ │ │ ├── 000 -│ │ │ │ ├── 001 -│ │ │ │ ├── ... -│ │ │ ├── X4_sub -│ │ │ ├── 000_s001 -│ │ │ ├── 000_s002 -│ │ │ ├── ... -│ │ │ ├── 001_s001 -│ │ │ ├── ... -``` - -Note that by default `preprocess_reds_dataset.py` does not make lmdb and annotation file for the cropped dataset. You may need to modify the scripts a little bit for such operations. - -## Vid4 Dataset - - - -```bibtex -@article{xue2019video, - title={On Bayesian adaptive video super resolution}, - author={Liu, Ce and Sun, Deqing}, - journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, - volume={36}, - number={2}, - pages={346--360}, - year={2013}, - publisher={IEEE} -} -``` - -The Vid4 dataset can be downloaded from [here](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view?usp=sharing). There are two degradations in the dataset. - -1. BIx4 contains images downsampled by bicubic interpolation -2. BDx4 contains images blurred by Gaussian kernel with σ=1.6, followed by a subsampling every four pixels. - -## Vimeo90K Dataset - - - -```bibtex -@article{xue2019video, - title={Video Enhancement with Task-Oriented Flow}, - author={Xue, Tianfan and Chen, Baian and Wu, Jiajun and Wei, Donglai and Freeman, William T}, - journal={International Journal of Computer Vision (IJCV)}, - volume={127}, - number={8}, - pages={1106--1125}, - year={2019}, - publisher={Springer} -} -``` - -The training and test datasets can be download from [here](http://toflow.csail.mit.edu/). - -The Vimeo90K dataset has a `clip/sequence/img` folder structure: - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── vimeo_triplet -│ │ ├── BDx4 -│ │ │ ├── 00001 -│ │ │ │ ├── 0001 -│ │ │ │ │ ├── im1.png -│ │ │ │ │ ├── im2.png -│ │ │ │ │ ├── ... -│ │ │ │ ├── 0002 -│ │ │ │ ├── 0003 -│ │ │ │ ├── ... -│ │ │ ├── 00002 -│ │ │ ├── ... -│ │ ├── BIx4 -│ │ ├── GT -│ │ ├── meta_info_Vimeo90K_test_GT.txt -│ │ ├── meta_info_Vimeo90K_train_GT.txt -``` - -### Prepare the annotation files for Vimeo90K dataset - -To prepare the annotation file for training, you need to download the official training list path for Vimeo90K from the official website, and run the following command: - -```shell -python tools/dataset_converters/super-resolution/vimeo90k/preprocess_vimeo90k_dataset.py ./data/Vimeo90K/official_train_list.txt -``` - -The annotation file for test is generated similarly. - -### Prepare LMDB dataset for Vimeo90K - -If you want to use LMDB datasets for faster IO speed, you can make LMDB files by: - -```shell -python tools/dataset_converters/super-resolution/vimeo90k/preprocess_vimeo90k_dataset.py ./data/Vimeo90K/official_train_list.txt --gt-path ./data/Vimeo90K/GT --lq-path ./data/Vimeo90K/LQ --make-lmdb -``` diff --git a/docs/en/dataset_zoo/2_inpainting_datasets.md b/docs/en/dataset_zoo/2_inpainting_datasets.md deleted file mode 100644 index f5dac5abac..0000000000 --- a/docs/en/dataset_zoo/2_inpainting_datasets.md +++ /dev/null @@ -1,114 +0,0 @@ -# Inpainting Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported inpainting datasets: - -- [CelebA-HQ](#celeba-hq-dataset) \[ [Homepage](https://github.com/tkarras/progressive_growing_of_gans#preparing-datasets-for-training) \] -- [Paris Street View](#paris-street-view-dataset) \[ [Homepage](https://github.com/pathak22/context-encoder/issues/24) \] -- [Places365](#places365-dataset) \[ [Homepage](http://places2.csail.mit.edu/) \] - -As we only need images for inpainting task, further preparation is not necessary and the folder structure can be different from the example. You can utilize the information provided by the original dataset like `Place365` (e.g. `meta`). Also, you can easily scan the data set and list all of the images to a specific `txt` file. Here is an example for the `Places365_val.txt` from Places365 and we will only use the image name information in inpainting. - -``` -Places365_val_00000001.jpg 165 -Places365_val_00000002.jpg 358 -Places365_val_00000003.jpg 93 -Places365_val_00000004.jpg 164 -Places365_val_00000005.jpg 289 -Places365_val_00000006.jpg 106 -Places365_val_00000007.jpg 81 -Places365_val_00000008.jpg 121 -Places365_val_00000009.jpg 150 -Places365_val_00000010.jpg 302 -Places365_val_00000011.jpg 42 -``` - -## CelebA-HQ Dataset - - - -```bibtex -@article{karras2017progressive, - title={Progressive growing of gans for improved quality, stability, and variation}, - author={Karras, Tero and Aila, Timo and Laine, Samuli and Lehtinen, Jaakko}, - journal={arXiv preprint arXiv:1710.10196}, - year={2017} -} -``` - -Follow the instructions [here](https://github.com/tkarras/progressive_growing_of_gans##preparing-datasets-for-training) to prepare the dataset. - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── CelebA-HQ -│ │ ├── train_256 -| | ├── test_256 -| | ├── train_celeba_img_list.txt -| | ├── val_celeba_img_list.txt - -``` - -## Paris Street View Dataset - - - -```bibtex -@inproceedings{pathak2016context, - title={Context encoders: Feature learning by inpainting}, - author={Pathak, Deepak and Krahenbuhl, Philipp and Donahue, Jeff and Darrell, Trevor and Efros, Alexei A}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={2536--2544}, - year={2016} -} -``` - -Obtain the dataset [here](https://github.com/pathak22/context-encoder/issues/24). - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── paris_street_view -│ │ ├── train -| | ├── val - -``` - -## Places365 Dataset - - - -```bibtex - @article{zhou2017places, - title={Places: A 10 million Image Database for Scene Recognition}, - author={Zhou, Bolei and Lapedriza, Agata and Khosla, Aditya and Oliva, Aude and Torralba, Antonio}, - journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, - year={2017}, - publisher={IEEE} - } - -``` - -Prepare the data from [Places365](http://places2.csail.mit.edu/download.html). - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── Places -│ │ ├── data_large -│ │ ├── val_large -| | ├── meta -| | | ├── places365_train_challenge.txt -| | | ├── places365_val.txt - -``` diff --git a/docs/en/dataset_zoo/3_matting_datasets.md b/docs/en/dataset_zoo/3_matting_datasets.md deleted file mode 100644 index cc9c95f924..0000000000 --- a/docs/en/dataset_zoo/3_matting_datasets.md +++ /dev/null @@ -1,147 +0,0 @@ -# Matting Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported matting datasets: - -- [Composition-1k](#composition-1k-dataset) \[ [Homepage](https://sites.google.com/view/deepimagematting) \] - -## Composition-1k Dataset - -### Introduction - - - -```bibtex -@inproceedings{xu2017deep, - title={Deep image matting}, - author={Xu, Ning and Price, Brian and Cohen, Scott and Huang, Thomas}, - booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, - pages={2970--2979}, - year={2017} -} -``` - -The Adobe Composition-1k dataset consists of foreground images and their corresponding alpha images. -To get the full dataset, one need to composite the foregrounds with selected backgrounds from the COCO dataset and the Pascal VOC dataset. - -### Obtain and Extract - -Please follow the instructions of [paper authors](https://sites.google.com/view/deepimagematting) to obtain the Composition-1k (comp1k) dataset. - -### Composite the full dataset - -The Adobe composition-1k dataset contains only `alpha` and `fg` (and `trimap` in test set). -It is needed to merge `fg` with COCO data (training) or VOC data (test) before training or evaluation. -Use the following script to perform image composition and generate annotation files for training or testing: - -```shell -## The script is run under the root folder of MMEditing -python tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --composite -``` - -The generated data is stored under `adobe_composition-1k/Training_set` and `adobe_composition-1k/Test_set` respectively. -If you only want to composite test data (since compositing training data is time-consuming), you can skip compositing the training set by removing the `--composite` option: - -```shell -## skip compositing training set -python tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit -``` - -If you only want to preprocess test data, i.e. for FBA, you can skip the train set by adding the `--skip-train` option: - -```shell -## skip preprocessing training set -python tools/data/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip-train -``` - -> Currently, `GCA` and `FBA` support online composition of training data. But you can modify the data pipeline of other models to perform online composition instead of loading composited images (we called it `merged` in our data pipeline). - -### Check Directory Structure for DIM - -The result folder structure should look like: - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── adobe_composition-1k -│ │ ├── Test_set -│ │ │ ├── Adobe-licensed images -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ │ ├── trimaps -│ │ │ ├── merged (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ │ ├── bg (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ ├── Training_set -│ │ │ ├── Adobe-licensed images -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ ├── Other -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ ├── merged (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ │ ├── bg (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ ├── test_list.json (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ ├── training_list.json (generated by tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py) -│ ├── coco -│ │ ├── train2014 (or train2017) -│ ├── VOCdevkit -│ │ ├── VOC2012 -``` - -### Prepare the dataset for FBA - -FBA adopts dynamic dataset augmentation proposed in [Learning-base Sampling for Natural Image Matting](https://openaccess.thecvf.com/content_CVPR_2019/papers/Tang_Learning-Based_Sampling_for_Natural_Image_Matting_CVPR_2019_paper.pdf). -In addition, to reduce artifacts during augmentation, it uses the extended version of foreground as foreground. -We provide scripts to estimate foregrounds. - -Prepare the test set as follows: - -```shell -## skip preprocessing training set, as it composites online during training -python tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py data/adobe_composition-1k data/coco data/VOCdevkit --skip-train -``` - -Extend the foreground of training set as follows: - -```shell -python tools/dataset_converters/matting/comp1k/extend_fg.py data/adobe_composition-1k -``` - -### Check Directory Structure for DIM - -The final folder structure should look like: - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── adobe_composition-1k -│ │ ├── Test_set -│ │ │ ├── Adobe-licensed images -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ │ ├── trimaps -│ │ │ ├── merged (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ │ ├── bg (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ ├── Training_set -│ │ │ ├── Adobe-licensed images -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py) -│ │ │ ├── Other -│ │ │ │ ├── alpha -│ │ │ │ ├── fg -│ │ │ │ ├── fg_extended (generated by tools/data/matting/comp1k/extend_fg.py) -│ │ ├── test_list.json (generated by tools/data/matting/comp1k/preprocess_comp1k_dataset.py) -│ │ ├── training_list_fba.json (generated by tools/data/matting/comp1k/extend_fg.py) -│ ├── coco -│ │ ├── train2014 (or train2017) -│ ├── VOCdevkit -│ │ ├── VOC2012 -``` diff --git a/docs/en/dataset_zoo/4_video_interpolation_datasets.md b/docs/en/dataset_zoo/4_video_interpolation_datasets.md deleted file mode 100644 index 9889939c91..0000000000 --- a/docs/en/dataset_zoo/4_video_interpolation_datasets.md +++ /dev/null @@ -1,50 +0,0 @@ -# Video Frame Interpolation Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported video frame interpolation datasets: - -- [Vimeo90K-triplet](#vimeo90k-triplet-dataset) \[ [Homepage](http://toflow.csail.mit.edu) \] - -## Vimeo90K-triplet Dataset - - - -```bibtex -@article{xue2019video, - title={Video Enhancement with Task-Oriented Flow}, - author={Xue, Tianfan and Chen, Baian and Wu, Jiajun and Wei, Donglai and Freeman, William T}, - journal={International Journal of Computer Vision (IJCV)}, - volume={127}, - number={8}, - pages={1106--1125}, - year={2019}, - publisher={Springer} -} -``` - -The training and test datasets can be download from [here](http://toflow.csail.mit.edu/). - -The Vimeo90K-triplet dataset has a `clip/sequence/img` folder structure: - -```text -mmediting -├── mmedit -├── tools -├── configs -├── data -│ ├── vimeo_triplet -│ │ ├── tri_testlist.txt -│ │ ├── tri_trainlist.txt -│ │ ├── sequences -│ │ │ ├── 00001 -│ │ │ │ ├── 0001 -│ │ │ │ │ ├── im1.png -│ │ │ │ │ ├── im2.png -│ │ │ │ │ └── im3.png -│ │ │ │ ├── 0002 -│ │ │ │ ├── 0003 -│ │ │ │ ├── ... -│ │ │ ├── 00002 -│ │ │ ├── ... -``` diff --git a/docs/en/dataset_zoo/5_unconditional_gans_datasets.md b/docs/en/dataset_zoo/5_unconditional_gans_datasets.md deleted file mode 100644 index a0f4d67164..0000000000 --- a/docs/en/dataset_zoo/5_unconditional_gans_datasets.md +++ /dev/null @@ -1,91 +0,0 @@ -# Unconditional GANs Datasets - -**Data preparation for unconditional model** is simple. What you need to do is downloading the images and put them into a directory. Next, you should set a symlink in the `data` directory. For standard unconditional gans with static architectures, like DCGAN and StyleGAN2, `UnconditionalImageDataset` is designed to train such unconditional models. Here is an example config for FFHQ dataset: - -```python -dataset_type = 'BasicImageDataset' - -train_pipeline = [ - dict(type='LoadImageFromFile', key='img'), - dict(type='Flip', keys=['img'], direction='horizontal'), - dict(type='PackGenInputs', keys=['img'], meta_keys=['img_path']) -] - -# `batch_size` and `data_root` need to be set. -train_dataloader = dict( - batch_size=4, - num_workers=8, - persistent_workers=True, - sampler=dict(type='InfiniteSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=None, # set by user - pipeline=train_pipeline)) -``` - -Here, we adopt `InfinitySampler` to avoid frequent dataloader reloading, which will accelerate the training procedure. As shown in the example, `pipeline` provides important data pipeline to process images, including loading from file system, resizing, cropping, transferring to `torch.Tensor` and packing to `GenDataSample`. All of supported data pipelines can be found in `mmedit/datasets/transforms`. - -For unconditional GANs with dynamic architectures like PGGAN and StyleGANv1, `GrowScaleImgDataset` is recommended to use for training. Since such dynamic architectures need real images in different scales, directly adopting `UnconditionalImageDataset` will bring heavy I/O cost for loading multiple high-resolution images. Here is an example we use for training PGGAN in CelebA-HQ dataset: - -```python -dataset_type = 'GrowScaleImgDataset' - -pipeline = [ - dict(type='LoadImageFromFile', key='img'), - dict(type='Flip', keys=['img'], direction='horizontal'), - dict(type='PackGenInputs') -] - -# `samples_per_gpu` and `imgs_root` need to be set. -train_dataloader = dict( - num_workers=4, - batch_size=64, - dataset=dict( - type='GrowScaleImgDataset', - data_roots={ - '1024': './data/ffhq/images', - '256': './data/ffhq/ffhq_imgs/ffhq_256', - '64': './data/ffhq/ffhq_imgs/ffhq_64' - }, - gpu_samples_base=4, - # note that this should be changed with total gpu number - gpu_samples_per_scale={ - '4': 64, - '8': 32, - '16': 16, - '32': 8, - '64': 4, - '128': 4, - '256': 4, - '512': 4, - '1024': 4 - }, - len_per_stage=300000, - pipeline=pipeline), - sampler=dict(type='InfiniteSampler', shuffle=True)) -``` - -In this dataset, you should provide a dictionary of image paths to the `data_roots`. Thus, you should resize the images in the dataset in advance. -For the resizing methods in the data pre-processing, we adopt bilinear interpolation methods in all of the experiments studied in MMEditing. - -Note that this dataset should be used with `PGGANFetchDataHook`. In this config file, this hook should be added in the customized hooks, as shown below. - -```python -custom_hooks = [ - dict( - type='GenVisualizationHook', - interval=5000, - fixed_input=True, - # vis ema and orig at the same time - vis_kwargs_list=dict( - type='Noise', - name='fake_img', - sample_model='ema/orig', - target_keys=['ema', 'orig'])), - dict(type='PGGANFetchDataHook') -] -``` - -This fetching data hook helps the dataloader update the status of dataset to change the data source and batch size during training. - -Here, we provide several download links of datasets frequently used in unconditional models: [LSUN](http://dl.yf.io/lsun/), [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html), [CelebA-HQ](https://drive.google.com/drive/folders/11Vz0fqHS2rXDb5pprgTjpD7S2BAJhi1P), [FFHQ](https://drive.google.com/drive/folders/1u2xu7bSrWxrbUxk-dT-UvEJq8IjdmNTP). diff --git a/docs/en/dataset_zoo/6_image_translation_datasets.md b/docs/en/dataset_zoo/6_image_translation_datasets.md deleted file mode 100644 index 85cec47c16..0000000000 --- a/docs/en/dataset_zoo/6_image_translation_datasets.md +++ /dev/null @@ -1,149 +0,0 @@ -# Image Translation Datasets - -**Data preparation for translation model** needs a little attention. You should organize the files in the way we told you in `quick_run.md`. Fortunately, for most official datasets like facades and summer2winter_yosemite, they already have the right format. Also, you should set a symlink in the `data` directory. For paired-data trained translation model like Pix2Pix , `PairedImageDataset` is designed to train such translation models. Here is an example config for facades dataset: - -```python -train_dataset_type = 'PairedImageDataset' -val_dataset_type = 'PairedImageDataset' -img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -train_pipeline = [ - dict( - type='LoadPairedImageFromFile', - io_backend='disk', - key='pair', - domain_a=domain_a, - domain_b=domain_b, - flag='color'), - dict( - type='Resize', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - scale=(286, 286), - interpolation='bicubic') -] -test_pipeline = [ - dict( - type='LoadPairedImageFromFile', - io_backend='disk', - key='image', - domain_a=domain_a, - domain_b=domain_b, - flag='color'), - dict( - type='Resize', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - scale=(256, 256), - interpolation='bicubic') -] -dataroot = 'data/paired/facades' -train_dataloader = dict( - batch_size=1, - num_workers=4, - persistent_workers=True, - sampler=dict(type='InfiniteSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=train_pipeline)) - -val_dataloader = dict( - batch_size=1, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) - -test_dataloader = dict( - batch_size=1, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) -``` - -Here, we adopt `LoadPairedImageFromFile` to load a paired image as the common loader does and crops -it into two images with the same shape in different domains. As shown in the example, `pipeline` provides important data pipeline to process images, including loading from file system, resizing, cropping, flipping, transferring to `torch.Tensor` and packing to `GenDataSample`. All of supported data pipelines can be found in `mmedit/datasets/transforms`. - -For unpaired-data trained translation model like CycleGAN , `UnpairedImageDataset` is designed to train such translation models. Here is an example config for horse2zebra dataset: - -```python -train_dataset_type = 'UnpairedImageDataset' -val_dataset_type = 'UnpairedImageDataset' -img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -domain_a, domain_b = 'horse', 'zebra' -train_pipeline = [ - dict( - type='LoadImageFromFile', - io_backend='disk', - key=f'img_{domain_a}', - flag='color'), - dict( - type='LoadImageFromFile', - io_backend='disk', - key=f'img_{domain_b}', - flag='color'), - dict( - type='TransformBroadcaster', - mapping={'img': [f'img_{domain_a}', f'img_{domain_b}']}, - auto_remap=True, - share_random_params=True, - transforms=[ - dict(type='Resize', scale=(286, 286), interpolation='bicubic'), - dict(type='Crop', crop_size=(256, 256), random_crop=True), - ]), - dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'), - dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'), - dict( - type='PackGenInputs', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path']) -] -test_pipeline = [ - dict(type='LoadImageFromFile', io_backend='disk', key='img', flag='color'), - dict(type='Resize', scale=(256, 256), interpolation='bicubic'), - dict( - type='PackGenInputs', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - meta_keys=[f'img_{domain_a}_path', f'img_{domain_b}_path']) -] -data_root = './data/horse2zebra/' -# `batch_size` and `data_root` need to be set. -train_dataloader = dict( - batch_size=1, - num_workers=4, - persistent_workers=True, - sampler=dict(type='InfiniteSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=train_pipeline)) - -val_dataloader = dict( - batch_size=None, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) - -test_dataloader = dict( - batch_size=None, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) -``` - -`UnpairedImageDataset` will load both images (domain A and B) from different paths and transform them at the same time. - -Here, we provide download links of datasets used in [Pix2Pix](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) and [CycleGAN](https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/). diff --git a/docs/en/notes/4_faq.md b/docs/en/faq.md similarity index 91% rename from docs/en/notes/4_faq.md rename to docs/en/faq.md index 4edec727c2..7205a47449 100644 --- a/docs/en/notes/4_faq.md +++ b/docs/en/faq.md @@ -1,4 +1,4 @@ -# Frequently Asked Questions +# Frequently asked questions We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues @@ -15,11 +15,11 @@ and make sure you fill in all required information in the template. **Q2**: What's the folder structure of xxx dataset? -**A2**: You can make sure the folder structure is correct following tutorials of [dataset preparation](../user_guides/2_dataset_prepare.md). +**A2**: You can make sure the folder structure is correct following tutorials of [dataset preparation](user_guides/dataset_prepare.md). **Q3**: How to use LMDB data to train the model? -**A3**: You can use scripts in `tools/data` to make LMDB files. More details are shown in tutorials of [dataset preparation](../user_guides/2_dataset_prepare.md). +**A3**: You can use scripts in `tools/data` to make LMDB files. More details are shown in tutorials of [dataset preparation](user_guides/dataset_prepare.md). **Q4**: Why `MMCV==xxx is used but incompatible` is raised when import I try to import `mmgen`? diff --git a/docs/en/2_get_started.md b/docs/en/get_started/install.md similarity index 75% rename from docs/en/2_get_started.md rename to docs/en/get_started/install.md index 3c7edcc92e..78fc8d1be3 100644 --- a/docs/en/2_get_started.md +++ b/docs/en/get_started/install.md @@ -1,4 +1,4 @@ -# Get Started: Install and Run MMEditing +# Installation In this section, you will know about: @@ -7,7 +7,6 @@ In this section, you will know about: - [Best practices](#best-practices) - [Customize installation](#customize-installation) - [Developing with multiple MMEditing versions](#developing-with-multiple-mmediting-versions) -- [Quick run](#quick-run) ## Installation @@ -156,7 +155,7 @@ docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmediting/data mmediting #### Trouble shooting -If you have some issues during the installation, please first view the [FAQ](notes/4_faq.md) page. +If you have some issues during the installation, please first view the [FAQ](../faq.md) page. You may [open an issue](https://github.com/open-mmlab/mmediting/issues/new/choose) on GitHub if no solution is found. ### Developing with multiple MMEditing versions @@ -168,52 +167,3 @@ To use the default MMEditing installed in the environment rather than that you a ```shell PYTHONPATH="$(dirname $0)/..":$PYTHONPATH ``` - -## Quick run - -After installing MMEditing successfully, now you are able to play with MMEditing! - -To synthesize an image of a church, you only need several lines of codes by MMEditing! - -```python -from mmedit.apis import init_model, sample_unconditional_model - -config_file = 'configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-church-256x256.py' -# you can download this checkpoint in advance and use a local file path. -checkpoint_file = 'https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth' -device = 'cuda:0' -# init a generative model -model = init_model(config_file, checkpoint_file, device=device) -# sample images -fake_imgs = sample_unconditional_model(model, 4) -``` - -Or you can just run the following command. - -```bash -python demo/unconditional_demo.py \ -configs/styleganv2/stylegan2_c2_lsun-church_256_b4x8_800k.py \ -https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth - -``` - -You will see a new image `unconditional_samples.png` in folder `work_dirs/demos/`, which contained generated samples. - -What's more, if you want to make these photos much more clear, -you only need several lines of codes for image super-resolution by MMEditing! - -```python -import mmcv -from mmedit.apis import init_model, restoration_inference -from mmedit.engine.misc import tensor2img - -config = 'configs/esrgan/esrgan_x4c64b23g32_1xb16-400k_div2k.py' -checkpoint = 'https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth' -img_path = 'tests/data/image/lq/baboon_x4.png' -model = init_model(config, checkpoint) -output = restoration_inference(model, img_path) -output = tensor2img(output) -mmcv.imwrite(output, 'output.png') -``` - -Now, you can check your fancy photos in `output.png`. diff --git a/docs/en/1_overview.md b/docs/en/get_started/overview.md similarity index 92% rename from docs/en/1_overview.md rename to docs/en/get_started/overview.md index b1c7f51472..62af697b74 100644 --- a/docs/en/1_overview.md +++ b/docs/en/get_started/overview.md @@ -78,7 +78,7 @@ MMEditing supports various applications, including: - **New Modular Design for Flexible Combination:** - We decompose the editing framework into different modules and one can easily construct a customized editor framework by combining different modules. Specifically, a new design for complex loss modules is proposed for customizing the links between modules, which can achieve flexible combinations among different modules.(Tutorial for [losses](advanced_guides/4_losses.md)) + We decompose the editing framework into different modules and one can easily construct a customized editor framework by combining different modules. Specifically, a new design for complex loss modules is proposed for customizing the links between modules, which can achieve flexible combinations among different modules.(Tutorial for [losses](../howto/losses.md)) - **Efficient Distributed Training:** @@ -86,12 +86,16 @@ MMEditing supports various applications, including: ## Get started -For installation instructions, please see [get_started](2_get_started.md). +For installation instructions, please see [Installation](install.md). ## User guides -For beginners, we suggest learning the basic usage of MMEditing from [user_guides](user_guides/1_config.md). +For beginners, we suggest learning the basic usage of MMEditing from [user_guides](../user_guides/config.md). ### Advanced guides -For users who are familiar with MMEditing, you may want to learn the design of MMEditing, as well as how to extend the repo, how to use multiple repos and other advanced usages, please refer to [advanced_guides](advanced_guides/1_models.md). +For users who are familiar with MMEditing, you may want to learn the design of MMEditing, as well as how to extend the repo, how to use multiple repos and other advanced usages, please refer to [advanced_guides](../advanced_guides/evaluator.md). + +### How to + +For users who want to use MMEditing to do something, please refer to [How to](../howto/models.md). diff --git a/docs/en/get_started/quick_run.md b/docs/en/get_started/quick_run.md new file mode 100644 index 0000000000..b243e5263a --- /dev/null +++ b/docs/en/get_started/quick_run.md @@ -0,0 +1,48 @@ +# Quick run + +After installing MMEditing successfully, now you are able to play with MMEditing! + +To synthesize an image of a church, you only need several lines of codes by MMEditing! + +```python +from mmedit.apis import init_model, sample_unconditional_model + +config_file = 'configs/styleganv2/stylegan2_c2_8xb4-800kiters_lsun-church-256x256.py' +# you can download this checkpoint in advance and use a local file path. +checkpoint_file = 'https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth' +device = 'cuda:0' +# init a generative model +model = init_model(config_file, checkpoint_file, device=device) +# sample images +fake_imgs = sample_unconditional_model(model, 4) +``` + +Or you can just run the following command. + +```bash +python demo/unconditional_demo.py \ +configs/styleganv2/stylegan2_c2_lsun-church_256_b4x8_800k.py \ +https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth + +``` + +You will see a new image `unconditional_samples.png` in folder `work_dirs/demos/`, which contained generated samples. + +What's more, if you want to make these photos much more clear, +you only need several lines of codes for image super-resolution by MMEditing! + +```python +import mmcv +from mmedit.apis import init_model, restoration_inference +from mmedit.engine.misc import tensor2img + +config = 'configs/esrgan/esrgan_x4c64b23g32_1xb16-400k_div2k.py' +checkpoint = 'https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth' +img_path = 'tests/data/image/lq/baboon_x4.png' +model = init_model(config, checkpoint) +output = restoration_inference(model, img_path) +output = tensor2img(output) +mmcv.imwrite(output, 'output.png') +``` + +Now, you can check your fancy photos in `output.png`. diff --git a/docs/en/advanced_guides/2_dataset.md b/docs/en/howto/dataset.md similarity index 79% rename from docs/en/advanced_guides/2_dataset.md rename to docs/en/howto/dataset.md index ab0c380ee5..11086661fe 100644 --- a/docs/en/advanced_guides/2_dataset.md +++ b/docs/en/howto/dataset.md @@ -1,4 +1,4 @@ -# Prepare Your Own Datasets +# How to prepare your own datasets In this document, we will introduce the design of each datasets in MMEditing and how users can design their own dataset. @@ -198,6 +198,144 @@ dataset = BasicFramesDataset( img=['img1.png', 'img3.png'], gt=['img2.png'])) ``` +### BasicConditonalDataset + +**BasicConditonalDataset** `mmedit.datasets.BasicConditonalDataset` is designed for conditional GANs (e.g., SAGAN, BigGAN). This dataset support load label for the annotation file. `BasicConditonalDataset` support three kinds of annotation as follow: + +#### 1. Annotation file read by line (e.g., txt) + +Sample files structure: + +``` + data_prefix/ + ├── folder_1 + │ ├── xxx.png + │ ├── xxy.png + │ └── ... + └── folder_2 + ├── 123.png + ├── nsdf3.png + └── ... +``` + +Sample annotation file (the first column is the image path and the second column is the index of category): + +``` + folder_1/xxx.png 0 + folder_1/xxy.png 1 + folder_2/123.png 5 + folder_2/nsdf3.png 3 + ... +``` + +Config example for ImageNet dataset: + +```python +dataset=dict( + type='BasicConditionalDataset, + data_root='./data/imagenet/', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), +``` + +#### 2. Dict-based annotation file (e.g., json): + +Sample files structure: + +``` + data_prefix/ + ├── folder_1 + │ ├── xxx.png + │ ├── xxy.png + │ └── ... + └── folder_2 + ├── 123.png + ├── nsdf3.png + └── ... +``` + +Sample annotation file (the key is the image path and the value column +is the label): + +``` + { + "folder_1/xxx.png": [1, 2, 3, 4], + "folder_1/xxy.png": [2, 4, 1, 0], + "folder_2/123.png": [0, 9, 8, 1], + "folder_2/nsdf3.png", [1, 0, 0, 2], + ... + } +``` + +Config example for EG3D (shapenet-car) dataset: + +```python +dataset = dict( + type='BasicConditionalDataset', + data_root='./data/eg3d/shapenet-car', + ann_file='annotation.json', + pipeline=train_pipeline) +``` + +In this kind of annotation, labels can be any type and not restricted to an index. + +#### 3. Folder-based annotation (no annotation file need): + +Sample files structure: + +``` + data_prefix/ + ├── class_x + │ ├── xxx.png + │ ├── xxy.png + │ └── ... + │ └── xxz.png + └── class_y + ├── 123.png + ├── nsdf3.png + ├── ... + └── asd932_.png +``` + +If the annotation file is specified, the dataset will be generated by the first two ways, otherwise, try the third way. + +### ImageNet Dataset and CIFAR10 Dataset + +**ImageNet Dataset**`mmedit.datasets.ImageNet` and **CIFAR10 Dataset**`mmedit.datasets.CIFAR10` are datasets specific designed for ImageNet and CIFAR10 datasets. Both two datasets are encapsulation of `BasicConditionalDataset`. You can used them to load data from ImageNet dataset and CIFAR10 dataset easily. + +Config example for ImageNet: + +```python +pipeline = [ + dict(type='LoadImageFromFile', key='img'), + dict(type='RandomCropLongEdge', keys=['img']), + dict(type='Resize', scale=(128, 128), keys=['img'], backend='pillow'), + dict(type='Flip', keys=['img'], flip_ratio=0.5, direction='horizontal'), + dict(type='PackEditInputs') +] + +dataset=dict( + type='ImageNet', + data_root='./data/imagenet/', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=pipeline), +``` + +Config example for CIFAR10: + +```python +pipeline = [dict(type='PackEditInputs')] + +dataset = dict( + type='CIFAR10', + data_root='./data', + data_prefix='cifar10', + test_mode=False, + pipeline=pipeline) +``` + ### AdobeComp1kDataset **AdobeComp1kDataset** `mmedit.datasets.AdobeComp1kDataset` diff --git a/docs/en/advanced_guides/4_losses.md b/docs/en/howto/losses.md similarity index 99% rename from docs/en/advanced_guides/4_losses.md rename to docs/en/howto/losses.md index 22ea7a2590..3485a02ed9 100644 --- a/docs/en/advanced_guides/4_losses.md +++ b/docs/en/howto/losses.md @@ -1,4 +1,4 @@ -# Design Your Own Loss Functions +# How to design your own loss functions `losses` are registered as `LOSSES` in `MMEditing`. Customizing losses is similar to customizing any other model. diff --git a/docs/en/advanced_guides/1_models.md b/docs/en/howto/models.md similarity index 99% rename from docs/en/advanced_guides/1_models.md rename to docs/en/howto/models.md index 0d5e9cefdb..d7e93eb4e5 100644 --- a/docs/en/advanced_guides/1_models.md +++ b/docs/en/howto/models.md @@ -1,4 +1,4 @@ -# Design Your Own Models +# How to design your own models MMEditing is built upon MMEngine and MMCV, which enables users to design new models quickly, train and evaluate them easily. In this section, you will learn how to design your own models. @@ -406,7 +406,7 @@ After implementing the network architecture and the forward loop of SRCNN, now we can create a new file `configs/srcnn/srcnn_x4k915_g1_1000k_div2k.py` to set the configurations needed by training SRCNN. -In the configuration file, we need to specify the parameters of our model, `class BaseEditModel`, including the generator network architecture, loss function, additional training and testing configuration, and data preprocessor of input tensors. Please refer to the [Introduction to the loss in MMEditing](./4_losses.md) for more details of losses in MMEditing. +In the configuration file, we need to specify the parameters of our model, `class BaseEditModel`, including the generator network architecture, loss function, additional training and testing configuration, and data preprocessor of input tensors. Please refer to the [Introduction to the loss in MMEditing](./losses.md) for more details of losses in MMEditing. ```python # model settings @@ -425,7 +425,7 @@ model = dict( )) ``` -We also need to specify the training dataloader and testing dataloader according to [create your own dataloader](../dataset_zoo/0_overview.md). +We also need to specify the training dataloader and testing dataloader according to create your own dataloader. Finally we can start training our own model by: ```python @@ -733,7 +733,7 @@ model = dict( out_channels=1)) ``` -We also need to specify the training dataloader and testing dataloader according to [create your own dataloader](2_dataset.md). +We also need to specify the training dataloader and testing dataloader according to [create your own dataloader](dataset.md). Finally we can start training our own model by: ```python diff --git a/docs/en/advanced_guides/3_transforms.md b/docs/en/howto/transforms.md similarity index 99% rename from docs/en/advanced_guides/3_transforms.md rename to docs/en/howto/transforms.md index 565a3499ab..46719f58c9 100644 --- a/docs/en/advanced_guides/3_transforms.md +++ b/docs/en/howto/transforms.md @@ -1,4 +1,4 @@ -# Design Your Own Data Pipelines +# How to design your own data transforms In this tutorial, we introduce the design of transforms pipeline in MMEditing. diff --git a/docs/en/index.rst b/docs/en/index.rst index e60fbaefc0..73cc96cb0b 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -34,108 +34,128 @@ Codes are available on `GitHub `_. Documentation ============= +.. toctree:: + :maxdepth: 1 + :caption: Community + + community/contributing.md + community/projects.md + + .. toctree:: :maxdepth: 1 :caption: Get Started - 1_overview.md - 2_get_started.md + get_started/overview.md + get_started/install.md + get_started/quick_run.md + .. toctree:: :maxdepth: 1 :caption: User Guides - user_guides/1_config.md - user_guides/2_dataset_prepare.md - user_guides/3_inference.md - user_guides/4_train_test.md - user_guides/5_visualization.md - user_guides/6_useful_tools.md - user_guides/7_deploy.md - user_guides/8_metrics.md + user_guides/config.md + user_guides/dataset_prepare.md + user_guides/inference.md + user_guides/train_test.md + user_guides/metrics.md + user_guides/visualization.md + user_guides/useful_tools.md + user_guides/deploy.md + .. toctree:: :maxdepth: 2 :caption: Advanced Guides - advanced_guides/1_models.md - advanced_guides/2_dataset.md - advanced_guides/3_transforms.md - advanced_guides/4_losses.md + advanced_guides/models.md + advanced_guides/dataset.md + advanced_guides/transforms.md + advanced_guides/losses.md + advanced_guides/evaluator.md + advanced_guides/structures.md + advanced_guides/data_preprocessor.md + advanced_guides/data_flow.md +.. toctree:: + :maxdepth: 2 + :caption: How To + + howto/models.md + howto/dataset.md + howto/transforms.md + howto/losses.md + .. toctree:: :maxdepth: 1 + :caption: FAQ + + faq.md + +.. toctree:: + :maxdepth: 2 :caption: Model Zoo - 3_model_zoo.md + model_zoo/index.rst .. toctree:: :maxdepth: 1 :caption: Dataset Zoo - dataset_zoo/0_overview.md - dataset_zoo/1_super_resolution_datasets.md - dataset_zoo/2_inpainting_datasets.md - dataset_zoo/3_matting_datasets.md - dataset_zoo/4_video_interpolation_datasets.md - dataset_zoo/5_unconditional_gans_datasets.md - dataset_zoo/6_image_translation_datasets.md - + dataset_zoo/index.rst .. toctree:: :maxdepth: 1 - :caption: Migration from MMEdit 0.x + :caption: Changelog - migration/1_overview.md - migration/2_runtime.md - migration/3_models.md - migration/4_eval_test.md - migration/5_schedule.md - migration/6_data.md - migration/7_distributed_train.md - migration/8_optimizers.md - migration/9_visualization.md - migration/10_amp.md + changelog.md .. toctree:: :maxdepth: 2 :caption: API Reference - mmedit.apis - mmedit.datasets - mmedit.datasets.transforms - mmedit.engine.hooks - mmedit.engine.optimizers - mmedit.engine.runner - mmedit.engine.schedulers - mmedit.evaluation.metrics - mmedit.evaluation.functional - mmedit.models.base_models - mmedit.models.data_preprocessors - mmedit.models.losses - mmedit.models.utils - mmedit.models.editors - mmedit.structures - mmedit.visualization - mmedit.utils + mmedit.apis.inferencers + mmedit.structures + mmedit.datasets + mmedit.datasets.transforms + mmedit.evaluation + mmedit.visualization + mmedit.engine.hooks + mmedit.engine.logging + mmedit.engine.optimizers + mmedit.engine.runner + mmedit.engine.schedulers + mmedit.models.base_archs + mmedit.models.base_models + mmedit.models.losses + mmedit.models.data_preprocessors + mmedit.models.utils + mmedit.models.editors + mmedit.utils .. toctree:: :maxdepth: 1 - :caption: Notes - - notes/1_contribution_guide.md - notes/2_projects.md - notes/3_changelog.md - notes/4_faq.md + :caption: Migration from MMEdit 0.x + migration/overview.md + migration/runtime.md + migration/models.md + migration/eval_test.md + migration/schedule.md + migration/data.md + migration/distributed_train.md + migration/optimizers.md + migration/visualization.md + migration/amp.md .. toctree:: :caption: Switch Language - 5_switch_language.md + switch_language.md diff --git a/docs/en/migration/10_amp.md b/docs/en/migration/amp.md similarity index 100% rename from docs/en/migration/10_amp.md rename to docs/en/migration/amp.md diff --git a/docs/en/migration/6_data.md b/docs/en/migration/data.md similarity index 99% rename from docs/en/migration/6_data.md rename to docs/en/migration/data.md index 5b00eba624..b91758d1c8 100644 --- a/docs/en/migration/6_data.md +++ b/docs/en/migration/data.md @@ -12,7 +12,7 @@ We update data pipelines settings in MMEdit 1.x. Important modifications are as - Remove normalization and color space transforms operations. They are moved from datasets transforms pipelines to data_preprocessor. - The original formatting transforms pipelines `Collect` and `ToTensor` are combined as `PackEditInputs`. - More details of data pipelines are shown in [transform guides](../advanced_guides/3_transforms.md). + More details of data pipelines are shown in [transform guides](../howto/transforms.md). diff --git a/docs/en/migration/7_distributed_train.md b/docs/en/migration/distributed_train.md similarity index 100% rename from docs/en/migration/7_distributed_train.md rename to docs/en/migration/distributed_train.md diff --git a/docs/en/migration/4_eval_test.md b/docs/en/migration/eval_test.md similarity index 97% rename from docs/en/migration/4_eval_test.md rename to docs/en/migration/eval_test.md index 87c2290d69..5341329d20 100644 --- a/docs/en/migration/4_eval_test.md +++ b/docs/en/migration/eval_test.md @@ -50,7 +50,7 @@ test_cfg = dict(type='TestLoop') # The name of test loop type We have merged [MMGeneration 1.x](https://github.com/open-mmlab/mmgeneration/tree/1.x) into MMEditing. Here is migration of Evaluation and Testing Settings about MMGeneration. -The evaluation field is splited to `val_evaluator` and `test_evaluator`. And it won't support `interval` and `save_best` arguments. The `interval` is moved to `train_cfg.val_interval`, see [the schedule settings](./5_schedule.md) and the `save_best` is moved to `default_hooks.checkpoint.save_best`. +The evaluation field is splited to `val_evaluator` and `test_evaluator`. And it won't support `interval` and `save_best` arguments. The `interval` is moved to `train_cfg.val_interval`, see [the schedule settings](./schedule.md) and the `save_best` is moved to `default_hooks.checkpoint.save_best`.
diff --git a/docs/en/migration/3_models.md b/docs/en/migration/models.md similarity index 97% rename from docs/en/migration/3_models.md rename to docs/en/migration/models.md index 07c6fb9d47..f467755eb8 100644 --- a/docs/en/migration/3_models.md +++ b/docs/en/migration/models.md @@ -68,4 +68,4 @@ We refactor models in MMEdit 1.x. Important modifications are as following. - The `models` in MMedit 1.x is refactored to five parts: `base_models`, `data_preprocessors`, `editors`, `layers` and `losses`. - Add `data_preprocessor` module in `models`. Normalization and color space transforms operations are moved from datasets transforms pipelines to data_preprocessor. The data out from the data pipeline is transformed by this module and then fed into the model. -More details of models are shown in [model guides](../advanced_guides/1_models.md). +More details of models are shown in [model guides](../howto/models.md). diff --git a/docs/en/migration/8_optimizers.md b/docs/en/migration/optimizers.md similarity index 100% rename from docs/en/migration/8_optimizers.md rename to docs/en/migration/optimizers.md diff --git a/docs/en/migration/1_overview.md b/docs/en/migration/overview.md similarity index 94% rename from docs/en/migration/1_overview.md rename to docs/en/migration/overview.md index 7302148cf3..b4e71473f3 100644 --- a/docs/en/migration/1_overview.md +++ b/docs/en/migration/overview.md @@ -7,7 +7,7 @@ This section introduce the following contents in terms of migration from MMEditi ## New dependencies -MMEdit 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [install tutorial](../2_get_started.md). Or install the below packages manually. +MMEdit 1.x depends on some new packages, you can prepare a new clean environment and install again according to the [install tutorial](../get_started/install.md). Or install the below packages manually. 1. [MMEngine](https://github.com/open-mmlab/mmengine): MMEngine is the core the OpenMMLab 2.0 architecture, and we splited many compentents unrelated to computer vision from MMCV to MMEngine. 2. [MMCV](https://github.com/open-mmlab/mmcv/tree/dev-2.x): The computer vision package of OpenMMLab. This is not a new dependency, but you need to upgrade it to above 2.0.0rc0 version. @@ -25,4 +25,4 @@ We refactor overall structures in MMEdit 1.x as following. We rename config file to new template: `{model_settings}_{module_setting}_{training_setting}_{datasets_info}`. -More details of config are shown in [config guides](../user_guides/1_config.md). +More details of config are shown in [config guides](../user_guides/config.md). diff --git a/docs/en/migration/2_runtime.md b/docs/en/migration/runtime.md similarity index 100% rename from docs/en/migration/2_runtime.md rename to docs/en/migration/runtime.md diff --git a/docs/en/migration/5_schedule.md b/docs/en/migration/schedule.md similarity index 100% rename from docs/en/migration/5_schedule.md rename to docs/en/migration/schedule.md diff --git a/docs/en/migration/9_visualization.md b/docs/en/migration/visualization.md similarity index 95% rename from docs/en/migration/9_visualization.md rename to docs/en/migration/visualization.md index 0f782531b7..6c9e3329c8 100644 --- a/docs/en/migration/9_visualization.md +++ b/docs/en/migration/visualization.md @@ -41,4 +41,4 @@ custom_hooks = [dict(type='BasicVisualizationHook', interval=1)]
-To learn more about the visualization function, please refers to [this tutorial](../user_guides/5_visualization.md). +To learn more about the visualization function, please refers to [this tutorial](../user_guides/visualization.md). diff --git a/docs/en/notes/1_contribution_guide.md b/docs/en/notes/1_contribution_guide.md deleted file mode 100644 index a7669e771b..0000000000 --- a/docs/en/notes/1_contribution_guide.md +++ /dev/null @@ -1,68 +0,0 @@ -# Contributing to MMEditing - -This section introduces following contents: - -- [Workflow](#workflow) -- [Code style](#code-style) - - [Python](#python) - - [C++ and CUDA](#c-and-cuda) - -All kinds of contributions are welcome, including but not limited to the following. - -- Fix typo or bugs -- Add documentation or translate the documentation into other languages -- Add new features and components - -## Workflow - -1. fork and pull the latest MMEditing repository (MMEditing) -2. checkout a new branch (do not use master branch for PRs) -3. commit your changes -4. create a PR - -```{note} -If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. -``` - -## Code style - -### Python - -We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. - -We use the following tools for linting and formatting: - -- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools. -- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. -- [yapf](https://github.com/google/yapf): A formatter for Python files. -- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files. -- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files. -- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. - -Style configurations can be found in [setup.cfg](https://github.com/open-mmlab/mmediting/blob/1.x/setup.cfg). - -We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, -fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. -The config for a pre-commit hook is stored in [.pre-commit-config](https://github.com/open-mmlab/mmediting/blob/1.x/.pre-commit-config.yaml). - -After you clone the repository, you will need to install initialize pre-commit hook. - -```shell -pip install -U pre-commit -``` - -From the repository folder - -```shell -pre-commit install -``` - -After this on every commit check code linters and formatter will be enforced. - -```{important} -Before you create a PR, make sure that your code lints and is formatted by yapf. -``` - -### C++ and CUDA - -We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). diff --git a/docs/en/notes/2_projects.md b/docs/en/notes/2_projects.md deleted file mode 100644 index 9adf606d79..0000000000 --- a/docs/en/notes/2_projects.md +++ /dev/null @@ -1,46 +0,0 @@ -# Projects based on MMEditing - -There are many projects built upon MMEditing. -We list some of them as examples of how to extend MMEditing for your own projects. -As the page might not be completed, please feel free to create a PR to update this page. - -## Research papers - -There are also projects released with papers. -Some of the papers are published in top-tier conferences (CVPR, ECCV, and NeurIPS). -Methods already supported and maintained by MMEditing are not listed. - -- Towards Interpretable Video Super-Resolution via Alternating Optimization, ECCV 2022 [\[paper\]](https://arxiv.org/abs/2207.10765)[\[github\]](https://github.com/caojiezhang/DAVSR) - -- SepLUT:Separable Image-adaptive Lookup Tables for Real-time Image Enhancement, ECCV 2022 [\[paper\]](https://arxiv.org/abs/2207.08351)[\[github\]](https://github.com/ImCharlesY/SepLUT) - -- Investigating Tradeoffs in Real-World Video Super-Resolution(RealBasicVSR), CVPR 2022 [\[paper\]](https://arxiv.org/abs/2111.12704)[\[github\]](https://github.com/ckkelvinchan/RealBasicVSR) - -- BasicVSR++: Improving Video Super-Resolution with Enhanced Propagation and Alignment, CVPR 2022 [\[paper\]](https://arxiv.org/abs/2104.13371)[\[github\]](https://github.com/ckkelvinchan/BasicVSR_PlusPlus) - -- Multi-Scale Memory-Based Video Deblurring, CVPR 2022 [\[paper\]](https://arxiv.org/abs/2204.02977)[\[github\]](https://github.com/jibo27/MemDeblur) - -- AdaInt:Learning Adaptive Intervals for 3D Lookup Tables on Real-time Image Enhancement, CVPR 2022 [\[paper\]](https://arxiv.org/abs/2204.13983)[\[github\]](https://github.com/ImCharlesY/AdaInt) - -- A New Dataset and Transformer for Stereoscopic Video Super-Resolution, CVPRW 2022 [\[paper\]](https://openaccess.thecvf.com/content/CVPR2022W/NTIRE/papers/Imani_A_New_Dataset_and_Transformer_for_Stereoscopic_Video_Super-Resolution_CVPRW_2022_paper.pdf)[\[github\]](https://github.com/H-deep/Trans-SVSR) - -- BasicVSR:The Search for Essential Components in Video Super-Resolution and Beyond, CVPR 2021 [\[paper\]](https://arxiv.org/abs/2012.02181)[\[github\]](https://github.com/ckkelvinchan/BasicVSR-IconVSR) - -- GLEAN:Generative Latent Bank for Large-Factor Image Super-Resolution, CVPR 2021 [\[paper\]](https://arxiv.org/abs/2012.00739)[\[github\]](https://github.com/ckkelvinchan/GLEAN) - -- DAN:Unfolding the Alternating Optimization for Blind Super Resolution, NeurIPS 2020 [\[paper\]](https://arxiv.org/abs/2010.02631v4)[\[github\]](https://github.com/AlexZou14/DAN-Basd-on-Openmmlab) - -- Positional Encoding as Spatial Inductive Bias in GANs, CVPR 2021[\[paper\]](https://openaccess.thecvf.com/content/CVPR2021/papers/Xu_Positional_Encoding_As_Spatial_Inductive_Bias_in_GANs_CVPR_2021_paper.pdf)[\[github\]](https://github.com/open-mmlab/mmgeneration/tree/master/configs/positional_encoding_in_gans) - -- A Multi-Modality Ovarian Tumor Ultrasound Image Dataset for Unsupervised Cross-Domain Semantic Segmentation, arXiv 2022 [\[paper\]](https://arxiv.org/pdf/2207.06799.pdf)[\[github\]](https://github.com/cv516buaa/mmotu_ds2net) - -- Arbitrary-Scale Image Synthesis, CVPR 2022 [\[paper\]](https://arxiv.org/pdf/2204.02273.pdf)[\[github\]](https://github.com/vglsd/ScaleParty) - -## Open-source projects - -Some open-source projects extend MMEditing for more functions and fields. -They reveal the potential of what MMEditing can do. We list several of them as below. - -- [PowerVQE](https://github.com/ryanxingql/powervqe): Open framework for quality enhancement of compressed videos based on PyTorch and MMEditing. -- [VR-Baseline](https://github.com/linjing7/VR-Baseline): Video Restoration Toolbox. -- [Manga-Colorization-with-CycleGAN](https://github.com/chandlerbing65nm/Manga-Colorization-with-CycleGAN): Colorizing Black&White Japanese Manga using Generative Adversarial Network. diff --git a/docs/en/5_switch_language.md b/docs/en/switch_language.md similarity index 100% rename from docs/en/5_switch_language.md rename to docs/en/switch_language.md diff --git a/docs/en/user_guides/7_deploy.md b/docs/en/user_guides/7_deploy.md deleted file mode 100644 index d7a688f344..0000000000 --- a/docs/en/user_guides/7_deploy.md +++ /dev/null @@ -1,4 +0,0 @@ -# Tutorial 7: Deploy Models in MMEditing - -[MMDeploy](https://github.com/open-mmlab/mmdeploy) is an open-source deep learning model deployment toolset. -MMDeploy supports deploying models in MMEditing. Please refer to [MMDeploy](https://github.com/open-mmlab/mmdeploy) for more information. diff --git a/docs/en/user_guides/1_config.md b/docs/en/user_guides/config.md similarity index 100% rename from docs/en/user_guides/1_config.md rename to docs/en/user_guides/config.md diff --git a/docs/en/user_guides/2_dataset_prepare.md b/docs/en/user_guides/dataset_prepare.md similarity index 89% rename from docs/en/user_guides/2_dataset_prepare.md rename to docs/en/user_guides/dataset_prepare.md index c58fe4a8a9..b26ae391cf 100644 --- a/docs/en/user_guides/2_dataset_prepare.md +++ b/docs/en/user_guides/dataset_prepare.md @@ -1,4 +1,4 @@ -# Tutorial 2: Prepare Datasets +# Tutorial 2: Prepare datasets In this section, we will detail how to prepare data and adopt the proper dataset in our repo for different methods. @@ -34,6 +34,6 @@ python tools/dataset_converters/super-resolution/div2k/preprocess_div2k_dataset. We support detailed tutorials and split them according to different tasks. -Please check our [dataset zoo](../dataset_zoo/0_overview.md) for data preparation of different tasks. +Please check our dataset zoo for data preparation of different tasks. -If you're interested in more details of datasets in MMEditing, please check the [advanced guides](../advanced_guides/2_dataset.md). +If you're interested in more details of datasets in MMEditing, please check the [advanced guides](../howto/dataset.md). diff --git a/docs/en/user_guides/deploy.md b/docs/en/user_guides/deploy.md new file mode 100644 index 0000000000..ce2ff00274 --- /dev/null +++ b/docs/en/user_guides/deploy.md @@ -0,0 +1,158 @@ +# Tutorial 8: Deploy models in MMEditing + +The deployment of OpenMMLab codebases, including MMClassification, MMDetection, MMEditing and so on are supported by [MMDeploy](https://github.com/open-mmlab/mmdeploy). +The latest deployment guide for MMEditing can be found from [here](https://mmdeploy.readthedocs.io/en/1.x/04-supported-codebases/mmedit.html). + +This tutorial is organized as follows: + +- [Installation](#installation) +- [Convert model](#convert-model) +- [Model specification](#model-specification) +- [Model inference](#model-inference) + - [Backend model inference](#backend-model-inference) + - [SDK model inference](#sdk-model-inference) +- [Supported models](#supported-models) + +## Installation + +Please follow the [guide](../get_started/install.md) to install mmedit. And then install mmdeploy from source by following [this](https://mmdeploy.readthedocs.io/en/1.x/get_started.html#installation) guide. + +```{note} +If you install mmdeploy prebuilt package, please also clone its repository by 'git clone https://github.com/open-mmlab/mmdeploy.git --depth=1' to get the deployment config files. +``` + +## Convert model + +Suppose mmediting and mmdeploy repositories are in the same directory, and the working directory is the root path of mmediting. + +Take [ESRGAN](../../../configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) model as an example. +You can download its checkpoint from [here](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth), and then convert it to onnx model as follows: + +```python +from mmdeploy.apis import torch2onnx +from mmdeploy.backend.sdk.export_info import export2SDK + +img = 'tests/data/image/face/000001.png' +work_dir = 'mmdeploy_models/mmedit/onnx' +save_file = 'end2end.onnx' +deploy_cfg = '../mmdeploy/configs/mmedit/super-resolution/super-resolution_onnxruntime_dynamic.py' +model_cfg = 'configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py' +model_checkpoint = 'esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth' +device = 'cpu' + +# 1. convert model to onnx +torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, + model_checkpoint, device) + +# 2. extract pipeline info for inference by MMDeploy SDK +export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device) +``` + +It is crucial to specify the correct deployment config during model conversion.MMDeploy has already provided builtin deployment config [files](https://github.com/open-mmlab/mmdeploy/tree/1.x/configs/mmedit) of all supported backends for mmedit, under which the config file path follows the pattern: + +``` +{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py +``` + +- **{task}:** task in mmedit. + +- **{backend}:** inference backend, such as onnxruntime, tensorrt, pplnn, ncnn, openvino, coreml etc. + +- **{precision}:** fp16, int8. When it's empty, it means fp32 + +- **{static | dynamic}:** static shape or dynamic shape + +- **{shape}:** input shape or shape range of a model + +Therefore, in the above example, you can also convert `ESRGAN` to other backend models by changing the deployment config file, e.g., converting to tensorrt-fp16 model by `super-resolution_tensorrt-fp16_dynamic-32x32-512x512.py`. + +```{tip} +When converting mmedit models to tensorrt models, --device should be set to "cuda" +``` + +## Model specification + +Before moving on to model inference chapter, let's know more about the converted model structure which is very important for model inference. + +The converted model locates in the working directory like `mmdeploy_models/mmedit/onnx` in the previous example. It includes: + +``` +mmdeploy_models/mmedit/onnx +├── deploy.json +├── detail.json +├── end2end.onnx +└── pipeline.json +``` + +in which, + +- **end2end.onnx**: backend model which can be inferred by ONNX Runtime +- ***xxx*.json**: the necessary information for mmdeploy SDK + +The whole package **mmdeploy_models/mmedit/onnx** is defined as **mmdeploy SDK model**, i.e., **mmdeploy SDK model** includes both backend model and inference meta information. + +## Model inference + +### Backend model inference + +Take the previous converted `end2end.onnx` model as an example, you can use the following code to inference the model. + +```python +from mmdeploy.apis.utils import build_task_processor +from mmdeploy.utils import get_input_shape, load_config +import torch + +deploy_cfg = '../mmdeploy/configs/mmedit/super-resolution/super-resolution_onnxruntime_dynamic.py' +model_cfg = 'configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py' +device = 'cpu' +backend_model = ['mmdeploy_models/mmedit/onnx/end2end.onnx'] +image = 'tests/data/image/lq/baboon_x4.png' + +# read deploy_cfg and model_cfg +deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + +# build task and backend model +task_processor = build_task_processor(model_cfg, deploy_cfg, device) +model = task_processor.build_backend_model(backend_model) + +# process input image +input_shape = get_input_shape(deploy_cfg) +model_inputs, _ = task_processor.create_input(image, input_shape) + +# do model inference +with torch.no_grad(): + result = model.test_step(model_inputs) + +# visualize results +task_processor.visualize( + image=image, + model=model, + result=result[0], + window_name='visualize', + output_file='output_restorer.bmp') +``` + +### SDK model inference + +You can also perform SDK model inference like following, + +```python +from mmdeploy_python import Restorer +import cv2 + +img = cv2.imread('tests/data/image/lq/baboon_x4.png') + +# create a predictor +restorer = Restorer(model_path='mmdeploy_models/mmedit/onnx', device_name='cpu', device_id=0) +# perform inference +result = restorer(img) + +# visualize inference result +cv2.imwrite('output_restorer.bmp', result) +``` + +Besides python API, MMDeploy SDK also provides other FFI (Foreign Function Interface), such as C, C++, C#, Java and so on. You can learn their usage from [demos](https://github.com/open-mmlab/mmdeploy/tree/1.x/demo). + +## Supported models + +Please refer to [here](https://mmdeploy.readthedocs.io/en/1.x/04-supported-codebases/mmedit.html#supported-models) for the supported model list. diff --git a/docs/en/user_guides/3_inference.md b/docs/en/user_guides/inference.md similarity index 94% rename from docs/en/user_guides/3_inference.md rename to docs/en/user_guides/inference.md index 22e88566cd..cc71aaf608 100644 --- a/docs/en/user_guides/3_inference.md +++ b/docs/en/user_guides/inference.md @@ -1,4 +1,4 @@ -# Tutorial 3: Inference with Pre-trained Models +# Tutorial 3: Inference with pre-trained models MMEditing provides APIs for you to easily play with state-of-the-art models on your own images or videos. Specifically, MMEditing supports various fundamental generative models, including: @@ -30,7 +30,7 @@ from mmedit.apis import init_model, sample_unconditional_model # Specify the path to model config and checkpoint file config_file = 'configs/styleganv2/stylegan2_c2_8xb4_ffhq-1024x1024.py' # you can download this checkpoint in advance and use a local file path. -checkpoint_file = 'https://download.openmmlab.com/mmgen/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth' +checkpoint_file = 'https://download.openmmlab.com/mmediting/stylegan2/stylegan2_c2_ffhq_1024_b4x8_20210407_150045-618c9024.pth' device = 'cuda:0' # init a generative model @@ -61,7 +61,7 @@ from mmedit.apis import init_model, sample_conditional_model # Specify the path to model config and checkpoint file config_file = 'configs/sagan/sagan_woReLUinplace-Glr1e-4_Dlr4e-4_noaug-ndisc1-8xb32-bigGAN-sch_imagenet1k-128x128.py' # you can download this checkpoint in advance and use a local file path. -checkpoint_file = 'https://download.openmmlab.com/mmgen/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth' +checkpoint_file = 'https://download.openmmlab.com/mmediting/sagan/sagan_128_woReLUinplace_noaug_bigGAN_imagenet1k_b32x8_Glr1e-4_Dlr-4e-4_ndisc1_20210818_210232-3f5686af.pth' device = 'cuda:0' # init a generative model @@ -105,7 +105,7 @@ from mmedit.apis import init_model, sample_ddpm_model # Specify the path to model config and checkpoint file config_file = 'configs/improved_ddpm/ddpm_cosine-hybird-timestep-4k_16xb8-1500kiters_imagenet1k-64x64.py' # you can download this checkpoint in advance and use a local file path. -checkpoint_file = 'https://download.openmmlab.com/mmgen/improved_ddpm/ddpm_cosine_hybird_timestep-4k_imagenet1k_64x64_b8x16_1500k_20220103_223919-b8f1a310.pth' +checkpoint_file = 'https://download.openmmlab.com/mmediting/improved_ddpm/ddpm_cosine_hybird_timestep-4k_imagenet1k_64x64_b8x16_1500k_20220103_223919-b8f1a310.pth' device = 'cuda:0' # init a generative model model = init_model(config_file, checkpoint_file, device=device) @@ -332,7 +332,7 @@ from mmedit.apis import init_model, sample_img2img_model # Specify the path to model config and checkpoint file config_file = 'configs/pix2pix/pix2pix_vanilla-unet-bn_wo-jitter-flip-4xb1-190kiters_edges2shoes.py' # you can download this checkpoint in advance and use a local file path. -checkpoint_file = 'https://download.openmmlab.com/mmgen/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth' +checkpoint_file = 'https://download.openmmlab.com/mmediting/pix2pix/refactor/pix2pix_vanilla_unet_bn_wo_jitter_flip_1x4_186840_edges2shoes_convert-bgr_20210902_170902-0c828552.pth' # Specify the path to image you want to translate image_path = 'tests/data/paired/test/33_AB.jpg' device = 'cuda:0' diff --git a/docs/en/user_guides/8_metrics.md b/docs/en/user_guides/metrics.md similarity index 98% rename from docs/en/user_guides/8_metrics.md rename to docs/en/user_guides/metrics.md index 9c501a566f..500bd4f07e 100644 --- a/docs/en/user_guides/8_metrics.md +++ b/docs/en/user_guides/metrics.md @@ -1,8 +1,8 @@ -# Tutorial 8: Using Metrics in MMEditing +# Tutorial 5: Using metrics in MMEditing MMEditing supports **17 metrics** to assess the quality of models. -Please refer to [Train and Test in MMEditing](../user_guides/4_train_test.md) for usages. +Please refer to [Train and Test in MMEditing](../user_guides/train_test.md) for usages. Here, we will specify the details of different metrics one by one. @@ -176,7 +176,7 @@ metrics = [ ``` `TransFID` has same usage as `FID`, but it's designed for translation models like `Pix2Pix` and `CycleGAN`, which is adapted for our evaluator. You can refer -to [evaluation](../user_guides/4_train_test.md) for details. +to [evaluation](../user_guides/train_test.md) for details. ## IS and TransIS @@ -223,7 +223,7 @@ We also perform a survey on the influence of data loading pipeline and the versi `TransIS` has same usage as `IS`, but it's designed for translation models like `Pix2Pix` and `CycleGAN`, which is adapted for our evaluator. You can refer -to [evaluation](../user_guides/4_train_test.md) for details. +to [evaluation](../user_guides/train_test.md) for details. ## Precision and Recall diff --git a/docs/en/user_guides/4_train_test.md b/docs/en/user_guides/train_test.md similarity index 96% rename from docs/en/user_guides/4_train_test.md rename to docs/en/user_guides/train_test.md index f4894de795..72ad958772 100644 --- a/docs/en/user_guides/4_train_test.md +++ b/docs/en/user_guides/train_test.md @@ -1,4 +1,4 @@ -# Tutorial 4: Train and Test in MMEditing +# Tutorial 4: Train and test in MMEditing In this section, we introduce how to test and train models in MMEditing. @@ -19,7 +19,7 @@ In this section, we provide the following guides: ## Prerequisite -Users need to [prepare dataset](../user_guides/2_dataset_prepare.md) first to enable training and testing models in MMEditing. +Users need to [prepare dataset](../user_guides/dataset_prepare.md) first to enable training and testing models in MMEditing. ## Test a model in MMEditing @@ -211,4 +211,4 @@ val_evaluator = dict(type='GenEvaluator', metrics=metrics) You can set `val_begin` and `val_interval` to adjust when to begin validation and interval of validation. -For details of metrics, refer to [metrics' guide](./8_metrics.md). +For details of metrics, refer to [metrics' guide](./metrics.md). diff --git a/docs/en/user_guides/6_useful_tools.md b/docs/en/user_guides/useful_tools.md similarity index 89% rename from docs/en/user_guides/6_useful_tools.md rename to docs/en/user_guides/useful_tools.md index a5b0f95d53..9e62ceece6 100644 --- a/docs/en/user_guides/6_useful_tools.md +++ b/docs/en/user_guides/useful_tools.md @@ -1,4 +1,4 @@ -# Tutorial 6: Useful Tools +# Tutorial 7: Useful tools We provide lots of useful tools under `tools/` directory. @@ -61,7 +61,7 @@ The final output filename will be `stylegan2_c2_8xb4_ffhq-1024x1024_{time}-{hash ## Print full config -MMGeneration incorporates config mechanism to set parameters used for training and testing models. With our [config](../user_guides/1_config.md) mechanism, users can easily conduct extensive experiments without hard coding. If you wish to inspect the config file, you may run `python tools/misc/print_config.py /PATH/TO/CONFIG` to see the complete config. +MMGeneration incorporates config mechanism to set parameters used for training and testing models. With our [config](../user_guides/config.md) mechanism, users can easily conduct extensive experiments without hard coding. If you wish to inspect the config file, you may run `python tools/misc/print_config.py /PATH/TO/CONFIG` to see the complete config. An Example: diff --git a/docs/en/user_guides/5_visualization.md b/docs/en/user_guides/visualization.md similarity index 99% rename from docs/en/user_guides/5_visualization.md rename to docs/en/user_guides/visualization.md index 0edf3bea05..31a0326cff 100644 --- a/docs/en/user_guides/5_visualization.md +++ b/docs/en/user_guides/visualization.md @@ -1,4 +1,4 @@ -# Tutorial 5: Visualization +# Tutorial 6: Visualization The visualization of images is an important way to measure the quality of image processing, editing and synthesis. Using `visualizer` in config file can save visual results when training or testing. You can follow [MMEngine Documents](https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/visualization.md) to learn the usage of visualization. MMEditing provides a rich set of visualization functions. diff --git a/docs/zh_cn/.dev_scripts/update_dataset_zoo.py b/docs/zh_cn/.dev_scripts/update_dataset_zoo.py new file mode 100644 index 0000000000..3383a0187c --- /dev/null +++ b/docs/zh_cn/.dev_scripts/update_dataset_zoo.py @@ -0,0 +1,51 @@ +import os + +from tqdm import tqdm + + +def update_dataset_zoo(): + + target_dir = 'dataset_zoo' + source_dir = '../../tools/dataset_converters' + os.makedirs(target_dir, exist_ok=True) + + # generate overview + overviewmsg = """ +# 概览 + +""" + + # generate index.rst + rstmsg = """ +.. toctree:: + :maxdepth: 1 + :caption: Dataset Zoo + + overview.md +""" + + subfolders = os.listdir(source_dir) + for subf in tqdm(subfolders, desc='update dataset zoo'): + + target_subf = subf.replace('-', '_').lower() + target_readme = os.path.join(target_dir, target_subf + '.md') + source_readme = os.path.join(source_dir, subf, 'README_zh-CN.md') + if not os.path.exists(source_readme): + continue + + overviewmsg += f'\n- [{subf}]({target_subf}.md)' + rstmsg += f'\n {target_subf}.md' + + # generate all tasks dataset_zoo + command = f'cat {source_readme} > {target_readme}' + os.popen(command) + + with open(os.path.join(target_dir, 'overview.md'), 'w') as f: + f.write(overviewmsg) + + with open(os.path.join(target_dir, 'index.rst'), 'w') as f: + f.write(rstmsg) + + +if __name__ == '__main__': + update_dataset_zoo() diff --git a/docs/zh_cn/.dev_scripts/update_model_zoo.py b/docs/zh_cn/.dev_scripts/update_model_zoo.py new file mode 100755 index 0000000000..c780bb314e --- /dev/null +++ b/docs/zh_cn/.dev_scripts/update_model_zoo.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# Copyright (c) OpenMMLab. All rights reserved. + +import functools as func +import glob +import os +import os.path as osp +import re +from os.path import basename, dirname + +import numpy as np +import titlecase +from tqdm import tqdm + +github_link = 'https://github.com/open-mmlab/mmediting/blob/1.x/' + + +def anchor(name): + return re.sub(r'-+', '-', + re.sub(r'[^a-zA-Z0-9\+]', '-', + name.strip().lower())).strip('-') + + +def summarize(stats, name): + allpapers = func.reduce(lambda a, b: a.union(b), + [p for p, _, _, _, _, _, _ in stats]) + allconfigs = func.reduce(lambda a, b: a.union(b), + [c for _, c, _, _, _, _, _ in stats]) + allckpts = func.reduce(lambda a, b: a.union(b), + [c for _, _, c, _, _, _, _ in stats]) + alltasks = func.reduce(lambda a, b: a.union(b), + [t for _, _, _, t, _, _, _ in stats]) + task_desc = '\n'.join([ + f" - [{task}]({task.replace('-', '_').replace(' ', '_').lower()}.md)" # noqa + for task in list(alltasks) + ]) + + # Overview + papertypes, papercounts = np.unique([t for t, _ in allpapers], + return_counts=True) + countstr = '\n'.join( + [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) + countstr = '\n'.join([f' - ALGORITHM: {len(stats)}']) + + summary = f"""# {name} +""" + + if name != 'Overview': + summary += '\n## 概览' + + summary += f""" +* 预训练权重个数: {len(allckpts)} +* 配置文件个数: {len(allconfigs)} +* 论文个数: {len(allpapers)} +{countstr} + """ + + if name == 'Overview': + summary += f""" +* 任务: +{task_desc} + +""" + + return summary + + +# Count algorithms +def update_model_zoo(): + + target_dir = 'model_zoo' + + os.makedirs(target_dir, exist_ok=True) + + root_dir = dirname(dirname(dirname(dirname(osp.abspath(__file__))))) + files = sorted(glob.glob(osp.join(root_dir, 'configs/*/README_zh-CN.md'))) + stats = [] + + for f in tqdm(files, desc='update model zoo'): + with open(f, 'r') as content_file: + content = content_file.read() + + # title + title = content.split('\n')[0].replace('#', '') + year = title.split('\'')[-1].split(')')[0] + + # count papers + papers = set( + (papertype, + titlecase.titlecase(paper.lower().strip()).replace('+', r'\+')) + for (papertype, paper) in re.findall( + r'\s*\n.*?\btitle\s*=\s*{(.*?)}', + content, re.DOTALL)) + + # paper links + revcontent = '\n'.join(list(reversed(content.splitlines()))) + paperlinks = {} + for _, p in papers: + paper_link = osp.join(github_link, 'configs', basename(dirname(f)), + 'README_zh-CN.md') + # print(p, paper_link) + paperlinks[p] = ' '.join( + (f'[⇨]({paper_link}#{anchor(paperlink)})' + for paperlink in re.findall( + rf'\btitle\s*=\s*{{\s*{p}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n', + revcontent, re.DOTALL | re.IGNORECASE))) + # print(' ', paperlinks[p]) + paperlist = '\n'.join( + sorted(f' - [{t}] {x} ({paperlinks[x]})' for t, x in papers)) + + # count configs + configs = set(x.lower().strip() + for x in re.findall(r'/configs/.*?\.py', content)) + + # count ckpts + ckpts = list( + x.lower().strip() + for x in re.findall(r'\[model\]\(https\:\/\/.*\.pth', content)) + ckpts.extend( + x.lower().strip() + for x in re.findall(r'\[ckpt\]\(https\:\/\/.*\.pth', content)) + ckpts.extend( + x.lower().strip() + for x in re.findall(r'\[模型\]\(https\:\/\/.*\.pth', content)) + ckpts.extend( + x.lower().strip() + for x in re.findall(r'\[权重\]\(https\:\/\/.*\.pth', content)) + ckpts = set(ckpts) + + # count tasks + task_desc = list( + set(x.lower().strip() + for x in re.findall(r'\*\*任务\*\*: .*', content))) + tasks = set() + if len(task_desc) > 0: + tasks = set(task_desc[0].split('**任务**: ')[1].split(', ')) + + statsmsg = f"""## {title}""" + if len(tasks) > 0: + statsmsg += f"\n* Tasks: {','.join(list(tasks))}" + statsmsg += f""" + +* 预训练权重个数: {len(ckpts)} +* 配置文件个数: {len(configs)} +* 论文个数: {len(papers)} +{paperlist} + +""" + # * We should have: {len(glob.glob(osp.join(dirname(f), '*.py')))} + content = content.replace('# ', '## ') + stats.append((papers, configs, ckpts, tasks, year, statsmsg, content)) + + # overview + overview = summarize(stats, '概览') + with open(osp.join(target_dir, 'overview.md'), 'w') as f: + f.write(overview) + + alltasks = func.reduce(lambda a, b: a.union(b), + [t for _, _, _, t, _, _, _ in stats]) + + # index.rst + indexmsg = """ +.. toctree:: + :maxdepth: 1 + :caption: 模型库 + + overview.md +""" + + for task in alltasks: + task = task.replace(' ', '_').replace('-', '_').lower() + indexmsg += f' {task}.md\n' + + with open(osp.join(target_dir, 'index.rst'), 'w') as f: + f.write(indexmsg) + + # task-specific + for task in alltasks: + filtered_model = [ + (paper, config, ckpt, tasks, year, x, content) + for paper, config, ckpt, tasks, year, x, content in stats + if task in tasks + ] + filtered_model = sorted(filtered_model, key=lambda x: x[-3])[::-1] + overview = summarize(filtered_model, task) + + msglist = '\n'.join(x for _, _, _, _, _, _, x in filtered_model) + task = task.replace(' ', '_').replace('-', '_').lower() + with open(osp.join(target_dir, f'{task}.md'), 'w') as f: + f.write(overview + '\n' + msglist) + + +if __name__ == '__main__': + update_model_zoo() diff --git a/docs/zh_cn/.gitignore b/docs/zh_cn/.gitignore new file mode 100644 index 0000000000..825514f423 --- /dev/null +++ b/docs/zh_cn/.gitignore @@ -0,0 +1,2 @@ +model_zoo +dataset_zoo diff --git a/docs/zh_cn/Makefile b/docs/zh_cn/Makefile index d4bb2cbb9e..56ae5906ce 100644 --- a/docs/zh_cn/Makefile +++ b/docs/zh_cn/Makefile @@ -17,4 +17,7 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile + rm -rf _build + rm -rf model_zoo + rm -rf dataset_zoo @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/zh_cn/_templates/404.html b/docs/zh_cn/_templates/404.html new file mode 100644 index 0000000000..3ead60b47a --- /dev/null +++ b/docs/zh_cn/_templates/404.html @@ -0,0 +1,16 @@ +{% extends "layout.html" %} + +{% block body %} + +

未找到页面

+

+ 未找到你要打开的页面。 +

+

+ 如果你是从旧版本文档跳转至此,可能是对应的页面被移动了。请从左侧的目录中寻找新版本文档,或者跳转至首页。 +

+

+ 如果你找不到希望打开的文档,欢迎在 Issue 中告诉我们! +

+ +{% endblock %} diff --git a/docs/zh_cn/advanced_guides/data_flow.md b/docs/zh_cn/advanced_guides/data_flow.md new file mode 100644 index 0000000000..ccc734f77f --- /dev/null +++ b/docs/zh_cn/advanced_guides/data_flow.md @@ -0,0 +1 @@ +# 数据流(待更新) diff --git a/docs/zh_cn/advanced_guides/data_preprocessor.md b/docs/zh_cn/advanced_guides/data_preprocessor.md new file mode 100644 index 0000000000..b944a828f3 --- /dev/null +++ b/docs/zh_cn/advanced_guides/data_preprocessor.md @@ -0,0 +1 @@ +# 数据预处理器(待更新) diff --git a/docs/zh_cn/advanced_guides/evaluator.md b/docs/zh_cn/advanced_guides/evaluator.md new file mode 100644 index 0000000000..87a5701991 --- /dev/null +++ b/docs/zh_cn/advanced_guides/evaluator.md @@ -0,0 +1 @@ +# 评估器(待更新) diff --git a/docs/zh_cn/advanced_guides/index.rst b/docs/zh_cn/advanced_guides/index.rst deleted file mode 100644 index dab3050979..0000000000 --- a/docs/zh_cn/advanced_guides/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. toctree:: - :maxdepth: 2 - - models/customize_models.md - dataset.md - transforms.md - losses.md diff --git a/docs/zh_cn/advanced_guides/losses.md b/docs/zh_cn/advanced_guides/losses.md deleted file mode 100644 index 260855c734..0000000000 --- a/docs/zh_cn/advanced_guides/losses.md +++ /dev/null @@ -1 +0,0 @@ -# 自定义损失函数(待更新) diff --git a/docs/zh_cn/advanced_guides/structures.md b/docs/zh_cn/advanced_guides/structures.md new file mode 100644 index 0000000000..c2118c34a3 --- /dev/null +++ b/docs/zh_cn/advanced_guides/structures.md @@ -0,0 +1 @@ +# 数据结构(待更新) diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst deleted file mode 100644 index 8fe6a7c0c6..0000000000 --- a/docs/zh_cn/api.rst +++ /dev/null @@ -1,99 +0,0 @@ -mmedit.apis ---------------- -.. automodule:: mmedit.apis - :members: - - -mmedit.datasets -------------------------- - -datasets -^^^^^^^^^^ -.. automodule:: mmedit.datasets - :members: - -transforms -^^^^^^^^^^ -.. automodule:: mmedit.datasets.transforms - :members: - - -mmedit.engine --------------- - -hooks -^^^^^^^^^^ -.. automodule:: mmedit.engine.hooks - :members: - -optimizers -^^^^^^^^^^ -.. automodule:: mmedit.engine.optimizers - :members: - -schedulers -^^^^^^^^^^ -.. automodule:: mmedit.engine.schedulers - :members: - - -mmedit.evaluation ------------------- - -metrics -^^^^^^^^^^ -.. automodule:: mmedit.evaluation.metrics - :members: - - -functional -^^^^^^^^^^ -.. automodule:: mmedit.evaluation.functional - :members: - - -mmedit.models --------------- - -base_models -^^^^^^^ -.. automodule:: mmedit.models.base_models - :members: - -data_preprocessors -^^^^^^^ -.. automodule:: mmedit.models.data_preprocessors - :members: - -layers -^^^^^^^^^^^^ -.. automodule:: mmedit.models.layers - :members: - -losses -^^^^^^^^^^^^ -.. automodule:: mmedit.models.losses - :members: - -utils -^^^^^^^^^^^^ -.. automodule:: mmedit.models.utils - :members: - -editors -^^^^^^^^^^^^ -.. automodule:: mmedit.models.editors - :members: - - -mmedit.visualization --------------------- - -.. automodule:: mmedit.visualization - :members: - -mmedit.utils --------------------- - -.. automodule:: mmedit.utils - :members: diff --git a/docs/zh_cn/notes/changelog.md b/docs/zh_cn/changelog.md similarity index 100% rename from docs/zh_cn/notes/changelog.md rename to docs/zh_cn/changelog.md diff --git a/docs/zh_cn/community/contributing.md b/docs/zh_cn/community/contributing.md new file mode 100644 index 0000000000..028e00a4a9 --- /dev/null +++ b/docs/zh_cn/community/contributing.md @@ -0,0 +1,875 @@ +# 贡献代码 + +欢迎加入 MMEditing 社区,我们致力于打造最前沿的计算机视觉基础库,我们欢迎任何类型的贡献,包括但不限于 + +**修复错误** + +修复代码实现错误的步骤如下: + +1. 如果提交的代码改动较大,建议先提交 issue,并正确描述 issue 的现象、原因和复现方式,讨论后确认修复方案。 +2. 修复错误并补充相应的单元测试,提交拉取请求。 + +**新增功能或组件** + +1. 如果新功能或模块涉及较大的代码改动,建议先提交 issue,确认功能的必要性。 +2. 实现新增功能并添单元测试,提交拉取请求。 + +**文档补充** + +修复文档可以直接提交拉取请求 + +添加文档或将文档翻译成其他语言步骤如下 + +1. 提交 issue,确认添加文档的必要性。 +2. 添加文档,提交拉取请求。 + +### 拉取请求工作流 + +如果你对拉取请求不了解,没关系,接下来的内容将会从零开始,一步一步地指引你如何创建一个拉取请求。如果你想深入了解拉取请求的开发模式,可以参考 github [官方文档](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) + +#### 1. 复刻仓库 + +当你第一次提交拉取请求时,先复刻 OpenMMLab 原代码库,点击 GitHub 页面右上角的 **Fork** 按钮,复刻后的代码库将会出现在你的 GitHub 个人主页下。 + + + +将代码克隆到本地 + +```shell +git clone git@github.com:{username}/mmediting.git +``` + +添加原代码库为上游代码库 + +```bash +git remote add upstream git@github.com:open-mmlab/mmediting +``` + +检查 remote 是否添加成功,在终端输入 `git remote -v` + +```bash +origin git@github.com:{username}/mmediting.git (fetch) +origin git@github.com:{username}/mmediting.git (push) +upstream git@github.com:open-mmlab/mmediting (fetch) +upstream git@github.com:open-mmlab/mmediting (push) +``` + +```{note} +这里对 origin 和 upstream 进行一个简单的介绍,当我们使用 git clone 来克隆代码时,会默认创建一个 origin 的 remote,它指向我们克隆的代码库地址,而 upstream 则是我们自己添加的,用来指向原始代码库地址。当然如果你不喜欢他叫 upstream,也可以自己修改,比如叫 open-mmlab。我们通常向 origin 提交代码(即 fork 下来的远程仓库),然后向 upstream 提交一个 pull request。如果提交的代码和最新的代码发生冲突,再从 upstream 拉取最新的代码,和本地分支解决冲突,再提交到 origin。 +``` + +#### 2. 配置 pre-commit + +在本地开发环境中,我们使用 [pre-commit](https://pre-commit.com/#intro) 来检查代码风格,以确保代码风格的统一。在提交代码,需要先安装 pre-commit(需要在 mmediting 目录下执行): + +```shell +pip install -U pre-commit +pre-commit install +``` + +检查 pre-commit 是否配置成功,并安装 `.pre-commit-config.yaml` 中的钩子: + +```shell +pre-commit run --all-files +``` + + + + + +```{note} +如果你是中国用户,由于网络原因,可能会出现安装失败的情况,这时可以使用国内源 + +pre-commit install -c .pre-commit-config-zh-cn.yaml + +pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml +``` + +如果安装过程被中断,可以重复执行 `pre-commit run ...` 继续安装。 + +如果提交的代码不符合代码风格规范,pre-commit 会发出警告,并自动修复部分错误。 + + + +如果我们想临时绕开 pre-commit 的检查提交一次代码,可以在 `git commit` 时加上 `--no-verify`(需要保证最后推送至远程仓库的代码能够通过 pre-commit 检查)。 + +```shell +git commit -m "xxx" --no-verify +``` + +#### 3. 创建开发分支 + +安装完 pre-commit 之后,我们需要基于 master 创建开发分支,建议的分支命名规则为 `username/pr_name`。 + +```shell +git checkout -b yhc/refactor_contributing_doc +``` + +在后续的开发中,如果本地仓库的 master 分支落后于 upstream 的 master 分支,我们需要先拉取 upstream 的代码进行同步,再执行上面的命令 + +```shell +git pull upstream master +``` + +#### 4. 提交代码并在本地通过单元测试 + +- mmediting 引入了 mypy 来做静态类型检查,以增加代码的鲁棒性。因此我们在提交代码时,需要补充 Type Hints。具体规则可以参考[教程](https://zhuanlan.zhihu.com/p/519335398)。 + +- 提交的代码同样需要通过单元测试 + + ```shell + # 通过全量单元测试 + pytest tests + + # 我们需要保证提交的代码能够通过修改模块的单元测试,以 runner 为例 + pytest tests/test_runner/test_runner.py + ``` + + 如果你由于缺少依赖无法运行修改模块的单元测试,可以参考[指引-单元测试](#单元测试) + +- 如果修改/添加了文档,参考[指引](#文档渲染)确认文档渲染正常。 + +#### 5. 推送代码到远程 + +代码通过单元测试和 pre-commit 检查后,将代码推送到远程仓库,如果是第一次推送,可以在 `git push` 后加上 `-u` 参数以关联远程分支 + +```shell +git push -u origin {branch_name} +``` + +这样下次就可以直接使用 `git push` 命令推送代码了,而无需指定分支和远程仓库。 + +#### 6. 提交拉取请求(PR) + +(1) 在 GitHub 的 Pull request 界面创建拉取请求 + + +(2) 根据指引修改 PR 描述,以便于其他开发者更好地理解你的修改 + + + +描述规范详见[拉取请求规范](#拉取请求规范) + +  + +**注意事项** + +(a) PR 描述应该包含修改理由、修改内容以及修改后带来的影响,并关联相关 Issue(具体方式见[文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)) + +(b) 如果是第一次为 OpenMMLab 做贡献,需要签署 CLA + + + +(c) 检查提交的 PR 是否通过 CI(集成测试) + + + +mmediting 会在不同的平台(Linux、Window、Mac),基于不同版本的 Python、PyTorch、CUDA 对提交的代码进行单元测试,以保证代码的正确性,如果有任何一个没有通过,我们可点击上图中的 `Details` 来查看具体的测试信息,以便于我们修改代码。 + +(3) 如果 PR 通过了 CI,那么就可以等待其他开发者的 review,并根据 reviewer 的意见,修改代码,并重复 [4](#4-提交代码并本地通过单元测试)-[5](#5-推送代码到远程) 步骤,直到 reviewer 同意合入 PR。 + + + +所有 reviewer 同意合入 PR 后,我们会尽快将 PR 合并到主分支。 + +#### 7. 解决冲突 + +随着时间的推移,我们的代码库会不断更新,这时候,如果你的 PR 与主分支存在冲突,你需要解决冲突,解决冲突的方式有两种: + +```shell +git fetch --all --prune +git rebase upstream/master +``` + +或者 + +```shell +git fetch --all --prune +git merge upstream/master +``` + +如果你非常善于处理冲突,那么可以使用 rebase 的方式来解决冲突,因为这能够保证你的 commit log 的整洁。如果你不太熟悉 `rebase` 的使用,那么可以使用 `merge` 的方式来解决冲突。 + +### 指引 + +#### 单元测试 + +在提交修复代码错误或新增特性的拉取请求时,我们应该尽可能的让单元测试覆盖所有提交的代码,计算单元测试覆盖率的方法如下 + +```shell +python -m coverage run -m pytest /path/to/test_file +python -m coverage html +# check file in htmlcov/index.html +``` + +#### 文档渲染 + +在提交修复代码错误或新增特性的拉取请求时,可能会需要修改/新增模块的 docstring。我们需要确认渲染后的文档样式是正确的。 +本地生成渲染后的文档的方法如下 + +```shell +pip install -r requirements/docs.txt +cd docs/zh_cn/ +# or docs/en +make html +# check file in ./docs/zh_cn/_build/html/index.html +``` + +### 代码风格 + +#### Python + +[PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代码规范,我们使用以下工具检查和格式化代码 + +- [flake8](https://github.com/PyCQA/flake8): Python 官方发布的代码规范检查工具,是多个检查工具的封装 +- [isort](https://github.com/timothycrosley/isort): 自动调整模块导入顺序的工具 +- [yapf](https://github.com/google/yapf): Google 发布的代码规范检查工具 +- [codespell](https://github.com/codespell-project/codespell): 检查单词拼写是否有误 +- [mdformat](https://github.com/executablebooks/mdformat): 检查 markdown 文件的工具 +- [docformatter](https://github.com/myint/docformatter): 格式化 docstring 的工具 + +yapf 和 isort 的配置可以在 [setup.cfg](../../../setup.cfg) 找到 + +通过配置 [pre-commit hook](https://pre-commit.com/) ,我们可以在提交代码时自动检查和格式化 `flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`,修复 `end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`,调整 `requirments.txt` 的包顺序。 +pre-commit 钩子的配置可以在 [.pre-commit-config](../../../.pre-commit-config.yaml) 找到。 + +pre-commit 具体的安装使用方式见[拉取请求](#2-配置-pre-commit)。 + +更具体的规范请参考 [OpenMMLab 代码规范](contributing.md#代码风格)。 + +#### C++ and CUDA + +C++ 和 CUDA 的代码规范遵从 [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) + +### 拉取请求规范 + +1. 使用 [pre-commit hook](https://pre-commit.com),尽量减少代码风格相关问题 + +2. 一个`拉取请求`对应一个短期分支 + +3. 粒度要细,一个`拉取请求`只做一件事情,避免超大的`拉取请求` + + - Bad:实现 Faster R-CNN + - Acceptable:给 Faster R-CNN 添加一个 box head + - Good:给 box head 增加一个参数来支持自定义的 conv 层数 + +4. 每次 Commit 时需要提供清晰且有意义 commit 信息 + +5. 提供清晰且有意义的`拉取请求`描述 + + - 标题写明白任务名称,一般格式:\[Prefix\] Short description of the pull request (Suffix) + - prefix: 新增功能 \[Feature\], 修 bug \[Fix\], 文档相关 \[Docs\], 开发中 \[WIP\] (暂时不会被review) + - 描述里介绍`拉取请求`的主要修改内容,结果,以及对其他部分的影响, 参考`拉取请求`模板 + - 关联相关的`议题` (issue) 和其他`拉取请求` + +6. 如果引入了其他三方库,或借鉴了三方库的代码,请确认他们的许可证和 mmediting 兼容,并在借鉴的代码上补充 `This code is inspired from http://` + +## 代码规范 + +### 代码规范标准 + +#### PEP 8 —— Python 官方代码规范 + +[Python 官方的代码风格指南](https://www.python.org/dev/peps/pep-0008/),包含了以下几个方面的内容: + +- 代码布局,介绍了 Python 中空行、断行以及导入相关的代码风格规范。比如一个常见的问题:当我的代码较长,无法在一行写下时,何处可以断行? + +- 表达式,介绍了 Python 中表达式空格相关的一些风格规范。 + +- 尾随逗号相关的规范。当列表较长,无法一行写下而写成如下逐行列表时,推荐在末项后加逗号,从而便于追加选项、版本控制等。 + + ```python + # Correct: + FILES = ['setup.cfg', 'tox.ini'] + # Correct: + FILES = [ + 'setup.cfg', + 'tox.ini', + ] + # Wrong: + FILES = ['setup.cfg', 'tox.ini',] + # Wrong: + FILES = [ + 'setup.cfg', + 'tox.ini' + ] + ``` + +- 命名相关规范、注释相关规范、类型注解相关规范,我们将在后续章节中做详细介绍。 + + "A style guide is about consistency. Consistency with this style guide is important. Consistency within a project is more important. Consistency within one module or function is the most important." PEP 8 -- Style Guide for Python Code + +:::{note} +PEP 8 的代码规范并不是绝对的,项目内的一致性要优先于 PEP 8 的规范。OpenMMLab 各个项目都在 setup.cfg 设定了一些代码规范的设置,请遵照这些设置。一个例子是在 PEP 8 中有如下一个例子: + +```python +# Correct: +hypot2 = x*x + y*y +# Wrong: +hypot2 = x * x + y * y +``` + +这一规范是为了指示不同优先级,但 OpenMMLab 的设置中通常没有启用 yapf 的 `ARITHMETIC_PRECEDENCE_INDICATION` 选项,因而格式规范工具不会按照推荐样式格式化,以设置为准。 +::: + +#### Google 开源项目风格指南 + +[Google 使用的编程风格指南](https://google.github.io/styleguide/pyguide.html),包括了 Python 相关的章节。相较于 PEP 8,该指南提供了更为详尽的代码指南。该指南包括了语言规范和风格规范两个部分。 + +其中,语言规范对 Python 中很多语言特性进行了优缺点的分析,并给出了使用指导意见,如异常、Lambda 表达式、列表推导式、metaclass 等。 + +风格规范的内容与 PEP 8 较为接近,大部分约定建立在 PEP 8 的基础上,也有一些更为详细的约定,如函数长度、TODO 注释、文件与 socket 对象的访问等。 + +推荐将该指南作为参考进行开发,但不必严格遵照,一来该指南存在一些 Python 2 兼容需求,例如指南中要求所有无基类的类应当显式地继承 Object, 而在仅使用 Python 3 的环境中,这一要求是不必要的,依本项目中的惯例即可。二来 OpenMMLab 的项目作为框架级的开源软件,不必对一些高级技巧过于避讳,尤其是 MMCV。但尝试使用这些技巧前应当认真考虑是否真的有必要,并寻求其他开发人员的广泛评估。 + +另外需要注意的一处规范是关于包的导入,在该指南中,要求导入本地包时必须使用路径全称,且导入的每一个模块都应当单独成行,通常这是不必要的,而且也不符合目前项目的开发惯例,此处进行如下约定: + +```python +# Correct +from mmedit.cnn.bricks import (Conv2d, build_norm_layer, DropPath, MaxPool2d, + Linear) +from ..utils import ext_loader + +# Wrong +from mmedit.cnn.bricks import Conv2d, build_norm_layer, DropPath, MaxPool2d, \ + Linear # 使用括号进行连接,而不是反斜杠 +from ...utils import is_str # 最多向上回溯一层,过多的回溯容易导致结构混乱 +``` + +OpenMMLab 项目使用 pre-commit 工具自动格式化代码,详情见[贡献代码](contributing.md#代码风格)。 + +### 命名规范 + +#### 命名规范的重要性 + +优秀的命名是良好代码可读的基础。基础的命名规范对各类变量的命名做了要求,使读者可以方便地根据代码名了解变量是一个类 / 局部变量 / 全局变量等。而优秀的命名则需要代码作者对于变量的功能有清晰的认识,以及良好的表达能力,从而使读者根据名称就能了解其含义,甚至帮助了解该段代码的功能。 + +#### 基础命名规范 + +| 类型 | 公有 | 私有 | +| --------------- | ---------------- | ------------------ | +| 模块 | lower_with_under | \_lower_with_under | +| 包 | lower_with_under | | +| 类 | CapWords | \_CapWords | +| 异常 | CapWordsError | | +| 函数(方法) | lower_with_under | \_lower_with_under | +| 函数 / 方法参数 | lower_with_under | | +| 全局 / 类内常量 | CAPS_WITH_UNDER | \_CAPS_WITH_UNDER | +| 全局 / 类内变量 | lower_with_under | \_lower_with_under | +| 变量 | lower_with_under | \_lower_with_under | +| 局部变量 | lower_with_under | | + +注意: + +- 尽量避免变量名与保留字冲突,特殊情况下如不可避免,可使用一个后置下划线,如 class\_ +- 尽量不要使用过于简单的命名,除了约定俗成的循环变量 i,文件变量 f,错误变量 e 等。 +- 不会被用到的变量可以命名为 \_,逻辑检查器会将其忽略。 + +#### 命名技巧 + +良好的变量命名需要保证三点: + +1. 含义准确,没有歧义 +2. 长短适中 +3. 前后统一 + +```python +# Wrong +class Masks(metaclass=ABCMeta): # 命名无法表现基类;Instance or Semantic? + pass + +# Correct +class BaseInstanceMasks(metaclass=ABCMeta): + pass + +# Wrong,不同地方含义相同的变量尽量用统一的命名 +def __init__(self, inplanes, planes): + pass + +def __init__(self, in_channels, out_channels): + pass +``` + +常见的函数命名方法: + +- 动宾命名法:crop_img, init_weights +- 动宾倒置命名法:imread, bbox_flip + +注意函数命名与参数的顺序,保证主语在前,符合语言习惯: + +- check_keys_exist(key, container) +- check_keys_contain(container, key) + +注意避免非常规或统一约定的缩写,如 nb -> num_blocks,in_nc -> in_channels + +### docstring 规范 + +#### 为什么要写 docstring + +docstring 是对一个类、一个函数功能与 API 接口的详细描述,有两个功能,一是帮助其他开发者了解代码功能,方便 debug 和复用代码;二是在 Readthedocs 文档中自动生成相关的 API reference 文档,帮助不了解源代码的社区用户使用相关功能。 + +#### 如何写 docstring + +与注释不同,一份规范的 docstring 有着严格的格式要求,以便于 Python 解释器以及 sphinx 进行文档解析,详细的 docstring 约定参见 [PEP 257](https://www.python.org/dev/peps/pep-0257/)。此处以例子的形式介绍各种文档的标准格式,参考格式为 [Google 风格](https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/#comments)。 + +1. 模块文档 + + 代码风格规范推荐为每一个模块(即 Python 文件)编写一个 docstring,但目前 OpenMMLab 项目大部分没有此类 docstring,因此不做硬性要求。 + + ```python + """A one line summary of the module or program, terminated by a period. + + Leave one blank line. The rest of this docstring should contain an + overall description of the module or program. Optionally, it may also + contain a brief description of exported classes and functions and/or usage + examples. + + Typical usage example: + + foo = ClassFoo() + bar = foo.FunctionBar() + """ + ``` + +2. 类文档 + + 类文档是我们最常需要编写的,此处,按照 OpenMMLab 的惯例,我们使用了与 Google 风格不同的写法。如下例所示,文档中没有使用 Attributes 描述类属性,而是使用 Args 描述 __init__ 函数的参数。 + + 在 Args 中,遵照 `parameter (type): Description.` 的格式,描述每一个参数类型和功能。其中,多种类型可使用 `(float or str)` 的写法,可以为 None 的参数可以写为 `(int, optional)`。 + + ```python + class BaseRunner(metaclass=ABCMeta): + """The base class of Runner, a training helper for PyTorch. + + All subclasses should implement the following APIs: + + - ``run()`` + - ``train()`` + - ``val()`` + - ``save_checkpoint()`` + + Args: + model (:obj:`torch.nn.Module`): The model to be run. + batch_processor (callable, optional): A callable method that process + a data batch. The interface of this method should be + ``batch_processor(model, data, train_mode) -> dict``. + Defaults to None. + optimizer (dict or :obj:`torch.optim.Optimizer`, optional): It can be + either an optimizer (in most cases) or a dict of optimizers + (in models that requires more than one optimizer, e.g., GAN). + Defaults to None. + work_dir (str, optional): The working directory to save checkpoints + and logs. Defaults to None. + logger (:obj:`logging.Logger`): Logger used during training. + Defaults to None. (The default value is just for backward + compatibility) + meta (dict, optional): A dict records some import information such as + environment info and seed, which will be logged in logger hook. + Defaults to None. + max_epochs (int, optional): Total training epochs. Defaults to None. + max_iters (int, optional): Total training iterations. Defaults to None. + """ + + def __init__(self, + model, + batch_processor=None, + optimizer=None, + work_dir=None, + logger=None, + meta=None, + max_iters=None, + max_epochs=None): + ... + ``` + + 另外,在一些算法实现的主体类中,建议加入原论文的链接;如果参考了其他开源代码的实现,则应加入 modified from,而如果是直接复制了其他代码库的实现,则应加入 copied from ,并注意源码的 License。如有必要,也可以通过 .. math:: 来加入数学公式 + + ```python + # 参考实现 + # This func is modified from `detectron2 + # `_. + + # 复制代码 + # This code was copied from the `ubelt + # library`_. + + # 引用论文 & 添加公式 + class LabelSmoothLoss(nn.Module): + r"""Initializer for the label smoothed cross entropy loss. + + Refers to `Rethinking the Inception Architecture for Computer Vision + `_. + + This decreases gap between output scores and encourages generalization. + Labels provided to forward can be one-hot like vectors (NxC) or class + indices (Nx1). + And this accepts linear combination of one-hot like labels from mixup or + cutmix except multi-label task. + + Args: + label_smooth_val (float): The degree of label smoothing. + num_classes (int, optional): Number of classes. Defaults to None. + mode (str): Refers to notes, Options are "original", "classy_vision", + "multi_label". Defaults to "classy_vision". + reduction (str): The method used to reduce the loss. + Options are "none", "mean" and "sum". Defaults to 'mean'. + loss_weight (float): Weight of the loss. Defaults to 1.0. + + Note: + if the ``mode`` is "original", this will use the same label smooth + method as the original paper as: + + .. math:: + (1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K} + + where :math:`\epsilon` is the ``label_smooth_val``, :math:`K` is + the ``num_classes`` and :math:`\delta_{k,y}` is Dirac delta, + which equals 1 for k=y and 0 otherwise. + + if the ``mode`` is "classy_vision", this will use the same label + smooth method as the `facebookresearch/ClassyVision + `_ repo as: + + .. math:: + \frac{\delta_{k, y} + \epsilon/K}{1+\epsilon} + + if the ``mode`` is "multi_label", this will accept labels from + multi-label task and smoothing them as: + + .. math:: + (1-2\epsilon)\delta_{k, y} + \epsilon + ``` + +```{note} +注意 \`\`here\`\`、\`here\`、"here" 三种引号功能是不同。 + +在 reStructured 语法中,\`\`here\`\` 表示一段代码;\`here\` 表示斜体;"here" 无特殊含义,一般可用来表示字符串。其中 \`here\` 的用法与 Markdown 中不同,需要多加留意。 +另外还有 :obj:\`type\` 这种更规范的表示类的写法,但鉴于长度,不做特别要求,一般仅用于表示非常用类型。 +``` + +3. 方法(函数)文档 + + 函数文档与类文档的结构基本一致,但需要加入返回值文档。对于较为复杂的函数和类,可以使用 Examples 字段加入示例;如果需要对参数加入一些较长的备注,可以加入 Note 字段进行说明。 + + 对于使用较为复杂的类或函数,比起看大段大段的说明文字和参数文档,添加合适的示例更能帮助用户迅速了解其用法。需要注意的是,这些示例最好是能够直接在 Python 交互式环境中运行的,并给出一些相对应的结果。如果存在多个示例,可以使用注释简单说明每段示例,也能起到分隔作用。 + + ```python + def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Defaults to False. + + Returns: + List[module] | module | None: The imported modules. + All these three lines in docstring will be compiled into the same + line in readthedocs. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + ... + ``` + + 如果函数接口在某个版本发生了变化,需要在 docstring 中加入相关的说明,必要时添加 Note 或者 Warning 进行说明,例如: + + ```python + class CheckpointHook(Hook): + """Save checkpoints periodically. + + Args: + out_dir (str, optional): The root directory to save checkpoints. If + not specified, ``runner.work_dir`` will be used by default. If + specified, the ``out_dir`` will be the concatenation of + ``out_dir`` and the last level directory of ``runner.work_dir``. + Defaults to None. `Changed in version 1.3.15.` + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmedit.fileio.FileClient` for details. + Defaults to None. `New in version 1.3.15.` + + Warning: + Before v1.3.15, the ``out_dir`` argument indicates the path where the + checkpoint is stored. However, in v1.3.15 and later, ``out_dir`` + indicates the root directory and the final path to save checkpoint is + the concatenation of out_dir and the last level directory of + ``runner.work_dir``. Suppose the value of ``out_dir`` is + "/path/of/A" and the value of ``runner.work_dir`` is "/path/of/B", + then the final path will be "/path/of/A/B". + ``` + + 如果参数或返回值里带有需要展开描述字段的 dict,则应该采用如下格式: + + ```python + def func(x): + r""" + Args: + x (None): A dict with 2 keys, ``padded_targets``, and ``targets``. + + - ``targets`` (list[Tensor]): A list of tensors. + Each tensor has the shape of :math:`(T_i)`. Each + element is the index of a character. + - ``padded_targets`` (Tensor): A tensor of shape :math:`(N)`. + Each item is the length of a word. + + Returns: + dict: A dict with 2 keys, ``padded_targets``, and ``targets``. + + - ``targets`` (list[Tensor]): A list of tensors. + Each tensor has the shape of :math:`(T_i)`. Each + element is the index of a character. + - ``padded_targets`` (Tensor): A tensor of shape :math:`(N)`. + Each item is the length of a word. + """ + return x + ``` + +```{important} +为了生成 readthedocs 文档,文档的编写需要按照 ReStructrued 文档格式,否则会产生文档渲染错误,在提交 PR 前,最好生成并预览一下文档效果。 +语法规范参考: + +- [reStructuredText Primer - Sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#) +- [Example Google Style Python Docstrings ‒ napoleon 0.7 documentation](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html#example-google) +``` + +### 注释规范 + +#### 为什么要写注释 + +对于一个开源项目,团队合作以及社区之间的合作是必不可少的,因而尤其要重视合理的注释。不写注释的代码,很有可能过几个月自己也难以理解,造成额外的阅读和修改成本。 + +#### 如何写注释 + +最需要写注释的是代码中那些技巧性的部分。如果你在下次代码审查的时候必须解释一下,那么你应该现在就给它写注释。对于复杂的操作,应该在其操作开始前写上若干行注释。对于不是一目了然的代码,应在其行尾添加注释。 +—— Google 开源项目风格指南 + +```python +# We use a weighted dictionary search to find out where i is in +# the array. We extrapolate position based on the largest num +# in the array and the array size and then do binary search to +# get the exact number. +if i & (i-1) == 0: # True if i is 0 or a power of 2. +``` + +为了提高可读性, 注释应该至少离开代码2个空格. +另一方面, 绝不要描述代码. 假设阅读代码的人比你更懂Python, 他只是不知道你的代码要做什么. +—— Google 开源项目风格指南 + +```python +# Wrong: +# Now go through the b array and make sure whenever i occurs +# the next element is i+1 + +# Wrong: +if i & (i-1) == 0: # True if i bitwise and i-1 is 0. +``` + +在注释中,可以使用 Markdown 语法,因为开发人员通常熟悉 Markdown 语法,这样可以便于交流理解,如可使用单反引号表示代码和变量(注意不要和 docstring 中的 ReStructured 语法混淆) + +```python +# `_reversed_padding_repeated_twice` is the padding to be passed to +# `F.pad` if needed (e.g., for non-zero padding types that are +# implemented as two ops: padding + conv). `F.pad` accepts paddings in +# reverse order than the dimension. +self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2) +``` + +#### 注释示例 + +1. 出自 `mmcv/utils/registry.py`,对于较为复杂的逻辑结构,通过注释,明确了优先级关系。 + + ```python + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + ``` + +2. 出自 `mmcv/runner/checkpoint.py`,对于 bug 修复中的一些特殊处理,可以附带相关的 issue 链接,帮助其他人了解 bug 背景。 + + ```python + def _save_ckpt(checkpoint, file): + # The 1.6 release of PyTorch switched torch.save to use a new + # zipfile-based file format. It will cause RuntimeError when a + # checkpoint was saved in high version (PyTorch version>=1.6.0) but + # loaded in low version (PyTorch version<1.6.0). More details at + # https://github.com/open-mmlab/mmpose/issues/904 + if digit_version(TORCH_VERSION) >= digit_version('1.6.0'): + torch.save(checkpoint, file, _use_new_zipfile_serialization=False) + else: + torch.save(checkpoint, file) + ``` + +### 类型注解 + +#### 为什么要写类型注解 + +类型注解是对函数中变量的类型做限定或提示,为代码的安全性提供保障、增强代码的可读性、避免出现类型相关的错误。 +Python 没有对类型做强制限制,类型注解只起到一个提示作用,通常你的 IDE 会解析这些类型注解,然后在你调用相关代码时对类型做提示。另外也有类型注解检查工具,这些工具会根据类型注解,对代码中可能出现的问题进行检查,减少 bug 的出现。 +需要注意的是,通常我们不需要注释模块中的所有函数: + +1. 公共的 API 需要注释 +2. 在代码的安全性,清晰性和灵活性上进行权衡是否注释 +3. 对于容易出现类型相关的错误的代码进行注释 +4. 难以理解的代码请进行注释 +5. 若代码中的类型已经稳定,可以进行注释. 对于一份成熟的代码,多数情况下,即使注释了所有的函数,也不会丧失太多的灵活性. + +#### 如何写类型注解 + +1. 函数 / 方法类型注解,通常不对 self 和 cls 注释。 + + ```python + from typing import Optional, List, Tuple + + # 全部位于一行 + def my_method(self, first_var: int) -> int: + pass + + # 另起一行 + def my_method( + self, first_var: int, + second_var: float) -> Tuple[MyLongType1, MyLongType1, MyLongType1]: + pass + + # 单独成行(具体的应用场合与行宽有关,建议结合 yapf 自动化格式使用) + def my_method( + self, first_var: int, second_var: float + ) -> Tuple[MyLongType1, MyLongType1, MyLongType1]: + pass + + # 引用尚未被定义的类型 + class MyClass: + def __init__(self, + stack: List["MyClass"]) -> None: + pass + ``` + + 注:类型注解中的类型可以是 Python 内置类型,也可以是自定义类,还可以使用 Python 提供的 wrapper 类对类型注解进行装饰,一些常见的注解如下: + + ```python + # 数值类型 + from numbers import Number + + # 可选类型,指参数可以为 None + from typing import Optional + def foo(var: Optional[int] = None): + pass + + # 联合类型,指同时接受多种类型 + from typing import Union + def foo(var: Union[float, str]): + pass + + from typing import Sequence # 序列类型 + from typing import Iterable # 可迭代类型 + from typing import Any # 任意类型 + from typing import Callable # 可调用类型 + + from typing import List, Dict # 列表和字典的泛型类型 + from typing import Tuple # 元组的特殊格式 + # 虽然在 Python 3.9 中,list, tuple 和 dict 本身已支持泛型,但为了支持之前的版本 + # 我们在进行类型注解时还是需要使用 List, Tuple, Dict 类型 + # 另外,在对参数类型进行注解时,尽量使用 Sequence & Iterable & Mapping + # List, Tuple, Dict 主要用于返回值类型注解 + # 参见 https://docs.python.org/3/library/typing.html#typing.List + ``` + +2. 变量类型注解,一般用于难以直接推断其类型时 + + ```python + # Recommend: 带类型注解的赋值 + a: Foo = SomeUndecoratedFunction() + a: List[int]: [1, 2, 3] # List 只支持单一类型泛型,可使用 Union + b: Tuple[int, int] = (1, 2) # 长度固定为 2 + c: Tuple[int, ...] = (1, 2, 3) # 变长 + d: Dict[str, int] = {'a': 1, 'b': 2} + + # Not Recommend:行尾类型注释 + # 虽然这种方式被写在了 Google 开源指南中,但这是一种为了支持 Python 2.7 版本 + # 而补充的注释方式,鉴于我们只支持 Python 3, 为了风格统一,不推荐使用这种方式。 + a = SomeUndecoratedFunction() # type: Foo + a = [1, 2, 3] # type: List[int] + b = (1, 2, 3) # type: Tuple[int, ...] + c = (1, "2", 3.5) # type: Tuple[int, Text, float] + ``` + +3. 泛型 + + 上文中我们知道,typing 中提供了 list 和 dict 的泛型类型,那么我们自己是否可以定义类似的泛型呢? + + ```python + from typing import TypeVar, Generic + + KT = TypeVar('KT') + VT = TypeVar('VT') + + class Mapping(Generic[KT, VT]): + def __init__(self, data: Dict[KT, VT]): + self._data = data + + def __getitem__(self, key: KT) -> VT: + return self._data[key] + ``` + + 使用上述方法,我们定义了一个拥有泛型能力的映射类,实际用法如下: + + ```python + mapping = Mapping[str, float]({'a': 0.5}) + value: float = example['a'] + ``` + + 另外,我们也可以利用 TypeVar 在函数签名中指定联动的多个类型: + + ```python + from typing import TypeVar, List + + T = TypeVar('T') # Can be anything + A = TypeVar('A', str, bytes) # Must be str or bytes + + + def repeat(x: T, n: int) -> List[T]: + """Return a list containing n references to x.""" + return [x]*n + + + def longest(x: A, y: A) -> A: + """Return the longest of two strings.""" + return x if len(x) >= len(y) else y + ``` + +更多关于类型注解的写法请参考 [typing](https://docs.python.org/3/library/typing.html)。 + +#### 类型注解检查工具 + +[mypy](https://mypy.readthedocs.io/en/stable/) 是一个 Python 静态类型检查工具。根据你的类型注解,mypy 会检查传参、赋值等操作是否符合类型注解,从而避免可能出现的 bug。 + +例如如下的一个 Python 脚本文件 test.py: + +```python +def foo(var: int) -> float: + return float(var) + +a: str = foo('2.0') +b: int = foo('3.0') # type: ignore +``` + +运行 mypy test.py 可以得到如下检查结果,分别指出了第 4 行在函数调用和返回值赋值两处类型错误。而第 5 行同样存在两个类型错误,由于使用了 type: ignore 而被忽略了,只有部分特殊情况可能需要此类忽略。 + +``` +test.py:4: error: Incompatible types in assignment (expression has type "float", variable has type "int") +test.py:4: error: Argument 1 to "foo" has incompatible type "str"; expected "int" +Found 2 errors in 1 file (checked 1 source file) +``` diff --git a/docs/zh_cn/notes/projects.md b/docs/zh_cn/community/projects.md similarity index 100% rename from docs/zh_cn/notes/projects.md rename to docs/zh_cn/community/projects.md diff --git a/docs/zh_cn/conf.py b/docs/zh_cn/conf.py index 9d397fccb2..beb21b8c3d 100644 --- a/docs/zh_cn/conf.py +++ b/docs/zh_cn/conf.py @@ -31,17 +31,44 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', + 'sphinx.ext.autosectionlabel', 'sphinx_markdown_tables', 'sphinx_copybutton', + 'sphinx_tabs.tabs', 'myst_parser', ] +extensions.append('notfound.extension') # enable customizing not-found page + +extensions.append('autoapi.extension') +autoapi_type = 'python' +autoapi_dirs = ['../../mmedit'] +autoapi_add_toctree_entry = False +autoapi_template_dir = '_templates' +# autoapi_options = ['members', 'undoc-members', 'show-module-summary'] + +# # Core library for html generation from docstrings +# extensions.append('sphinx.ext.autodoc') +# extensions.append('sphinx.ext.autodoc.typehints') +# # Enable 'expensive' imports for sphinx_autodoc_typehints +# set_type_checking_flag = True +# # Sphinx-native method. Not as good as sphinx_autodoc_typehints +# autodoc_typehints = "description" + +# extensions.append('sphinx.ext.autosummary') # Create neat summary tables +# autosummary_generate = True # Turn on sphinx.ext.autosummary +# # Add __init__ doc (ie. params) to class summaries +# autoclass_content = 'both' +# autodoc_skip_member = [] +# # If no docstring, inherit from base class +# autodoc_inherit_docstrings = True + autodoc_mock_imports = [ - 'mmedit.version', 'mmcv.ops.ModulatedDeformConv2d', - 'mmcv.ops.modulated_deform_conv2d', 'mmcv._ext' + 'mmedit.version', 'mmcv._ext', 'mmcv.ops.ModulatedDeformConv2d', + 'mmcv.ops.modulated_deform_conv2d', 'clip', 'resize_right', 'pandas' ] source_suffix = { @@ -54,7 +81,7 @@ copybutton_prompt_is_regexp = True # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ['../en/_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -107,17 +134,26 @@ html_css_files = ['css/readthedocs.css'] myst_enable_extensions = ['colon_fence'] +myst_heading_anchors = 3 language = 'zh_CN' # The master toctree document. -master_doc = 'index' +root_doc = 'index' +notfound_template = '404.html' def builder_inited_handler(app): - # subprocess.run(['./merge_docs.sh']) - subprocess.run(['./stat.py']) + subprocess.run(['python', './.dev_scripts/update_model_zoo.py']) + subprocess.run(['python', './.dev_scripts/update_dataset_zoo.py']) + + +def skip_member(app, what, name, obj, skip, options): + if what == 'package' or what == 'module': + skip = True + return skip def setup(app): app.connect('builder-inited', builder_inited_handler) + app.connect('autoapi-skip-member', skip_member) diff --git a/docs/zh_cn/config.md b/docs/zh_cn/config.md deleted file mode 100644 index 6be88c4c47..0000000000 --- a/docs/zh_cn/config.md +++ /dev/null @@ -1,23 +0,0 @@ -# 教程 1: 了解配置文件 - -mmedit 采用基于 python 文件的配置系统,您可以在 `$MMEditing/configs` 下查看预置的配置文件。 - -## 配置文件命名风格 - -配置文件按照下面的风格命名。我们建议社区贡献者使用同样的风格。 - -```bash -{model}_[model setting]_{backbone}_[refiner]_[norm setting]_[misc]_[gpu x batch_per_gpu]_{schedule}_{dataset} -``` - -`{xxx}` 是必填字段,`[yyy]` 是可选的。 - -- `{model}`: 模型种类,例如 `srcnn`, `dim` 等等。 -- `[model setting]`: 特定设置一些模型,例如,输入图像 `resolution` , 训练 `stage name`。 -- `{backbone}`: 主干网络种类,例如 `r50` (ResNet-50)、`x101` (ResNeXt-101)。 -- `{refiner}`: 精炼器种类,例如 `pln` 简单精炼器模型 -- `[norm_setting]`: 指定归一化设置,默认为批归一化,其他归一化可以设为: `bn`(批归一化), `gn` (组归一化), `syncbn` (同步批归一化)。 -- `[misc]`: 模型中各式各样的设置/插件,例如 `dconv`, `gcb`, `attention`, `mstrain`。 -- `[gpu x batch_per_gpu]`: GPU数目 和每个 GPU 的样本数, 默认为 `8x2 `。 -- `{schedule}`: 训练策略,如 `20k`, `100k` 等,意思是 `20k` 或 `100k` 迭代轮数。 -- `{dataset}`: 数据集,如 `places`(图像补全)、`comp1k`(抠图)、`div2k`(图像恢复)和 `paired`(图像生成)。 diff --git a/docs/zh_cn/demo.md b/docs/zh_cn/demo.md deleted file mode 100644 index 8c00b9282f..0000000000 --- a/docs/zh_cn/demo.md +++ /dev/null @@ -1,237 +0,0 @@ -### 演示 - -我们针对特定任务提供了一些脚本,可以对单张图像进行推理。 - -#### 图像补全 - -您可以使用以下命令,输入一张测试图像以及缺损部位的遮罩图像,实现对测试图像的补全。 - -```shell -python demo/inpainting_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${MASKED_IMAGE_FILE} \ - ${MASK_FILE} \ - ${SAVE_FILE} \ - [--imshow] \ - [--device ${GPU_ID}] -``` - -如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: - -```shell -python demo/inpainting_demo.py \ - configs/global_local/gl_8xb12_celeba-256x256.py \ - https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth \ - tests/data/inpainting/celeba_test.png \ - tests/data/inpainting/bbox_mask.png \ - tests/data/inpainting/inpainting_celeba.png -``` - -补全结果将保存在 `tests/data/inpainting/inpainting_celeba.png` 中。 - -#### 抠图 - -您可以使用以下命令,输入一张测试图像以及对应的三元图(trimap),实现对测试图像的抠图。 - -```shell -python demo/matting_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${IMAGE_FILE} \ - ${TRIMAP_FILE} \ - ${SAVE_FILE} \ - [--imshow] \ - [--device ${GPU_ID}] -``` - -如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: - -```shell -python demo/matting_demo.py \ - configs/dim/dim_stage3-v16-pln_1000k-1xb1_comp1k.py \ - https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth \ - tests/data/matting_dataset/merged/GT05.jpg \ - tests/data/matting_dataset/trimap/GT05.png \ - tests/data/matting_dataset/pred/GT05.png -``` - -预测的 alpha 遮罩将保存在 `tests/data/matting_dataset/pred/GT05.png` 中。 - -#### 图像超分辨率 - -您可以使用以下命令来测试要恢复的图像。 - -```shell -python demo/restoration_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${IMAGE_FILE} \ - ${SAVE_FILE} \ - [--imshow] \ - [--device ${GPU_ID}] \ - [--ref-path ${REF_PATH}] -``` - -如果指定了 `--imshow` ,演示程序将使用 opencv 显示图像。例子: - -```shell -python demo/restoration_demo.py \ - configs/esrgan/esrgan_x4c64b23g32_400k-1xb16_div2k.py \ - https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth \ - tests/data/image/lq/baboon_x4.png \ - demo/demo_out_baboon.png -``` - -您可以通过提供 `--ref-path` 参数来测试基于参考的超分辨率算法。例子: - -```shell -python demo/restoration_demo.py \ - configs/ttsr/ttsr-gan_x4c64b16_500k-1xb9_CUFED.py \ - https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth \ - tests/data/frames/sequence/gt/sequence_1/00000000.png \ - demo/demo_out.png \ - --ref-path tests/data/frames/sequence/gt/sequence_1/00000001.png -``` - -#### 人脸图像超分辨率 - -您可以使用以下命令来测试要恢复的人脸图像。 - -```shell -python demo/restoration_face_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${IMAGE_FILE} \ - ${SAVE_FILE} \ - [--upscale-factor] \ - [--face-size] \ - [--imshow] \ - [--device ${GPU_ID}] -``` - -如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: - -```shell -python demo/restoration_face_demo.py \ - configs/glean/glean_in128out1024_300k-4xb2_ffhq-celeba-hq.py \ - https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth \ - tests/data/image/face/000001.png \ - tests/data/image/face/pred.png \ - --upscale-factor 4 -``` - -#### 视频超分辨率 - -您可以使用以下命令来测试视频以进行恢复。 - -```shell -python demo/restoration_video_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${INPUT_DIR} \ - ${OUTPUT_DIR} \ - [--window-size=${WINDOW_SIZE}] \ - [--device ${GPU_ID}] -``` - -它同时支持滑动窗口框架和循环框架。 例子: - -EDVR: - -```shell -python demo/restoration_video_demo.py \ - configs/edvr/edvrm_wotsa_reds_600k-8xb8.py \ - https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth \ - data/Vid4/BIx4/calendar/ \ - demo/output \ - --window-size=5 -``` - -BasicVSR: - -```shell -python demo/restoration_video_demo.py \ - configs/basicvsr/basicvsr_2xb4_reds4.py \ - https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth \ - data/Vid4/BIx4/calendar/ \ - demo/output -``` - -复原的视频将保存在 ` demo/output/` 中。 - -#### 视频插帧 - -您可以使用以下命令来测试视频插帧。 - -```shell -python demo/video_interpolation_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${INPUT_DIR} \ - ${OUTPUT_DIR} \ - [--fps-multiplier ${FPS_MULTIPLIER}] \ - [--fps ${FPS}] -``` - -`${INPUT_DIR}` 和 `${OUTPUT_DIR}` 可以是视频文件路径或存放一系列有序图像的文件夹。 -若 `${OUTPUT_DIR}` 是视频文件地址,其帧率可由输入视频帧率和 `fps_multiplier` 共同决定,也可由 `fps` 直接给定(其中前者优先级更高)。例子: - -由输入视频帧率和 `fps_multiplier` 共同决定输出视频的帧率: - -```shell -python demo/video_interpolation_demo.py \ - configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py \ - https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_320k_vimeo-triple_20220117-647f3de2.pth \ - tests/data/frames/test_inference.mp4 \ - tests/data/frames/test_inference_vfi_out.mp4 \ - --fps-multiplier 2.0 -``` - -由 `fps` 直接给定输出视频的帧率: - -```shell -python demo/video_interpolation_demo.py \ - configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py \ - https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_320k_vimeo-triple_20220117-647f3de2.pth \ - tests/data/frames/test_inference.mp4 \ - tests/data/frames/test_inference_vfi_out.mp4 \ - --fps 60.0 -``` - -#### 图像生成 - -```shell -python demo/generation_demo.py \ - ${CONFIG_FILE} \ - ${CHECKPOINT_FILE} \ - ${IMAGE_FILE} \ - ${SAVE_FILE} \ - [--unpaired-path ${UNPAIRED_IMAGE_FILE}] \ - [--imshow] \ - [--device ${GPU_ID}] -``` - -如果指定了 `--unpaired-path` (用于 CycleGAN),模型将执行未配对的图像到图像的转换。 如果指定了 `--imshow` ,演示也将使用opencv显示图像。 例子: - -针对配对数据: - -```shell -python demo/generation_demo.py \ - configs/example_config.py \ - work_dirs/example_exp/example_model_20200202.pth \ - demo/demo.jpg \ - demo/demo_out.jpg -``` - -针对未配对数据(用 opencv 显示图像): - -```shell -python demo/generation_demo.py 、 - configs/example_config.py \ - work_dirs/example_exp/example_model_20200202.pth \ - demo/demo.jpg \ - demo/demo_out.jpg \ - --unpaired-path demo/demo_unpaired.jpg \ - --imshow -``` diff --git a/docs/zh_cn/notes/faq.md b/docs/zh_cn/faq.md similarity index 100% rename from docs/zh_cn/notes/faq.md rename to docs/zh_cn/faq.md diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md deleted file mode 100644 index 4572c593f1..0000000000 --- a/docs/zh_cn/get_started.md +++ /dev/null @@ -1 +0,0 @@ -# 入门指引(待更新) diff --git a/docs/zh_cn/install.md b/docs/zh_cn/get_started/install.md similarity index 99% rename from docs/zh_cn/install.md rename to docs/zh_cn/get_started/install.md index a213d7b7cc..fb02528651 100644 --- a/docs/zh_cn/install.md +++ b/docs/zh_cn/get_started/install.md @@ -1,3 +1,5 @@ +# 安装 (待更新) + ```{warning} 中文版本的安装文档过于陈旧,请参考英文版本。 如果您希望帮助翻译英文版安装文档,请通过issue联系我们 diff --git a/docs/zh_cn/get_started/overview.md b/docs/zh_cn/get_started/overview.md new file mode 100644 index 0000000000..25ae662d32 --- /dev/null +++ b/docs/zh_cn/get_started/overview.md @@ -0,0 +1,103 @@ +# 概述(待更新) + +# 概述 + +欢迎来到 MMEditing! 在本节中,您将了解 + +- [MMEditing是什么?](#mmediting-是什么) +- [为什么要使用 MMEditing?](#为什么要使用-mmediting) +- [新手入门](#新手入门) +- [基础教程](#基础教程) +- [进阶教程](#进阶教程) + +## MMEditing 是什么? + +MMEditing 是一个供专业人工智能研究人员和机器学习工程师去处理、编辑和合成图像与视频的开源工具箱。 + +MMEditing 允许研究人员和工程师使用最先进的预训练模型,并且可以轻松训练和开发新的定制模型。 + +MMEditing 支持各种基础生成模型,包括: + +- 无条件生成对抗网络 (GANs) +- 条件生成对抗网络 (GANs) +- 内部学习 +- 扩散模型 +- 还有许多其他生成模型即将推出! + +MMEditing 支持各种应用程序,包括: + +- 图像超分辨率 +- 视频超分辨率 +- 视频帧插值 +- 图像修复 +- 图像抠图 +- 图像到图像的翻译 +- 还有许多其他应用程序即将推出! + +
+ +
+
+ +
+ +
+
+ +
+ +
+ +
+ StyleGAN3 Images +
+ +
+ +
+ BigGAN Images +
+ +
+ +## 为什么要使用 MMEditing? + +- **最先进的性能** + + MMEditing 提供最先进的生成模型来处理、编辑和合成图像和视频。 + +- **强大而流行的应用** + + MMEditing 支持流行的*修复*、*抠图*、*超分辨率* 和*生成* 等应用。 具体来说,MMEditing 支持 GAN 插值、GAN 投影、GAN 编辑和许多其他流行的 GAN 的应用。 是时候玩转你的 GAN 了! + +- **全新模块化设计,灵活组合:** + + 我们将 MMEditing 分解为不同的模块,通过组合不同的模块可以轻松构建定制的模型。 具体来说,提出了一种新的复杂损失模块设计,用于自定义模块之间的链接,可以实现不同模块之间的灵活组合。([损失函数](../howto/losses.md)) + +- **高效的分布式训练:** + + 在[MMSeparateDistributedDataParallel](https://github.com/open-mmlab/mmengine/blob/main/mmengine/model/wrappers/seperate_distributed.py)的支持下,可以轻松实现动态架构的分布式训练。 + +## 新手入门 + +安装说明见[安装](install.md)。 + +## 基础教程 + +对于初学者,我们建议从 [基础教程](../user_guides/config.md) 学习 MMEditing 的基本用法。 + +## 进阶教程 + +对于熟悉 MMEditing 的用户,可能想了解 MMEditing 的进阶实用,以及如何扩展算法库,如何使用多个算法库框架等高级用法,请参考[进阶教程](../advanced_guides/evaluator.md)。 + +## 开发指南 + +想要使用 MMEditing 进行深度开发的用户,可以参考[开发指南](../howto/models.md)。 diff --git a/docs/zh_cn/quick_run.md b/docs/zh_cn/get_started/quick_run.md similarity index 98% rename from docs/zh_cn/quick_run.md rename to docs/zh_cn/get_started/quick_run.md index eef7d7543b..0552ef67ef 100644 --- a/docs/zh_cn/quick_run.md +++ b/docs/zh_cn/get_started/quick_run.md @@ -1,3 +1,5 @@ +# 快速运行 (待更新) + ## 使用预训练模型进行推理 我们提供用于在完整数据集上进行预训练模型评估和特定任务图像演示的测试脚本。 @@ -52,7 +54,7 @@ GPUS=8 ./tools/slurm_test.sh dev test configs/example_config.py work_dirs/exampl - `--deterministic`: 与 `--seed` 相关,此参数决定是否为 CUDNN 后端设置确定性的选项。如果指定该参数,会将 `torch.backends.cudnn.deterministic` 设置为 `True`,将 `torch.backends.cudnn.benchmark` 设置为 `False`。 - `--cfg-options`: 如果指明,这里的键值对将会被合并到配置文件中。 -注:目前,我们不使用像 [MMDetection](https://github.com/open-mmlab/mmdetection) 那样的 `--eval` 参数来指定评估指标。 评估指标在配置文件中给出(参见 [config.md](config.md))。 +注:目前,我们不使用像 [MMDetection](https://github.com/open-mmlab/mmdetection) 那样的 `--eval` 参数来指定评估指标。 评估指标在配置文件中给出(参见 [config.md](../user_guides/config.md))。 ## 训练一个模型 diff --git a/docs/zh_cn/advanced_guides/dataset.md b/docs/zh_cn/howto/dataset.md similarity index 100% rename from docs/zh_cn/advanced_guides/dataset.md rename to docs/zh_cn/howto/dataset.md diff --git a/docs/zh_cn/howto/losses.md b/docs/zh_cn/howto/losses.md new file mode 100644 index 0000000000..bf04fde426 --- /dev/null +++ b/docs/zh_cn/howto/losses.md @@ -0,0 +1,525 @@ +# 如何设计自己的损失函数 + +`losses` 在 `MMEditing` 中注册为 `LOSSES`。 +在 MMEditing 中设计自己的损失函数,步骤和在 MMEditing 中自定义任何其他模型类似。 +本节主要具体介绍了如何在 MMEditing 中实现自定义的损失函数。 +本教程建议您在实现自定义的损失函数时,应该遵循本教程相同的设计,这样在我们的框架中使用您新定义的损失函数,就不需要额外的工作。 + +本指南包括: + +- [设计你自己的损失函数](#如何设计自己的损失函数) + - [支持损失函数介绍](#支持的损失函数介绍) + - [设计一个新的损失函数](#设计一个新的损失函数) + - [MSELoss 的一个例子](#MSELoss-的一个例子) + - [DiscShiftLoss 的一个例子](#DiscShiftLoss-的一个例子) + - [GANWithCustomizedLoss 的一个例子](#GANWithCustomizedLoss-的一个例子) + - [可用损失函数](#可用损失函数) + - [常规损失函数](#常规损失函数) + - [损失函数组件](#损失函数组件) + +## 支持的损失函数介绍 + +为了方便使用,您可以直接使用我们为具体算法设置的默认损失计算过程,如lsgan、biggan、styleganv2等。 +以`stylegan2`为例,我们使用R1梯度惩罚和生成器路径长度正则化作为可配置损失,用户可以调整相关参数,如 `r1_loss_weight` 和 `g_reg_weight`。 + +```python +# stylegan2_base.py +loss_config =dict( + r1_loss_weight=10。 / 2. * d_reg_interval, + r1_interval=d_reg_interval, + norm_mode='HWC', + g_reg_interval=g_reg_interval, + g_reg_weight=2。 * g_reg_interval, + pl_batch_shrink=2) + +model=dict( + type='StyleGAN2', + xxx, + loss_config=loss_config) +``` + +## 设计一个新的损失函数 + +### MSELoss 的一个例子 + +一般来说,要实现一个损失模块,我们会编写一个函数实现,然后用类实现包装它。 以MSELoss为例: + +```python +@masked_loss +def mse_loss(pred,target): + return F.mse_loss(pred,target,reduction='none') + +@LOSSES.register_module() +Class MSELoss(nn.Module): + + def __init__(self, loss_weight=1.0, reduction='mean', sample_wise=False): + # 代码可以在``mmedit/models/losses/pixelwise_loss.py``中找到 + + def forward(self, pred, target, weight=None, **kwargs): + # 代码可以在``mmedit/models/losses/pixelwise_loss.py``中找到 +``` + +根据这个损失函数的定义,我们现在可以简单地通过在配置文件中定义它来使用: + +```python +pixel_loss=dict(type='MSELoss', loss_weight=1.0, reduction='mean') +``` + +请注意,上面的`pixel_loss`必须在模型中定义。 详情请参考[自定义模型](./models.md)。 与自定义模型类似,为了使用您自己实现的损失函数,您需要在编写后在`mmedit/models/losses/__init__.py`中导入该损失函数。 + +### DiscShiftLoss 的一个例子 + +一般来说,要实现一个损失模块,我们会编写一个函数实现,然后用类实现包装它。 +但是,在 MMEditing 中,我们提供了另一个统一的接口 data_info 供用户定义输入参数和数据项之间的映射。 + +```python +@weighted_loss +def disc_shift_loss(pred): + return pred**2 + +@MODULES.register_module() +Class DiscShiftLoss(nn.Module): + + def __init__(self, loss_weight=1.0, data_info=None): + super(DiscShiftLoss,self).__init__() + # 代码可以在``mmgen/models/losses/disc_auxiliary_loss.py``中找到 + + def forward(self, *args, **kwargs): + # 代码可以在``mmgen/models/losses/disc_auxiliary_loss.py``中找到 +``` + +这种损失模块设计的目标是允许在生成模型(`MODELS`)中自动使用它,而无需其他复杂代码来定义数据和关键字参数之间的映射。 因此,与 OpenMMLab 中的其他框架不同,我们的损失模块包含一个特殊的关键字 data_info,它是一个定义输入参数与生成模型数据之间映射的字典。 以`DiscShiftLoss`为例,用户在编写配置文件时,可能会用到这个loss,如下: + +```python +dict(type='DiscShiftLoss', + loss_weight=0.001 * 0.5, + data_info=dict(pred='disc_pred_real')) +``` + +`data_info` 中的信息告诉模块使用 `disc_pred_real` 数据作为 `pred` 参数的输入张量。 一旦 `data_info` 不为 `None`,我们的损失模块将自动构建计算图。 + +```python +@MODULES.register_module() +class DiscShiftLoss(nn.Module): + + def __init__(self, loss_weight=1.0, data_info=None): + super(DiscShiftLoss, self).__init__() + self.loss_weight = loss_weight + self.data_info = data_info + + def forward(self, *args, **kwargs): + # use data_info to build computational path + if self.data_info is not None: + # parse the args and kwargs + if len(args) == 1: + assert isinstance(args[0], dict), ( + 'You should offer a dictionary containing network outputs ' + 'for building up computational graph of this loss module.') + outputs_dict = args[0] + elif 'outputs_dict' in kwargs: + assert len(args) == 0, ( + 'If the outputs dict is given in keyworded arguments, no' + ' further non-keyworded arguments should be offered.') + outputs_dict = kwargs.pop('outputs_dict') + else: + raise NotImplementedError( + 'Cannot parsing your arguments passed to this loss module.' + ' Please check the usage of this module') + # link the outputs with loss input args according to self.data_info + loss_input_dict = { + k: outputs_dict[v] + for k, v in self.data_info.items() + } + kwargs.update(loss_input_dict) + kwargs.update(dict(weight=self.loss_weight)) + return disc_shift_loss(**kwargs) + else: + # if you have not define how to build computational graph, this + # module will just directly return the loss as usual. + return disc_shift_loss(*args, weight=self.loss_weight, **kwargs) + + @staticmethod + def loss_name(): + return 'loss_disc_shift' + +``` + +如这部分代码所示,一旦用户设置了“data_info”,损失模块将收到一个包含所有必要数据和模块的字典,该字典由训练过程中的“MODELS”提供。 如果此字典作为非关键字参数给出,则应将其作为第一个参数提供。 如果您使用关键字参数,请将其命名为 `outputs_dict`。 + +### GANWithCustomizedLoss 的一个例子 + +为了构建计算图,生成模型必须提供包含各种数据的字典。 仔细观察任何生成模型,你会发现我们将各种特征和模块收集到字典中。 我们在这里提供了一个自定义的`GANWithCustomizedLoss`来展示这个过程。 + +```python +class GANWithCustomizedLoss(BaseModel): + + def __init__(self, gan_loss, disc_auxiliary_loss, gen_auxiliary_loss, + *args, **kwargs): + # ... + if gan_loss is not None: + self.gan_loss = MODULES.build(gan_loss) + else: + self.gan_loss = None + + if disc_auxiliary_loss: + self.disc_auxiliary_losses = MODULES.build(disc_auxiliary_loss) + if not isinstance(self.disc_auxiliary_losses, nn.ModuleList): + self.disc_auxiliary_losses = nn.ModuleList( + [self.disc_auxiliary_losses]) + else: + self.disc_auxiliary_loss = None + + if gen_auxiliary_loss: + self.gen_auxiliary_losses = MODULES.build(gen_auxiliary_loss) + if not isinstance(self.gen_auxiliary_losses, nn.ModuleList): + self.gen_auxiliary_losses = nn.ModuleList( + [self.gen_auxiliary_losses]) + else: + self.gen_auxiliary_losses = None + + def train_step(self, data: dict, + optim_wrapper: OptimWrapperDict) -> Dict[str, Tensor]: + # ... + + # get data dict to compute losses for disc + data_dict_ = dict( + iteration=curr_iter, + gen=self.generator, + disc=self.discriminator, + disc_pred_fake=disc_pred_fake, + disc_pred_real=disc_pred_real, + fake_imgs=fake_imgs, + real_imgs=real_imgs) + + loss_disc, log_vars_disc = self._get_disc_loss(data_dict_) + + # ... + + def _get_disc_loss(self, outputs_dict): + # Construct losses dict. If you hope some items to be included in the + # computational graph, you have to add 'loss' in its name. Otherwise, + # items without 'loss' in their name will just be used to print + # information. + losses_dict = {} + # gan loss + losses_dict['loss_disc_fake'] = self.gan_loss( + outputs_dict['disc_pred_fake'], target_is_real=False, is_disc=True) + losses_dict['loss_disc_real'] = self.gan_loss( + outputs_dict['disc_pred_real'], target_is_real=True, is_disc=True) + + # disc auxiliary loss + if self.with_disc_auxiliary_loss: + for loss_module in self.disc_auxiliary_losses: + loss_ = loss_module(outputs_dict) + if loss_ is None: + continue + + # the `loss_name()` function return name as 'loss_xxx' + if loss_module.loss_name() in losses_dict: + losses_dict[loss_module.loss_name( + )] = losses_dict[loss_module.loss_name()] + loss_ + else: + losses_dict[loss_module.loss_name()] = loss_ + loss, log_var = self.parse_losses(losses_dict) + + return loss, log_var + +``` + +在这里,`_get_disc_loss` 将帮助自动组合各种损失函数。 + +因此,只要用户设计相同规则的损失模块,就可以在生成模型的训练中插入任何一种损失,无需对模型代码进行其他修改。 您只需要在配置文件中定义 `data_info` 即可。 + +## 可用损失函数 + +我们在配置中列出了可用的损失示例,如下所示。 + +### 常规损失函数 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MethodclassExample
vanilla gan lossmmedit.models.GANLoss + +```python +# dic gan +loss_gan=dict( + type='GANLoss', + gan_type='vanilla', + loss_weight=0.001, +) + +``` + +
lsgan lossmmedit.models.GANLoss +
wgan lossmmedit.models.GANLoss + +```python +# deepfillv1 +loss_gan=dict( + type='GANLoss', + gan_type='wgan', + loss_weight=0.0001, +) +``` + +
hinge lossmmedit.models.GANLoss + +```python +# deepfillv2 +loss_gan=dict( + type='GANLoss', + gan_type='hinge', + loss_weight=0.1, +) +``` + +
smgan lossmmedit.models.GANLoss + +```python +# aot-gan +loss_gan=dict( + type='GANLoss', + gan_type='smgan', + loss_weight=0.01, +) +``` + +
gradient penaltymmedit.models.GradientPenaltyLoss + +```python +# deepfillv1 +loss_gp=dict(type='GradientPenaltyLoss', loss_weight=10.) +``` + +
discriminator shift lossmmedit.models.DiscShiftLoss + +```python +# deepfillv1 +loss_disc_shift=dict(type='DiscShiftLoss', loss_weight=0.001) + +``` + +
clip lossmmedit.models.CLIPLoss
L1 composition lossmmedit.models.L1CompositionLoss
MSE composition lossmmedit.models.MSECompositionLoss
charbonnier composition lossmmedit.models.CharbonnierCompLoss + +```python +# dim +loss_comp=dict(type='CharbonnierCompLoss', loss_weight=0.5) +``` + +
face id Lossmmedit.models.FaceIdLoss
light cnn feature lossmmedit.models.LightCNNFeatureLoss + +```python +# dic gan +feature_loss=dict( + type='LightCNNFeatureLoss', + pretrained=pretrained_light_cnn, + loss_weight=0.1, + criterion='l1') +``` + +
gradient lossmmedit.models.GradientLoss
l1 Lossmmedit.models.L1Loss + +```python +# dic gan +pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean') +``` + +
mse lossmmedit.models.MSELoss + +```python +# dic gan +align_loss=dict(type='MSELoss', loss_weight=0.1, reduction='mean') +``` + +
charbonnier lossmmedit.models.CharbonnierLoss + +```python +# dim +loss_alpha=dict(type='CharbonnierLoss', loss_weight=0.5) +``` + +
masked total variation lossmmedit.models.MaskedTVLoss + +```python +# partial conv +loss_tv=dict( + type='MaskedTVLoss', + loss_weight=0.1 +) + +``` + +
perceptual lossmmedit.models.PerceptualLoss + +```python +# real_basicvsr +perceptual_loss=dict( + type='PerceptualLoss', + layer_weights={ + '2': 0.1, + '7': 0.1, + '16': 1.0, + '25': 1.0, + '34': 1.0, + }, + vgg_type='vgg19', + perceptual_weight=1.0, + style_weight=0, + norm_img=False) + +``` + +
transferal perceptual lossmmedit.models.TransferalPerceptualLoss + +```python +# ttsr +transferal_perceptual_loss=dict( + type='TransferalPerceptualLoss', + loss_weight=1e-2, + use_attention=False, + criterion='mse') + +``` + +
+ +### 损失函数组件 + +对于“GANWithCustomizedLoss”,我们提供了几个组件来构建自定义损失。 + +| Method | class | +| ------------------------------------ | ------------------------------------------- | +| clip loss component | mmedit.models.CLIPLossComps | +| discriminator shift loss component | mmedit.models. DiscShiftLossComps | +| gradient penalty loss component | mmedit.models. GradientPenaltyLossComps | +| r1 gradient penalty component | mmedit.models. R1GradientPenaltyComps | +| face Id loss component | mmedit.models. FaceIdLossComps | +| gan loss component | mmedit.models. GANLossComps | +| generator path regularizer component | mmedit.models.GeneratorPathRegularizerComps | diff --git a/docs/zh_cn/advanced_guides/models/customize_models.md b/docs/zh_cn/howto/models.md similarity index 100% rename from docs/zh_cn/advanced_guides/models/customize_models.md rename to docs/zh_cn/howto/models.md diff --git a/docs/zh_cn/advanced_guides/transforms.md b/docs/zh_cn/howto/transforms.md similarity index 100% rename from docs/zh_cn/advanced_guides/transforms.md rename to docs/zh_cn/howto/transforms.md diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 528bdf98c5..7a8e14da6e 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -4,53 +4,124 @@ 您可以在页面左下角切换中英文文档。 .. note:: - 目前英文版有更多的内容,如果您希望帮助我们翻译一部分文档,可以通过issue联系我们。 + 目前英文版有更多的内容,欢迎加入我们一起提升中文文档! + 您可以通过 issue,discussion 或者我们的社区群来联系我们! + .. toctree:: - :maxdepth: 3 - :caption: 开始你的第一步 + :maxdepth: 1 + :caption: MMEditing 社区 + + community/contributing.md + community/projects.md - 概述 - 入门指引 .. toctree:: - :maxdepth: 2 - :caption: 使用指南 + :maxdepth: 1 + :caption: 新手入门 + + 概述 + 安装 + 快速运行 - user_guides/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: 基础教程 + + user_guides/config.md + user_guides/dataset_prepare.md + user_guides/inference.md + user_guides/train_test.md + user_guides/metrics.md + user_guides/visualization.md + user_guides/useful_tools.md + user_guides/deploy.md .. toctree:: :maxdepth: 2 :caption: 进阶教程 - advanced_guides/index.rst + advanced_guides/evaluator.md + advanced_guides/structures.md + advanced_guides/data_preprocessor.md + advanced_guides/data_flow.md .. toctree:: :maxdepth: 1 - :caption: 迁移 + :caption: 开发指南 + + howto/models.md + howto/dataset.md + howto/transforms.md + howto/losses.md - migration.md .. toctree:: :maxdepth: 1 - :caption: 接口文档(英文) + :caption: 常见问题 + + faq.md - api.rst .. toctree:: - :maxdepth: 1 + :maxdepth: 2 :caption: 模型库 - model_zoo.md + model_zoo/index.rst + + +.. toctree:: + :maxdepth: 1 + :caption: 数据集库 + + dataset_zoo/index.rst + +.. toctree:: + :maxdepth: 1 + :caption: 变更日志 + + changelog.md + + +.. toctree:: + :maxdepth: 2 + :caption: 接口文档(英文) + + mmedit.apis.inferencers + mmedit.structures + mmedit.datasets + mmedit.datasets.transforms + mmedit.evaluation + mmedit.visualization + mmedit.engine.hooks + mmedit.engine.logging + mmedit.engine.optimizers + mmedit.engine.runner + mmedit.engine.schedulers + mmedit.models.base_archs + mmedit.models.base_models + mmedit.models.losses + mmedit.models.data_preprocessors + mmedit.models.utils + mmedit.models.editors + mmedit.utils .. toctree:: :maxdepth: 1 - :caption: 说明 + :caption: 迁移指南 + + migration/overview.md + migration/runtime.md + migration/models.md + migration/eval_test.md + migration/schedule.md + migration/data.md + migration/distributed_train.md + migration/optimizers.md + migration/visualization.md + migration/amp.md - notes/contribution_guide.md - notes/projects.md - notes/changelog.md - notes/faq.md .. toctree:: :caption: 语言切换 diff --git a/docs/zh_cn/merge_docs.sh b/docs/zh_cn/merge_docs.sh deleted file mode 100755 index 7477f7e88c..0000000000 --- a/docs/zh_cn/merge_docs.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -sed -i '$a\\n

\n' ../../configs/inpainting/*/README_zh-CN.md -sed -i '$a\\n

\n' ../../configs/*/README_zh-CN.md -sed -i '$a\\n

\n' ../../configs/restorers/*/README_zh-CN.md -sed -i '$a\\n

\n' ../../configs/synthesizers/*/README_zh-CN.md - -# gather models -cat ../../configs/inpainting/*/README_zh-CN.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 补全模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >inpainting_models.md -cat ../../configs/*/README_zh-CN.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 抠图模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >mattors_models.md -cat ../../configs/restorers/*/README_zh-CN.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 超分辨率模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >restorers_models.md -cat ../../configs/synthesizers/*/README_zh-CN.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 生成模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >synthesizers_models.md - -# gather datasets -cat ../../tools/data/generation/README_zh-CN.md > generation_datasets.md -cat ../../tools/data/inpainting/README_zh-CN.md > inpainting_datasets.md -cat ../../tools/data/matting/README_zh-CN.md > matting_datasets.md -cat ../../tools/data/super-resolution/README_zh-CN.md > sr_datasets.md -cat ../../tools/data/video-interpolation/README.md > vfi_datasets.md - -sed -i 's=(paired-pix2pix/README_zh-CN.md)=(#paired-dataset-for-pix2pix)=g' generation_datasets.md -sed -i 's=(unpaired-cyclegan/README_zh-CN.md)=(#unpaired-dataset-for-cyclegan)=g' generation_datasets.md -sed -i 's=(paris-street-view/README_zh-CN.md)=(#paris-street-view-dataset)=g' inpainting_datasets.md -sed -i 's=(celeba-hq/README_zh-CN.md)=(#celeba-hq-dataset)=g' inpainting_datasets.md -sed -i 's=(places365/README_zh-CN.md)=(#places365-dataset)=g' inpainting_datasets.md -sed -i 's=(comp1k/README_zh-CN.md)=(#composition-1k-dataset)=g' matting_datasets.md -sed -i 's=(div2k/README_zh-CN.md)=(#div2k-dataset)=g' sr_datasets.md -sed -i 's=(reds/README_zh-CN.md)=(#reds-dataset)=g' sr_datasets.md -sed -i 's=(vimeo90k/README_zh-CN.md)=(#vimeo90k-dataset)=g' sr_datasets.md -sed -i 's=(vimeo90k-triplet/README.md)=(#vimeo90k-triplet-dataset)=g' vfi_datasets.md - -cat ../../tools/data/generation/*/README_zh-CN.md | sed 's/# Preparing/# /g' | sed "s/#/#&/" | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >> generation_datasets.md -cat ../../tools/data/inpainting/*/README_zh-CN.md | sed 's/# Preparing/# /g' | sed "s/#/#&/" | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >> inpainting_datasets.md -cat ../../tools/data/matting/*/README_zh-CN.md | sed 's/# Preparing/# /g' | sed "s/#/#&/" | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >> matting_datasets.md -cat ../../tools/data/super-resolution/*/README_zh-CN.md | sed 's/# Preparing/# /g' | sed "s/#/#&/" | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmediting/tree/master/=g' >> sr_datasets.md - -# merge configs -cat configs/config_*.md | sed "s/#/#&/" >> config.md diff --git a/docs/zh_cn/migration.md b/docs/zh_cn/migration.md deleted file mode 100644 index 46892c7bd9..0000000000 --- a/docs/zh_cn/migration.md +++ /dev/null @@ -1 +0,0 @@ -# 迁移(待更新) diff --git a/docs/zh_cn/migration/amp.md b/docs/zh_cn/migration/amp.md new file mode 100644 index 0000000000..b1f944aff7 --- /dev/null +++ b/docs/zh_cn/migration/amp.md @@ -0,0 +1 @@ +# 混合精度训练的迁移(待更新) diff --git a/docs/zh_cn/migration/data.md b/docs/zh_cn/migration/data.md new file mode 100644 index 0000000000..e73a39f1cc --- /dev/null +++ b/docs/zh_cn/migration/data.md @@ -0,0 +1 @@ +# 数据的迁移(待更新) diff --git a/docs/zh_cn/migration/distributed_train.md b/docs/zh_cn/migration/distributed_train.md new file mode 100644 index 0000000000..2cde55e005 --- /dev/null +++ b/docs/zh_cn/migration/distributed_train.md @@ -0,0 +1 @@ +# 分布式训练的迁移(待更新) diff --git a/docs/zh_cn/migration/eval_test.md b/docs/zh_cn/migration/eval_test.md new file mode 100644 index 0000000000..e403806d33 --- /dev/null +++ b/docs/zh_cn/migration/eval_test.md @@ -0,0 +1 @@ +# 评测与测试的迁移(待更新) diff --git a/docs/zh_cn/migration/models.md b/docs/zh_cn/migration/models.md new file mode 100644 index 0000000000..8704a85160 --- /dev/null +++ b/docs/zh_cn/migration/models.md @@ -0,0 +1 @@ +# 模型的迁移(待更新) diff --git a/docs/zh_cn/migration/optimizers.md b/docs/zh_cn/migration/optimizers.md new file mode 100644 index 0000000000..36f4737860 --- /dev/null +++ b/docs/zh_cn/migration/optimizers.md @@ -0,0 +1 @@ +# 优化器的迁移(待更新) diff --git a/docs/zh_cn/migration/overview.md b/docs/zh_cn/migration/overview.md new file mode 100644 index 0000000000..1d98fbb249 --- /dev/null +++ b/docs/zh_cn/migration/overview.md @@ -0,0 +1 @@ +# 概览(待更新) diff --git a/docs/zh_cn/migration/runtime.md b/docs/zh_cn/migration/runtime.md new file mode 100644 index 0000000000..f9a370a95a --- /dev/null +++ b/docs/zh_cn/migration/runtime.md @@ -0,0 +1 @@ +# 运行设置的迁移(待更新) diff --git a/docs/zh_cn/migration/schedule.md b/docs/zh_cn/migration/schedule.md new file mode 100644 index 0000000000..1c51265be7 --- /dev/null +++ b/docs/zh_cn/migration/schedule.md @@ -0,0 +1 @@ +# 调度器的迁移(待更新) diff --git a/docs/zh_cn/migration/visualization.md b/docs/zh_cn/migration/visualization.md new file mode 100644 index 0000000000..a2035e155c --- /dev/null +++ b/docs/zh_cn/migration/visualization.md @@ -0,0 +1 @@ +# 可视化的迁移(待更新) diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md deleted file mode 100644 index 6314b361a2..0000000000 --- a/docs/zh_cn/model_zoo.md +++ /dev/null @@ -1 +0,0 @@ -# 总览(待更新) diff --git a/docs/zh_cn/notes.md b/docs/zh_cn/notes.md deleted file mode 100644 index b1459db891..0000000000 --- a/docs/zh_cn/notes.md +++ /dev/null @@ -1,3 +0,0 @@ -## 基准 - -详情请参阅 [model_zoo](https://mmediting.readthedocs.io/en/latest/modelzoo.html)。 diff --git a/docs/zh_cn/notes/contribution_guide.md b/docs/zh_cn/notes/contribution_guide.md deleted file mode 100644 index d1cf43e8e1..0000000000 --- a/docs/zh_cn/notes/contribution_guide.md +++ /dev/null @@ -1 +0,0 @@ -# 贡献指南(待更新) diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/overview.md deleted file mode 100644 index e525587c02..0000000000 --- a/docs/zh_cn/overview.md +++ /dev/null @@ -1 +0,0 @@ -# 概述(待更新) diff --git a/docs/zh_cn/tools_scripts.md b/docs/zh_cn/tools_scripts.md deleted file mode 100644 index 8be45280fe..0000000000 --- a/docs/zh_cn/tools_scripts.md +++ /dev/null @@ -1,351 +0,0 @@ -## 实用工具 - -我们在 `tools/` 目录下提供了很多有用的工具。 - -### 获取 FLOP 和参数量(实验性) - -我们提供了一个改编自 [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) 的脚本来计算模型的 FLOP 和参数量。 - -```shell -python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}] -``` - -例如, - -```shell -python tools/get_flops.py configs/resotorer/srresnet.py --shape 40 40 -``` - -你会得到以下的结果。 - -``` -============================== -Input shape: (3, 40, 40) -Flops: 4.07 GMac -Params: 1.52 M -============================== -``` - -**注**:此工具仍处于实验阶段,我们不保证数字正确。 您可以将结果用于简单的比较,但在技术报告或论文中采用它之前,请仔细检查它。 - -(1) FLOPs 与输入形状有关,而参数量与输入形状无关。默认输入形状为 (1, 3, 250, 250)。 -(2) 一些运算符不计入 FLOP,如 GN 和自定义运算符。 -你可以通过修改 [`mmcv/cnn/utils/flops_counter.py`](https://github.com/open-mmlab/mmcv/blob/master/mmcv/cnn/utils/flops_counter.py) 来添加对新运算符的支持。 - -### 发布模型 - -在将模型上传到 AWS 之前,您可能需要 -(1) 将模型权重转换为 CPU tensors, (2) 删除优化器状态,和 -(3) 计算模型权重文件的哈希并将哈希 ID 附加到文件名。 - -```shell -python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME} -``` - -例如, - -```shell -python tools/publish_model.py work_dirs/example_exp/latest.pth example_model_20200202.pth -``` - -最终输出文件名将是 `example_model_20200202-{hash id}.pth`. - -### 转换为 ONNX(实验性) - -我们提供了一个脚本将模型转换为 [ONNX](https://github.com/onnx/onnx) 格式。 转换后的模型可以通过 [Netron](https://github.com/lutzroeder/netron) 等工具进行可视化。此外,我们还支持比较 Pytorch 和 ONNX 模型之间的输出结果。 - -```bash -python tools/pytorch2onnx.py - ${CFG_PATH} \ - ${CHECKPOINT_PATH} \ - ${MODEL_TYPE} \ - ${IMAGE_PATH} \ - --trimap-path ${TRIMAP_PATH} \ - --output-file ${OUTPUT_ONNX} \ - --show \ - --verify \ - --dynamic-export -``` - -参数说明: - -- `config` : 模型配置文件的路径。 -- `checkpoint` : 模型模型权重文件的路径。 -- `model_type` : 配置文件的模型类型,选项: `inpainting`, `mattor`, `restorer`, `synthesizer`。 -- `image_path` : 输入图像文件的路径。 -- `--trimap-path` : 输入三元图文件的路径,用于 mattor 模型。 -- `--output-file`: 输出 ONNX 模型的路径。默认为 `tmp.onnx`。 -- `--opset-version` : ONNX opset 版本。默认为 11。 -- `--show`: 确定是否打印导出模型的架构。默认为 `False`。 -- `--verify`: 确定是否验证导出模型的正确性。默认为 `False`。 -- `--dynamic-export`: 确定是否导出具有动态输入和输出形状的 ONNX 模型。默认为 `False`。 - -**注**:此工具仍处于试验阶段。目前不支持某些自定义运算符。我们现在只支持 `mattor` 和 `restorer`。 - -#### 支持导出到 ONNX 的模型列表 - -下表列出了保证可导出到 ONNX 并可在 ONNX Runtime 中运行的模型。 - -| 模型 | 配置 | 动态形状 | 批量推理 | 备注 | -| :------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------: | :------: | :--: | -| ESRGAN | [esrgan_x4c64b23g32_g1_400k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_x4c64b23g32_g1_400k_div2k.py) | Y | Y | | -| ESRGAN | [esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py) | Y | Y | | -| SRCNN | [srcnn_x4k915_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/srcnn/srcnn_x4k915_g1_1000k_div2k.py) | Y | Y | | -| DIM | [dim_stage3_v16_pln_1x1_1000k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/dim/dim_stage3_v16_pln_1x1_1000k_comp1k.py) | Y | Y | | -| GCA | [gca_r34_4x10_200k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/gca/gca_r34_4x10_200k_comp1k.py) | N | Y | | -| IndexNet | [indexnet_mobv2_1x16_78k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/indexnet/indexnet_mobv2_1x16_78k_comp1k.py) | Y | Y | | - -**注**: - -- *以上所有模型均使用 Pytorch==1.6.0 和 onnxruntime==1.5.1* -- 如果您遇到上面列出的模型的任何问题,请创建一个 issue,我们会尽快处理。对于列表中未包含的型号,请尝试自行解决。 -- 由于此功能是实验性的并且可能会快速更改,请始终尝试使用最新的 `mmcv` 和 `mmedit`。 - -### 将 ONNX 转换为 TensorRT(实验性) - -我们还提供了将 [ONNX](https://github.com/onnx/onnx) 模型转换为 [TensorRT](https://github.com/NVIDIA/TensorRT) 格式的脚本。 此外,我们支持比较 ONNX 和 TensorRT 模型之间的输出结果。 - -```bash -python tools/onnx2tensorrt.py - ${CFG_PATH} \ - ${MODEL_TYPE} \ - ${IMAGE_PATH} \ - ${INPUT_ONNX} \ - --trt-file ${OUT_TENSORRT} \ - --max-shape INT INT INT INT \ - --min-shape INT INT INT INT \ - --workspace-size INT \ - --fp16 \ - --show \ - --verify \ - --verbose -``` - -参数说明: - -- `config` : 模型配置文件的路径。 -- `model_type` :配置文件的模型类型,选项: `inpainting`, `mattor`, `restorer`, `synthesizer`。 -- `img_path` : 输入图像文件的路径。 -- `onnx_file` : 输入 ONNX 文件的路径。 -- `--trt-file` : 输出 TensorRT 模型的路径。默认为 `tmp.trt`。 -- `--max-shape` : 模型输入的最大形状。 -- `--min-shape` : 模型输入的最小形状。 -- `--workspace-size`: 以 GiB 为单位的最大工作空间大小。默认为 1 GiB。 -- `--fp16`: 确定是否以 fp16 模式导出 TensorRT。默认为 `False`。 -- `--show`: 确定是否显示 ONNX 和 TensorRT 的输出。默认为 `False`。 -- `--verify`: 确定是否验证导出模型的正确性。默认为 `False`。 -- `--verbose`: 确定在创建 TensorRT 引擎时是否详细记录日志消息。默认为 `False`。 - -**注**:此工具仍处于试验阶段。 目前不支持某些自定义运算符。 我们现在只支持 `restorer`。 在生成 SRCNN 的 ONNX 文件时,将 SCRNN 模型中的 'bicubic' 替换为 'bilinear' \[此处\](https://github.com/open-mmlab/mmediting/blob/764e6065e315b7d0033762038fcbf0bb1c570d4d/mmedit.bones/modelsrnn py#L40)。 因为 TensorRT 目前不支持 bicubic 插值,最终性能将下降约 4%。 - -#### 支持导出到 TensorRT 的模型列表 - -下表列出了保证可导出到 TensorRT 引擎并可在 TensorRT 中运行的模型。 - -| 模型 | 配置 | 动态形状 | 批量推理 | 备注 | -| :----: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :------: | :------: | :-----------------------------------: | -| ESRGAN | [esrgan_x4c64b23g32_g1_400k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_x4c64b23g32_g1_400k_div2k.py) | Y | Y | | -| ESRGAN | [esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py) | Y | Y | | -| SRCNN | [srcnn_x4k915_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/srcnn/srcnn_x4k915_g1_1000k_div2k.py) | Y | Y | 'bicubic' 上采样必须替换为 'bilinear' | - -**注**: - -- *以上所有模型均使用 Pytorch==1.8.1、onnxruntime==1.7.0 和 tensorrt==7.2.3.4 进行测试* -- 如果您遇到上面列出的模型的任何问题,请创建一个问题,我们会尽快处理。 对于列表中未包含的型号,请尝试自行解决。 -- 由于此功能是实验性的并且可能会快速更改,因此请始终尝试使用最新的 `mmcv` 和 `mmedit`。 - -### 评估 ONNX 和 TensorRT 模型(实验性) - -我们在 `tools/deploy_test.py` 中提供了评估 TensorRT 和 ONNX 模型的方法。 - -#### 先决条件 - -要评估 ONNX 和 TensorRT 模型,应先安装 onnx、onnxruntime 和 TensorRT。遵循 [mmcv 中的 ONNXRuntime](https://mmcv.readthedocs.io/en/latest/onnxruntime_op.html) 和 \[mmcv 中的 TensorRT 插件\](https://github.com/open-mmlab/mmcv/blob/master/docs/tensorrt_plugin.md%EF%BC%89%E4%BD%BF%E7%94%A8 ONNXRuntime 自定义操作和 TensorRT 插件安装 `mmcv-full`。 - -#### 用法 - -```bash -python tools/deploy_test.py \ - ${CONFIG_FILE} \ - ${MODEL_PATH} \ - ${BACKEND} \ - --out ${OUTPUT_FILE} \ - --save-path ${SAVE_PATH} \ - ----cfg-options ${CFG_OPTIONS} \ -``` - -#### 参数说明: - -- `config`: 模型配置文件的路径。 -- `model`: TensorRT 或 ONNX 模型文件的路径。 -- `backend`: 用于测试的后端,选择 tensorrt 或 onnxruntime。 -- `--out`: pickle 格式的输出结果文件的路径。 -- `--save-path`: 存储图像的路径,如果没有给出,则不会保存图像。 -- `--cfg-options`: 覆盖使用的配置文件中的一些设置,`xxx=yyy` 格式的键值对将被合并到配置文件中。 - -#### 结果和模型 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ModelConfigDatasetMetricPyTorchONNX RuntimeTensorRT FP32TensorRT FP16
ESRGAN - esrgan_x4c64b23g32_g1_400k_div2k.py - Set5PSNR28.270028.261928.261928.2616
SSIM0.77780.77840.77840.7783
Set14PSNR24.632824.629024.629024.6274
SSIM0.64910.64940.64940.6494
DIV2KPSNR26.653126.653226.653226.6532
SSIM0.73400.73400.73400.7340
ESRGAN - esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py - Set5PSNR30.642830.630730.630730.6305
SSIM0.85590.85650.85650.8566
Set14PSNR27.054327.042227.042227.0411
SSIM0.74470.74500.74500.7449
DIV2KPSNR29.335429.335429.335429.3339
SSIM0.82630.82630.82630.8263
SRCNN - srcnn_x4k915_g1_1000k_div2k.py - Set5PSNR28.431628.412027.214427.2127
SSIM0.80990.81060.77820.7781
Set14PSNR25.648625.636724.861324.8599
SSIM0.70140.70150.66740.6673
DIV2KPSNR27.746027.746026.989126.9862
SSIM0.78540.785430.76050.7604
- -**注**: - -- 所有 ONNX 和 TensorRT 模型都使用数据集上的动态形状进行评估,图像根据原始配置文件进行预处理。 -- 此工具仍处于试验阶段,我们目前仅支持 `restorer`。 diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/user_guides/config.md index c4aa1e3768..d147aa5c29 100644 --- a/docs/zh_cn/user_guides/config.md +++ b/docs/zh_cn/user_guides/config.md @@ -1 +1,675 @@ -# 配置文件(待更新) +# 教程 1: 了解配置文件(待更新) + +mmedit 采用基于 python 文件的配置系统,您可以在 `$MMEditing/configs` 下查看预置的配置文件。 + +## 配置文件命名风格 + +配置文件按照下面的风格命名。我们建议社区贡献者使用同样的风格。 + +```bash +{model}_[model setting]_{backbone}_[refiner]_[norm setting]_[misc]_[gpu x batch_per_gpu]_{schedule}_{dataset} +``` + +`{xxx}` 是必填字段,`[yyy]` 是可选的。 + +- `{model}`: 模型种类,例如 `srcnn`, `dim` 等等。 +- `[model setting]`: 特定设置一些模型,例如,输入图像 `resolution` , 训练 `stage name`。 +- `{backbone}`: 主干网络种类,例如 `r50` (ResNet-50)、`x101` (ResNeXt-101)。 +- `{refiner}`: 精炼器种类,例如 `pln` 简单精炼器模型 +- `[norm_setting]`: 指定归一化设置,默认为批归一化,其他归一化可以设为: `bn`(批归一化), `gn` (组归一化), `syncbn` (同步批归一化)。 +- `[misc]`: 模型中各式各样的设置/插件,例如 `dconv`, `gcb`, `attention`, `mstrain`。 +- `[gpu x batch_per_gpu]`: GPU数目 和每个 GPU 的样本数, 默认为 `8x2 `。 +- `{schedule}`: 训练策略,如 `20k`, `100k` 等,意思是 `20k` 或 `100k` 迭代轮数。 +- `{dataset}`: 数据集,如 `places`(图像补全)、`comp1k`(抠图)、`div2k`(图像恢复)和 `paired`(图像生成)。 + +## 配置文件 - 生成 + +与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 + +## 示例 - pix2pix + +为了帮助用户对完整的配置和生成系统中的模块有一个基本的了解,我们对 pix2pix 的配置做如下简要说明。 +更详细的用法和各个模块对应的替代方案,请参考 API 文档。 + +```python +# 模型设置 +model = dict( + type='Pix2Pix', # 合成器名称 + generator=dict( + type='UnetGenerator', # 生成器名称 + in_channels=3, # 生成器的输入通道数 + out_channels=3, # 生成器的输出通道数 + num_down=8, # # 生成器中下采样的次数 + base_channels=64, # 生成器最后卷积层的通道数 + norm_cfg=dict(type='BN'), # 归一化层的配置 + use_dropout=True, # 是否在生成器中使用 dropout + init_cfg=dict(type='normal', gain=0.02)), # 初始化配置 + discriminator=dict( + type='PatchDiscriminator', # 判别器的名称 + in_channels=6, # 判别器的输入通道数 + base_channels=64, # 判别器第一卷积层的通道数 + num_conv=3, # 判别器中堆叠的中间卷积层(不包括输入和输出卷积层)的数量 + norm_cfg=dict(type='BN'), # 归一化层的配置 + init_cfg=dict(type='normal', gain=0.02)), # 初始化配置 + gan_loss=dict( + type='GANLoss', # GAN 损失的名称 + gan_type='vanilla', # GAN 损失的类型 + real_label_val=1.0, # GAN 损失函数中真实标签的值 + fake_label_val=0.0, # GAN 损失函数中伪造标签的值 + loss_weight=1.0), # GAN 损失函数的权重 + pixel_loss=dict(type='L1Loss', loss_weight=100.0, reduction='mean')) +# 模型训练和测试设置 +train_cfg = dict( + direction='b2a') # pix2pix 的图像到图像的转换方向 (模型训练的方向,和测试方向一致)。模型默认: a2b +test_cfg = dict( + direction='b2a', # pix2pix 的图像到图像的转换方向 (模型测试的方向,和训练方向一致)。模型默认: a2b + show_input=True) # 保存 pix2pix 的测试图像时是否显示输入的真实图像 + +# 数据设置 +train_dataset_type = 'GenerationPairedDataset' # 训练数据集的类型 +val_dataset_type = 'GenerationPairedDataset' # 验证/测试数据集类型 +img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 输入图像归一化配置 +train_pipeline = [ + dict( + type='LoadPairedImageFromFile', # 从文件路径加载图像对 + io_backend='disk', # 存储图像的 IO 后端 + key='pair', # 查找对应路径的关键词 + flag='color'), # 加载图像标志 + dict( + type='Resize', # 图像大小调整 + keys=['img_a', 'img_b'], # 要调整大小的图像的关键词 + scale=(286, 286), # 调整图像大小的比例 + interpolation='bicubic'), # 调整图像大小时用于插值的算法 + dict( + type='FixedCrop', # 固定裁剪,在特定位置将配对图像裁剪为特定大小以训练 pix2pix + keys=['img_a', 'img_b'], # 要裁剪的图像的关键词 + crop_size=(256, 256)), # 裁剪图像的大小 + dict( + type='Flip', # 翻转图像 + keys=['img_a', 'img_b'], # 要翻转的图像的关键词 + direction='horizontal'), # 水平或垂直翻转图像 + dict( + type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] + keys=['img_a', 'img_b']), # 要重新缩放的图像的关键词 + dict( + type='Normalize', # 图像归一化 + keys=['img_a', 'img_b'], # 要归一化的图像的关键词 + to_rgb=True, # 是否将图像通道从 BGR 转换为 RGB + **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) + dict( + type='ToTensor', # 将图像转化为 Tensor + keys=['img_a', 'img_b']), # 要从图像转换为 Tensor 的图像的关键词 + dict( + type='Collect', # 决定数据中哪些键应该传递给合成器 + keys=['img_a', 'img_b'], # 图像的关键词 + meta_keys=['img_a_path', 'img_b_path']) # 图片的元关键词 +] +test_pipeline = [ + dict( + type='LoadPairedImageFromFile', # 从文件路径加载图像对 + io_backend='disk', # 存储图像的 IO 后端 + key='pair', # 查找对应路径的关键词 + flag='color'), # 加载图像标志 + dict( + type='Resize', # 图像大小调整 + keys=['img_a', 'img_b'], # 要调整大小的图像的关键词 + scale=(256, 256), # 调整图像大小的比例 + interpolation='bicubic'), # 调整图像大小时用于插值的算法 + dict( + type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] + keys=['img_a', 'img_b']), # 要重新缩放的图像的关键词 + dict( + type='Normalize', # 图像归一化 + keys=['img_a', 'img_b'], # 要归一化的图像的关键词 + to_rgb=True, # 是否将图像通道从 BGR 转换为 RGB + **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) + dict( + type='ToTensor', # 将图像转化为 Tensor + keys=['img_a', 'img_b']), # 要从图像转换为 Tensor 的图像的关键词 + dict( + type='Collect', # 决定数据中哪些键应该传递给合成器 + keys=['img_a', 'img_b'], # 图像的关键词 + meta_keys=['img_a_path', 'img_b_path']) # 图片的元关键词 +] +data_root = 'data/pix2pix/facades' # 数据的根路径 +data = dict( + samples_per_gpu=1, # 单个 GPU 的批量大小 + workers_per_gpu=4, # 为每个 GPU 预取数据的 Worker 数 + drop_last=True, # 是否丢弃训练中的最后一批数据 + val_samples_per_gpu=1, # 验证中单个 GPU 的批量大小 + val_workers_per_gpu=0, # 在验证中为每个 GPU 预取数据的 Worker 数 + train=dict( # 训练数据集配置 + type=train_dataset_type, + dataroot=data_root, + pipeline=train_pipeline, + test_mode=False), + val=dict( # 验证数据集配置 + type=val_dataset_type, + dataroot=data_root, + pipeline=test_pipeline, + test_mode=True), + test=dict( # 测试数据集配置 + type=val_dataset_type, + dataroot=data_root, + pipeline=test_pipeline, + test_mode=True)) + +# 优化器 +optimizers = dict( # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 + generator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999)), + discriminator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999))) + +# 学习策略 +lr_config = dict(policy='Fixed', by_epoch=False) # 用于注册 LrUpdater 钩子的学习率调度程序配置 + +# 检查点保存 +checkpoint_config = dict(interval=4000, save_optimizer=True, by_epoch=False) # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py +evaluation = dict( # 构建验证钩子的配置 + interval=4000, # 验证区间 + save_image=True) # 是否保存图片 +log_config = dict( # 配置注册记录器钩子 + interval=100, # 打印日志的时间间隔 + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), # 用于记录训练过程的记录器 + # dict(type='TensorboardLoggerHook') # 还支持 Tensorboard 记录器 + ]) +visual_config = None # 构建可视化钩子的配置 + +# 运行设置 +total_iters = 80000 # 训练模型的总迭代次数 +cudnn_benchmark = True # 设置 cudnn_benchmark +dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 +log_level = 'INFO' # 日志级别 +load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 +resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 +workflow = [('train', 1)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前生成模型时保持不变 +exp_name = 'pix2pix_facades' # 实验名称 +work_dir = f'./work_dirs/{exp_name}' # 保存当前实验的模型检查点和日志的目录 +``` + +## 配置文件 - 补全 + +## 配置名称样式 + +与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 + +## 配置字段说明 + +为了帮助用户对完整的配置和修复系统中的模块有一个基本的了解,我们对 Global&Local 的配置作如下简要说明。更详细的用法和各个模块对应的替代方案,请参考 API 文档。 + +```python +model = dict( + type='GLInpaintor', # 补全器的名称 + encdec=dict( + type='GLEncoderDecoder', # 编码器-解码器的名称 + encoder=dict(type='GLEncoder', norm_cfg=dict(type='SyncBN')), # 编码器的配置 + decoder=dict(type='GLDecoder', norm_cfg=dict(type='SyncBN')), # 解码器的配置 + dilation_neck=dict( + type='GLDilationNeck', norm_cfg=dict(type='SyncBN'))), # 扩颈的配置 + disc=dict( + type='GLDiscs', # 判别器的名称 + global_disc_cfg=dict( + in_channels=3, # 判别器的输入通道数 + max_channels=512, # 判别器中的最大通道数 + fc_in_channels=512 * 4 * 4, # 最后一个全连接层的输入通道 + fc_out_channels=1024, # 最后一个全连接层的输出通道 + num_convs=6, # 判别器中使用的卷积数量 + norm_cfg=dict(type='SyncBN') # 归一化层的配置 + ), + local_disc_cfg=dict( + in_channels=3, # 判别器的输入通道数 + max_channels=512, # 判别器中的最大通道数 + fc_in_channels=512 * 4 * 4, # 最后一个全连接层的输入通道 + fc_out_channels=1024, # 最后一个全连接层的输出通道 + num_convs=5, # 判别器中使用的卷积数量 + norm_cfg=dict(type='SyncBN') # 归一化层的配置 + ), + ), + loss_gan=dict( + type='GANLoss', # GAN 损失的名称 + gan_type='vanilla', # GAN 损失的类型 + loss_weight=0.001 # GAN 损失函数的权重 + ), + loss_l1_hole=dict( + type='L1Loss', # L1 损失的类型 + loss_weight=1.0 # L1 损失函数的权重 + ), + pretrained=None) # 预训练权重的路径 + +train_cfg = dict( + disc_step=1, # 训练生成器之前训练判别器的迭代次数 + iter_tc=90000, # 预热生成器的迭代次数 + iter_td=100000, # 预热判别器的迭代次数 + start_iter=0, # 开始的迭代 + local_size=(128, 128)) # 图像块的大小 +test_cfg = dict(metrics=['l1']) # 测试的配置 + +dataset_type = 'ImgInpaintingDataset' # 数据集类型 +input_shape = (256, 256) # 输入图像的形状 + +train_pipeline = [ + dict(type='LoadImageFromFile', key='gt_img'), # 加载图片的配置 + dict( + type='LoadMask', # 加载掩码 + mask_mode='bbox', # 掩码的类型 + mask_config=dict( + max_bbox_shape=(128, 128), # 检测框的形状 + max_bbox_delta=40, # 检测框高宽的变化 + min_margin=20, # 检测框到图片边界的最小距离 + img_shape=input_shape)), # 输入图像的形状 + dict( + type='Crop', # 裁剪 + keys=['gt_img'], # 要裁剪的图像的关键词 + crop_size=(384, 384), # 裁剪图像块的大小 + random_crop=True, # 是否使用随机裁剪 + ), + dict( + type='Resize', # 图像大小调整 + keys=['gt_img'], # 要调整大小的图像的关键词 + scale=input_shape, # 调整图像大小的比例 + keep_ratio=False, # 调整大小时是否保持比例 + ), + dict( + type='Normalize', # 图像归一化 + keys=['gt_img'], # 要归一化的图像的关键词 + mean=[127.5] * 3, # 归一化中使用的均值 + std=[127.5] * 3, # 归一化中使用的标准差 + to_rgb=False), # 是否将图像通道从 BGR 转换为 RGB + dict(type='GetMaskedImage'), # 获取被掩盖的图像 + dict( + type='Collect', # 决定数据中哪些键应该传递给合成器 + keys=['gt_img', 'masked_img', 'mask', 'mask_bbox'], # 要收集的数据的关键词 + meta_keys=['gt_img_path']), # 要收集的数据的元关键词 + dict(type='ToTensor', keys=['gt_img', 'masked_img', 'mask']), # 将图像转化为 Tensor + dict(type='ToTensor', keys=['mask_bbox']) # 转化为 Tensor +] + +test_pipeline = train_pipeline # 构建测试/验证流程 + +data_root = 'data/places365' # 数据根目录 + +data = dict( + samples_per_gpu=12, # 单个 GPU 的批量大小 + workers_per_gpu=8, # 为每个 GPU 预取数据的 Worker 数 + val_samples_per_gpu=1, # 验证中单个 GPU 的批量大小 + val_workers_per_gpu=8, # 在验证中为每个 GPU 预取数据的 Worker 数 + drop_last=True, # 是否丢弃训练中的最后一批数据 + train=dict( # 训练数据集配置 + type=dataset_type, + ann_file=f'{data_root}/train_places_img_list_total.txt', + data_prefix=data_root, + pipeline=train_pipeline, + test_mode=False), + val=dict( # 验证数据集配置 + type=dataset_type, + ann_file=f'{data_root}/val_places_img_list.txt', + data_prefix=data_root, + pipeline=test_pipeline, + test_mode=True)) + +optimizers = dict( # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 + generator=dict(type='Adam', lr=0.0004), disc=dict(type='Adam', lr=0.0004)) + +lr_config = dict(policy='Fixed', by_epoch=False) # 用于注册 LrUpdater 钩子的学习率调度程序配置 + +checkpoint_config = dict(by_epoch=False, interval=50000) # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py +log_config = dict( # 配置注册记录器钩子 + interval=100, # 打印日志的时间间隔 + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), + # dict(type='TensorboardLoggerHook'), # 支持 Tensorboard 记录器 + # dict(type='PaviLoggerHook', init_kwargs=dict(project='mmedit')) + ]) # 用于记录训练过程的记录器 + +visual_config = dict( # 构建可视化钩子的配置 + type='VisualizationHook', + output_dir='visual', + interval=1000, + res_name_list=[ + 'gt_img', 'masked_img', 'fake_res', 'fake_img', 'fake_gt_local' + ], +) # 用于可视化训练过程的记录器。 + +evaluation = dict(interval=50000) # 构建验证钩子的配置 + +total_iters = 500002 +dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 +log_level = 'INFO' # 日志级别 +work_dir = None # 保存当前实验的模型检查点和日志的目录 +load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 +resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 +workflow = [('train', 10000)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前生成模型时保持不变 +exp_name = 'gl_places' # 实验名称 +find_unused_parameters = False # 是否在分布式训练中查找未使用的参数 +``` + +## 配置文件 - 抠图 + +与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 + +## 例子 - Deep Image Matting Model + +为了帮助用户对一个完整的配置有一个基本的了解,我们对我们实现的原始 DIM 模型的配置做一个简短的评论,如下所示。 更详细的用法和各个模块对应的替代方案,请参考 API 文档。 + +```python +# 模型配置 +model = dict( + type='DIM', # 模型的名称(我们称之为抠图器) + backbone=dict( # 主干网络的配置 + type='SimpleEncoderDecoder', # 主干网络的类型 + encoder=dict( # 编码器的配置 + type='VGG16'), # 编码器的类型 + decoder=dict( # 解码器的配置 + type='PlainDecoder')), # 解码器的类型 + pretrained='./weights/vgg_state_dict.pth', # 编码器的预训练权重 + loss_alpha=dict( # alpha 损失的配置 + type='CharbonnierLoss', # 预测的 alpha 遮罩的损失类型 + loss_weight=0.5), # alpha 损失的权重 + loss_comp=dict( # 组合损失的配置 + type='CharbonnierCompLoss', # 组合损失的类型 + loss_weight=0.5)) # 组合损失的权重 +train_cfg = dict( # 训练 DIM 模型的配置 + train_backbone=True, # 在 DIM stage 1 中,会对主干网络进行训练 + train_refiner=False) # 在 DIM stage 1 中,不会对精炼器进行训练 +test_cfg = dict( # 测试 DIM 模型的配置 + refine=False, # 是否使用精炼器输出作为输出,在 stage 1 中,我们不使用它 + metrics=['SAD', 'MSE', 'GRAD', 'CONN']) # 测试时使用的指标 + +# 数据配置 +dataset_type = 'AdobeComp1kDataset' # 数据集类型,这将用于定义数据集 +data_root = 'data/adobe_composition-1k' # 数据的根目录 +img_norm_cfg = dict( # 归一化输入图像的配置 + mean=[0.485, 0.456, 0.406], # 归一化中使用的均值 + std=[0.229, 0.224, 0.225], # 归一化中使用的标准差 + to_rgb=True) # 是否将图像通道从 BGR 转换为 RGB +train_pipeline = [ # 训练数据处理流程 + dict( + type='LoadImageFromFile', # 从文件加载 alpha 遮罩 + key='alpha', # 注释文件中 alpha 遮罩的键关键词。流程将从路径 “alpha_path” 中读取 alpha 遮罩 + flag='grayscale'), # 加载灰度图像,形状为(高度、宽度) + dict( + type='LoadImageFromFile', # 从文件中加载图像 + key='fg'), # 要加载的图像的关键词。流程将从路径 “fg_path” 读取 fg + dict( + type='LoadImageFromFile', # 从文件中加载图像 + key='bg'), # 要加载的图像的关键词。流程将从路径 “bg_path” 读取 bg + dict( + type='LoadImageFromFile', # 从文件中加载图像 + key='merged'), # 要加载的图像的关键词。流程将从路径 “merged_path” 读取并合并 + dict( + type='CropAroundUnknown', # 在未知区域(半透明区域)周围裁剪图像 + keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'], # 要裁剪的图像 + crop_sizes=[320, 480, 640]), # 裁剪大小 + dict( + type='Flip', # 翻转图像 + keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg']), # 要翻转的图像 + dict( + type='Resize', # 图像大小调整 + keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'], # 图像调整大小的图像 + scale=(320, 320), # 目标大小 + keep_ratio=False), # 是否保持高宽比例 + dict( + type='GenerateTrimap', # 从 alpha 遮罩生成三元图。 + kernel_size=(1, 30)), # 腐蚀/扩张内核大小的范围 + dict( + type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] + keys=['merged', 'alpha', 'ori_merged', 'fg', 'bg']), # 要重新缩放的图像 + dict( + type='Normalize', # 图像归一化 + keys=['merged'], # 要归一化的图像 + **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) + dict( + type='Collect', # 决定数据中哪些键应该传递给合成器 + keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg'], # 图像的关键词 + meta_keys=[]), # 图片的元关键词,这里不需要元信息。 + dict( + type='ToTensor', # 将图像转化为 Tensor + keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg']), # 要转换为 Tensor 的图像 +] +test_pipeline = [ + dict( + type='LoadImageFromFile', # 从文件加载 alpha 遮罩 + key='alpha', # 注释文件中 alpha 遮罩的键关键词。流程将从路径 “alpha_path” 中读取 alpha 遮罩 + flag='grayscale', + save_original_img=True), + dict( + type='LoadImageFromFile', # 从文件中加载图像 + key='trimap', # 要加载的图像的关键词。流程将从路径 “trimap_path” 读取三元图 + flag='grayscale', # 加载灰度图像,形状为(高度、宽度) + save_original_img=True), # 保存三元图用于计算指标。 它将与 “ori_trimap” 一起保存 + dict( + type='LoadImageFromFile', # 从文件中加载图像 + key='merged'), # 要加载的图像的关键词。流程将从路径 “merged_path” 读取并合并 + dict( + type='Pad', # 填充图像以与模型的下采样因子对齐 + keys=['trimap', 'merged'], # 要填充的图像 + mode='reflect'), # 填充模式 + dict( + type='RescaleToZeroOne', # 与 train_pipeline 相同 + keys=['merged', 'ori_alpha']), # 要缩放的图像 + dict( + type='Normalize', # 与 train_pipeline 相同 + keys=['merged'], + **img_norm_cfg), + dict( + type='Collect', # 与 train_pipeline 相同 + keys=['merged', 'trimap'], + meta_keys=[ + 'merged_path', 'pad', 'merged_ori_shape', 'ori_alpha', + 'ori_trimap' + ]), + dict( + type='ToTensor', # 与 train_pipeline 相同 + keys=['merged', 'trimap']), +] +data = dict( + samples_per_gpu=1, #单个 GPU 的批量大小 + workers_per_gpu=4, # 为每个 GPU 预取数据的 Worker 数 + drop_last=True, # 是否丢弃训练中的最后一批数据 + train=dict( # 训练数据集配置 + type=dataset_type, # 数据集的类型 + ann_file=f'{data_root}/training_list.json', # 注解文件路径 + data_prefix=data_root, # 图像路径的前缀 + pipeline=train_pipeline), # 见上文 train_pipeline + val=dict( # 验证数据集配置 + type=dataset_type, + ann_file=f'{data_root}/test_list.json', + data_prefix=data_root, + pipeline=test_pipeline), # 见上文 test_pipeline + test=dict( # 测试数据集配置 + type=dataset_type, + ann_file=f'{data_root}/test_list.json', + data_prefix=data_root, + pipeline=test_pipeline)) # 见上文 test_pipeline + +# 优化器 +optimizers = dict(type='Adam', lr=0.00001) # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 +# 学习策略 +lr_config = dict( # 用于注册 LrUpdater 钩子的学习率调度程序配置 + policy='Fixed') # 调度器的策略,支持 CosineAnnealing、Cyclic 等。支持的 LrUpdater 详情请参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9。 + +# 检查点保存 +checkpoint_config = dict( # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py + interval=40000, # 保存间隔为 40000 次迭代 + by_epoch=False) # 按迭代计数 +evaluation = dict( # # 构建验证钩子的配置 + interval=40000) # 验证区间 +log_config = dict( # 配置注册记录器钩子 + interval=10, # 打印日志的时间间隔 + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), # 用于记录训练过程的记录器 + # dict(type='TensorboardLoggerHook') # 支持 Tensorboard 记录器 + ]) + +# runtime settings +total_iters = 1000000 # 训练模型的总迭代次数 +dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 +log_level = 'INFO' # 日志级别 +work_dir = './work_dirs/dim_stage1' # 保存当前实验的模型检查点和日志的目录 +load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 +resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 +workflow = [('train', 1)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前抠图模型时保持不变 +``` + +## 配置文件 - 复原 + +## 示例-EDSR + +为了帮助用户理解 mmediting 的配置文件结构,这里以 EDSR 为例,给出其配置文件的注释。对于每个模块的详细用法以及对应参数的选择,请参照 API 文档。 + +```python +exp_name = 'edsr_x2c64b16_1x16_300k_div2k' # 实验名称 + +scale = 2 # 上采样放大因子 + +# 模型设置 +model = dict( + type='BasicRestorer', # 图像恢复模型类型 + generator=dict( # 生成器配置 + type='EDSR', # 生成器类型 + in_channels=3, # 输入通道数 + out_channels=3, # 输出通道数 + mid_channels=64, # 中间特征通道数 + num_blocks=16, # 残差块数目 + upscale_factor=scale, # 上采样因子 + res_scale=1, # 残差缩放因子 + rgb_mean=(0.4488, 0.4371, 0.4040), # 输入图像 RGB 通道的平均值 + rgb_std=(1.0, 1.0, 1.0)), # 输入图像 RGB 通道的方差 + pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean')) # 像素损失函数的配置 + +# 模型训练和测试设置 +train_cfg = None # 训练的配置 +test_cfg = dict( # 测试的配置 + metrics=['PSNR'], # 测试时使用的评价指标 + crop_border=scale) # 测试时裁剪的边界尺寸 + +# 数据集设置 +train_dataset_type = 'SRAnnotationDataset' # 用于训练的数据集类型 +val_dataset_type = 'SRFolderDataset' # 用于验证的数据集类型 +train_pipeline = [ # 训练数据前处理流水线步骤组成的列表 + dict(type='LoadImageFromFile', # 从文件加载图像 + io_backend='disk', # 读取图像时使用的 io 类型 + key='lq', # 设置LR图像的键来找到相应的路径 + flag='unchanged'), # 读取图像的标识 + dict(type='LoadImageFromFile', # 从文件加载图像 + io_backend='disk', # 读取图像时使用的io类型 + key='gt', # 设置HR图像的键来找到相应的路径 + flag='unchanged'), # 读取图像的标识 + dict(type='RescaleToZeroOne', keys=['lq', 'gt']), # 将图像从[0,255]重缩放到[0,1] + dict(type='Normalize', # 正则化图像 + keys=['lq', 'gt'], # 执行正则化图像的键 + mean=[0, 0, 0], # 平均值 + std=[1, 1, 1], # 标准差 + to_rgb=True), # 更改为 RGB 通道 + dict(type='PairedRandomCrop', gt_patch_size=96), # LR 和 HR 成对随机裁剪 + dict(type='Flip', # 图像翻转 + keys=['lq', 'gt'], # 执行翻转图像的键 + flip_ratio=0.5, # 执行翻转的几率 + direction='horizontal'), # 翻转方向 + dict(type='Flip', # 图像翻转 + keys=['lq', 'gt'], # 执行翻转图像的键 + flip_ratio=0.5, # 执行翻转几率 + direction='vertical'), # 翻转方向 + dict(type='RandomTransposeHW', # 图像的随机的转置 + keys=['lq', 'gt'], # 执行转置图像的键 + transpose_ratio=0.5 # 执行转置的几率 + ), + dict(type='Collect', # Collect 类决定哪些键会被传递到生成器中 + keys=['lq', 'gt'], # 传入模型的键 + meta_keys=['lq_path', 'gt_path']), # 元信息键。在训练中,不需要元信息 + dict(type='ToTensor', # 将图像转换为张量 + keys=['lq', 'gt']) # 执行图像转换为张量的键 +] +test_pipeline = [ # 测试数据前处理流水线步骤组成的列表 + dict( + type='LoadImageFromFile', # 从文件加载图像 + io_backend='disk', # 读取图像时使用的io类型 + key='lq', # 设置LR图像的键来找到相应的路径 + flag='unchanged'), # 读取图像的标识 + dict( + type='LoadImageFromFile', # 从文件加载图像 + io_backend='disk', # 读取图像时使用的io类型 + key='gt', # 设置HR图像的键来找到相应的路径 + flag='unchanged'), # 读取图像的标识 + dict(type='RescaleToZeroOne', keys=['lq', 'gt']), # 将图像从[0,255]重缩放到[0,1] + dict( + type='Normalize', # 正则化图像 + keys=['lq', 'gt'], # 执行正则化图像的键 + mean=[0, 0, 0], # 平均值 + std=[1, 1, 1], # 标准差 + to_rgb=True), # 更改为RGB通道 + dict(type='Collect', # Collect类决定哪些键会被传递到生成器中 + keys=['lq', 'gt'], # 传入模型的键 + meta_keys=['lq_path', 'gt_path']), # 元信息键 + dict(type='ToTensor', # 将图像转换为张量 + keys=['lq', 'gt']) # 执行图像转换为张量的键 +] + +data = dict( + # 训练 + samples_per_gpu=16, # 单个 GPU 的批大小 + workers_per_gpu=6, # 单个 GPU 的 dataloader 的进程 + drop_last=True, # 在训练过程中丢弃最后一个批次 + train=dict( # 训练数据集的设置 + type='RepeatDataset', # 基于迭代的重复数据集 + times=1000, # 重复数据集的重复次数 + dataset=dict( + type=train_dataset_type, # 数据集类型 + lq_folder='data/DIV2K/DIV2K_train_LR_bicubic/X2_sub', # lq文件夹的路径 + gt_folder='data/DIV2K/DIV2K_train_HR_sub', # gt文件夹的路径 + ann_file='data/DIV2K/meta_info_DIV2K800sub_GT.txt', # 批注文件的路径 + pipeline=train_pipeline, # 训练流水线,如上所示 + scale=scale)), # 上采样放大因子 + + # 验证 + val_samples_per_gpu=1, # 验证时单个 GPU 的批大小 + val_workers_per_gpu=1, # 验证时单个 GPU 的 dataloader 的进程 + val=dict( + type=val_dataset_type, # 数据集类型 + lq_folder='data/val_set5/Set5_bicLRx2', # lq 文件夹的路径 + gt_folder='data/val_set5/Set5_mod12', # gt 文件夹的路径 + pipeline=test_pipeline, # 测试流水线,如上所示 + scale=scale, # 上采样放大因子 + filename_tmpl='{}'), # 文件名模板 + + # 测试 + test=dict( + type=val_dataset_type, # 数据集类型 + lq_folder='data/val_set5/Set5_bicLRx2', # lq 文件夹的路径 + gt_folder='data/val_set5/Set5_mod12', # gt 文件夹的路径 + pipeline=test_pipeline, # 测试流水线,如上所示 + scale=scale, # 上采样放大因子 + filename_tmpl='{}')) # 文件名模板 + +# 优化器设置 +optimizers = dict(generator=dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))) # 用于构建优化器的设置,支持PyTorch中所有参数与PyTorch中参数相同的优化器 + +# 学习策略 +total_iters = 300000 # 训练模型的总迭代数 +lr_config = dict( # 用于注册LrUpdater钩子的学习率调度程序配置 + policy='Step', by_epoch=False, step=[200000], gamma=0.5) # 调度器的策略,还支持余弦、循环等 + +checkpoint_config = dict( # 模型权重钩子设置,更多细节可参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py + interval=5000, # 模型权重文件保存间隔为5000次迭代 + save_optimizer=True, # 保存优化器 + by_epoch=False) # 按迭代次数计数 +evaluation = dict( # 构建验证钩子的配置 + interval=5000, # 执行验证的间隔为5000次迭代 + save_image=True, # 验证期间保存图像 + gpu_collect=True) # 使用gpu收集 +log_config = dict( # 注册日志钩子的设置 + interval=100, # 打印日志间隔 + hooks=[ + dict(type='TextLoggerHook', by_epoch=False), # 记录训练过程信息的日志 + dict(type='TensorboardLoggerHook'), # 同时支持 Tensorboard 日志 + ]) +visual_config = None # 可视化的设置 + +# 运行设置 +dist_params = dict(backend='nccl') # 建立分布式训练的设置,其中端口号也可以设置 +log_level = 'INFO' # 日志等级 +work_dir = f'./work_dirs/{exp_name}' # 记录当前实验日志和模型权重文件的文件夹 +load_from = None # 从给定路径加载模型作为预训练模型. 这个选项不会用于断点恢复训练 +resume_from = None # 加载给定路径的模型权重文件作为断点续连的模型, 训练将从该时间点保存的周期点继续进行 +workflow = [('train', 1)] # runner 的执行流. [('train', 1)] 代表只有一个执行流,并且这个名为 train 的执行流只执行一次 +``` diff --git a/docs/zh_cn/user_guides/configs/config_generation.md b/docs/zh_cn/user_guides/configs/config_generation.md deleted file mode 100644 index 75088f5ef4..0000000000 --- a/docs/zh_cn/user_guides/configs/config_generation.md +++ /dev/null @@ -1,164 +0,0 @@ -# 配置文件 - 生成 - -与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 - -## 示例 - pix2pix - -为了帮助用户对完整的配置和生成系统中的模块有一个基本的了解,我们对 pix2pix 的配置做如下简要说明。 -更详细的用法和各个模块对应的替代方案,请参考 API 文档。 - -```python -# 模型设置 -model = dict( - type='Pix2Pix', # 合成器名称 - generator=dict( - type='UnetGenerator', # 生成器名称 - in_channels=3, # 生成器的输入通道数 - out_channels=3, # 生成器的输出通道数 - num_down=8, # # 生成器中下采样的次数 - base_channels=64, # 生成器最后卷积层的通道数 - norm_cfg=dict(type='BN'), # 归一化层的配置 - use_dropout=True, # 是否在生成器中使用 dropout - init_cfg=dict(type='normal', gain=0.02)), # 初始化配置 - discriminator=dict( - type='PatchDiscriminator', # 判别器的名称 - in_channels=6, # 判别器的输入通道数 - base_channels=64, # 判别器第一卷积层的通道数 - num_conv=3, # 判别器中堆叠的中间卷积层(不包括输入和输出卷积层)的数量 - norm_cfg=dict(type='BN'), # 归一化层的配置 - init_cfg=dict(type='normal', gain=0.02)), # 初始化配置 - gan_loss=dict( - type='GANLoss', # GAN 损失的名称 - gan_type='vanilla', # GAN 损失的类型 - real_label_val=1.0, # GAN 损失函数中真实标签的值 - fake_label_val=0.0, # GAN 损失函数中伪造标签的值 - loss_weight=1.0), # GAN 损失函数的权重 - pixel_loss=dict(type='L1Loss', loss_weight=100.0, reduction='mean')) -# 模型训练和测试设置 -train_cfg = dict( - direction='b2a') # pix2pix 的图像到图像的转换方向 (模型训练的方向,和测试方向一致)。模型默认: a2b -test_cfg = dict( - direction='b2a', # pix2pix 的图像到图像的转换方向 (模型测试的方向,和训练方向一致)。模型默认: a2b - show_input=True) # 保存 pix2pix 的测试图像时是否显示输入的真实图像 - -# 数据设置 -train_dataset_type = 'GenerationPairedDataset' # 训练数据集的类型 -val_dataset_type = 'GenerationPairedDataset' # 验证/测试数据集类型 -img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # 输入图像归一化配置 -train_pipeline = [ - dict( - type='LoadPairedImageFromFile', # 从文件路径加载图像对 - io_backend='disk', # 存储图像的 IO 后端 - key='pair', # 查找对应路径的关键词 - flag='color'), # 加载图像标志 - dict( - type='Resize', # 图像大小调整 - keys=['img_a', 'img_b'], # 要调整大小的图像的关键词 - scale=(286, 286), # 调整图像大小的比例 - interpolation='bicubic'), # 调整图像大小时用于插值的算法 - dict( - type='FixedCrop', # 固定裁剪,在特定位置将配对图像裁剪为特定大小以训练 pix2pix - keys=['img_a', 'img_b'], # 要裁剪的图像的关键词 - crop_size=(256, 256)), # 裁剪图像的大小 - dict( - type='Flip', # 翻转图像 - keys=['img_a', 'img_b'], # 要翻转的图像的关键词 - direction='horizontal'), # 水平或垂直翻转图像 - dict( - type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] - keys=['img_a', 'img_b']), # 要重新缩放的图像的关键词 - dict( - type='Normalize', # 图像归一化 - keys=['img_a', 'img_b'], # 要归一化的图像的关键词 - to_rgb=True, # 是否将图像通道从 BGR 转换为 RGB - **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) - dict( - type='ToTensor', # 将图像转化为 Tensor - keys=['img_a', 'img_b']), # 要从图像转换为 Tensor 的图像的关键词 - dict( - type='Collect', # 决定数据中哪些键应该传递给合成器 - keys=['img_a', 'img_b'], # 图像的关键词 - meta_keys=['img_a_path', 'img_b_path']) # 图片的元关键词 -] -test_pipeline = [ - dict( - type='LoadPairedImageFromFile', # 从文件路径加载图像对 - io_backend='disk', # 存储图像的 IO 后端 - key='pair', # 查找对应路径的关键词 - flag='color'), # 加载图像标志 - dict( - type='Resize', # 图像大小调整 - keys=['img_a', 'img_b'], # 要调整大小的图像的关键词 - scale=(256, 256), # 调整图像大小的比例 - interpolation='bicubic'), # 调整图像大小时用于插值的算法 - dict( - type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] - keys=['img_a', 'img_b']), # 要重新缩放的图像的关键词 - dict( - type='Normalize', # 图像归一化 - keys=['img_a', 'img_b'], # 要归一化的图像的关键词 - to_rgb=True, # 是否将图像通道从 BGR 转换为 RGB - **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) - dict( - type='ToTensor', # 将图像转化为 Tensor - keys=['img_a', 'img_b']), # 要从图像转换为 Tensor 的图像的关键词 - dict( - type='Collect', # 决定数据中哪些键应该传递给合成器 - keys=['img_a', 'img_b'], # 图像的关键词 - meta_keys=['img_a_path', 'img_b_path']) # 图片的元关键词 -] -data_root = 'data/pix2pix/facades' # 数据的根路径 -data = dict( - samples_per_gpu=1, # 单个 GPU 的批量大小 - workers_per_gpu=4, # 为每个 GPU 预取数据的 Worker 数 - drop_last=True, # 是否丢弃训练中的最后一批数据 - val_samples_per_gpu=1, # 验证中单个 GPU 的批量大小 - val_workers_per_gpu=0, # 在验证中为每个 GPU 预取数据的 Worker 数 - train=dict( # 训练数据集配置 - type=train_dataset_type, - dataroot=data_root, - pipeline=train_pipeline, - test_mode=False), - val=dict( # 验证数据集配置 - type=val_dataset_type, - dataroot=data_root, - pipeline=test_pipeline, - test_mode=True), - test=dict( # 测试数据集配置 - type=val_dataset_type, - dataroot=data_root, - pipeline=test_pipeline, - test_mode=True)) - -# 优化器 -optimizers = dict( # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 - generator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999)), - discriminator=dict(type='Adam', lr=2e-4, betas=(0.5, 0.999))) - -# 学习策略 -lr_config = dict(policy='Fixed', by_epoch=False) # 用于注册 LrUpdater 钩子的学习率调度程序配置 - -# 检查点保存 -checkpoint_config = dict(interval=4000, save_optimizer=True, by_epoch=False) # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py -evaluation = dict( # 构建验证钩子的配置 - interval=4000, # 验证区间 - save_image=True) # 是否保存图片 -log_config = dict( # 配置注册记录器钩子 - interval=100, # 打印日志的时间间隔 - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), # 用于记录训练过程的记录器 - # dict(type='TensorboardLoggerHook') # 还支持 Tensorboard 记录器 - ]) -visual_config = None # 构建可视化钩子的配置 - -# 运行设置 -total_iters = 80000 # 训练模型的总迭代次数 -cudnn_benchmark = True # 设置 cudnn_benchmark -dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 -log_level = 'INFO' # 日志级别 -load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 -resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 -workflow = [('train', 1)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前生成模型时保持不变 -exp_name = 'pix2pix_facades' # 实验名称 -work_dir = f'./work_dirs/{exp_name}' # 保存当前实验的模型检查点和日志的目录 -``` diff --git a/docs/zh_cn/user_guides/configs/config_inpainting.md b/docs/zh_cn/user_guides/configs/config_inpainting.md deleted file mode 100644 index 12836fd3a3..0000000000 --- a/docs/zh_cn/user_guides/configs/config_inpainting.md +++ /dev/null @@ -1,155 +0,0 @@ -# 配置文件 - 补全 - -## 配置名称样式 - -与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 - -## 配置字段说明 - -为了帮助用户对完整的配置和修复系统中的模块有一个基本的了解,我们对 Global&Local 的配置作如下简要说明。更详细的用法和各个模块对应的替代方案,请参考 API 文档。 - -```python -model = dict( - type='GLInpaintor', # 补全器的名称 - encdec=dict( - type='GLEncoderDecoder', # 编码器-解码器的名称 - encoder=dict(type='GLEncoder', norm_cfg=dict(type='SyncBN')), # 编码器的配置 - decoder=dict(type='GLDecoder', norm_cfg=dict(type='SyncBN')), # 解码器的配置 - dilation_neck=dict( - type='GLDilationNeck', norm_cfg=dict(type='SyncBN'))), # 扩颈的配置 - disc=dict( - type='GLDiscs', # 判别器的名称 - global_disc_cfg=dict( - in_channels=3, # 判别器的输入通道数 - max_channels=512, # 判别器中的最大通道数 - fc_in_channels=512 * 4 * 4, # 最后一个全连接层的输入通道 - fc_out_channels=1024, # 最后一个全连接层的输出通道 - num_convs=6, # 判别器中使用的卷积数量 - norm_cfg=dict(type='SyncBN') # 归一化层的配置 - ), - local_disc_cfg=dict( - in_channels=3, # 判别器的输入通道数 - max_channels=512, # 判别器中的最大通道数 - fc_in_channels=512 * 4 * 4, # 最后一个全连接层的输入通道 - fc_out_channels=1024, # 最后一个全连接层的输出通道 - num_convs=5, # 判别器中使用的卷积数量 - norm_cfg=dict(type='SyncBN') # 归一化层的配置 - ), - ), - loss_gan=dict( - type='GANLoss', # GAN 损失的名称 - gan_type='vanilla', # GAN 损失的类型 - loss_weight=0.001 # GAN 损失函数的权重 - ), - loss_l1_hole=dict( - type='L1Loss', # L1 损失的类型 - loss_weight=1.0 # L1 损失函数的权重 - ), - pretrained=None) # 预训练权重的路径 - -train_cfg = dict( - disc_step=1, # 训练生成器之前训练判别器的迭代次数 - iter_tc=90000, # 预热生成器的迭代次数 - iter_td=100000, # 预热判别器的迭代次数 - start_iter=0, # 开始的迭代 - local_size=(128, 128)) # 图像块的大小 -test_cfg = dict(metrics=['l1']) # 测试的配置 - -dataset_type = 'ImgInpaintingDataset' # 数据集类型 -input_shape = (256, 256) # 输入图像的形状 - -train_pipeline = [ - dict(type='LoadImageFromFile', key='gt_img'), # 加载图片的配置 - dict( - type='LoadMask', # 加载掩码 - mask_mode='bbox', # 掩码的类型 - mask_config=dict( - max_bbox_shape=(128, 128), # 检测框的形状 - max_bbox_delta=40, # 检测框高宽的变化 - min_margin=20, # 检测框到图片边界的最小距离 - img_shape=input_shape)), # 输入图像的形状 - dict( - type='Crop', # 裁剪 - keys=['gt_img'], # 要裁剪的图像的关键词 - crop_size=(384, 384), # 裁剪图像块的大小 - random_crop=True, # 是否使用随机裁剪 - ), - dict( - type='Resize', # 图像大小调整 - keys=['gt_img'], # 要调整大小的图像的关键词 - scale=input_shape, # 调整图像大小的比例 - keep_ratio=False, # 调整大小时是否保持比例 - ), - dict( - type='Normalize', # 图像归一化 - keys=['gt_img'], # 要归一化的图像的关键词 - mean=[127.5] * 3, # 归一化中使用的均值 - std=[127.5] * 3, # 归一化中使用的标准差 - to_rgb=False), # 是否将图像通道从 BGR 转换为 RGB - dict(type='GetMaskedImage'), # 获取被掩盖的图像 - dict( - type='Collect', # 决定数据中哪些键应该传递给合成器 - keys=['gt_img', 'masked_img', 'mask', 'mask_bbox'], # 要收集的数据的关键词 - meta_keys=['gt_img_path']), # 要收集的数据的元关键词 - dict(type='ToTensor', keys=['gt_img', 'masked_img', 'mask']), # 将图像转化为 Tensor - dict(type='ToTensor', keys=['mask_bbox']) # 转化为 Tensor -] - -test_pipeline = train_pipeline # 构建测试/验证流程 - -data_root = 'data/places365' # 数据根目录 - -data = dict( - samples_per_gpu=12, # 单个 GPU 的批量大小 - workers_per_gpu=8, # 为每个 GPU 预取数据的 Worker 数 - val_samples_per_gpu=1, # 验证中单个 GPU 的批量大小 - val_workers_per_gpu=8, # 在验证中为每个 GPU 预取数据的 Worker 数 - drop_last=True, # 是否丢弃训练中的最后一批数据 - train=dict( # 训练数据集配置 - type=dataset_type, - ann_file=f'{data_root}/train_places_img_list_total.txt', - data_prefix=data_root, - pipeline=train_pipeline, - test_mode=False), - val=dict( # 验证数据集配置 - type=dataset_type, - ann_file=f'{data_root}/val_places_img_list.txt', - data_prefix=data_root, - pipeline=test_pipeline, - test_mode=True)) - -optimizers = dict( # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 - generator=dict(type='Adam', lr=0.0004), disc=dict(type='Adam', lr=0.0004)) - -lr_config = dict(policy='Fixed', by_epoch=False) # 用于注册 LrUpdater 钩子的学习率调度程序配置 - -checkpoint_config = dict(by_epoch=False, interval=50000) # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py -log_config = dict( # 配置注册记录器钩子 - interval=100, # 打印日志的时间间隔 - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), - # dict(type='TensorboardLoggerHook'), # 支持 Tensorboard 记录器 - # dict(type='PaviLoggerHook', init_kwargs=dict(project='mmedit')) - ]) # 用于记录训练过程的记录器 - -visual_config = dict( # 构建可视化钩子的配置 - type='VisualizationHook', - output_dir='visual', - interval=1000, - res_name_list=[ - 'gt_img', 'masked_img', 'fake_res', 'fake_img', 'fake_gt_local' - ], -) # 用于可视化训练过程的记录器。 - -evaluation = dict(interval=50000) # 构建验证钩子的配置 - -total_iters = 500002 -dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 -log_level = 'INFO' # 日志级别 -work_dir = None # 保存当前实验的模型检查点和日志的目录 -load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 -resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 -workflow = [('train', 10000)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前生成模型时保持不变 -exp_name = 'gl_places' # 实验名称 -find_unused_parameters = False # 是否在分布式训练中查找未使用的参数 -``` diff --git a/docs/zh_cn/user_guides/configs/config_matting.md b/docs/zh_cn/user_guides/configs/config_matting.md deleted file mode 100644 index 41058bff29..0000000000 --- a/docs/zh_cn/user_guides/configs/config_matting.md +++ /dev/null @@ -1,167 +0,0 @@ -# 配置文件 - 抠图 - -与 [MMDetection](https://github.com/open-mmlab/mmdetection) 一样,我们将模块化和继承设计融入我们的配置系统,以方便进行各种实验。 - -## 例子 - Deep Image Matting Model - -为了帮助用户对一个完整的配置有一个基本的了解,我们对我们实现的原始 DIM 模型的配置做一个简短的评论,如下所示。 更详细的用法和各个模块对应的替代方案,请参考 API 文档。 - -```python -# 模型配置 -model = dict( - type='DIM', # 模型的名称(我们称之为抠图器) - backbone=dict( # 主干网络的配置 - type='SimpleEncoderDecoder', # 主干网络的类型 - encoder=dict( # 编码器的配置 - type='VGG16'), # 编码器的类型 - decoder=dict( # 解码器的配置 - type='PlainDecoder')), # 解码器的类型 - pretrained='./weights/vgg_state_dict.pth', # 编码器的预训练权重 - loss_alpha=dict( # alpha 损失的配置 - type='CharbonnierLoss', # 预测的 alpha 遮罩的损失类型 - loss_weight=0.5), # alpha 损失的权重 - loss_comp=dict( # 组合损失的配置 - type='CharbonnierCompLoss', # 组合损失的类型 - loss_weight=0.5)) # 组合损失的权重 -train_cfg = dict( # 训练 DIM 模型的配置 - train_backbone=True, # 在 DIM stage 1 中,会对主干网络进行训练 - train_refiner=False) # 在 DIM stage 1 中,不会对精炼器进行训练 -test_cfg = dict( # 测试 DIM 模型的配置 - refine=False, # 是否使用精炼器输出作为输出,在 stage 1 中,我们不使用它 - metrics=['SAD', 'MSE', 'GRAD', 'CONN']) # 测试时使用的指标 - -# 数据配置 -dataset_type = 'AdobeComp1kDataset' # 数据集类型,这将用于定义数据集 -data_root = 'data/adobe_composition-1k' # 数据的根目录 -img_norm_cfg = dict( # 归一化输入图像的配置 - mean=[0.485, 0.456, 0.406], # 归一化中使用的均值 - std=[0.229, 0.224, 0.225], # 归一化中使用的标准差 - to_rgb=True) # 是否将图像通道从 BGR 转换为 RGB -train_pipeline = [ # 训练数据处理流程 - dict( - type='LoadImageFromFile', # 从文件加载 alpha 遮罩 - key='alpha', # 注释文件中 alpha 遮罩的键关键词。流程将从路径 “alpha_path” 中读取 alpha 遮罩 - flag='grayscale'), # 加载灰度图像,形状为(高度、宽度) - dict( - type='LoadImageFromFile', # 从文件中加载图像 - key='fg'), # 要加载的图像的关键词。流程将从路径 “fg_path” 读取 fg - dict( - type='LoadImageFromFile', # 从文件中加载图像 - key='bg'), # 要加载的图像的关键词。流程将从路径 “bg_path” 读取 bg - dict( - type='LoadImageFromFile', # 从文件中加载图像 - key='merged'), # 要加载的图像的关键词。流程将从路径 “merged_path” 读取并合并 - dict( - type='CropAroundUnknown', # 在未知区域(半透明区域)周围裁剪图像 - keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'], # 要裁剪的图像 - crop_sizes=[320, 480, 640]), # 裁剪大小 - dict( - type='Flip', # 翻转图像 - keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg']), # 要翻转的图像 - dict( - type='Resize', # 图像大小调整 - keys=['alpha', 'merged', 'ori_merged', 'fg', 'bg'], # 图像调整大小的图像 - scale=(320, 320), # 目标大小 - keep_ratio=False), # 是否保持高宽比例 - dict( - type='GenerateTrimap', # 从 alpha 遮罩生成三元图。 - kernel_size=(1, 30)), # 腐蚀/扩张内核大小的范围 - dict( - type='RescaleToZeroOne', # 将图像从 [0, 255] 缩放到 [0, 1] - keys=['merged', 'alpha', 'ori_merged', 'fg', 'bg']), # 要重新缩放的图像 - dict( - type='Normalize', # 图像归一化 - keys=['merged'], # 要归一化的图像 - **img_norm_cfg), # 图像归一化配置(`img_norm_cfg` 的定义见上文) - dict( - type='Collect', # 决定数据中哪些键应该传递给合成器 - keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg'], # 图像的关键词 - meta_keys=[]), # 图片的元关键词,这里不需要元信息。 - dict( - type='ToTensor', # 将图像转化为 Tensor - keys=['merged', 'alpha', 'trimap', 'ori_merged', 'fg', 'bg']), # 要转换为 Tensor 的图像 -] -test_pipeline = [ - dict( - type='LoadImageFromFile', # 从文件加载 alpha 遮罩 - key='alpha', # 注释文件中 alpha 遮罩的键关键词。流程将从路径 “alpha_path” 中读取 alpha 遮罩 - flag='grayscale', - save_original_img=True), - dict( - type='LoadImageFromFile', # 从文件中加载图像 - key='trimap', # 要加载的图像的关键词。流程将从路径 “trimap_path” 读取三元图 - flag='grayscale', # 加载灰度图像,形状为(高度、宽度) - save_original_img=True), # 保存三元图用于计算指标。 它将与 “ori_trimap” 一起保存 - dict( - type='LoadImageFromFile', # 从文件中加载图像 - key='merged'), # 要加载的图像的关键词。流程将从路径 “merged_path” 读取并合并 - dict( - type='Pad', # 填充图像以与模型的下采样因子对齐 - keys=['trimap', 'merged'], # 要填充的图像 - mode='reflect'), # 填充模式 - dict( - type='RescaleToZeroOne', # 与 train_pipeline 相同 - keys=['merged', 'ori_alpha']), # 要缩放的图像 - dict( - type='Normalize', # 与 train_pipeline 相同 - keys=['merged'], - **img_norm_cfg), - dict( - type='Collect', # 与 train_pipeline 相同 - keys=['merged', 'trimap'], - meta_keys=[ - 'merged_path', 'pad', 'merged_ori_shape', 'ori_alpha', - 'ori_trimap' - ]), - dict( - type='ToTensor', # 与 train_pipeline 相同 - keys=['merged', 'trimap']), -] -data = dict( - samples_per_gpu=1, #单个 GPU 的批量大小 - workers_per_gpu=4, # 为每个 GPU 预取数据的 Worker 数 - drop_last=True, # 是否丢弃训练中的最后一批数据 - train=dict( # 训练数据集配置 - type=dataset_type, # 数据集的类型 - ann_file=f'{data_root}/training_list.json', # 注解文件路径 - data_prefix=data_root, # 图像路径的前缀 - pipeline=train_pipeline), # 见上文 train_pipeline - val=dict( # 验证数据集配置 - type=dataset_type, - ann_file=f'{data_root}/test_list.json', - data_prefix=data_root, - pipeline=test_pipeline), # 见上文 test_pipeline - test=dict( # 测试数据集配置 - type=dataset_type, - ann_file=f'{data_root}/test_list.json', - data_prefix=data_root, - pipeline=test_pipeline)) # 见上文 test_pipeline - -# 优化器 -optimizers = dict(type='Adam', lr=0.00001) # 用于构建优化器的配置,支持 PyTorch 中所有优化器,且参数与 PyTorch 中对应优化器相同 -# 学习策略 -lr_config = dict( # 用于注册 LrUpdater 钩子的学习率调度程序配置 - policy='Fixed') # 调度器的策略,支持 CosineAnnealing、Cyclic 等。支持的 LrUpdater 详情请参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9。 - -# 检查点保存 -checkpoint_config = dict( # 配置检查点钩子,实现参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py - interval=40000, # 保存间隔为 40000 次迭代 - by_epoch=False) # 按迭代计数 -evaluation = dict( # # 构建验证钩子的配置 - interval=40000) # 验证区间 -log_config = dict( # 配置注册记录器钩子 - interval=10, # 打印日志的时间间隔 - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), # 用于记录训练过程的记录器 - # dict(type='TensorboardLoggerHook') # 支持 Tensorboard 记录器 - ]) - -# runtime settings -total_iters = 1000000 # 训练模型的总迭代次数 -dist_params = dict(backend='nccl') # 设置分布式训练的参数,端口也可以设置 -log_level = 'INFO' # 日志级别 -work_dir = './work_dirs/dim_stage1' # 保存当前实验的模型检查点和日志的目录 -load_from = None # 从给定路径加载模型作为预训练模型。 这不会恢复训练 -resume_from = None # 从给定路径恢复检查点,当检查点被保存时,训练将从该 epoch 恢复 -workflow = [('train', 1)] # runner 的工作流程。 [('train', 1)] 表示只有一个工作流程,名为 'train' 的工作流程执行一次。 训练当前抠图模型时保持不变 -``` diff --git a/docs/zh_cn/user_guides/configs/config_restoration.md b/docs/zh_cn/user_guides/configs/config_restoration.md deleted file mode 100644 index 68d6589ef4..0000000000 --- a/docs/zh_cn/user_guides/configs/config_restoration.md +++ /dev/null @@ -1,162 +0,0 @@ -# 配置文件 - 复原 - -## 示例-EDSR - -为了帮助用户理解 mmediting 的配置文件结构,这里以 EDSR 为例,给出其配置文件的注释。对于每个模块的详细用法以及对应参数的选择,请参照 API 文档。 - -```python -exp_name = 'edsr_x2c64b16_1x16_300k_div2k' # 实验名称 - -scale = 2 # 上采样放大因子 - -# 模型设置 -model = dict( - type='BasicRestorer', # 图像恢复模型类型 - generator=dict( # 生成器配置 - type='EDSR', # 生成器类型 - in_channels=3, # 输入通道数 - out_channels=3, # 输出通道数 - mid_channels=64, # 中间特征通道数 - num_blocks=16, # 残差块数目 - upscale_factor=scale, # 上采样因子 - res_scale=1, # 残差缩放因子 - rgb_mean=(0.4488, 0.4371, 0.4040), # 输入图像 RGB 通道的平均值 - rgb_std=(1.0, 1.0, 1.0)), # 输入图像 RGB 通道的方差 - pixel_loss=dict(type='L1Loss', loss_weight=1.0, reduction='mean')) # 像素损失函数的配置 - -# 模型训练和测试设置 -train_cfg = None # 训练的配置 -test_cfg = dict( # 测试的配置 - metrics=['PSNR'], # 测试时使用的评价指标 - crop_border=scale) # 测试时裁剪的边界尺寸 - -# 数据集设置 -train_dataset_type = 'SRAnnotationDataset' # 用于训练的数据集类型 -val_dataset_type = 'SRFolderDataset' # 用于验证的数据集类型 -train_pipeline = [ # 训练数据前处理流水线步骤组成的列表 - dict(type='LoadImageFromFile', # 从文件加载图像 - io_backend='disk', # 读取图像时使用的 io 类型 - key='lq', # 设置LR图像的键来找到相应的路径 - flag='unchanged'), # 读取图像的标识 - dict(type='LoadImageFromFile', # 从文件加载图像 - io_backend='disk', # 读取图像时使用的io类型 - key='gt', # 设置HR图像的键来找到相应的路径 - flag='unchanged'), # 读取图像的标识 - dict(type='RescaleToZeroOne', keys=['lq', 'gt']), # 将图像从[0,255]重缩放到[0,1] - dict(type='Normalize', # 正则化图像 - keys=['lq', 'gt'], # 执行正则化图像的键 - mean=[0, 0, 0], # 平均值 - std=[1, 1, 1], # 标准差 - to_rgb=True), # 更改为 RGB 通道 - dict(type='PairedRandomCrop', gt_patch_size=96), # LR 和 HR 成对随机裁剪 - dict(type='Flip', # 图像翻转 - keys=['lq', 'gt'], # 执行翻转图像的键 - flip_ratio=0.5, # 执行翻转的几率 - direction='horizontal'), # 翻转方向 - dict(type='Flip', # 图像翻转 - keys=['lq', 'gt'], # 执行翻转图像的键 - flip_ratio=0.5, # 执行翻转几率 - direction='vertical'), # 翻转方向 - dict(type='RandomTransposeHW', # 图像的随机的转置 - keys=['lq', 'gt'], # 执行转置图像的键 - transpose_ratio=0.5 # 执行转置的几率 - ), - dict(type='Collect', # Collect 类决定哪些键会被传递到生成器中 - keys=['lq', 'gt'], # 传入模型的键 - meta_keys=['lq_path', 'gt_path']), # 元信息键。在训练中,不需要元信息 - dict(type='ToTensor', # 将图像转换为张量 - keys=['lq', 'gt']) # 执行图像转换为张量的键 -] -test_pipeline = [ # 测试数据前处理流水线步骤组成的列表 - dict( - type='LoadImageFromFile', # 从文件加载图像 - io_backend='disk', # 读取图像时使用的io类型 - key='lq', # 设置LR图像的键来找到相应的路径 - flag='unchanged'), # 读取图像的标识 - dict( - type='LoadImageFromFile', # 从文件加载图像 - io_backend='disk', # 读取图像时使用的io类型 - key='gt', # 设置HR图像的键来找到相应的路径 - flag='unchanged'), # 读取图像的标识 - dict(type='RescaleToZeroOne', keys=['lq', 'gt']), # 将图像从[0,255]重缩放到[0,1] - dict( - type='Normalize', # 正则化图像 - keys=['lq', 'gt'], # 执行正则化图像的键 - mean=[0, 0, 0], # 平均值 - std=[1, 1, 1], # 标准差 - to_rgb=True), # 更改为RGB通道 - dict(type='Collect', # Collect类决定哪些键会被传递到生成器中 - keys=['lq', 'gt'], # 传入模型的键 - meta_keys=['lq_path', 'gt_path']), # 元信息键 - dict(type='ToTensor', # 将图像转换为张量 - keys=['lq', 'gt']) # 执行图像转换为张量的键 -] - -data = dict( - # 训练 - samples_per_gpu=16, # 单个 GPU 的批大小 - workers_per_gpu=6, # 单个 GPU 的 dataloader 的进程 - drop_last=True, # 在训练过程中丢弃最后一个批次 - train=dict( # 训练数据集的设置 - type='RepeatDataset', # 基于迭代的重复数据集 - times=1000, # 重复数据集的重复次数 - dataset=dict( - type=train_dataset_type, # 数据集类型 - lq_folder='data/DIV2K/DIV2K_train_LR_bicubic/X2_sub', # lq文件夹的路径 - gt_folder='data/DIV2K/DIV2K_train_HR_sub', # gt文件夹的路径 - ann_file='data/DIV2K/meta_info_DIV2K800sub_GT.txt', # 批注文件的路径 - pipeline=train_pipeline, # 训练流水线,如上所示 - scale=scale)), # 上采样放大因子 - - # 验证 - val_samples_per_gpu=1, # 验证时单个 GPU 的批大小 - val_workers_per_gpu=1, # 验证时单个 GPU 的 dataloader 的进程 - val=dict( - type=val_dataset_type, # 数据集类型 - lq_folder='data/val_set5/Set5_bicLRx2', # lq 文件夹的路径 - gt_folder='data/val_set5/Set5_mod12', # gt 文件夹的路径 - pipeline=test_pipeline, # 测试流水线,如上所示 - scale=scale, # 上采样放大因子 - filename_tmpl='{}'), # 文件名模板 - - # 测试 - test=dict( - type=val_dataset_type, # 数据集类型 - lq_folder='data/val_set5/Set5_bicLRx2', # lq 文件夹的路径 - gt_folder='data/val_set5/Set5_mod12', # gt 文件夹的路径 - pipeline=test_pipeline, # 测试流水线,如上所示 - scale=scale, # 上采样放大因子 - filename_tmpl='{}')) # 文件名模板 - -# 优化器设置 -optimizers = dict(generator=dict(type='Adam', lr=1e-4, betas=(0.9, 0.999))) # 用于构建优化器的设置,支持PyTorch中所有参数与PyTorch中参数相同的优化器 - -# 学习策略 -total_iters = 300000 # 训练模型的总迭代数 -lr_config = dict( # 用于注册LrUpdater钩子的学习率调度程序配置 - policy='Step', by_epoch=False, step=[200000], gamma=0.5) # 调度器的策略,还支持余弦、循环等 - -checkpoint_config = dict( # 模型权重钩子设置,更多细节可参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py - interval=5000, # 模型权重文件保存间隔为5000次迭代 - save_optimizer=True, # 保存优化器 - by_epoch=False) # 按迭代次数计数 -evaluation = dict( # 构建验证钩子的配置 - interval=5000, # 执行验证的间隔为5000次迭代 - save_image=True, # 验证期间保存图像 - gpu_collect=True) # 使用gpu收集 -log_config = dict( # 注册日志钩子的设置 - interval=100, # 打印日志间隔 - hooks=[ - dict(type='TextLoggerHook', by_epoch=False), # 记录训练过程信息的日志 - dict(type='TensorboardLoggerHook'), # 同时支持 Tensorboard 日志 - ]) -visual_config = None # 可视化的设置 - -# 运行设置 -dist_params = dict(backend='nccl') # 建立分布式训练的设置,其中端口号也可以设置 -log_level = 'INFO' # 日志等级 -work_dir = f'./work_dirs/{exp_name}' # 记录当前实验日志和模型权重文件的文件夹 -load_from = None # 从给定路径加载模型作为预训练模型. 这个选项不会用于断点恢复训练 -resume_from = None # 加载给定路径的模型权重文件作为断点续连的模型, 训练将从该时间点保存的周期点继续进行 -workflow = [('train', 1)] # runner 的执行流. [('train', 1)] 代表只有一个执行流,并且这个名为 train 的执行流只执行一次 -``` diff --git a/docs/zh_cn/user_guides/dataset_prepare.md b/docs/zh_cn/user_guides/dataset_prepare.md new file mode 100644 index 0000000000..6d16267fb5 --- /dev/null +++ b/docs/zh_cn/user_guides/dataset_prepare.md @@ -0,0 +1 @@ +# 教程 2:准备数据集(待更新) diff --git a/docs/zh_cn/user_guides/datasets/dataset_prepare.md b/docs/zh_cn/user_guides/datasets/dataset_prepare.md deleted file mode 100644 index 16cc9bfc17..0000000000 --- a/docs/zh_cn/user_guides/datasets/dataset_prepare.md +++ /dev/null @@ -1 +0,0 @@ -# 数据集(待更新) diff --git a/docs/zh_cn/user_guides/deploy.md b/docs/zh_cn/user_guides/deploy.md new file mode 100644 index 0000000000..d5ac375884 --- /dev/null +++ b/docs/zh_cn/user_guides/deploy.md @@ -0,0 +1,155 @@ +# 教程 8:模型部署指南 + +[MMDeploy](https://github.com/open-mmlab/mmdeploy) 是 OpenMMLab 的部署仓库,负责包括 MMClassification、MMDetection、MMEditing 等在内的各算法库的部署工作。 +你可以从[这里](https://mmdeploy.readthedocs.io/zh_CN/1.x/04-supported-codebases/mmedit.html)获取 MMDeploy 对 MMClassification 部署支持的最新文档。 + +本文的结构如下: + +- [安装](#安装) +- [模型转换](#模型转换) +- [模型规范](#模型规范) +- [模型推理](#模型推理) + - [后端模型推理](#后端模型推理) + - [SDK 模型推理](#sdk-模型推理) +- [模型支持列表](#模型支持列表) + +## 安装 + +请参考[此处](../get_started/install.md)安装 mmedit。然后,按照[说明](https://mmdeploy.readthedocs.io/zh_CN/1.x/get_started.html#mmdeploy)安装 mmdeploy。 + +```{note} +如果安装的是 mmdeploy 预编译包,那么也请通过 'git clone https://github.com/open-mmlab/mmdeploy.git --depth=1' 下载 mmdeploy 源码。因为它包含了部署时要用到的配置文件 +``` + +## 模型转换 + +假设在安装步骤中,mmediting 和 mmdeploy 代码库在同级目录下,并且当前的工作目录为 mmediting 的根目录,那么以 [ESRGAN](../../../configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py) 模型为例,你可以从[此处](https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth)下载对应的 checkpoint,并使用以下代码将之转换为 onnx 模型: + +```python +from mmdeploy.apis import torch2onnx +from mmdeploy.backend.sdk.export_info import export2SDK + +img = 'tests/data/image/face/000001.png' +work_dir = 'mmdeploy_models/mmedit/onnx' +save_file = 'end2end.onnx' +deploy_cfg = '../mmdeploy/configs/mmedit/super-resolution/super-resolution_onnxruntime_dynamic.py' +model_cfg = 'configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py' +model_checkpoint = 'esrgan_psnr_x4c64b23g32_1x16_1000k_div2k_20200420-bf5c993c.pth' +device = 'cpu' + +# 1. convert model to onnx +torch2onnx(img, work_dir, save_file, deploy_cfg, model_cfg, + model_checkpoint, device) + +# 2. extract pipeline info for inference by MMDeploy SDK +export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device) +``` + +转换的关键之一是使用正确的配置文件。项目中已内置了各后端部署[配置文件](https://github.com/open-mmlab/mmdeploy/tree/1.x/configs/mmedit)。 +文件的命名模式是: + +``` +{task}/{task}_{backend}-{precision}_{static | dynamic}_{shape}.py +``` + +其中: + +- **{task}:** mmedit 中的任务 +- **{backend}:** 推理后端名称。比如,onnxruntime、tensorrt、pplnn、ncnn、openvino、coreml 等等 +- **{precision}:** 推理精度。比如,fp16、int8。不填表示 fp32 +- **{static | dynamic}:** 动态、静态 shape +- **{shape}:** 模型输入的 shape 或者 shape 范围 + +在上例中,你也可以把 `ESRGAN` 转为其他后端模型。比如使用`super-resolution_tensorrt-fp16_dynamic-32x32-512x512.py`,把模型转为 tensorrt-fp16 模型。 + +```{tip} +当转 tensorrt 模型时, --device 需要被设置为 "cuda" +``` + +## 模型规范 + +在使用转换后的模型进行推理之前,有必要了解转换结果的结构。 它存放在 `--work-dir` 指定的路路径下。 + +上例中的`mmdeploy_models/mmedit/onnx`,结构如下: + +``` +mmdeploy_models/mmedit/onnx +├── deploy.json +├── detail.json +├── end2end.onnx +└── pipeline.json +``` + +重要的是: + +- **end2end.onnx**: 推理引擎文件。可用 ONNX Runtime 推理 +- ***xxx*.json**: mmdeploy SDK 推理所需的 meta 信息 + +整个文件夹被定义为**mmdeploy SDK model**。换言之,**mmdeploy SDK model**既包括推理引擎,也包括推理 meta 信息。 + +## 模型推理 + +### 后端模型推理 + +以上述模型转换后的 `end2end.onnx` 为例,你可以使用如下代码进行推理: + +```python +from mmdeploy.apis.utils import build_task_processor +from mmdeploy.utils import get_input_shape, load_config +import torch + +deploy_cfg = '../mmdeploy/configs/mmedit/super-resolution/super-resolution_onnxruntime_dynamic.py' +model_cfg = 'configs/esrgan/esrgan_psnr-x4c64b23g32_1xb16-1000k_div2k.py' +device = 'cpu' +backend_model = ['mmdeploy_models/mmedit/onnx/end2end.onnx'] +image = 'tests/data/image/lq/baboon_x4.png' + +# read deploy_cfg and model_cfg +deploy_cfg, model_cfg = load_config(deploy_cfg, model_cfg) + +# build task and backend model +task_processor = build_task_processor(model_cfg, deploy_cfg, device) +model = task_processor.build_backend_model(backend_model) + +# process input image +input_shape = get_input_shape(deploy_cfg) +model_inputs, _ = task_processor.create_input(image, input_shape) + +# do model inference +with torch.no_grad(): + result = model.test_step(model_inputs) + +# visualize results +task_processor.visualize( + image=image, + model=model, + result=result[0], + window_name='visualize', + output_file='output_restorer.bmp') +``` + +### SDK 模型推理 + +你也可以参考如下代码,对 SDK model 进行推理: + +```python +from mmdeploy_python import Restorer +import cv2 + +img = cv2.imread('tests/data/image/lq/baboon_x4.png') + +# create a predictor +restorer = Restorer(model_path='mmdeploy_models/mmedit/onnx', device_name='cpu', device_id=0) +# perform inference +result = restorer(img) + +# visualize inference result +cv2.imwrite('output_restorer.bmp', result) +``` + +除了python API,mmdeploy SDK 还提供了诸如 C、C++、C#、Java等多语言接口。 +你可以参考[样例](https://github.com/open-mmlab/mmdeploy/tree/1.x/demo)学习其他语言接口的使用方法。 + +## 模型支持列表 + +请参考[这里](https://mmdeploy.readthedocs.io/zh_CN/1.x/04-supported-codebases/mmedit.html#id7) diff --git a/docs/zh_cn/user_guides/inference.md b/docs/zh_cn/user_guides/inference.md index ae954498e1..7d56f7b4ae 100644 --- a/docs/zh_cn/user_guides/inference.md +++ b/docs/zh_cn/user_guides/inference.md @@ -1 +1,237 @@ -# 推理(待更新) +# 教程 3:预训练权重推理(待更新) + +我们针对特定任务提供了一些脚本,可以对单张图像进行推理。 + +#### 图像补全 + +您可以使用以下命令,输入一张测试图像以及缺损部位的遮罩图像,实现对测试图像的补全。 + +```shell +python demo/inpainting_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${MASKED_IMAGE_FILE} \ + ${MASK_FILE} \ + ${SAVE_FILE} \ + [--imshow] \ + [--device ${GPU_ID}] +``` + +如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: + +```shell +python demo/inpainting_demo.py \ + configs/global_local/gl_8xb12_celeba-256x256.py \ + https://download.openmmlab.com/mmediting/inpainting/global_local/gl_256x256_8x12_celeba_20200619-5af0493f.pth \ + tests/data/inpainting/celeba_test.png \ + tests/data/inpainting/bbox_mask.png \ + tests/data/inpainting/inpainting_celeba.png +``` + +补全结果将保存在 `tests/data/inpainting/inpainting_celeba.png` 中。 + +#### 抠图 + +您可以使用以下命令,输入一张测试图像以及对应的三元图(trimap),实现对测试图像的抠图。 + +```shell +python demo/matting_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${IMAGE_FILE} \ + ${TRIMAP_FILE} \ + ${SAVE_FILE} \ + [--imshow] \ + [--device ${GPU_ID}] +``` + +如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: + +```shell +python demo/matting_demo.py \ + configs/dim/dim_stage3-v16-pln_1000k-1xb1_comp1k.py \ + https://download.openmmlab.com/mmediting/mattors/dim/dim_stage3_v16_pln_1x1_1000k_comp1k_SAD-50.6_20200609_111851-647f24b6.pth \ + tests/data/matting_dataset/merged/GT05.jpg \ + tests/data/matting_dataset/trimap/GT05.png \ + tests/data/matting_dataset/pred/GT05.png +``` + +预测的 alpha 遮罩将保存在 `tests/data/matting_dataset/pred/GT05.png` 中。 + +#### 图像超分辨率 + +您可以使用以下命令来测试要恢复的图像。 + +```shell +python demo/restoration_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${IMAGE_FILE} \ + ${SAVE_FILE} \ + [--imshow] \ + [--device ${GPU_ID}] \ + [--ref-path ${REF_PATH}] +``` + +如果指定了 `--imshow` ,演示程序将使用 opencv 显示图像。例子: + +```shell +python demo/restoration_demo.py \ + configs/esrgan/esrgan_x4c64b23g32_400k-1xb16_div2k.py \ + https://download.openmmlab.com/mmediting/restorers/esrgan/esrgan_x4c64b23g32_1x16_400k_div2k_20200508-f8ccaf3b.pth \ + tests/data/image/lq/baboon_x4.png \ + demo/demo_out_baboon.png +``` + +您可以通过提供 `--ref-path` 参数来测试基于参考的超分辨率算法。例子: + +```shell +python demo/restoration_demo.py \ + configs/ttsr/ttsr-gan_x4c64b16_500k-1xb9_CUFED.py \ + https://download.openmmlab.com/mmediting/restorers/ttsr/ttsr-gan_x4_c64b16_g1_500k_CUFED_20210626-2ab28ca0.pth \ + tests/data/frames/sequence/gt/sequence_1/00000000.png \ + demo/demo_out.png \ + --ref-path tests/data/frames/sequence/gt/sequence_1/00000001.png +``` + +#### 人脸图像超分辨率 + +您可以使用以下命令来测试要恢复的人脸图像。 + +```shell +python demo/restoration_face_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${IMAGE_FILE} \ + ${SAVE_FILE} \ + [--upscale-factor] \ + [--face-size] \ + [--imshow] \ + [--device ${GPU_ID}] +``` + +如果指定了 --imshow ,演示程序将使用 opencv 显示图像。例子: + +```shell +python demo/restoration_face_demo.py \ + configs/glean/glean_in128out1024_300k-4xb2_ffhq-celeba-hq.py \ + https://download.openmmlab.com/mmediting/restorers/glean/glean_in128out1024_4x2_300k_ffhq_celebahq_20210812-acbcb04f.pth \ + tests/data/image/face/000001.png \ + tests/data/image/face/pred.png \ + --upscale-factor 4 +``` + +#### 视频超分辨率 + +您可以使用以下命令来测试视频以进行恢复。 + +```shell +python demo/restoration_video_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${INPUT_DIR} \ + ${OUTPUT_DIR} \ + [--window-size=${WINDOW_SIZE}] \ + [--device ${GPU_ID}] +``` + +它同时支持滑动窗口框架和循环框架。 例子: + +EDVR: + +```shell +python demo/restoration_video_demo.py \ + configs/edvr/edvrm_wotsa_reds_600k-8xb8.py \ + https://download.openmmlab.com/mmediting/restorers/edvr/edvrm_wotsa_x4_8x4_600k_reds_20200522-0570e567.pth \ + data/Vid4/BIx4/calendar/ \ + demo/output \ + --window-size=5 +``` + +BasicVSR: + +```shell +python demo/restoration_video_demo.py \ + configs/basicvsr/basicvsr_2xb4_reds4.py \ + https://download.openmmlab.com/mmediting/restorers/basicvsr/basicvsr_reds4_20120409-0e599677.pth \ + data/Vid4/BIx4/calendar/ \ + demo/output +``` + +复原的视频将保存在 ` demo/output/` 中。 + +#### 视频插帧 + +您可以使用以下命令来测试视频插帧。 + +```shell +python demo/video_interpolation_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${INPUT_DIR} \ + ${OUTPUT_DIR} \ + [--fps-multiplier ${FPS_MULTIPLIER}] \ + [--fps ${FPS}] +``` + +`${INPUT_DIR}` 和 `${OUTPUT_DIR}` 可以是视频文件路径或存放一系列有序图像的文件夹。 +若 `${OUTPUT_DIR}` 是视频文件地址,其帧率可由输入视频帧率和 `fps_multiplier` 共同决定,也可由 `fps` 直接给定(其中前者优先级更高)。例子: + +由输入视频帧率和 `fps_multiplier` 共同决定输出视频的帧率: + +```shell +python demo/video_interpolation_demo.py \ + configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py \ + https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_320k_vimeo-triple_20220117-647f3de2.pth \ + tests/data/frames/test_inference.mp4 \ + tests/data/frames/test_inference_vfi_out.mp4 \ + --fps-multiplier 2.0 +``` + +由 `fps` 直接给定输出视频的帧率: + +```shell +python demo/video_interpolation_demo.py \ + configs/cain/cain_g1b32_1xb5_vimeo90k-triplet.py \ + https://download.openmmlab.com/mmediting/video_interpolators/cain/cain_b5_320k_vimeo-triple_20220117-647f3de2.pth \ + tests/data/frames/test_inference.mp4 \ + tests/data/frames/test_inference_vfi_out.mp4 \ + --fps 60.0 +``` + +#### 图像生成 + +```shell +python demo/generation_demo.py \ + ${CONFIG_FILE} \ + ${CHECKPOINT_FILE} \ + ${IMAGE_FILE} \ + ${SAVE_FILE} \ + [--unpaired-path ${UNPAIRED_IMAGE_FILE}] \ + [--imshow] \ + [--device ${GPU_ID}] +``` + +如果指定了 `--unpaired-path` (用于 CycleGAN),模型将执行未配对的图像到图像的转换。 如果指定了 `--imshow` ,演示也将使用opencv显示图像。 例子: + +针对配对数据: + +```shell +python demo/generation_demo.py \ + configs/example_config.py \ + work_dirs/example_exp/example_model_20200202.pth \ + demo/demo.jpg \ + demo/demo_out.jpg +``` + +针对未配对数据(用 opencv 显示图像): + +```shell +python demo/generation_demo.py 、 + configs/example_config.py \ + work_dirs/example_exp/example_model_20200202.pth \ + demo/demo.jpg \ + demo/demo_out.jpg \ + --unpaired-path demo/demo_unpaired.jpg \ + --imshow +``` diff --git a/docs/zh_cn/user_guides/metrics.md b/docs/zh_cn/user_guides/metrics.md new file mode 100644 index 0000000000..6df8c50bea --- /dev/null +++ b/docs/zh_cn/user_guides/metrics.md @@ -0,0 +1 @@ +# 教程 5:使用评价指标 diff --git a/docs/zh_cn/user_guides/train_test.md b/docs/zh_cn/user_guides/train_test.md index 35571fc9e9..7ee71b6bb6 100644 --- a/docs/zh_cn/user_guides/train_test.md +++ b/docs/zh_cn/user_guides/train_test.md @@ -1 +1 @@ -# 训练 & 测试(待更新) +# 教程 4:训练与测试(待更新) diff --git a/docs/zh_cn/user_guides/useful_tools.md b/docs/zh_cn/user_guides/useful_tools.md index eea60c83a6..429e2a6f77 100644 --- a/docs/zh_cn/user_guides/useful_tools.md +++ b/docs/zh_cn/user_guides/useful_tools.md @@ -1 +1,351 @@ -# 实用工具(待更新) +# 教程 7:实用工具(待更新) + +我们在 `tools/` 目录下提供了很多有用的工具。 + +### 获取 FLOP 和参数量(实验性) + +我们提供了一个改编自 [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) 的脚本来计算模型的 FLOP 和参数量。 + +```shell +python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}] +``` + +例如, + +```shell +python tools/get_flops.py configs/resotorer/srresnet.py --shape 40 40 +``` + +你会得到以下的结果。 + +``` +============================== +Input shape: (3, 40, 40) +Flops: 4.07 GMac +Params: 1.52 M +============================== +``` + +**注**:此工具仍处于实验阶段,我们不保证数字正确。 您可以将结果用于简单的比较,但在技术报告或论文中采用它之前,请仔细检查它。 + +(1) FLOPs 与输入形状有关,而参数量与输入形状无关。默认输入形状为 (1, 3, 250, 250)。 +(2) 一些运算符不计入 FLOP,如 GN 和自定义运算符。 +你可以通过修改 [`mmcv/cnn/utils/flops_counter.py`](https://github.com/open-mmlab/mmcv/blob/master/mmcv/cnn/utils/flops_counter.py) 来添加对新运算符的支持。 + +### 发布模型 + +在将模型上传到 AWS 之前,您可能需要 +(1) 将模型权重转换为 CPU tensors, (2) 删除优化器状态,和 +(3) 计算模型权重文件的哈希并将哈希 ID 附加到文件名。 + +```shell +python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME} +``` + +例如, + +```shell +python tools/publish_model.py work_dirs/example_exp/latest.pth example_model_20200202.pth +``` + +最终输出文件名将是 `example_model_20200202-{hash id}.pth`. + +### 转换为 ONNX(实验性) + +我们提供了一个脚本将模型转换为 [ONNX](https://github.com/onnx/onnx) 格式。 转换后的模型可以通过 [Netron](https://github.com/lutzroeder/netron) 等工具进行可视化。此外,我们还支持比较 Pytorch 和 ONNX 模型之间的输出结果。 + +```bash +python tools/pytorch2onnx.py + ${CFG_PATH} \ + ${CHECKPOINT_PATH} \ + ${MODEL_TYPE} \ + ${IMAGE_PATH} \ + --trimap-path ${TRIMAP_PATH} \ + --output-file ${OUTPUT_ONNX} \ + --show \ + --verify \ + --dynamic-export +``` + +参数说明: + +- `config` : 模型配置文件的路径。 +- `checkpoint` : 模型模型权重文件的路径。 +- `model_type` : 配置文件的模型类型,选项: `inpainting`, `mattor`, `restorer`, `synthesizer`。 +- `image_path` : 输入图像文件的路径。 +- `--trimap-path` : 输入三元图文件的路径,用于 mattor 模型。 +- `--output-file`: 输出 ONNX 模型的路径。默认为 `tmp.onnx`。 +- `--opset-version` : ONNX opset 版本。默认为 11。 +- `--show`: 确定是否打印导出模型的架构。默认为 `False`。 +- `--verify`: 确定是否验证导出模型的正确性。默认为 `False`。 +- `--dynamic-export`: 确定是否导出具有动态输入和输出形状的 ONNX 模型。默认为 `False`。 + +**注**:此工具仍处于试验阶段。目前不支持某些自定义运算符。我们现在只支持 `mattor` 和 `restorer`。 + +#### 支持导出到 ONNX 的模型列表 + +下表列出了保证可导出到 ONNX 并可在 ONNX Runtime 中运行的模型。 + +| 模型 | 配置 | 动态形状 | 批量推理 | 备注 | +| :------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------: | :------: | :--: | +| ESRGAN | [esrgan_x4c64b23g32_g1_400k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_x4c64b23g32_g1_400k_div2k.py) | Y | Y | | +| ESRGAN | [esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py) | Y | Y | | +| SRCNN | [srcnn_x4k915_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/srcnn/srcnn_x4k915_g1_1000k_div2k.py) | Y | Y | | +| DIM | [dim_stage3_v16_pln_1x1_1000k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/dim/dim_stage3_v16_pln_1x1_1000k_comp1k.py) | Y | Y | | +| GCA | [gca_r34_4x10_200k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/gca/gca_r34_4x10_200k_comp1k.py) | N | Y | | +| IndexNet | [indexnet_mobv2_1x16_78k_comp1k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/indexnet/indexnet_mobv2_1x16_78k_comp1k.py) | Y | Y | | + +**注**: + +- *以上所有模型均使用 Pytorch==1.6.0 和 onnxruntime==1.5.1* +- 如果您遇到上面列出的模型的任何问题,请创建一个 issue,我们会尽快处理。对于列表中未包含的型号,请尝试自行解决。 +- 由于此功能是实验性的并且可能会快速更改,请始终尝试使用最新的 `mmcv` 和 `mmedit`。 + +### 将 ONNX 转换为 TensorRT(实验性) + +我们还提供了将 [ONNX](https://github.com/onnx/onnx) 模型转换为 [TensorRT](https://github.com/NVIDIA/TensorRT) 格式的脚本。 此外,我们支持比较 ONNX 和 TensorRT 模型之间的输出结果。 + +```bash +python tools/onnx2tensorrt.py + ${CFG_PATH} \ + ${MODEL_TYPE} \ + ${IMAGE_PATH} \ + ${INPUT_ONNX} \ + --trt-file ${OUT_TENSORRT} \ + --max-shape INT INT INT INT \ + --min-shape INT INT INT INT \ + --workspace-size INT \ + --fp16 \ + --show \ + --verify \ + --verbose +``` + +参数说明: + +- `config` : 模型配置文件的路径。 +- `model_type` :配置文件的模型类型,选项: `inpainting`, `mattor`, `restorer`, `synthesizer`。 +- `img_path` : 输入图像文件的路径。 +- `onnx_file` : 输入 ONNX 文件的路径。 +- `--trt-file` : 输出 TensorRT 模型的路径。默认为 `tmp.trt`。 +- `--max-shape` : 模型输入的最大形状。 +- `--min-shape` : 模型输入的最小形状。 +- `--workspace-size`: 以 GiB 为单位的最大工作空间大小。默认为 1 GiB。 +- `--fp16`: 确定是否以 fp16 模式导出 TensorRT。默认为 `False`。 +- `--show`: 确定是否显示 ONNX 和 TensorRT 的输出。默认为 `False`。 +- `--verify`: 确定是否验证导出模型的正确性。默认为 `False`。 +- `--verbose`: 确定在创建 TensorRT 引擎时是否详细记录日志消息。默认为 `False`。 + +**注**:此工具仍处于试验阶段。 目前不支持某些自定义运算符。 我们现在只支持 `restorer`。 在生成 SRCNN 的 ONNX 文件时,将 SCRNN 模型中的 'bicubic' 替换为 'bilinear' \[此处\](https://github.com/open-mmlab/mmediting/blob/764e6065e315b7d0033762038fcbf0bb1c570d4d/mmedit.bones/modelsrnn py#L40)。 因为 TensorRT 目前不支持 bicubic 插值,最终性能将下降约 4%。 + +#### 支持导出到 TensorRT 的模型列表 + +下表列出了保证可导出到 TensorRT 引擎并可在 TensorRT 中运行的模型。 + +| 模型 | 配置 | 动态形状 | 批量推理 | 备注 | +| :----: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :------: | :------: | :-----------------------------------: | +| ESRGAN | [esrgan_x4c64b23g32_g1_400k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_x4c64b23g32_g1_400k_div2k.py) | Y | Y | | +| ESRGAN | [esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/esrgan/esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py) | Y | Y | | +| SRCNN | [srcnn_x4k915_g1_1000k_div2k.py](https://github.com/open-mmlab/mmediting/blob/master/configs/restorers/srcnn/srcnn_x4k915_g1_1000k_div2k.py) | Y | Y | 'bicubic' 上采样必须替换为 'bilinear' | + +**注**: + +- *以上所有模型均使用 Pytorch==1.8.1、onnxruntime==1.7.0 和 tensorrt==7.2.3.4 进行测试* +- 如果您遇到上面列出的模型的任何问题,请创建一个问题,我们会尽快处理。 对于列表中未包含的型号,请尝试自行解决。 +- 由于此功能是实验性的并且可能会快速更改,因此请始终尝试使用最新的 `mmcv` 和 `mmedit`。 + +### 评估 ONNX 和 TensorRT 模型(实验性) + +我们在 `tools/deploy_test.py` 中提供了评估 TensorRT 和 ONNX 模型的方法。 + +#### 先决条件 + +要评估 ONNX 和 TensorRT 模型,应先安装 onnx、onnxruntime 和 TensorRT。遵循 [mmcv 中的 ONNXRuntime](https://mmcv.readthedocs.io/en/latest/onnxruntime_op.html) 和 \[mmcv 中的 TensorRT 插件\](https://github.com/open-mmlab/mmcv/blob/master/docs/tensorrt_plugin.md%EF%BC%89%E4%BD%BF%E7%94%A8 ONNXRuntime 自定义操作和 TensorRT 插件安装 `mmcv-full`。 + +#### 用法 + +```bash +python tools/deploy_test.py \ + ${CONFIG_FILE} \ + ${MODEL_PATH} \ + ${BACKEND} \ + --out ${OUTPUT_FILE} \ + --save-path ${SAVE_PATH} \ + ----cfg-options ${CFG_OPTIONS} \ +``` + +#### 参数说明: + +- `config`: 模型配置文件的路径。 +- `model`: TensorRT 或 ONNX 模型文件的路径。 +- `backend`: 用于测试的后端,选择 tensorrt 或 onnxruntime。 +- `--out`: pickle 格式的输出结果文件的路径。 +- `--save-path`: 存储图像的路径,如果没有给出,则不会保存图像。 +- `--cfg-options`: 覆盖使用的配置文件中的一些设置,`xxx=yyy` 格式的键值对将被合并到配置文件中。 + +#### 结果和模型 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelConfigDatasetMetricPyTorchONNX RuntimeTensorRT FP32TensorRT FP16
ESRGAN + esrgan_x4c64b23g32_g1_400k_div2k.py + Set5PSNR28.270028.261928.261928.2616
SSIM0.77780.77840.77840.7783
Set14PSNR24.632824.629024.629024.6274
SSIM0.64910.64940.64940.6494
DIV2KPSNR26.653126.653226.653226.6532
SSIM0.73400.73400.73400.7340
ESRGAN + esrgan_psnr_x4c64b23g32_g1_1000k_div2k.py + Set5PSNR30.642830.630730.630730.6305
SSIM0.85590.85650.85650.8566
Set14PSNR27.054327.042227.042227.0411
SSIM0.74470.74500.74500.7449
DIV2KPSNR29.335429.335429.335429.3339
SSIM0.82630.82630.82630.8263
SRCNN + srcnn_x4k915_g1_1000k_div2k.py + Set5PSNR28.431628.412027.214427.2127
SSIM0.80990.81060.77820.7781
Set14PSNR25.648625.636724.861324.8599
SSIM0.70140.70150.66740.6673
DIV2KPSNR27.746027.746026.989126.9862
SSIM0.78540.785430.76050.7604
+ +**注**: + +- 所有 ONNX 和 TensorRT 模型都使用数据集上的动态形状进行评估,图像根据原始配置文件进行预处理。 +- 此工具仍处于试验阶段,我们目前仅支持 `restorer`。 diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/user_guides/visualization.md index 04aa43c3ed..b42ece1d09 100644 --- a/docs/zh_cn/user_guides/visualization.md +++ b/docs/zh_cn/user_guides/visualization.md @@ -1 +1 @@ -# 可视化(待更新) +# 教程 6:可视化(待更新) diff --git a/mmedit/datasets/__init__.py b/mmedit/datasets/__init__.py index dd8a74e5da..91de4a8ac9 100644 --- a/mmedit/datasets/__init__.py +++ b/mmedit/datasets/__init__.py @@ -6,19 +6,14 @@ from .comp1k_dataset import AdobeComp1kDataset from .grow_scale_image_dataset import GrowScaleImgDataset from .imagenet_dataset import ImageNet +from .mscoco_dataset import MSCoCoDataset from .paired_image_dataset import PairedImageDataset from .singan_dataset import SinGANDataset from .unpaired_image_dataset import UnpairedImageDataset __all__ = [ - 'AdobeComp1kDataset', - 'BasicImageDataset', - 'BasicFramesDataset', - 'BasicConditionalDataset', - 'UnpairedImageDataset', - 'PairedImageDataset', - 'ImageNet', - 'CIFAR10', - 'GrowScaleImgDataset', - 'SinGANDataset', + 'AdobeComp1kDataset', 'BasicImageDataset', 'BasicFramesDataset', + 'BasicConditionalDataset', 'UnpairedImageDataset', 'PairedImageDataset', + 'ImageNet', 'CIFAR10', 'GrowScaleImgDataset', 'SinGANDataset', + 'MSCoCoDataset' ] diff --git a/mmedit/datasets/basic_conditional_dataset.py b/mmedit/datasets/basic_conditional_dataset.py index df0c371d73..6c2732a50f 100644 --- a/mmedit/datasets/basic_conditional_dataset.py +++ b/mmedit/datasets/basic_conditional_dataset.py @@ -13,15 +13,15 @@ @DATASETS.register_module() class BasicConditionalDataset(BaseDataset): - """Custom dataset for conditional GAN. This class is the combination of - `BaseDataset` (https://github.com/open- + """Custom dataset for conditional GAN. This class is based on the + combination of `BaseDataset` (https://github.com/open- mmlab/mmclassification/blob/1.x/mmcls/datasets/base_dataset.py) # noqa and `CustomDataset` (https://github.com/open- mmlab/mmclassification/blob/1.x/mmcls/datasets/custom.py). # noqa. The dataset supports two kinds of annotation format. - 1. An annotation file is provided, and each line indicates a sample: + 1. A annotation file read by line (e.g., txt) is provided, and each line indicates a sample: The sample files: :: @@ -47,7 +47,35 @@ class BasicConditionalDataset(BaseDataset): Please specify the name of categories by the argument ``classes`` or ``metainfo``. - 2. The samples are arranged in the specific way: :: + 2. A dict-based annotation file (e.g., json) is provided, key and value + indicate the path and label of the sample: + + The sample files: :: + + data_prefix/ + ├── folder_1 + │ ├── xxx.png + │ ├── xxy.png + │ └── ... + └── folder_2 + ├── 123.png + ├── nsdf3.png + └── ... + + The annotation file (the key is the image path and the value column + is the label): :: + + { + "folder_1/xxx.png": [1, 2, 3, 4], + "folder_1/xxy.png": [2, 4, 1, 0], + "folder_2/123.png": [0, 9, 8, 1], + "folder_2/nsdf3.png", [1, 0, 0, 2], + ... + } + + In this kind of annotation, labels can be any type and not restricted to an index. + + 3. The samples are arranged in the specific way: :: data_prefix/ ├── class_x @@ -62,7 +90,7 @@ class BasicConditionalDataset(BaseDataset): └── asd932_.png If the ``ann_file`` is specified, the dataset will be generated by the - first way, otherwise, try the second way. + first two ways, otherwise, try the third way. Args: ann_file (str): Annotation file path. Defaults to ''. @@ -156,9 +184,14 @@ def load_data_list(self): if not self.ann_file: samples = self._find_samples(file_client) - else: + elif self.ann_file.endswith('json'): + samples = mmengine.fileio.io.load(self.ann_file) + samples = [[name, label] for name, label in samples.items()] + elif self.ann_file.endswith('txt'): lines = mmengine.list_from_file(self.ann_file) samples = [x.strip().rsplit(' ', 1) for x in lines] + else: + raise TypeError('Only support \'json\' and \'txt\' as annotation.') def add_prefix(filename, prefix=''): if not prefix: @@ -169,7 +202,10 @@ def add_prefix(filename, prefix=''): data_list = [] for filename, gt_label in samples: img_path = add_prefix(filename, self.img_prefix) - info = {'img_path': img_path, 'gt_label': int(gt_label)} + # convert digit label to int + if isinstance(gt_label, str): + gt_label = int(gt_label) if gt_label.isdigit() else gt_label + info = {'img_path': img_path, 'gt_label': gt_label} data_list.append(info) return data_list diff --git a/mmedit/datasets/mscoco_dataset.py b/mmedit/datasets/mscoco_dataset.py new file mode 100644 index 0000000000..0ef12cd1c0 --- /dev/null +++ b/mmedit/datasets/mscoco_dataset.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import random +from typing import Optional, Sequence, Union + +import mmengine +from mmengine import FileClient + +from mmedit.registry import DATASETS +from .basic_conditional_dataset import BasicConditionalDataset + + +@DATASETS.register_module() +@DATASETS.register_module('MSCOCO') +class MSCoCoDataset(BasicConditionalDataset): + """MSCoCo 2014 dataset. + + Args: + ann_file (str): Annotation file path. Defaults to ''. + metainfo (dict, optional): Meta information for dataset, such as class + information. Defaults to None. + data_root (str): The root directory for ``data_prefix`` and + ``ann_file``. Defaults to ''. + drop_caption_rate (float, optional): Rate of dropping caption, + used for training. Defaults to 0.0. + phase (str, optional): Subdataset used for certain phase, can be set + to `train`, `test` and `val`. Defaults to 'train'. + year (int, optional): Version of CoCo dataset, can be set to 2014 + and 2017. Defaults to 2014. + data_prefix (str | dict): Prefix for the data. Defaults to ''. + extensions (Sequence[str]): A sequence of allowed extensions. Defaults + to ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif'). + lazy_init (bool): Whether to load annotation during instantiation. + In some cases, such as visualization, only the meta information of + the dataset is needed, which is not necessary to load annotation + file. ``Basedataset`` can skip load annotations to save time by set + ``lazy_init=False``. Defaults to False. + **kwargs: Other keyword arguments in :class:`BaseDataset`. + """ + METAINFO = dict(dataset_type='text_image_dataset', task_name='editing') + + def __init__(self, + ann_file: str = '', + metainfo: Optional[dict] = None, + data_root: str = '', + drop_caption_rate=0.0, + phase='train', + year=2014, + data_prefix: Union[str, dict] = '', + extensions: Sequence[str] = ('.jpg', '.jpeg', '.png', '.ppm', + '.bmp', '.pgm', '.tif'), + lazy_init: bool = False, + classes: Union[str, Sequence[str], None] = None, + **kwargs): + ann_file = os.path.join('annotations', 'captions_' + phase + + f'{year}.json') if ann_file == '' else ann_file + self.image_prename = 'COCO_' + phase + f'{year}_' + self.phase = phase + self.drop_rate = drop_caption_rate + self.year = year + assert self.year == 2014, 'We only support CoCo2014 now.' + + super().__init__( + ann_file=ann_file, + metainfo=metainfo, + data_root=data_root, + data_prefix=data_prefix, + extensions=extensions, + lazy_init=lazy_init, + classes=classes, + **kwargs) + + def load_data_list(self): + """Load image paths and gt_labels.""" + if self.img_prefix: + file_client = FileClient.infer_client(uri=self.img_prefix) + json_file = mmengine.fileio.io.load(self.ann_file) + + def add_prefix(filename, prefix=''): + if not prefix: + return filename + else: + return file_client.join_path(prefix, filename) + + data_list = [] + for item in json_file['annotations']: + image_name = self.image_prename + str( + item['image_id']).zfill(12) + '.jpg' + img_path = add_prefix( + os.path.join(self.phase + str(self.year), image_name), + self.img_prefix) + caption = item['caption'].lower() + info = { + 'img_path': + img_path, + 'gt_label': + caption if (self.phase != 'train' or self.drop_rate < 1e-6 + or random.random() >= self.drop_rate) else '' + } + data_list.append(info) + return data_list diff --git a/mmedit/datasets/transforms/random_degradations.py b/mmedit/datasets/transforms/random_degradations.py index 65e7fb7849..1c202125aa 100644 --- a/mmedit/datasets/transforms/random_degradations.py +++ b/mmedit/datasets/transforms/random_degradations.py @@ -162,11 +162,13 @@ class RandomJPEGCompression: params (dict): A dictionary specifying the degradation settings. keys (list[str]): A list specifying the keys whose values are modified. + bgr2rgb (str): Whether change channel order. Default: False. """ - def __init__(self, params, keys): + def __init__(self, params, keys, bgr2rgb=False): self.keys = keys self.params = params + self.bgr2rgb = bgr2rgb def _apply_random_compression(self, imgs): is_single_image = False @@ -176,6 +178,7 @@ def _apply_random_compression(self, imgs): # determine initial compression level and the step size quality = self.params['quality'] + color_type = self.params['color_type'] quality_step = self.params.get('quality_step', 0) jpeg_param = round(np.random.uniform(quality[0], quality[1])) @@ -183,8 +186,17 @@ def _apply_random_compression(self, imgs): outputs = [] for img in imgs: encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_param] - _, img_encoded = cv2.imencode('.jpg', img * 255., encode_param) - outputs.append(np.float32(cv2.imdecode(img_encoded, 1)) / 255.) + if self.bgr2rgb and color_type == 'color': + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + _, img_encoded = cv2.imencode('.jpg', img, encode_param) + + if color_type == 'color': + img_encoded = cv2.imdecode(img_encoded, 1) + if self.bgr2rgb: + img_encoded = cv2.cvtColor(img_encoded, cv2.COLOR_BGR2RGB) + outputs.append(img_encoded) + else: + outputs.append(cv2.imdecode(img_encoded, 0)) # update compression level jpeg_param += np.random.uniform(-quality_step, quality_step) @@ -238,7 +250,7 @@ def _apply_gaussian_noise(self, imgs): Tensor: images applied gaussian noise """ sigma_range = self.params['gaussian_sigma'] - sigma = np.random.uniform(sigma_range[0], sigma_range[1]) / 255. + sigma = np.random.uniform(sigma_range[0], sigma_range[1]) sigma_step = self.params.get('gaussian_sigma_step', 0) @@ -253,9 +265,8 @@ def _apply_gaussian_noise(self, imgs): outputs.append(img + noise) # update noise level - sigma += np.random.uniform(-sigma_step, sigma_step) / 255. - sigma = np.clip(sigma, sigma_range[0] / 255., - sigma_range[1] / 255.) + sigma += np.random.uniform(-sigma_step, sigma_step) + sigma = np.clip(sigma, sigma_range[0], sigma_range[1]) return outputs @@ -274,7 +285,7 @@ def _apply_poisson_noise(self, imgs): if is_gray_noise: noise = cv2.cvtColor(noise[..., [2, 1, 0]], cv2.COLOR_BGR2GRAY) noise = noise[..., np.newaxis] - noise = np.clip((noise * 255.0).round(), 0, 255) / 255. + noise = np.clip((noise).round(), 0, 255) unique_val = 2**np.ceil(np.log2(len(np.unique(noise)))) noise = np.random.poisson(noise * unique_val) / unique_val - noise diff --git a/mmedit/evaluation/metrics/base_gen_metric.py b/mmedit/evaluation/metrics/base_gen_metric.py index c5916cdc4c..eb8424b9ca 100644 --- a/mmedit/evaluation/metrics/base_gen_metric.py +++ b/mmedit/evaluation/metrics/base_gen_metric.py @@ -155,10 +155,14 @@ def get_metric_sampler(self, model: nn.Module, dataloader: DataLoader, DataLoader: Default sampler for normal metrics. """ batch_size = dataloader.batch_size - + dataset_length = len(dataloader.dataset) rank, num_gpus = get_dist_info() - item_subset = [(i * num_gpus + rank) % self.real_nums - for i in range((self.real_nums - 1) // num_gpus + 1)] + assert self.real_nums <= dataset_length, ( + f'\'real_nums\'({self.real_nums}) can not larger than length of ' + f'dataset ({dataset_length}).') + nums = dataset_length if self.real_nums == -1 else self.real_nums + item_subset = [(i * num_gpus + rank) % nums + for i in range((nums - 1) // num_gpus + 1)] metric_dataloader = DataLoader( dataloader.dataset, diff --git a/mmedit/evaluation/metrics/metrics_utils.py b/mmedit/evaluation/metrics/metrics_utils.py index 4ed5602a64..64630454f4 100644 --- a/mmedit/evaluation/metrics/metrics_utils.py +++ b/mmedit/evaluation/metrics/metrics_utils.py @@ -121,7 +121,7 @@ def img_transform(img, '"Y" and None.') if crop_border != 0: - img = img[crop_border:-crop_border, crop_border:-crop_border, None] + img = img[crop_border:-crop_border, crop_border:-crop_border, ...] return img diff --git a/mmedit/evaluation/metrics/swd.py b/mmedit/evaluation/metrics/swd.py index ecbb9a282c..ba8a46bc18 100644 --- a/mmedit/evaluation/metrics/swd.py +++ b/mmedit/evaluation/metrics/swd.py @@ -255,7 +255,8 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: data_batch (dict): A batch of data from the dataloader. data_samples (Sequence[dict]): A batch of outputs from the model. """ - if self._num_processed >= self.fake_nums_per_device: + if self.fake_nums != -1 and (self._num_processed >= + self.fake_nums_per_device): return real_imgs, fake_imgs = [], [] @@ -279,6 +280,8 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: # real images assert real_imgs.shape[1:] == self.image_shape + if real_imgs.shape[1] == 1: + real_imgs = real_imgs.repeat(1, 3, 1, 1) real_pyramid = laplacian_pyramid(real_imgs, self.n_pyramids - 1, self.gaussian_k) # lod: layer_of_descriptors @@ -291,6 +294,8 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: # fake images assert fake_imgs.shape[1:] == self.image_shape + if fake_imgs.shape[1] == 1: + fake_imgs = fake_imgs.repeat(1, 3, 1, 1) fake_pyramid = laplacian_pyramid(fake_imgs, self.n_pyramids - 1, self.gaussian_k) # lod: layer_of_descriptors diff --git a/mmedit/models/base_archs/aspp.py b/mmedit/models/base_archs/aspp.py index 9b3bbc4665..43f9341984 100644 --- a/mmedit/models/base_archs/aspp.py +++ b/mmedit/models/base_archs/aspp.py @@ -1,7 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Sequence + import torch from mmcv.cnn import ConvModule -from torch import nn +from torch import Tensor, nn from torch.nn import functional as F from .separable_conv_module import DepthwiseSeparableConvModule @@ -23,7 +25,9 @@ class ASPPPooling(nn.Sequential): act_cfg (dict): Config dict for activation layer. """ - def __init__(self, in_channels, out_channels, conv_cfg, norm_cfg, act_cfg): + def __init__(self, in_channels: int, out_channels: int, + conv_cfg: Optional[dict], norm_cfg: Optional[dict], + act_cfg: Optional[dict]): super().__init__( nn.AdaptiveAvgPool2d(1), ConvModule( @@ -34,7 +38,7 @@ def __init__(self, in_channels, out_channels, conv_cfg, norm_cfg, act_cfg): norm_cfg=norm_cfg, act_cfg=act_cfg)) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function for ASPP Pooling module. Args: @@ -79,14 +83,14 @@ class ASPP(nn.Module): """ def __init__(self, - in_channels, - out_channels=256, - mid_channels=256, - dilations=(12, 24, 36), - conv_cfg=None, - norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU'), - separable_conv=False): + in_channels: int, + out_channels: int = 256, + mid_channels: int = 256, + dilations: Sequence[int] = (12, 24, 36), + conv_cfg: Optional[dict] = None, + norm_cfg: Optional[dict] = dict(type='BN'), + act_cfg: Optional[dict] = dict(type='ReLU'), + separable_conv: bool = False): super().__init__() if separable_conv: @@ -131,7 +135,7 @@ def __init__(self, norm_cfg=norm_cfg, act_cfg=act_cfg), nn.Dropout(0.5)) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function for ASPP module. Args: diff --git a/mmedit/models/base_archs/downsample.py b/mmedit/models/base_archs/downsample.py index 7d51a5b554..d6ad980a3e 100644 --- a/mmedit/models/base_archs/downsample.py +++ b/mmedit/models/base_archs/downsample.py @@ -1,5 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. -def pixel_unshuffle(x, scale): +from torch import Tensor + + +def pixel_unshuffle(x: Tensor, scale: int) -> Tensor: """Down-sample by pixel unshuffle. Args: diff --git a/mmedit/models/base_archs/ensemble.py b/mmedit/models/base_archs/ensemble.py index 54ec380616..e34f2536cd 100644 --- a/mmedit/models/base_archs/ensemble.py +++ b/mmedit/models/base_archs/ensemble.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + import torch import torch.nn as nn @@ -13,13 +15,13 @@ class SpatialTemporalEnsemble(nn.Module): Default: False. """ - def __init__(self, is_temporal_ensemble=False): + def __init__(self, is_temporal_ensemble: Optional[bool] = False): super().__init__() self.is_temporal_ensemble = is_temporal_ensemble - def _transform(self, imgs, mode): + def _transform(self, imgs: torch.Tensor, mode: str) -> torch.Tensor: """Apply spatial transform (flip, rotate) to the images. Args: @@ -52,7 +54,8 @@ def _transform(self, imgs, mode): return imgs - def spatial_ensemble(self, imgs, model): + def spatial_ensemble(self, imgs: torch.Tensor, + model: nn.Module) -> torch.Tensor: """Apply spatial ensemble. Args: @@ -82,7 +85,7 @@ def spatial_ensemble(self, imgs, model): return outputs.to(imgs.device) - def forward(self, imgs, model): + def forward(self, imgs: torch.Tensor, model: nn.Module) -> torch.Tensor: """Apply spatial and temporal ensemble. Args: diff --git a/mmedit/models/base_archs/gated_conv_module.py b/mmedit/models/base_archs/gated_conv_module.py index 7cfa29fea3..a5a0e006b7 100644 --- a/mmedit/models/base_archs/gated_conv_module.py +++ b/mmedit/models/base_archs/gated_conv_module.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +from typing import Optional, Tuple, Union import torch import torch.nn as nn @@ -32,11 +33,11 @@ class SimpleGatedConvModule(nn.Module): """ def __init__(self, - in_channels, - out_channels, - kernel_size, - feat_act_cfg=dict(type='ELU'), - gate_act_cfg=dict(type='Sigmoid'), + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + feat_act_cfg: Optional[dict] = dict(type='ELU'), + gate_act_cfg: Optional[dict] = dict(type='Sigmoid'), **kwargs): super().__init__() # the activation function should specified outside conv module @@ -54,7 +55,7 @@ def __init__(self, if self.with_gate_act: self.gate_act = build_activation_layer(gate_act_cfg) - def forward(self, x): + def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward Function. Args: diff --git a/mmedit/models/base_archs/img_normalize.py b/mmedit/models/base_archs/img_normalize.py index 1bd8f76aa6..a845479e28 100644 --- a/mmedit/models/base_archs/img_normalize.py +++ b/mmedit/models/base_archs/img_normalize.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Tuple + import torch import torch.nn as nn @@ -15,7 +17,11 @@ class ImgNormalize(nn.Conv2d): sign (int): Sign of bias. Default -1. """ - def __init__(self, pixel_range, img_mean, img_std, sign=-1): + def __init__(self, + pixel_range: float, + img_mean: Tuple[float, float, float], + img_std: Tuple[float, float, float], + sign: int = -1): assert len(img_mean) == len(img_std) num_channels = len(img_mean) diff --git a/mmedit/models/base_archs/linear_module.py b/mmedit/models/base_archs/linear_module.py index 6d5e1152b6..0c3a268d40 100644 --- a/mmedit/models/base_archs/linear_module.py +++ b/mmedit/models/base_archs/linear_module.py @@ -1,7 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Tuple + import torch.nn as nn from mmcv.cnn import build_activation_layer from mmengine.model.weight_init import kaiming_init +from torch import Tensor class LinearModule(nn.Module): @@ -24,13 +27,13 @@ class LinearModule(nn.Module): """ def __init__(self, - in_features, - out_features, - bias=True, - act_cfg=dict(type='ReLU'), - inplace=True, - with_spectral_norm=False, - order=('linear', 'act')): + in_features: int, + out_features: int, + bias: bool = True, + act_cfg: Optional[dict] = dict(type='ReLU'), + inplace: bool = True, + with_spectral_norm: bool = False, + order: Tuple[str, str] = ('linear', 'act')): super().__init__() assert act_cfg is None or isinstance(act_cfg, dict) self.act_cfg = act_cfg @@ -62,7 +65,7 @@ def __init__(self, # Use msra init by default self.init_weights() - def init_weights(self): + def init_weights(self) -> None: """Init weights for the model.""" if self.with_activation and self.act_cfg['type'] == 'LeakyReLU': nonlinearity = 'leaky_relu' @@ -73,7 +76,7 @@ def init_weights(self): kaiming_init(self.linear, a=a, nonlinearity=nonlinearity) - def forward(self, x, activate=True): + def forward(self, x: Tensor, activate: Optional[bool] = True) -> Tensor: """Forward Function. Args: diff --git a/mmedit/models/base_archs/multi_layer_disc.py b/mmedit/models/base_archs/multi_layer_disc.py index 20f3c55660..3d225f727c 100644 --- a/mmedit/models/base_archs/multi_layer_disc.py +++ b/mmedit/models/base_archs/multi_layer_disc.py @@ -1,8 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + import torch.nn as nn from mmcv.cnn import ConvModule from mmengine import MMLogger from mmengine.runner import load_checkpoint +from torch import Tensor from mmedit.models.base_archs import LinearModule from mmedit.registry import COMPONENTS @@ -44,19 +47,19 @@ class MultiLayerDiscriminator(nn.Module): """ def __init__(self, - in_channels, - max_channels, - num_convs=5, - fc_in_channels=None, - fc_out_channels=1024, - kernel_size=5, - conv_cfg=None, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - out_act_cfg=dict(type='ReLU'), - with_input_norm=True, - with_out_convs=False, - with_spectral_norm=False, + in_channels: int, + max_channels: int, + num_convs: int = 5, + fc_in_channels: Optional[int] = None, + fc_out_channels: int = 1024, + kernel_size: int = 5, + conv_cfg: Optional[dict] = None, + norm_cfg: Optional[dict] = None, + act_cfg: Optional[dict] = dict(type='ReLU'), + out_act_cfg: Optional[dict] = dict(type='ReLU'), + with_input_norm: bool = True, + with_out_convs: bool = False, + with_spectral_norm: bool = False, **kwargs): super().__init__() if fc_in_channels is not None: @@ -128,7 +131,7 @@ def __init__(self, act_cfg=out_act_cfg, with_spectral_norm=with_spectral_norm) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward Function. Args: @@ -149,7 +152,7 @@ def forward(self, x): return x - def init_weights(self, pretrained=None): + def init_weights(self, pretrained: Optional[str] = None) -> None: """Init weights for models. Args: diff --git a/mmedit/models/base_archs/patch_disc.py b/mmedit/models/base_archs/patch_disc.py index e1bcaad08a..b25eef77c1 100644 --- a/mmedit/models/base_archs/patch_disc.py +++ b/mmedit/models/base_archs/patch_disc.py @@ -1,8 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + import torch.nn as nn from mmcv.cnn import ConvModule, build_conv_layer from mmengine import MMLogger from mmengine.runner import load_checkpoint +from torch import Tensor from mmedit.models.utils import generation_init_weights from mmedit.registry import MODULES @@ -27,11 +30,11 @@ class PatchDiscriminator(nn.Module): """ def __init__(self, - in_channels, - base_channels=64, - num_conv=3, - norm_cfg=dict(type='BN'), - init_cfg=dict(type='normal', gain=0.02)): + in_channels: int, + base_channels: int = 64, + num_conv: int = 3, + norm_cfg: dict = dict(type='BN'), + init_cfg: Optional[dict] = dict(type='normal', gain=0.02)): super().__init__() assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but" f'got {type(norm_cfg)}') @@ -105,7 +108,7 @@ def __init__(self, self.init_gain = 0.02 if init_cfg is None else init_cfg.get( 'gain', 0.02) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function. Args: @@ -116,7 +119,7 @@ def forward(self, x): """ return self.model(x) - def init_weights(self, pretrained=None): + def init_weights(self, pretrained: Optional[str] = None) -> None: """Initialize weights for the model. Args: diff --git a/mmedit/models/base_archs/resnet.py b/mmedit/models/base_archs/resnet.py index 1a0caf80da..90eedf08ae 100644 --- a/mmedit/models/base_archs/resnet.py +++ b/mmedit/models/base_archs/resnet.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Optional, Sequence + import torch.nn as nn import torch.utils.checkpoint as cp from mmcv.cnn import build_activation_layer, build_conv_layer, build_norm_layer @@ -6,6 +8,7 @@ from mmengine.model.weight_init import constant_init, kaiming_init from mmengine.runner import load_checkpoint from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm +from torch import Tensor class BasicBlock(nn.Module): @@ -30,15 +33,15 @@ class BasicBlock(nn.Module): expansion = 1 def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - act_cfg=dict(type='ReLU'), - conv_cfg=None, - norm_cfg=dict(type='BN'), - with_cp=False): + inplanes: int, + planes: int, + stride: int = 1, + dilation: int = 1, + downsample: Optional[nn.Module] = None, + act_cfg: dict = dict(type='ReLU'), + conv_cfg: Optional[dict] = None, + norm_cfg: dict = dict(type='BN'), + with_cp: bool = False): super(BasicBlock, self).__init__() self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) @@ -65,19 +68,19 @@ def __init__(self, self.with_cp = with_cp @property - def norm1(self): + def norm1(self) -> nn.Module: """nn.Module: normalization layer after the first convolution layer""" return getattr(self, self.norm1_name) @property - def norm2(self): + def norm2(self) -> nn.Module: """nn.Module: normalization layer after the second convolution layer""" return getattr(self, self.norm2_name) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function.""" - def _inner_forward(x): + def _inner_forward(x: Tensor) -> Tensor: identity = x out = self.conv1(x) @@ -126,15 +129,15 @@ class Bottleneck(nn.Module): expansion = 4 def __init__(self, - inplanes, - planes, - stride=1, - dilation=1, - downsample=None, - act_cfg=dict(type='ReLU'), - conv_cfg=None, - norm_cfg=dict(type='BN'), - with_cp=False): + inplanes: int, + planes: int, + stride: int = 1, + dilation: int = 1, + downsample: Optional[nn.Module] = None, + act_cfg: dict = dict(type='ReLU'), + conv_cfg: Optional[dict] = None, + norm_cfg: dict = dict(type='BN'), + with_cp: bool = False): super(Bottleneck, self).__init__() self.inplanes = inplanes @@ -185,21 +188,21 @@ def __init__(self, self.downsample = downsample @property - def norm1(self): + def norm1(self) -> nn.Module: """nn.Module: normalization layer after the first convolution layer""" return getattr(self, self.norm1_name) @property - def norm2(self): + def norm2(self) -> nn.Module: """nn.Module: normalization layer after the second convolution layer""" return getattr(self, self.norm2_name) @property - def norm3(self): + def norm3(self) -> nn.Module: """nn.Module: normalization layer after the second convolution layer""" return getattr(self, self.norm3_name) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: identity = x out = self.conv1(x) @@ -269,23 +272,23 @@ class ResNet(nn.Module): } def __init__(self, - depth, - in_channels, - stem_channels, - base_channels, - num_stages=4, - strides=(1, 2, 2, 2), - dilations=(1, 1, 2, 4), - deep_stem=False, - avg_down=False, - frozen_stages=-1, - act_cfg=dict(type='ReLU'), - conv_cfg=None, - norm_cfg=dict(type='BN'), - with_cp=False, - multi_grid=None, - contract_dilation=False, - zero_init_residual=True): + depth: int, + in_channels: int = 3, + stem_channels: int = 64, + base_channels: int = 64, + num_stages: int = 4, + strides: Sequence[int] = (1, 2, 2, 2), + dilations: Sequence[int] = (1, 1, 2, 4), + deep_stem: bool = False, + avg_down: bool = False, + frozen_stages: int = -1, + act_cfg: dict = dict(type='ReLU'), + conv_cfg: Optional[dict] = None, + norm_cfg: dict = dict(type='BN'), + with_cp: bool = False, + multi_grid: Optional[Sequence[int]] = None, + contract_dilation: bool = False, + zero_init_residual: bool = True): super(ResNet, self).__init__() from functools import partial @@ -334,7 +337,7 @@ def __init__(self, self._freeze_stages() - def _make_stem_layer(self, in_channels, stem_channels): + def _make_stem_layer(self, in_channels: int, stem_channels: int) -> None: """Make stem layer for ResNet.""" if self.deep_stem: self.stem = nn.Sequential( @@ -384,11 +387,16 @@ def _make_stem_layer(self, in_channels, stem_channels): self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) @property - def norm1(self): + def norm1(self) -> nn.Module: """nn.Module: normalization layer after the second convolution layer""" return getattr(self, self.norm1_name) - def _make_layer(self, block, planes, blocks, stride=1, dilation=1): + def _make_layer(self, + block: BasicBlock, + planes: int, + blocks: int, + stride: int = 1, + dilation: int = 1) -> nn.Module: downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( @@ -424,7 +432,7 @@ def _make_layer(self, block, planes, blocks, stride=1, dilation=1): return nn.Sequential(*layers) - def _nostride_dilate(self, m, dilate): + def _nostride_dilate(self, m: nn.Module, dilate: int) -> None: classname = m.__class__.__name__ if classname.find('Conv') != -1 and dilate > 1: # the convolution with stride @@ -440,7 +448,7 @@ def _nostride_dilate(self, m, dilate): m.dilation = (dilate, dilate) m.padding = (dilate, dilate) - def init_weights(self, pretrained=None): + def init_weights(self, pretrained: Optional[str] = None) -> None: """Init weights for the model. Args: @@ -466,7 +474,7 @@ def init_weights(self, pretrained=None): else: raise TypeError('pretrained must be a str or None') - def _freeze_stages(self): + def _freeze_stages(self) -> None: """Freeze stages param and norm stats.""" if self.frozen_stages >= 0: if self.deep_stem: @@ -485,7 +493,7 @@ def _freeze_stages(self): for param in m.parameters(): param.requires_grad = False - def forward(self, x): + def forward(self, x: Tensor) -> List[Tensor]: """Forward function. Args: diff --git a/mmedit/models/base_archs/separable_conv_module.py b/mmedit/models/base_archs/separable_conv_module.py index 139a54e58c..0d807ccbf5 100644 --- a/mmedit/models/base_archs/separable_conv_module.py +++ b/mmedit/models/base_archs/separable_conv_module.py @@ -1,6 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional, Tuple, Union + import torch.nn as nn from mmcv.cnn import ConvModule +from torch import Tensor class DepthwiseSeparableConvModule(nn.Module): @@ -39,18 +42,18 @@ class DepthwiseSeparableConvModule(nn.Module): """ def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - norm_cfg=None, - act_cfg=dict(type='ReLU'), - dw_norm_cfg='default', - dw_act_cfg='default', - pw_norm_cfg='default', - pw_act_cfg='default', + in_channels: int, + out_channels: int, + kernel_size: Union[int, Tuple[int, int]], + stride: Union[int, Tuple[int, int]] = 1, + padding: Union[int, Tuple[int, int]] = 0, + dilation: Union[int, Tuple[int, int]] = 1, + norm_cfg: Optional[dict] = None, + act_cfg: Optional[dict] = dict(type='ReLU'), + dw_norm_cfg: Union[dict, str] = 'default', + dw_act_cfg: Union[dict, str] = 'default', + pw_norm_cfg: Union[dict, str] = 'default', + pw_act_cfg: Union[dict, str] = 'default', **kwargs): super().__init__() assert 'groups' not in kwargs, 'groups should not be specified' @@ -83,7 +86,7 @@ def __init__(self, act_cfg=pw_act_cfg, **kwargs) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function. Args: diff --git a/mmedit/models/base_archs/simple_encoder_decoder.py b/mmedit/models/base_archs/simple_encoder_decoder.py index 964fdf6626..76f542377c 100644 --- a/mmedit/models/base_archs/simple_encoder_decoder.py +++ b/mmedit/models/base_archs/simple_encoder_decoder.py @@ -2,6 +2,7 @@ from typing import Optional from mmengine.model import BaseModule +from torch import Tensor from mmedit.registry import MODELS @@ -27,7 +28,7 @@ def __init__(self, decoder['in_channels'] = self.encoder.out_channels self.decoder = MODELS.build(decoder) - def forward(self, *args, **kwargs): + def forward(self, *args, **kwargs) -> Tensor: """Forward function. Returns: diff --git a/mmedit/models/base_archs/smpatch_disc.py b/mmedit/models/base_archs/smpatch_disc.py index b121a1a641..96446cac27 100644 --- a/mmedit/models/base_archs/smpatch_disc.py +++ b/mmedit/models/base_archs/smpatch_disc.py @@ -1,8 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + import torch.nn as nn from mmcv.cnn import ConvModule from mmengine import MMLogger from mmengine.runner import load_checkpoint +from torch import Tensor from mmedit.models.utils import generation_init_weights from mmedit.registry import COMPONENTS @@ -29,12 +32,12 @@ class SoftMaskPatchDiscriminator(nn.Module): """ def __init__(self, - in_channels, - base_channels=64, - num_conv=3, - norm_cfg=None, - init_cfg=dict(type='normal', gain=0.02), - with_spectral_norm=False): + in_channels: int, + base_channels: Optional[int] = 64, + num_conv: Optional[int] = 3, + norm_cfg: Optional[dict] = None, + init_cfg: Optional[dict] = dict(type='normal', gain=0.02), + with_spectral_norm: Optional[bool] = False): super().__init__() kernel_size = 4 @@ -104,7 +107,7 @@ def __init__(self, self.init_gain = 0.02 if init_cfg is None else init_cfg.get( 'gain', 0.02) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function. Args: @@ -115,7 +118,7 @@ def forward(self, x): """ return self.model(x) - def init_weights(self, pretrained=None): + def init_weights(self, pretrained: Optional[str] = None) -> None: """Initialize weights for the model. Args: diff --git a/mmedit/models/base_archs/sr_backbone.py b/mmedit/models/base_archs/sr_backbone.py index eef6d3c983..cab5c22b44 100644 --- a/mmedit/models/base_archs/sr_backbone.py +++ b/mmedit/models/base_archs/sr_backbone.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn +from torch import Tensor from ..utils import default_init_weights @@ -54,7 +55,7 @@ class ResidualBlockNoBN(nn.Module): Default: 1.0. """ - def __init__(self, mid_channels=64, res_scale=1.0): + def __init__(self, mid_channels: int = 64, res_scale: float = 1.0): super().__init__() self.res_scale = res_scale self.conv1 = nn.Conv2d(mid_channels, mid_channels, 3, 1, 1, bias=True) @@ -67,7 +68,7 @@ def __init__(self, mid_channels=64, res_scale=1.0): if res_scale == 1.0: self.init_weights() - def init_weights(self): + def init_weights(self) -> None: """Initialize weights for ResidualBlockNoBN. Initialization methods like `kaiming_init` are for VGG-style modules. @@ -79,7 +80,7 @@ def init_weights(self): for m in [self.conv1, self.conv2]: default_init_weights(m, 0.1) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function. Args: diff --git a/mmedit/models/base_archs/upsample.py b/mmedit/models/base_archs/upsample.py index 67b6befc3b..17516df5e2 100644 --- a/mmedit/models/base_archs/upsample.py +++ b/mmedit/models/base_archs/upsample.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch.nn as nn import torch.nn.functional as F +from torch import Tensor from .sr_backbone import default_init_weights @@ -18,8 +19,8 @@ class PixelShufflePack(nn.Module): Upsampled feature map. """ - def __init__(self, in_channels, out_channels, scale_factor, - upsample_kernel): + def __init__(self, in_channels: int, out_channels: int, scale_factor: int, + upsample_kernel: int): super().__init__() self.in_channels = in_channels self.out_channels = out_channels @@ -32,11 +33,11 @@ def __init__(self, in_channels, out_channels, scale_factor, padding=(self.upsample_kernel - 1) // 2) self.init_weights() - def init_weights(self): + def init_weights(self) -> None: """Initialize weights for PixelShufflePack.""" default_init_weights(self, 1) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: """Forward function for PixelShufflePack. Args: diff --git a/mmedit/models/base_archs/vgg.py b/mmedit/models/base_archs/vgg.py index 53af8e96c6..3dd0161d11 100644 --- a/mmedit/models/base_archs/vgg.py +++ b/mmedit/models/base_archs/vgg.py @@ -1,9 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Optional +from typing import Dict, List, Optional import torch.nn as nn from mmengine.model import BaseModule from mmengine.model.weight_init import constant_init, xavier_init +from torch import Tensor from mmedit.registry import MODELS from ..base_archs.aspp import ASPP @@ -28,10 +29,10 @@ class VGG16(BaseModule): """ def __init__(self, - in_channels, - batch_norm=False, - aspp=False, - dilations=None, + in_channels: int, + batch_norm: Optional[bool] = False, + aspp: Optional[bool] = False, + dilations: Optional[List[int]] = None, init_cfg: Optional[dict] = None): super().__init__(init_cfg=init_cfg) self.batch_norm = batch_norm @@ -55,7 +56,8 @@ def __init__(self, else: self.out_channels = 512 - def _make_layer(self, inplanes, planes, convs_layers): + def _make_layer(self, inplanes: int, planes: int, + convs_layers: int) -> nn.Module: layers = [] for _ in range(convs_layers): conv2d = nn.Conv2d(inplanes, planes, kernel_size=3, padding=1) @@ -68,7 +70,7 @@ def _make_layer(self, inplanes, planes, convs_layers): layers += [nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)] return nn.Sequential(*layers) - def init_weights(self): + def init_weights(self) -> None: """Init weights for the model.""" if self.init_cfg is not None: super().init_weights() @@ -80,7 +82,7 @@ def init_weights(self): elif isinstance(m, nn.BatchNorm2d): constant_init(m, 1) - def forward(self, x): + def forward(self, x: Tensor) -> Dict[str, Tensor]: """Forward function for ASPP module. Args: diff --git a/mmedit/models/base_models/base_edit_model.py b/mmedit/models/base_models/base_edit_model.py index 08234d4ed1..4d0c0afbc4 100644 --- a/mmedit/models/base_models/base_edit_model.py +++ b/mmedit/models/base_models/base_edit_model.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Optional +from typing import Dict, List, Optional, Union import torch from mmengine.model import BaseModel @@ -33,12 +33,12 @@ class BaseEditModel(BaseModel): """ def __init__(self, - generator, - pixel_loss, - train_cfg=None, - test_cfg=None, - init_cfg=None, - data_preprocessor=None): + generator: dict, + pixel_loss: dict, + train_cfg: Optional[dict] = None, + test_cfg: Optional[dict] = None, + init_cfg: Optional[dict] = None, + data_preprocessor: Optional[dict] = None): super().__init__( init_cfg=init_cfg, data_preprocessor=data_preprocessor) @@ -55,7 +55,7 @@ def forward(self, inputs: torch.Tensor, data_samples: Optional[List[EditDataSample]] = None, mode: str = 'tensor', - **kwargs): + **kwargs) -> Union[torch.Tensor, List[EditDataSample], dict]: """Returns losses or predictions of training, validation, testing, and simple inference process. @@ -116,12 +116,17 @@ def forward(self, elif mode == 'loss': return self.forward_train(inputs, data_samples, **kwargs) - def convert_to_datasample(self, inputs, data_samples): + def convert_to_datasample(self, inputs: List[EditDataSample], + data_samples: List[EditDataSample] + ) -> List[EditDataSample]: for data_sample, output in zip(inputs, data_samples): data_sample.output = output return inputs - def forward_tensor(self, inputs, data_samples=None, **kwargs): + def forward_tensor(self, + inputs: torch.Tensor, + data_samples: Optional[List[EditDataSample]] = None, + **kwargs) -> torch.Tensor: """Forward tensor. Returns result of simple forward. Args: @@ -138,7 +143,10 @@ def forward_tensor(self, inputs, data_samples=None, **kwargs): return feats - def forward_inference(self, inputs, data_samples=None, **kwargs): + def forward_inference(self, + inputs: torch.Tensor, + data_samples: Optional[List[EditDataSample]] = None, + **kwargs) -> List[EditDataSample]: """Forward inference. Returns predictions of validation, testing, and simple inference. @@ -163,7 +171,10 @@ def forward_inference(self, inputs, data_samples=None, **kwargs): return predictions - def forward_train(self, inputs, data_samples=None, **kwargs): + def forward_train(self, + inputs: torch.Tensor, + data_samples: Optional[List[EditDataSample]] = None, + **kwargs) -> Dict[str, torch.Tensor]: """Forward training. Returns dict of losses of training. Args: diff --git a/mmedit/models/base_models/base_mattor.py b/mmedit/models/base_models/base_mattor.py index ee48535c21..65b331cc4a 100644 --- a/mmedit/models/base_models/base_mattor.py +++ b/mmedit/models/base_models/base_mattor.py @@ -15,7 +15,9 @@ Tuple[torch.Tensor], torch.Tensor] -def _pad(batch_image, ds_factor, mode='reflect'): +def _pad(batch_image: torch.Tensor, + ds_factor: int, + mode: str = 'reflect') -> Tuple[torch.Tensor, Tuple[int, int]]: """Pad image to a multiple of give down-sampling factor.""" h, w = batch_image.shape[-2:] # NCHW @@ -33,7 +35,10 @@ def _pad(batch_image, ds_factor, mode='reflect'): return batch_image, pad -def _interpolate(batch_image, ds_factor, mode='bicubic'): +def _interpolate(batch_image: torch.Tensor, + ds_factor: int, + mode: str = 'bicubic' + ) -> Tuple[torch.Tensor, Tuple[int, int]]: """Resize image to multiple of give down-sampling factor.""" h, w = batch_image.shape[-2:] # NCHW @@ -97,7 +102,7 @@ def __init__(self, self.backbone = MODELS.build(backbone) - def resize_inputs(self, batch_inputs): + def resize_inputs(self, batch_inputs: torch.Tensor) -> torch.Tensor: """Pad or interpolate images and trimaps to multiple of given factor.""" @@ -121,7 +126,8 @@ def resize_inputs(self, batch_inputs): return torch.cat((batch_images, batch_trimaps), dim=1) - def restore_size(self, pred_alpha, data_sample): + def restore_size(self, pred_alpha: torch.Tensor, + data_sample: EditDataSample) -> torch.Tensor: """Restore the predicted alpha to the original shape. The shape of the predicted alpha may not be the same as the shape of @@ -243,7 +249,9 @@ def forward(self, else: raise ValueError('Invalid forward mode.') - def convert_to_datasample(self, inputs, data_samples): + def convert_to_datasample(self, inputs: DataSamples, + data_samples: List[EditDataSample] + ) -> List[EditDataSample]: for data_sample, output in zip(inputs, data_samples): data_sample.output = output return inputs diff --git a/mmedit/models/base_models/basic_interpolator.py b/mmedit/models/base_models/basic_interpolator.py index 1de2d36207..7a88fc34eb 100644 --- a/mmedit/models/base_models/basic_interpolator.py +++ b/mmedit/models/base_models/basic_interpolator.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + import torch from mmedit.registry import MODELS @@ -35,14 +37,14 @@ class BasicInterpolator(BaseEditModel): """ def __init__(self, - generator, - pixel_loss, - train_cfg=None, - test_cfg=None, - required_frames=2, - step_frames=1, - init_cfg=None, - data_preprocessor=None): + generator: dict, + pixel_loss: dict, + train_cfg: Optional[dict] = None, + test_cfg: Optional[dict] = None, + required_frames: int = 2, + step_frames: int = 1, + init_cfg: Optional[dict] = None, + data_preprocessor: Optional[dict] = None): super().__init__( generator=generator, @@ -57,7 +59,7 @@ def __init__(self, # Step size of video frame interpolation self.step_frames = step_frames - def split_frames(self, input_tensors): + def split_frames(self, input_tensors: torch.Tensor) -> torch.Tensor: """split input tensors for inference. Args: @@ -80,7 +82,8 @@ def split_frames(self, input_tensors): return result @staticmethod - def merge_frames(input_tensors, output_tensors): + def merge_frames(input_tensors: torch.Tensor, + output_tensors: torch.Tensor) -> list: """merge input frames and output frames. Interpolate a frame between the given two frames. diff --git a/mmedit/models/base_models/one_stage.py b/mmedit/models/base_models/one_stage.py index 8de25730cd..265e4b0cbe 100644 --- a/mmedit/models/base_models/one_stage.py +++ b/mmedit/models/base_models/one_stage.py @@ -1,14 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Optional, Union +from typing import List, Optional, Tuple, Union import torch from mmengine.config import Config from mmengine.model import BaseModel +from mmengine.optim import OptimWrapperDict from mmedit.registry import MODELS from mmedit.structures import EditDataSample, PixelData +from mmedit.utils import SampleList from ..utils import set_requires_grad +FORWARD_RETURN_TYPE = Union[dict, torch.Tensor, + Tuple[torch.Tensor, torch.Tensor], SampleList] + @MODELS.register_module() class OneStageInpaintor(BaseModel): @@ -45,18 +50,18 @@ class OneStageInpaintor(BaseModel): def __init__(self, data_preprocessor: Union[dict, Config], - encdec, - disc=None, - loss_gan=None, - loss_gp=None, - loss_disc_shift=None, - loss_composed_percep=None, - loss_out_percep=False, - loss_l1_hole=None, - loss_l1_valid=None, - loss_tv=None, - train_cfg=None, - test_cfg=None, + encdec: dict, + disc: Optional[dict] = None, + loss_gan: Optional[dict] = None, + loss_gp: Optional[dict] = None, + loss_disc_shift: Optional[dict] = None, + loss_composed_percep: Optional[dict] = None, + loss_out_percep: bool = False, + loss_l1_hole: Optional[dict] = None, + loss_l1_valid: Optional[dict] = None, + loss_tv: Optional[dict] = None, + train_cfg: Optional[dict] = None, + test_cfg: Optional[dict] = None, init_cfg: Optional[dict] = None): super().__init__( data_preprocessor=data_preprocessor, init_cfg=init_cfg) @@ -99,7 +104,10 @@ def __init__(self, self.disc_step_count = 0 - def forward(self, inputs, data_samples, mode='tensor'): + def forward(self, + inputs: torch.Tensor, + data_samples: Optional[SampleList], + mode: str = 'tensor') -> FORWARD_RETURN_TYPE: """Forward function. Args: @@ -144,7 +152,8 @@ def forward(self, inputs, data_samples, mode='tensor'): else: raise ValueError('Invalid forward mode.') - def train_step(self, data: List[dict], optim_wrapper): + def train_step(self, data: List[dict], + optim_wrapper: OptimWrapperDict) -> dict: """Train step function. In this function, the inpaintor will finish the train step following @@ -244,7 +253,7 @@ def train_step(self, data: List[dict], optim_wrapper): return log_vars - def forward_train(self, *args, **kwargs): + def forward_train(self, *args, **kwargs) -> None: """Forward function for training. In this version, we do not use this interface. @@ -253,7 +262,8 @@ def forward_train(self, *args, **kwargs): 'current training schedule. Please use ' '`train_step` for training.') - def forward_train_d(self, data_batch, is_real, is_disc): + def forward_train_d(self, data_batch: torch.Tensor, is_real: bool, + is_disc: bool) -> dict: """Forward function in discriminator training step. In this function, we compute the prediction for each data batch (real @@ -285,7 +295,9 @@ def forward_train_d(self, data_batch, is_real, is_disc): return loss - def generator_loss(self, fake_res, fake_img, gt, mask, masked_img): + def generator_loss(self, fake_res: torch.Tensor, fake_img: torch.Tensor, + gt: torch.Tensor, mask: torch.Tensor, + masked_img: torch.Tensor) -> Tuple[dict, dict]: """Forward function in generator training step. In this function, we mainly compute the loss items for generator with @@ -349,7 +361,8 @@ def generator_loss(self, fake_res, fake_img, gt, mask, masked_img): return res, loss - def forward_tensor(self, inputs, data_samples): + def forward_tensor(self, inputs: torch.Tensor, data_samples: SampleList + ) -> Tuple[torch.Tensor, torch.Tensor]: """Forward function in tensor mode. Args: @@ -370,7 +383,8 @@ def forward_tensor(self, inputs, data_samples): fake_imgs = fake_reses * masks + masked_imgs * (1. - masks) return fake_reses, fake_imgs - def forward_test(self, inputs, data_samples): + def forward_test(self, inputs: torch.Tensor, + data_samples: SampleList) -> SampleList: """Forward function for testing. Args: @@ -392,12 +406,13 @@ def forward_test(self, inputs, data_samples): predictions.append(pred) return predictions - def convert_to_datasample(self, inputs, data_samples): + def convert_to_datasample(self, inputs: SampleList, + data_samples: SampleList) -> SampleList: for data_sample, output in zip(inputs, data_samples): data_sample.output = output return inputs - def forward_dummy(self, x): + def forward_dummy(self, x: torch.Tensor) -> torch.Tensor: """Forward dummy function for getting flops. Args: diff --git a/mmedit/models/base_models/two_stage.py b/mmedit/models/base_models/two_stage.py index aabf0d8bba..fffae0ba7c 100644 --- a/mmedit/models/base_models/two_stage.py +++ b/mmedit/models/base_models/two_stage.py @@ -1,10 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Optional, Union +from typing import List, Optional, Sequence, Tuple, Union import torch from mmengine.config import Config +from mmengine.optim import OptimWrapperDict from mmedit.registry import MODELS +from mmedit.utils import SampleList from ..utils import set_requires_grad from .one_stage import OneStageInpaintor @@ -50,25 +52,27 @@ class TwoStageInpaintor(OneStageInpaintor): discriminator. Default: False. """ - def __init__(self, - data_preprocessor: Union[dict, Config], - encdec: dict, - disc=None, - loss_gan=None, - loss_gp=None, - loss_disc_shift=None, - loss_composed_percep=None, - loss_out_percep=False, - loss_l1_hole=None, - loss_l1_valid=None, - loss_tv=None, - train_cfg=None, - test_cfg=None, - init_cfg: Optional[dict] = None, - stage1_loss_type=('loss_l1_hole', ), - stage2_loss_type=('loss_l1_hole', 'loss_gan'), - input_with_ones=True, - disc_input_with_mask=False): + def __init__( + self, + data_preprocessor: Union[dict, Config], + encdec: dict, + disc: Optional[dict] = None, + loss_gan: Optional[dict] = None, + loss_gp: Optional[dict] = None, + loss_disc_shift: Optional[dict] = None, + loss_composed_percep: Optional[dict] = None, + loss_out_percep: bool = False, + loss_l1_hole: Optional[dict] = None, + loss_l1_valid: Optional[dict] = None, + loss_tv: Optional[dict] = None, + train_cfg: Optional[dict] = None, + test_cfg: Optional[dict] = None, + init_cfg: Optional[dict] = None, + stage1_loss_type: Optional[Sequence[str]] = ('loss_l1_hole', ), + stage2_loss_type: Optional[Sequence[str]] = ('loss_l1_hole', + 'loss_gan'), + input_with_ones: bool = True, + disc_input_with_mask: bool = False): super().__init__( data_preprocessor=data_preprocessor, encdec=encdec, @@ -93,7 +97,8 @@ def __init__(self, if self.train_cfg is not None: self.cur_iter = self.train_cfg.start_iter - def forward_tensor(self, inputs, data_samples): + def forward_tensor(self, inputs: torch.Tensor, data_samples: SampleList + ) -> Tuple[torch.Tensor, torch.Tensor]: """Forward function in tensor mode. Args: @@ -116,7 +121,9 @@ def forward_tensor(self, inputs, data_samples): fake_imgs = stage2_fake_res * masks + masked_imgs * (1. - masks) return stage2_fake_res, fake_imgs - def two_stage_loss(self, stage1_data, stage2_data, gt, mask, masked_img): + def two_stage_loss(self, stage1_data: dict, stage2_data: dict, + gt: torch.Tensor, mask: torch.Tensor, + masked_img: torch.Tensor) -> Tuple[dict, dict]: """Calculate two-stage loss. Args: @@ -164,12 +171,12 @@ def two_stage_loss(self, stage1_data, stage2_data, gt, mask, masked_img): return results, loss def calculate_loss_with_type(self, - loss_type, - fake_res, - fake_img, - gt, - mask, - prefix='stage1_'): + loss_type: str, + fake_res: torch.Tensor, + fake_img: torch.Tensor, + gt: torch.Tensor, + mask: torch.Tensor, + prefix: Optional[str] = 'stage1_') -> dict: """Calculate multiple types of losses. Args: @@ -213,7 +220,8 @@ def calculate_loss_with_type(self, return loss_dict - def train_step(self, data: List[dict], optim_wrapper): + def train_step(self, data: List[dict], + optim_wrapper: OptimWrapperDict) -> dict: """Train step function. In this function, the inpaintor will finish the train step following diff --git a/mmedit/models/data_preprocessors/gen_preprocessor.py b/mmedit/models/data_preprocessors/gen_preprocessor.py index ff8206a70d..8229589fa5 100644 --- a/mmedit/models/data_preprocessors/gen_preprocessor.py +++ b/mmedit/models/data_preprocessors/gen_preprocessor.py @@ -188,3 +188,20 @@ def forward(self, data: dict, training: bool = False) -> dict: data['inputs'] = _batch_inputs data.setdefault('data_samples', None) return data + + def destructor(self, batch_tensor: torch.Tensor): + """Destructor of data processor. Destruct padding, normalization and + dissolve batch. + + Args: + batch_tensor (Tensor): Batched output. + + Returns: + Tensor: Destructed output. + """ + + # De-normalization + batch_tensor = batch_tensor * self.std + self.mean + batch_tensor = batch_tensor.clamp_(0, 255) + + return batch_tensor diff --git a/mmedit/models/editors/__init__.py b/mmedit/models/editors/__init__.py index 36feaad845..b1add6f756 100644 --- a/mmedit/models/editors/__init__.py +++ b/mmedit/models/editors/__init__.py @@ -46,13 +46,16 @@ from .rdn import RDNNet from .real_basicvsr import RealBasicVSR, RealBasicVSRNet from .real_esrgan import RealESRGAN, UNetDiscriminatorWithSpectralNorm +from .restormer import Restormer from .sagan import SAGAN from .singan import SinGAN from .srcnn import SRCNNNet from .srgan import SRGAN, ModifiedVGG, MSRResNet +from .stable_diffusion import StableDiffusion from .stylegan1 import StyleGAN1 from .stylegan2 import StyleGAN2 from .stylegan3 import StyleGAN3, StyleGAN3Generator +from .swinir import SwinIRNet from .tdan import TDAN, TDANNet from .tof import TOFlowVFINet, TOFlowVSRNet, ToFResBlock from .ttsr import LTE, TTSR, SearchTransformer, TTSRDiscriminator, TTSRNet @@ -83,5 +86,6 @@ 'DiscoDiffusion', 'IDLossModel', 'PESinGAN', 'MSPIEStyleGAN2', 'StyleGAN3Generator', 'InstColorization', 'NAFBaseline', 'NAFBaselineLocal', 'NAFNet', 'NAFNetLocal', 'DDIMScheduler', - 'DDPMScheduler', 'DenoisingUnet', 'ClipWrapper', 'EG3D' + 'DDPMScheduler', 'DenoisingUnet', 'ClipWrapper', 'EG3D', 'Restormer', + 'SwinIRNet', 'StableDiffusion' ] diff --git a/mmedit/models/editors/ddim/ddim_scheduler.py b/mmedit/models/editors/ddim/ddim_scheduler.py index 104501d9e5..607afd2e61 100644 --- a/mmedit/models/editors/ddim/ddim_scheduler.py +++ b/mmedit/models/editors/ddim/ddim_scheduler.py @@ -4,8 +4,8 @@ import numpy as np import torch +from mmedit.models.utils.diffusion_utils import betas_for_alpha_bar from mmedit.registry import DIFFUSION_SCHEDULERS -from ...utils.diffusion_utils import betas_for_alpha_bar @DIFFUSION_SCHEDULERS.register_module() @@ -82,6 +82,8 @@ def __init__( self.timesteps = np.arange(0, num_train_timesteps)[::-1].copy() def set_timesteps(self, num_inference_steps, offset=0): + """set time steps.""" + self.num_inference_steps = num_inference_steps self.timesteps = np.arange( 0, self.num_train_timesteps, @@ -89,6 +91,8 @@ def set_timesteps(self, num_inference_steps, offset=0): self.timesteps += offset def _get_variance(self, timestep, prev_timestep): + """get variance.""" + alpha_prod_t = self.alphas_cumprod[timestep] alpha_prod_t_prev = self.alphas_cumprod[ prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod @@ -109,6 +113,8 @@ def step( use_clipped_model_output: bool = False, generator=None, ): + """step forward.""" + output = {} if self.num_inference_steps is None: raise ValueError("Number of inference steps is 'None', '\ @@ -123,7 +129,8 @@ def step( 1] * 2 and self.variance_type in ['learned', 'learned_range']: model_output, _ = torch.split(model_output, sample.shape[1], dim=1) else: - raise TypeError + if not model_output.shape == sample.shape: + raise TypeError # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf # noqa # Ideally, read DDIM paper in-detail understanding @@ -209,6 +216,8 @@ def step( return output def add_noise(self, original_samples, noise, timesteps): + """add noise.""" + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 noisy_samples = ( diff --git a/mmedit/models/editors/ddpm/attention.py b/mmedit/models/editors/ddpm/attention.py new file mode 100644 index 0000000000..b941272844 --- /dev/null +++ b/mmedit/models/editors/ddpm/attention.py @@ -0,0 +1,560 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Optional + +import torch +import torch.nn.functional as F +from addict import Dict +from torch import nn + + +class Transformer2DModel(nn.Module): + """Transformer model for image-like data. Takes either discrete (classes of + vector embeddings) or continuous (actual embeddings) inputs. + + When input is continuous: First, project the input + (aka embedding) and reshape to b, t, d. Then apply standard + transformer action. Finally, reshape to image. + + When input is discrete: First, input (classes of latent pixels) + is converted to embeddings and has positional + embeddings applied, see `ImagePositionalEmbeddings`. + Then apply standard transformer action. Finally, predict + classes of unnoised image. + + Note that it is assumed one of the input classes is + the masked latent pixel. The predicted classes of the unnoised + image do not contain a prediction for the masked pixel as + the unnoised image cannot be masked. + + Args: + num_attention_heads (`int`, *optional*, defaults to 16): + The number of heads to use for multi-head attention. + attention_head_dim (`int`, *optional*, defaults to 88): + The number of channels in each head. + in_channels (`int`, *optional*): + Pass if the input is continuous. + The number of channels in the input and output. + num_layers (`int`, *optional*, defaults to 1): + The number of layers of Transformer blocks to use. + dropout (`float`, *optional*, defaults to 0.1): + The dropout probability to use. + norm_num_groups (int): + Norm group num, defaults to 32. + cross_attention_dim (`int`, *optional*): + The number of context dimensions to use. + attention_bias (`bool`, *optional*): + Configure if the TransformerBlocks' attention should contain + a bias parameter. + sample_size (`int`, *optional*): + Pass if the input is discrete. The width of the latent images. + Note that this is fixed at training time as it is used for + learning a number of position embeddings. See + `ImagePositionalEmbeddings`. + num_vector_embeds (`int`, *optional*): + Pass if the input is discrete. The number of classes of + the vector embeddings of the latent pixels. + Includes the class for the masked latent pixel. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to be used in feed-forward. + use_linear_projection (bool): + Whether to use linear projection, defaults to False. + only_cross_attention (bool): + whether only use cross attention, defaults to False. + """ + + def __init__( + self, + num_attention_heads: int = 16, + attention_head_dim: int = 88, + in_channels: Optional[int] = None, + num_layers: int = 1, + dropout: float = 0.0, + norm_num_groups: int = 32, + cross_attention_dim: Optional[int] = None, + attention_bias: bool = False, + sample_size: Optional[int] = None, + num_vector_embeds: Optional[int] = None, + activation_fn: str = 'geglu', + use_linear_projection: bool = False, + only_cross_attention: bool = False, + ): + super().__init__() + self.use_linear_projection = use_linear_projection + self.num_attention_heads = num_attention_heads + self.attention_head_dim = attention_head_dim + inner_dim = num_attention_heads * attention_head_dim + + # 1. Transformer2DModel can process both standard continuous + # images of shape `(batch_size, num_channels, width, height)` + # as well as quantized image embeddings of shape + # `(batch_size, num_image_vectors)` + # Define whether input is continuous or discrete + # depending on configuration + self.is_input_continuous = in_channels is not None + self.is_input_vectorized = num_vector_embeds is not None + + if self.is_input_continuous and self.is_input_vectorized: + raise ValueError( + f'Cannot define both `in_channels`: {in_channels} ' + f'and `num_vector_embeds`: {num_vector_embeds}. Make' + f' sure that either `in_channels` or `num_vector_embeds` ' + 'is None.') + elif not self.is_input_continuous and not self.is_input_vectorized: + raise ValueError( + f'Has to define either `in_channels`: {in_channels} or' + f' `num_vector_embeds`: {num_vector_embeds}. Make' + f' sure that either `in_channels` or ' + '`num_vector_embeds` is not None.') + + # 2. Define input layers + if self.is_input_continuous: + self.in_channels = in_channels + + self.norm = torch.nn.GroupNorm( + num_groups=norm_num_groups, + num_channels=in_channels, + eps=1e-6, + affine=True) + if use_linear_projection: + self.proj_in = nn.Linear(in_channels, inner_dim) + else: + self.proj_in = nn.Conv2d( + in_channels, inner_dim, kernel_size=1, stride=1, padding=0) + else: + raise ValueError('input_vectorized not supported now.') + + # 3. Define transformers blocks + self.transformer_blocks = nn.ModuleList([ + BasicTransformerBlock( + inner_dim, + num_attention_heads, + attention_head_dim, + dropout=dropout, + cross_attention_dim=cross_attention_dim, + activation_fn=activation_fn, + attention_bias=attention_bias, + only_cross_attention=only_cross_attention, + ) for d in range(num_layers) + ]) + + # 4. Define output layers + if use_linear_projection: + self.proj_out = nn.Linear(in_channels, inner_dim) + else: + self.proj_out = nn.Conv2d( + inner_dim, in_channels, kernel_size=1, stride=1, padding=0) + + def _set_attention_slice(self, slice_size): + """set attention slice.""" + + for block in self.transformer_blocks: + block._set_attention_slice(slice_size) + + def forward(self, + hidden_states, + encoder_hidden_states=None, + timestep=None, + return_dict: bool = True): + """forward function. + + Args: + hidden_states ( When discrete, `torch.LongTensor` + of shape `(batch size, num latent pixels)`. + When continuous, `torch.FloatTensor` of shape ` + (batch size, channel, height, width)`): Input + hidden_states + encoder_hidden_states ( `torch.LongTensor` of shape + `(batch size, context dim)`, *optional*): + Conditional embeddings for cross attention layer. + If not given, cross-attention defaults to + self-attention. + timestep ( `torch.long`, *optional*): + Optional timestep to be applied as an embedding + in AdaLayerNorm's. Used to indicate denoising step. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a + [`models.unet_2d_condition.UNet2DConditionOutput`] + instead of a plain tuple. + + Returns: + Dict if `return_dict` is True, otherwise a `tuple`. + When returning a tuple, the first element is the sample + tensor. + """ + # 1. Input + if self.is_input_continuous: + batch, channel, height, weight = hidden_states.shape + residual = hidden_states + + hidden_states = self.norm(hidden_states) + if not self.use_linear_projection: + hidden_states = self.proj_in(hidden_states) + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape( + batch, height * weight, inner_dim) + else: + inner_dim = hidden_states.shape[1] + hidden_states = hidden_states.permute(0, 2, 3, 1).reshape( + batch, height * weight, inner_dim) + hidden_states = self.proj_in(hidden_states) + else: + raise ValueError('input_vectorized not supported now.') + + # 2. Blocks + for block in self.transformer_blocks: + hidden_states = block( + hidden_states, + context=encoder_hidden_states, + timestep=timestep) + + # 3. Output + if not self.use_linear_projection: + hidden_states = ( + hidden_states.reshape(batch, height, weight, + inner_dim).permute(0, 3, 1, + 2).contiguous()) + hidden_states = self.proj_out(hidden_states) + else: + hidden_states = self.proj_out(hidden_states) + hidden_states = ( + hidden_states.reshape(batch, height, weight, + inner_dim).permute(0, 3, 1, + 2).contiguous()) + + output = hidden_states + residual + + if not return_dict: + return (output, ) + + return Dict(sample=output) + + +class BasicTransformerBlock(nn.Module): + """A basic Transformer block. + + Args: + dim (int): The number of channels in the input and output. + num_attention_heads (int): The number of heads to use for + multi-head attention. + attention_head_dim (int): The number of channels in each head. + dropout (float, *optional*, defaults to 0.0): + The dropout probability to use. + cross_attention_dim (int, *optional*): + The size of the context vector for cross attention. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to be used in feed-forward. + attention_bias (bool, *optional*, defaults to `False`): + Configure if the attentions should contain a bias parameter. + only_cross_attention (bool, defaults to False): + whether to use cross attention only. + """ + + def __init__( + self, + dim: int, + num_attention_heads: int, + attention_head_dim: int, + dropout=0.0, + cross_attention_dim: Optional[int] = None, + activation_fn: str = 'geglu', + attention_bias: bool = False, + only_cross_attention: bool = False, + ): + super().__init__() + self.only_cross_attention = only_cross_attention + self.attn1 = CrossAttention( + query_dim=dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + cross_attention_dim=cross_attention_dim + if only_cross_attention else None, + ) # is a self-attention + self.ff = FeedForward( + dim, dropout=dropout, activation_fn=activation_fn) + self.attn2 = CrossAttention( + query_dim=dim, + cross_attention_dim=cross_attention_dim, + heads=num_attention_heads, + dim_head=attention_head_dim, + dropout=dropout, + bias=attention_bias, + ) # is self-attn if context is none + + # layer norms + self.norm1 = nn.LayerNorm(dim) + self.norm2 = nn.LayerNorm(dim) + self.norm3 = nn.LayerNorm(dim) + + def _set_attention_slice(self, slice_size): + """set attention slice.""" + self.attn1._slice_size = slice_size + self.attn2._slice_size = slice_size + + def forward(self, hidden_states, context=None, timestep=None): + """forward with hidden states, context and timestep.""" + # 1. Self-Attention + norm_hidden_states = (self.norm1(hidden_states)) + + if self.only_cross_attention: + hidden_states = self.attn1(norm_hidden_states, + context) + hidden_states + else: + hidden_states = self.attn1(norm_hidden_states) + hidden_states + + # 2. Cross-Attention + norm_hidden_states = (self.norm2(hidden_states)) + hidden_states = self.attn2( + norm_hidden_states, context=context) + hidden_states + + # 3. Feed-forward + hidden_states = self.ff(self.norm3(hidden_states)) + hidden_states + + return hidden_states + + +class CrossAttention(nn.Module): + r""" + A cross attention layer. + + Args: + query_dim (`int`): The number of channels in the query. + cross_attention_dim (`int`, *optional*): + The number of channels in the context. + If not given, defaults to `query_dim`. + heads (`int`, *optional*, defaults to 8): + The number of heads to use for multi-head attention. + dim_head (`int`, *optional*, defaults to 64): + The number of channels in each head. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + bias (`bool`, *optional*, defaults to False): + Set to `True` for the query, key, + and value linear layers to contain a bias parameter. + """ + + def __init__( + self, + query_dim: int, + cross_attention_dim: Optional[int] = None, + heads: int = 8, + dim_head: int = 64, + dropout: float = 0.0, + bias=False, + ): + super().__init__() + inner_dim = dim_head * heads + cross_attention_dim = cross_attention_dim if cross_attention_dim is not None else query_dim # noqa + + self.scale = dim_head**-0.5 + self.heads = heads + # for slice_size > 0 the attention score computation + # is split across the batch axis to save memory + # You can set slice_size with `set_attention_slice` + self._slice_size = None + + self.to_q = nn.Linear(query_dim, inner_dim, bias=bias) + self.to_k = nn.Linear(cross_attention_dim, inner_dim, bias=bias) + self.to_v = nn.Linear(cross_attention_dim, inner_dim, bias=bias) + + self.to_out = nn.ModuleList([]) + self.to_out.append(nn.Linear(inner_dim, query_dim)) + self.to_out.append(nn.Dropout(dropout)) + + def reshape_heads_to_batch_dim(self, tensor): + """reshape heads num to batch dim.""" + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape(batch_size, seq_len, head_size, + dim // head_size) + tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size * head_size, + seq_len, dim // head_size) + return tensor + + def reshape_batch_dim_to_heads(self, tensor): + """reshape batch dim to heads num.""" + batch_size, seq_len, dim = tensor.shape + head_size = self.heads + tensor = tensor.reshape(batch_size // head_size, head_size, seq_len, + dim) + tensor = tensor.permute(0, 2, 1, 3).reshape(batch_size // head_size, + seq_len, dim * head_size) + return tensor + + def forward(self, hidden_states, context=None, mask=None): + """forward with hidden states, context and mask.""" + batch_size, sequence_length, _ = hidden_states.shape + + query = self.to_q(hidden_states) + context = context if context is not None else hidden_states + key = self.to_k(context) + value = self.to_v(context) + + dim = query.shape[-1] + + query = self.reshape_heads_to_batch_dim(query) + key = self.reshape_heads_to_batch_dim(key) + value = self.reshape_heads_to_batch_dim(value) + + # TODO(PVP) - mask is currently never used. Remember + # to re-implement when used + + # attention, what we cannot get enough of + if self._slice_size is None or query.shape[0] // self._slice_size == 1: + hidden_states = self._attention(query, key, value) + else: + hidden_states = self._sliced_attention(query, key, value, + sequence_length, dim) + + # linear proj + hidden_states = self.to_out[0](hidden_states) + # dropout + hidden_states = self.to_out[1](hidden_states) + return hidden_states + + def _attention(self, query, key, value): + """attention calculation.""" + attention_scores = torch.baddbmm( + torch.empty( + query.shape[0], + query.shape[1], + key.shape[1], + dtype=query.dtype, + device=query.device), + query, + key.transpose(-1, -2), + beta=0, + alpha=self.scale, + ) + attention_probs = attention_scores.softmax(dim=-1) + # compute attention output + + hidden_states = torch.bmm(attention_probs, value) + + # reshape hidden_states + hidden_states = self.reshape_batch_dim_to_heads(hidden_states) + return hidden_states + + def _sliced_attention(self, query, key, value, sequence_length, dim): + """sliced attention calculation.""" + batch_size_attention = query.shape[0] + hidden_states = torch.zeros( + (batch_size_attention, sequence_length, dim // self.heads), + device=query.device, + dtype=query.dtype) + slice_size = self._slice_size if self._slice_size is not None \ + else hidden_states.shape[0] + for i in range(hidden_states.shape[0] // slice_size): + start_idx = i * slice_size + end_idx = (i + 1) * slice_size + attn_slice = torch.baddbmm( + torch.empty( + slice_size, + query.shape[1], + key.shape[1], + dtype=query.dtype, + device=query.device), + query[start_idx:end_idx], + key[start_idx:end_idx].transpose(-1, -2), + beta=0, + alpha=self.scale, + ) + attn_slice = attn_slice.softmax(dim=-1) + attn_slice = torch.bmm(attn_slice, value[start_idx:end_idx]) + + hidden_states[start_idx:end_idx] = attn_slice + + # reshape hidden_states + hidden_states = self.reshape_batch_dim_to_heads(hidden_states) + return hidden_states + + +class FeedForward(nn.Module): + r""" + A feed-forward layer. + + Args: + dim (int): The number of channels in the input. + dim_out (int, *optional*): + The number of channels in the output. + If not given, defaults to `dim`. + mult (int, *optional*, defaults to 4): + The multiplier to use for the hidden dimension. + dropout (`float`, *optional*, defaults to 0.0): + The dropout probability to use. + activation_fn (`str`, *optional*, defaults to `"geglu"`): + Activation function to be used in feed-forward. + """ + + def __init__( + self, + dim: int, + dim_out: Optional[int] = None, + mult: int = 4, + dropout: float = 0.0, + activation_fn: str = 'geglu', + ): + super().__init__() + inner_dim = int(dim * mult) + dim_out = dim_out if dim_out is not None else dim + + if activation_fn == 'geglu': + geglu = GEGLU(dim, inner_dim) + elif activation_fn == 'geglu-approximate': + geglu = ApproximateGELU(dim, inner_dim) + + self.net = nn.ModuleList([]) + # project in + self.net.append(geglu) + # project dropout + self.net.append(nn.Dropout(dropout)) + # project out + self.net.append(nn.Linear(inner_dim, dim_out)) + + def forward(self, hidden_states): + """forward with hidden states.""" + for module in self.net: + hidden_states = module(hidden_states) + return hidden_states + + +# feedforward +class GEGLU(nn.Module): + r""" + A variant of the gated linear unit activation function + from https://arxiv.org/abs/2002.05202. + + Args: + dim_in (`int`): The number of channels in the input. + dim_out (`int`): The number of channels in the output. + """ + + def __init__(self, dim_in: int, dim_out: int): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out * 2) + + def gelu(self, gate): + """gelu activation.""" + return F.gelu(gate) + + def forward(self, hidden_states): + """forward with hidden states.""" + hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1) + return hidden_states * self.gelu(gate) + + +class ApproximateGELU(nn.Module): + """The approximate form of Gaussian Error Linear Unit (GELU) + + For more details, see section 2: https://arxiv.org/abs/1606.08415 + """ + + def __init__(self, dim_in: int, dim_out: int): + super().__init__() + self.proj = nn.Linear(dim_in, dim_out) + + def forward(self, x): + """forward function.""" + x = self.proj(x) + return x * torch.sigmoid(1.702 * x) diff --git a/mmedit/models/editors/ddpm/ddpm_scheduler.py b/mmedit/models/editors/ddpm/ddpm_scheduler.py index d06f2e85bb..92b1000309 100644 --- a/mmedit/models/editors/ddpm/ddpm_scheduler.py +++ b/mmedit/models/editors/ddpm/ddpm_scheduler.py @@ -1,22 +1,22 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Union +from typing import Optional, Union import numpy as np import torch +from mmedit.models.utils.diffusion_utils import betas_for_alpha_bar from mmedit.registry import DIFFUSION_SCHEDULERS -from ...utils.diffusion_utils import betas_for_alpha_bar @DIFFUSION_SCHEDULERS.register_module() class DDPMScheduler: def __init__(self, - num_train_timesteps=1000, - beta_start=0.0001, - beta_end=0.02, - beta_schedule='linear', - trained_betas=None, + num_train_timesteps: int = 1000, + beta_start: float = 0.0001, + beta_end: float = 0.02, + beta_schedule: str = 'linear', + trained_betas: Optional[Union[np.array, list]] = None, variance_type='fixed_small', clip_sample=True): """```DDPMScheduler``` support the diffusion and reverse process @@ -25,13 +25,25 @@ def __init__(self, The code is heavily influenced by https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_ddpm.py. # noqa Args: - num_train_timesteps (int, optional): _description_. Defaults to 1000. - beta_start (float, optional): _description_. Defaults to 0.0001. - beta_end (float, optional): _description_. Defaults to 0.02. - beta_schedule (str, optional): _description_. Defaults to 'linear'. - trained_betas (_type_, optional): _description_. Defaults to None. - variance_type (str, optional): _description_. Defaults to 'fixed_small'. - clip_sample (bool, optional): _description_. Defaults to True. + num_train_timesteps (int, optional): The timesteps for training + process. Defaults to 1000. + beta_start (float, optional): The beta value at start. The beta + values will be interpolated from beta_start to beta_end. + Defaults to 0.0001. + beta_end (float, optional): The beta value at the end. The beta + values will be interpolated from beta_start to beta_end. + Defaults to 0.02. + beta_schedule (str, optional): The interpolation schedule for beta + values. Supported choices are 'linear', 'scaled_linear', and + 'squaredcos_cap_v2'. Defaults to 'linear'. + trained_betas (list, np.array, optional): betas directly to the + constructor to bypass `beta_start`, `beta_end` etc. Defaults to None. + variance_type (str, optional): How denoising unet output variance + value. Supported choices are 'fixed_small', 'fixed_small_log', + 'fixed_large', 'fixed_large_log', 'learned', and 'leanred_range'. + Defaults to 'fixed_small'. + clip_sample (bool, optional): Whether clip the value of predicted + original image (x0) to [-1, 1]. Defaults to True. """ self.num_train_timesteps = num_train_timesteps if trained_betas is not None: @@ -66,6 +78,8 @@ def __init__(self, self.clip_sample = clip_sample def set_timesteps(self, num_inference_steps): + """set timesteps.""" + num_inference_steps = min(self.num_train_timesteps, num_inference_steps) self.num_inference_steps = num_inference_steps @@ -74,6 +88,8 @@ def set_timesteps(self, num_inference_steps): self.num_train_timesteps // self.num_inference_steps)[::-1].copy() def _get_variance(self, t, predicted_variance=None, variance_type=None): + """get variance.""" + alpha_prod_t = self.alphas_cumprod[t] alpha_prod_t_prev = self.alphas_cumprod[t - 1] if t > 0 else self.one @@ -93,10 +109,10 @@ def _get_variance(self, t, predicted_variance=None, variance_type=None): # hacks - were probs added for training stability if variance_type == 'fixed_small': - variance = np.clip(variance, min_value=1e-20) + variance = np.clip(variance, a_min=1e-20, a_max=10000) # for rl-diffusion_scheduler https://arxiv.org/abs/2205.09991 elif variance_type == 'fixed_small_log': - variance = np.log(np.clip(variance, min_value=1e-20)) + variance = np.log(np.clip(variance, a_min=1e-20, a_max=10000)) elif variance_type == 'fixed_large': variance = self.betas[t] elif variance_type == 'fixed_large_log': @@ -114,12 +130,13 @@ def _get_variance(self, t, predicted_variance=None, variance_type=None): return variance def step(self, - model_output: Union[torch.FloatTensor], + model_output: torch.FloatTensor, timestep: int, - sample: Union[torch.FloatTensor], + sample: torch.FloatTensor, predict_epsilon=True, generator=None): t = timestep + """step forward""" if model_output.shape[1] == sample.shape[ 1] * 2 and self.variance_type in ['learned', 'learned_range']: @@ -177,6 +194,8 @@ def step(self, } def add_noise(self, original_samples, noise, timesteps): + """add noise.""" + sqrt_alpha_prod = self.alphas_cumprod[timesteps]**0.5 sqrt_alpha_prod = self.match_shape(sqrt_alpha_prod, original_samples) sqrt_one_minus_alpha_prod = (1 - self.alphas_cumprod[timesteps])**0.5 diff --git a/mmedit/models/editors/ddpm/denoising_unet.py b/mmedit/models/editors/ddpm/denoising_unet.py index 1154825077..3aad605392 100644 --- a/mmedit/models/editors/ddpm/denoising_unet.py +++ b/mmedit/models/editors/ddpm/denoising_unet.py @@ -2,6 +2,7 @@ import math from copy import deepcopy from functools import partial +from typing import Tuple import mmengine import numpy as np @@ -17,6 +18,10 @@ from mmengine.utils.version_utils import digit_version from mmedit.registry import MODELS, MODULES +from .embeddings import TimestepEmbedding, Timesteps +from .unet_blocks import UNetMidBlock2DCrossAttn, get_down_block, get_up_block + +logger = MMLogger.get_current_instance() class EmbedSequential(nn.Sequential): @@ -27,10 +32,14 @@ class EmbedSequential(nn.Sequential): https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/unet.py#L35 """ - def forward(self, x, y): + def forward(self, x, y, encoder_out=None): for layer in self: if isinstance(layer, DenoisingResBlock): x = layer(x, y) + elif isinstance( + layer, + MultiHeadAttentionBlock) and encoder_out is not None: + x = layer(x, encoder_out) else: x = layer(x) return x @@ -165,7 +174,8 @@ def __init__(self, num_heads=1, num_head_channels=-1, use_new_attention_order=False, - norm_cfg=dict(type='GN32', num_groups=32)): + norm_cfg=dict(type='GN32', num_groups=32), + encoder_channels=None): super().__init__() self.in_channels = in_channels if num_head_channels == -1: @@ -186,12 +196,18 @@ def __init__(self, self.attention = QKVAttentionLegacy(self.num_heads) self.proj_out = nn.Conv1d(in_channels, in_channels, 1) + if encoder_channels is not None: + self.encoder_kv = nn.Conv1d(encoder_channels, in_channels * 2, 1) - def forward(self, x): + def forward(self, x, encoder_out): b, c, *spatial = x.shape x = x.reshape(b, c, -1) qkv = self.qkv(self.norm(x)) - h = self.attention(qkv) + if encoder_out is not None: + encoder_out = self.encoder_kv(encoder_out) + h = self.attention(qkv, encoder_out) + else: + h = self.attention(qkv) h = self.proj_out(h) return (x + h).reshape(b, c, *spatial) @@ -207,7 +223,7 @@ def __init__(self, n_heads): super().__init__() self.n_heads = n_heads - def forward(self, qkv): + def forward(self, qkv, encoder_kv=None): """Apply QKV attention. :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. @@ -218,6 +234,12 @@ def forward(self, qkv): ch = width // (3 * self.n_heads) q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split( ch, dim=1) + if encoder_kv is not None: + assert encoder_kv.shape[1] == self.n_heads * ch * 2 + ek, ev = encoder_kv.reshape(bs * self.n_heads, ch * 2, -1).split( + ch, dim=1) + k = torch.cat([ek, k], dim=-1) + v = torch.cat([ev, v], dim=-1) scale = 1 / math.sqrt(math.sqrt(ch)) weight = torch.einsum( 'bct,bcs->bts', q * scale, @@ -579,6 +601,122 @@ def forward(self, x): return x +def build_down_block_resattn(resblocks_per_downsample, resblock_cfg, + in_channels_, out_channels_, attention_scale, + attention_cfg, in_channels_list, level, + channel_factor_list, embedding_channels, + use_scale_shift_norm, dropout, norm_cfg, + resblock_updown, downsample_cfg, scale): + """build unet down path blocks with resnet and attention.""" + + in_blocks = nn.ModuleList() + + for _ in range(resblocks_per_downsample): + layers = [ + MODULES.build( + resblock_cfg, + default_args={ + 'in_channels': in_channels_, + 'out_channels': out_channels_ + }) + ] + in_channels_ = out_channels_ + + if scale in attention_scale: + layers.append( + MODULES.build( + attention_cfg, default_args={'in_channels': in_channels_})) + + in_channels_list.append(in_channels_) + in_blocks.append(EmbedSequential(*layers)) + + if level != len(channel_factor_list) - 1: + in_blocks.append( + EmbedSequential( + DenoisingResBlock( + out_channels_, + embedding_channels, + use_scale_shift_norm, + dropout, + norm_cfg=norm_cfg, + out_channels=out_channels_, + down=True) if resblock_updown else MODULES.build( + downsample_cfg, + default_args={'in_channels': in_channels_}))) + in_channels_list.append(in_channels_) + scale *= 2 + return in_blocks, scale + + +def build_mid_blocks_resattn(resblock_cfg, attention_cfg, in_channels_): + """build unet mid blocks with resnet and attention.""" + + return EmbedSequential( + MODULES.build( + resblock_cfg, default_args={'in_channels': in_channels_}), + MODULES.build( + attention_cfg, default_args={'in_channels': in_channels_}), + MODULES.build( + resblock_cfg, default_args={'in_channels': in_channels_}), + ) + + +def build_up_blocks_resattn( + resblocks_per_downsample, + resblock_cfg, + in_channels_, + in_channels_list, + base_channels, + factor, + scale, + attention_scale, + attention_cfg, + channel_factor_list, + level, + embedding_channels, + use_scale_shift_norm, + dropout, + norm_cfg, + resblock_updown, + upsample_cfg, +): + """build up path blocks with resnet and attention.""" + + out_blocks = nn.ModuleList() + for idx in range(resblocks_per_downsample + 1): + layers = [ + MODULES.build( + resblock_cfg, + default_args={ + 'in_channels': in_channels_ + in_channels_list.pop(), + 'out_channels': int(base_channels * factor) + }) + ] + in_channels_ = int(base_channels * factor) + if scale in attention_scale: + layers.append( + MODULES.build( + attention_cfg, default_args={'in_channels': in_channels_})) + if (level != len(channel_factor_list) - 1 + and idx == resblocks_per_downsample): + out_channels_ = in_channels_ + layers.append( + DenoisingResBlock( + in_channels_, + embedding_channels, + use_scale_shift_norm, + dropout, + norm_cfg=norm_cfg, + out_channels=out_channels_, + up=True) if resblock_updown else MODULES. + build( + upsample_cfg, default_args={'in_channels': in_channels_})) + scale //= 2 + out_blocks.append(EmbedSequential(*layers)) + + return out_blocks, in_channels_, scale + + @MODULES.register_module() class DenoisingUnet(BaseModule): """Denoising Unet. This network receives a diffused image ``x_t`` and @@ -721,17 +859,26 @@ def __init__(self, time_embedding_cfg=None, resblock_cfg=dict(type='DenoisingResBlock'), attention_cfg=dict(type='MultiHeadAttention'), + encoder_channels=None, downsample_conv=True, upsample_conv=True, downsample_cfg=dict(type='DenoisingDownsample'), upsample_cfg=dict(type='DenoisingUpsample'), attention_res=[16, 8], - pretrained=None): + pretrained=None, + unet_type='', + down_block_types: Tuple[str] = (), + up_block_types: Tuple[str] = (), + cross_attention_dim=768, + layers_per_block: int = 2): super().__init__() + self.unet_type = unet_type self.num_classes = num_classes self.num_timesteps = num_timesteps + self.base_channels = base_channels + self.encoder_channels = encoder_channels self.use_rescale_timesteps = use_rescale_timesteps self.dtype = torch.float16 if use_fp16 else torch.float32 @@ -774,12 +921,30 @@ def __init__(self, embedding_channels = base_channels * 4 \ if embedding_channels == -1 else embedding_channels - self.time_embedding = TimeEmbedding( - base_channels, - embedding_channels=embedding_channels, - embedding_mode=time_embedding_mode, - embedding_cfg=time_embedding_cfg, - act_cfg=act_cfg) + + # init the channel scale factor + scale = 1 + ch = int(base_channels * self.channel_factor_list[0]) + self.in_channels_list = [ch] + + if self.unet_type == 'stable': + # time + self.time_proj = Timesteps(ch) + self.time_embedding = TimestepEmbedding(base_channels, + embedding_channels) + + self.conv_in = nn.Conv2d( + in_channels, ch, kernel_size=3, padding=(1, 1)) + else: + self.time_embedding = TimeEmbedding( + base_channels, + embedding_channels=embedding_channels, + embedding_mode=time_embedding_mode, + embedding_cfg=time_embedding_cfg, + act_cfg=act_cfg) + + self.in_blocks = nn.ModuleList( + [EmbedSequential(nn.Conv2d(in_channels, ch, 3, 1, padding=1))]) if self.num_classes != 0: self.label_embedding = nn.Embedding(self.num_classes, @@ -806,115 +971,177 @@ def __init__(self, self.upsample_cfg = deepcopy(upsample_cfg) self.upsample_cfg.setdefault('with_conv', upsample_conv) - # init the channel scale factor - scale = 1 - ch = int(base_channels * self.channel_factor_list[0]) - self.in_blocks = nn.ModuleList( - [EmbedSequential(nn.Conv2d(in_channels, ch, 3, 1, padding=1))]) - self.in_channels_list = [ch] + self.down_blocks = nn.ModuleList([]) + self.mid_block = None + self.up_blocks = nn.ModuleList([]) + + attention_head_dim = (num_heads, ) * len(down_block_types) # construct the encoder part of Unet for level, factor in enumerate(self.channel_factor_list): in_channels_ = ch if level == 0 \ else int(base_channels * self.channel_factor_list[level - 1]) out_channels_ = int(base_channels * factor) - for _ in range(resblocks_per_downsample): - layers = [ - MODULES.build( - self.resblock_cfg, - default_args={ - 'in_channels': in_channels_, - 'out_channels': out_channels_ - }) - ] - in_channels_ = out_channels_ - - if scale in attention_scale: - layers.append( - MODULES.build( - self.attention_cfg, - default_args={'in_channels': in_channels_})) - - self.in_channels_list.append(in_channels_) - self.in_blocks.append(EmbedSequential(*layers)) - - if level != len(self.channel_factor_list) - 1: - self.in_blocks.append( - EmbedSequential( - DenoisingResBlock( - out_channels_, - embedding_channels, - use_scale_shift_norm, - dropout, - norm_cfg=norm_cfg, - out_channels=out_channels_, - down=True) if resblock_updown else MODULES.build( - self.downsample_cfg, - default_args={'in_channels': in_channels_}))) - self.in_channels_list.append(in_channels_) - scale *= 2 + + if self.unet_type == 'stable': + is_final_block = level == len(self.channel_factor_list) - 1 + down_block_type = down_block_types[level] + down_block = get_down_block( + down_block_type, + num_layers=layers_per_block, + in_channels=in_channels_, + out_channels=out_channels_, + temb_channels=embedding_channels, + cross_attention_dim=cross_attention_dim, + add_downsample=not is_final_block, + resnet_act_fn=act_cfg['type'], + resnet_groups=norm_cfg['num_groups'], + attn_num_head_channels=attention_head_dim[level], + ) + self.down_blocks.append(down_block) + + else: + in_blocks, scale = build_down_block_resattn( + resblocks_per_downsample=resblocks_per_downsample, + resblock_cfg=self.resblock_cfg, + in_channels_=in_channels_, + out_channels_=out_channels_, + attention_scale=attention_scale, + attention_cfg=self.attention_cfg, + in_channels_list=self.in_channels_list, + level=level, + channel_factor_list=self.channel_factor_list, + embedding_channels=embedding_channels, + use_scale_shift_norm=use_scale_shift_norm, + dropout=dropout, + norm_cfg=norm_cfg, + resblock_updown=resblock_updown, + downsample_cfg=self.downsample_cfg, + scale=scale) + self.in_blocks.extend(in_blocks) # construct the bottom part of Unet - self.mid_blocks = EmbedSequential( - MODULES.build( - self.resblock_cfg, default_args={'in_channels': in_channels_}), - MODULES.build( - self.attention_cfg, default_args={'in_channels': - in_channels_}), - MODULES.build( - self.resblock_cfg, default_args={'in_channels': in_channels_}), - ) + block_out_channels = [ + times * base_channels for times in self.channel_factor_list + ] + in_channels_ = self.in_channels_list[-1] + if self.unet_type == 'stable': + self.mid_block = UNetMidBlock2DCrossAttn( + in_channels=block_out_channels[-1], + temb_channels=embedding_channels, + cross_attention_dim=cross_attention_dim, + resnet_act_fn=act_cfg['type'], + resnet_time_scale_shift='default', + attn_num_head_channels=attention_head_dim[-1], + resnet_groups=norm_cfg['num_groups'], + ) + else: + self.mid_blocks = build_mid_blocks_resattn(self.resblock_cfg, + self.attention_cfg, + in_channels_) + + # stable up parameters + self.num_upsamplers = 0 + reversed_block_out_channels = list(reversed(block_out_channels)) + reversed_attention_head_dim = list(reversed(attention_head_dim)) + output_channel = reversed_block_out_channels[0] # construct the decoder part of Unet in_channels_list = deepcopy(self.in_channels_list) - self.out_blocks = nn.ModuleList() + if self.unet_type != 'stable': + self.out_blocks = nn.ModuleList() for level, factor in enumerate(self.channel_factor_list[::-1]): - for idx in range(resblocks_per_downsample + 1): - layers = [ - MODULES.build( - self.resblock_cfg, - default_args={ - 'in_channels': - in_channels_ + in_channels_list.pop(), - 'out_channels': int(base_channels * factor) - }) - ] - in_channels_ = int(base_channels * factor) - if scale in attention_scale: - layers.append( - MODULES.build( - self.attention_cfg, - default_args={'in_channels': in_channels_})) - if (level != len(self.channel_factor_list) - 1 - and idx == resblocks_per_downsample): - out_channels_ = in_channels_ - layers.append( - DenoisingResBlock( - in_channels_, - embedding_channels, - use_scale_shift_norm, - dropout, - norm_cfg=norm_cfg, - out_channels=out_channels_, - up=True) if resblock_updown else MODULES.build( - self.upsample_cfg, - default_args={'in_channels': in_channels_})) - scale //= 2 - self.out_blocks.append(EmbedSequential(*layers)) - - self.out = ConvModule( - in_channels=in_channels_, - out_channels=out_channels, - kernel_size=3, - padding=1, - act_cfg=act_cfg, - norm_cfg=norm_cfg, - bias=True, - order=('norm', 'act', 'conv')) + + if self.unet_type == 'stable': + is_final_block = level == len(block_out_channels) - 1 + + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[level] + input_channel = reversed_block_out_channels[min( + level + 1, + len(block_out_channels) - 1)] + + # add upsample block for all BUT final layer + if not is_final_block: + add_upsample = True + self.num_upsamplers += 1 + else: + add_upsample = False + + up_block_type = up_block_types[level] + up_block = get_up_block( + up_block_type, + num_layers=layers_per_block + 1, + in_channels=input_channel, + out_channels=output_channel, + prev_output_channel=prev_output_channel, + temb_channels=embedding_channels, + cross_attention_dim=cross_attention_dim, + add_upsample=add_upsample, + resnet_act_fn=act_cfg['type'], + resnet_groups=norm_cfg['num_groups'], + attn_num_head_channels=reversed_attention_head_dim[level], + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + else: + out_blocks, in_channels_, scale = build_up_blocks_resattn( + resblocks_per_downsample, + self.resblock_cfg, + in_channels_, + in_channels_list, + base_channels, + factor, + scale, + attention_scale, + self.attention_cfg, + self.channel_factor_list, + level, + embedding_channels, + use_scale_shift_norm, + dropout, + norm_cfg, + resblock_updown, + self.upsample_cfg, + ) + self.out_blocks.extend(out_blocks) + + if self.unet_type == 'stable': + # out + self.conv_norm_out = nn.GroupNorm( + num_channels=block_out_channels[0], + num_groups=norm_cfg['num_groups']) + if digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.conv_act = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.conv_act == nn.ReLU() + self.conv_out = nn.Conv2d( + block_out_channels[0], + self.out_channels, + kernel_size=3, + padding=1) + else: + self.out = ConvModule( + in_channels=in_channels_, + out_channels=out_channels, + kernel_size=3, + padding=1, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + bias=True, + order=('norm', 'act', 'conv')) self.init_weights(pretrained) - def forward(self, x_t, t, label=None, return_noise=False): + def forward(self, + x_t, + t, + encoder_hidden_states=None, + label=None, + return_noise=False): """Forward function. Args: x_t (torch.Tensor): Diffused image at timestep `t` to denoise. @@ -930,32 +1157,125 @@ def forward(self, x_t, t, label=None, return_noise=False): Returns: torch.Tensor | dict: If not ``return_noise`` """ + # By default samples have to be AT least a multiple of t + # he overall upsampling factor. + # The overall upsampling factor is equal + # to 2 ** (# num of upsampling layears). + # However, the upsampling interpolation output size + # can be forced to fit any upsampling size + # on the fly if necessary. + default_overall_up_factor = 2**self.num_upsamplers + + # upsample size should be forwarded when sample is not + # a multiple of `default_overall_up_factor` + forward_upsample_size = False + upsample_size = None + + if any(s % default_overall_up_factor != 0 for s in x_t.shape[-2:]): + logger.info( + 'Forward upsample size to force interpolation output size.') + forward_upsample_size = True + if not torch.is_tensor(t): t = torch.tensor([t], dtype=torch.long, device=x_t.device) elif torch.is_tensor(t) and len(t.shape) == 0: t = t[None].to(x_t.device) - embedding = self.time_embedding(t) + if self.unet_type == 'stable': + # broadcast to batch dimension in a way that's + # compatible with ONNX/Core ML + t = t.expand(x_t.shape[0]) + + t_emb = self.time_proj(t) + + # t does not contain any weights and will always return f32 tensors + # but time_embedding might actually be running in fp16. + # so we need to cast here. + # there might be better ways to encapsulate this. + t_emb = t_emb.to(dtype=self.dtype) + embedding = self.time_embedding(t_emb) + else: + embedding = self.time_embedding(t) if label is not None: assert hasattr(self, 'label_embedding') embedding = self.label_embedding(label) + embedding - h, hs = x_t, [] - h = h.type(self.dtype) - # forward downsample blocks - for block in self.in_blocks: - h = block(h, embedding) - hs.append(h) - - # forward middle blocks - h = self.mid_blocks(h, embedding) - - # forward upsample blocks - for block in self.out_blocks: - h = block(torch.cat([h, hs.pop()], dim=1), embedding) - h = h.type(x_t.dtype) - outputs = self.out(h) + if self.unet_type == 'stable': + # 2. pre-process + x_t = self.conv_in(x_t) + + # 3. down + down_block_res_samples = (x_t, ) + for downsample_block in self.down_blocks: + if hasattr(downsample_block, 'attentions' + ) and downsample_block.attentions is not None: + x_t, res_samples = downsample_block( + hidden_states=x_t, + temb=embedding, + encoder_hidden_states=encoder_hidden_states, + ) + else: + x_t, res_samples = downsample_block( + hidden_states=x_t, temb=embedding) + + down_block_res_samples += res_samples + + # 4. mid + x_t = self.mid_block( + x_t, embedding, encoder_hidden_states=encoder_hidden_states) + + # 5. up + for i, upsample_block in enumerate(self.up_blocks): + is_final_block = i == len(self.up_blocks) - 1 + + res_samples = down_block_res_samples[-len(upsample_block. + resnets):] + down_block_res_samples = down_block_res_samples[:-len( + upsample_block.resnets)] + + # if we have not reached the final block + # and need to forward the upsample size, we do it here + if not is_final_block and forward_upsample_size: + upsample_size = down_block_res_samples[-1].shape[2:] + + if hasattr(upsample_block, 'attentions' + ) and upsample_block.attentions is not None: + x_t = upsample_block( + hidden_states=x_t, + temb=embedding, + res_hidden_states_tuple=res_samples, + encoder_hidden_states=encoder_hidden_states, + upsample_size=upsample_size, + ) + else: + x_t = upsample_block( + hidden_states=x_t, + temb=embedding, + res_hidden_states_tuple=res_samples, + upsample_size=upsample_size) + # 6. post-process + x_t = self.conv_norm_out(x_t) + x_t = self.conv_act(x_t) + x_t = self.conv_out(x_t) + + outputs = x_t + else: + h, hs = x_t, [] + h = h.type(self.dtype) + # forward downsample blocks + for block in self.in_blocks: + h = block(h, embedding) + hs.append(h) + + # forward middle blocks + h = self.mid_blocks(h, embedding) + + # forward upsample blocks + for block in self.out_blocks: + h = block(torch.cat([h, hs.pop()], dim=1), embedding) + h = h.type(x_t.dtype) + outputs = self.out(h) return {'outputs': outputs} diff --git a/mmedit/models/editors/ddpm/embeddings.py b/mmedit/models/editors/ddpm/embeddings.py new file mode 100644 index 0000000000..68fa5fa976 --- /dev/null +++ b/mmedit/models/editors/ddpm/embeddings.py @@ -0,0 +1,95 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import mmengine +import torch +from mmengine.utils.dl_utils import TORCH_VERSION +from mmengine.utils.version_utils import digit_version +from torch import nn + + +class TimestepEmbedding(nn.Module): + """Module which uses linear to embed timestep.""" + + def __init__(self, + in_channels: int, + time_embed_dim: int, + act_fn: str = 'silu', + out_dim: int = None): + super().__init__() + + self.linear_1 = nn.Linear(in_channels, time_embed_dim) + self.act = None + if act_fn == 'silu' and \ + digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.act = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.act = nn.ReLU() + + if out_dim is not None: + time_embed_dim_out = out_dim + else: + time_embed_dim_out = time_embed_dim + self.linear_2 = nn.Linear(time_embed_dim, time_embed_dim_out) + + def forward(self, sample): + """forward with sample.""" + + sample = self.linear_1(sample) + + if self.act is not None: + sample = self.act(sample) + + sample = self.linear_2(sample) + return sample + + +class Timesteps(nn.Module): + """A module which transforms timesteps to embedding.""" + + def __init__(self, + num_channels: int, + flip_sin_to_cos: bool = True, + downscale_freq_shift: float = 0): + super().__init__() + self.num_channels = num_channels + self.flip_sin_to_cos = flip_sin_to_cos + self.downscale_freq_shift = downscale_freq_shift + self.max_period = 10000 + self.scale = 1 + + def forward(self, timesteps): + """forward with timesteps.""" + + assert len(timesteps.shape) == 1, 'Timesteps should be a 1d-array' + + embedding_dim = self.num_channels + half_dim = embedding_dim // 2 + exponent = -math.log(self.max_period) * \ + torch.arange( + start=0, + end=half_dim, + dtype=torch.float32, + device=timesteps.device) + exponent = exponent / (half_dim - self.downscale_freq_shift) + + emb = torch.exp(exponent) + emb = timesteps[:, None].float() * emb[None, :] + + # scale embeddings + emb = self.scale * emb + + # concat sine and cosine embeddings + emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=-1) + + # flip sine and cosine embeddings + if self.flip_sin_to_cos: + emb = torch.cat([emb[:, half_dim:], emb[:, :half_dim]], dim=-1) + + # zero pad + if embedding_dim % 2 == 1: + emb = torch.nn.functional.pad(emb, (0, 1, 0, 0)) + return emb diff --git a/mmedit/models/editors/ddpm/res_blocks.py b/mmedit/models/editors/ddpm/res_blocks.py new file mode 100644 index 0000000000..7ccf5afef2 --- /dev/null +++ b/mmedit/models/editors/ddpm/res_blocks.py @@ -0,0 +1,247 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmengine +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.utils.dl_utils import TORCH_VERSION +from mmengine.utils.version_utils import digit_version + + +class ResnetBlock2D(nn.Module): + """resnet block support down sample and up sample. + + Args: + in_channels (int): input channels. + out_channels (int): output channels. + conv_shortcut (bool): whether to use conv shortcut. + dropout (float): dropout rate. + temb_channels (int): time embedding channels. + groups (int): conv groups. + groups_out (int): conv out groups. + pre_norm (bool): whether to norm before conv. Todo: remove. + eps (float): eps for groupnorm. + non_linearity (str): non linearity type. + time_embedding_norm (str): time embedding norm type. + output_scale_factor (float): factor to scale input and output. + use_in_shortcut (bool): whether to use conv in shortcut. + up (bool): whether to upsample. + down (bool): whether to downsample. + """ + + def __init__( + self, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity='silu', + time_embedding_norm='default', + output_scale_factor=1.0, + use_in_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = torch.nn.GroupNorm( + num_groups=groups, num_channels=in_channels, eps=eps, affine=True) + + self.conv1 = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = torch.nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = torch.nn.GroupNorm( + num_groups=groups_out, + num_channels=out_channels, + eps=eps, + affine=True) + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == 'silu' and \ + digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.nonlinearity = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.nonlinearity = nn.ReLU() + + self.upsample = self.downsample = None + if self.up: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + self.downsample = Downsample2D( + in_channels, use_conv=False, padding=1, name='op') + + self.use_in_shortcut = self.in_channels != self.out_channels if use_in_shortcut is None else use_in_shortcut # noqa + + self.conv_shortcut = None + if self.use_in_shortcut: + self.conv_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, input_tensor, temb): + """forward with hidden states and time embeddings.""" + hidden_states = input_tensor + + hidden_states = self.norm1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + if self.upsample is not None: + # upsample_nearest_nhwc fails with large batch sizes. + # see https://github.com/huggingface/diffusers/issues/984 + if hidden_states.shape[0] >= 64: + input_tensor = input_tensor.contiguous() + hidden_states = hidden_states.contiguous() + input_tensor = self.upsample(input_tensor) + hidden_states = self.upsample(hidden_states) + elif self.downsample is not None: + input_tensor = self.downsample(input_tensor) + hidden_states = self.downsample(hidden_states) + + hidden_states = self.conv1(hidden_states) + + if temb is not None: + temb = self.time_emb_proj(self.nonlinearity(temb))[:, :, None, + None] + hidden_states = hidden_states + temb + + hidden_states = self.norm2(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + input_tensor = self.conv_shortcut(input_tensor) + + output_tensor = (input_tensor + + hidden_states) / self.output_scale_factor + + return output_tensor + + +class Upsample2D(nn.Module): + """An upsampling layer with an optional convolution. + + Args: + channels (int): channels in the inputs and outputs. + use_conv (bool): a bool determining if a convolution is applied. + use_conv_transpose (bool): whether to use conv transpose. + out_channels (int): output channels. + """ + + def __init__(self, + channels, + use_conv=False, + use_conv_transpose=False, + out_channels=None, + name='conv'): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv: + conv = nn.Conv2d(self.channels, self.out_channels, 3, padding=1) + else: + conv = nn.ConvTranspose2d(channels, self.out_channels, 4, 2, 1) + + self.conv = conv + + def forward(self, hidden_states, output_size=None): + """forward with hidden states.""" + assert hidden_states.shape[1] == self.channels + + if self.use_conv_transpose: + return self.conv(hidden_states) + + # if `output_size` is passed we force the interpolation output + # size and do not make use of `scale_factor=2` + if output_size is None: + hidden_states = F.interpolate( + hidden_states, scale_factor=2.0, mode='nearest') + else: + hidden_states = F.interpolate( + hidden_states, size=output_size, mode='nearest') + + hidden_states = self.conv(hidden_states) + + return hidden_states + + +class Downsample2D(nn.Module): + """A downsampling layer with an optional convolution. + + Args: + channels (int): channels in the inputs and outputs. + use_conv (bool): a bool determining if a convolution is applied. + out_channels (int): output channels + padding (int): padding num + """ + + def __init__(self, + channels, + use_conv=False, + out_channels=None, + padding=1, + name='conv'): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2d( + self.channels, + self.out_channels, + 3, + stride=stride, + padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2d(kernel_size=stride, stride=stride) + + self.conv = conv + + def forward(self, hidden_states): + """forward with hidden states.""" + assert hidden_states.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + hidden_states = F.pad(hidden_states, pad, mode='constant', value=0) + + assert hidden_states.shape[1] == self.channels + hidden_states = self.conv(hidden_states) + + return hidden_states diff --git a/mmedit/models/editors/ddpm/unet_blocks.py b/mmedit/models/editors/ddpm/unet_blocks.py new file mode 100644 index 0000000000..159d2a7e10 --- /dev/null +++ b/mmedit/models/editors/ddpm/unet_blocks.py @@ -0,0 +1,594 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn + +from .attention import Transformer2DModel +from .res_blocks import Downsample2D, ResnetBlock2D, Upsample2D + + +def get_down_block( + down_block_type, + num_layers, + in_channels, + out_channels, + temb_channels, + add_downsample, + resnet_act_fn, + attn_num_head_channels, + resnet_eps=1e-5, + resnet_groups=32, + cross_attention_dim=1280, + downsample_padding=1, + dual_cross_attention=False, + use_linear_projection=False, + only_cross_attention=False, +): + """get unet down path block.""" + + down_block_type = down_block_type[7:] if down_block_type.startswith( + 'UNetRes') else down_block_type + if down_block_type == 'DownBlock2D': + return DownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + downsample_padding=downsample_padding, + ) + elif down_block_type == 'CrossAttnDownBlock2D': + return CrossAttnDownBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + add_downsample=add_downsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + downsample_padding=downsample_padding, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + dual_cross_attention=dual_cross_attention, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention, + ) + raise ValueError(f'{down_block_type} does not exist.') + + +def get_up_block( + up_block_type, + num_layers, + in_channels, + out_channels, + prev_output_channel, + temb_channels, + add_upsample, + resnet_act_fn, + attn_num_head_channels, + resnet_eps=1e-5, + resnet_groups=32, + cross_attention_dim=1280, + dual_cross_attention=False, + use_linear_projection=False, + only_cross_attention=False, +): + """get unet up path block.""" + + up_block_type = up_block_type[7:] if up_block_type.startswith( + 'UNetRes') else up_block_type + if up_block_type == 'UpBlock2D': + return UpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + ) + elif up_block_type == 'CrossAttnUpBlock2D': + return CrossAttnUpBlock2D( + num_layers=num_layers, + in_channels=in_channels, + out_channels=out_channels, + prev_output_channel=prev_output_channel, + temb_channels=temb_channels, + add_upsample=add_upsample, + resnet_eps=resnet_eps, + resnet_act_fn=resnet_act_fn, + resnet_groups=resnet_groups, + cross_attention_dim=cross_attention_dim, + attn_num_head_channels=attn_num_head_channels, + dual_cross_attention=dual_cross_attention, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention, + ) + raise ValueError(f'{up_block_type} does not exist.') + + +class UNetMidBlock2DCrossAttn(nn.Module): + """unet mid block built by cross attention.""" + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-5, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type='default', + output_scale_factor=1.0, + cross_attention_dim=1280, + dual_cross_attention=False, + use_linear_projection=False, + ): + super().__init__() + + self.attention_type = attention_type + self.attn_num_head_channels = attn_num_head_channels + resnet_groups = resnet_groups if resnet_groups is not None else min( + in_channels // 4, 32) + + # there is always at least one resnet + resnets = [ + ResnetBlock2D( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + Transformer2DModel( + attn_num_head_channels, + in_channels // attn_num_head_channels, + in_channels=in_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, + use_linear_projection=use_linear_projection, + )) + + resnets.append( + ResnetBlock2D( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.ModuleList(attentions) + self.resnets = nn.ModuleList(resnets) + + def set_attention_slice(self, slice_size): + """set attention slice.""" + + head_dims = self.attn_num_head_channels + head_dims = [head_dims] if isinstance(head_dims, int) else head_dims + if slice_size is not None and any(dim % slice_size != 0 + for dim in head_dims): + raise ValueError( + f'Make sure slice_size {slice_size} is a common divisor of ' + f'the number of heads used in cross_attention: {head_dims}') + + for attn in self.attentions: + attn._set_attention_slice(slice_size) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + """forward with hidden states.""" + + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + hidden_states = attn(hidden_states, encoder_hidden_states).sample + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class CrossAttnDownBlock2D(nn.Module): + """Down block built by cross attention.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-5, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type='default', + output_scale_factor=1.0, + downsample_padding=1, + add_downsample=True, + dual_cross_attention=False, + use_linear_projection=False, + only_cross_attention=False, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + self.attn_num_head_channels = attn_num_head_channels + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock2D( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + Transformer2DModel( + attn_num_head_channels, + out_channels // attn_num_head_channels, + in_channels=out_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention, + )) + + self.attentions = nn.ModuleList(attentions) + self.resnets = nn.ModuleList(resnets) + + if add_downsample: + self.downsamplers = nn.ModuleList([ + Downsample2D( + out_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name='op') + ]) + else: + self.downsamplers = None + + self.gradient_checkpointing = False + + def set_attention_slice(self, slice_size): + """set attention slice.""" + + head_dims = self.attn_num_head_channels + head_dims = [head_dims] if isinstance(head_dims, int) else head_dims + if slice_size is not None and any(dim % slice_size != 0 + for dim in head_dims): + raise ValueError( + f'Make sure slice_size {slice_size} is a common divisor of ' + f'the number of heads used in cross_attention: {head_dims}') + + for attn in self.attentions: + attn._set_attention_slice(slice_size) + + def forward(self, hidden_states, temb=None, encoder_hidden_states=None): + """forward with hidden states.""" + + output_states = () + + for resnet, attn in zip(self.resnets, self.attentions): + hidden_states = resnet(hidden_states, temb) + hidden_states = attn( + hidden_states, + encoder_hidden_states=encoder_hidden_states).sample + + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class DownBlock2D(nn.Module): + """Down block built by resnet.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-5, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock2D( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.ModuleList(resnets) + + if add_downsample: + self.downsamplers = nn.ModuleList([ + Downsample2D( + out_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name='op') + ]) + else: + self.downsamplers = None + + self.gradient_checkpointing = False + + def forward(self, hidden_states, temb=None): + """forward with hidden states.""" + + output_states = () + + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb) + + output_states += (hidden_states, ) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + output_states += (hidden_states, ) + + return hidden_states, output_states + + +class CrossAttnUpBlock2D(nn.Module): + """Up block built by cross attention.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + prev_output_channel: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-5, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + cross_attention_dim=1280, + attention_type='default', + output_scale_factor=1.0, + add_upsample=True, + dual_cross_attention=False, + use_linear_projection=False, + only_cross_attention=False, + ): + super().__init__() + resnets = [] + attentions = [] + + self.attention_type = attention_type + self.attn_num_head_channels = attn_num_head_channels + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - + 1) else out_channels + resnet_in_channels = \ + prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock2D( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + attentions.append( + Transformer2DModel( + attn_num_head_channels, + out_channels // attn_num_head_channels, + in_channels=out_channels, + num_layers=1, + cross_attention_dim=cross_attention_dim, + norm_num_groups=resnet_groups, + use_linear_projection=use_linear_projection, + only_cross_attention=only_cross_attention, + )) + + self.attentions = nn.ModuleList(attentions) + self.resnets = nn.ModuleList(resnets) + + if add_upsample: + self.upsamplers = nn.ModuleList([ + Upsample2D( + out_channels, use_conv=True, out_channels=out_channels) + ]) + else: + self.upsamplers = None + + self.gradient_checkpointing = False + + def set_attention_slice(self, slice_size): + """set attention slice.""" + + head_dims = self.attn_num_head_channels + head_dims = [head_dims] if isinstance(head_dims, int) else head_dims + if slice_size is not None and any(dim % slice_size != 0 + for dim in head_dims): + raise ValueError( + f'Make sure slice_size {slice_size} is a common divisor of ' + f'the number of heads used in cross_attention: {head_dims}') + + for attn in self.attentions: + attn._set_attention_slice(slice_size) + + self.gradient_checkpointing = False + + def forward( + self, + hidden_states, + res_hidden_states_tuple, + temb=None, + encoder_hidden_states=None, + upsample_size=None, + ): + """forward with hidden states and res hidden states.""" + + for resnet, attn in zip(self.resnets, self.attentions): + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + hidden_states = resnet(hidden_states, temb) + hidden_states = attn( + hidden_states, + encoder_hidden_states=encoder_hidden_states).sample + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states + + +class UpBlock2D(nn.Module): + """Up block built by resnet.""" + + def __init__( + self, + in_channels: int, + prev_output_channel: int, + out_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-5, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + res_skip_channels = in_channels if (i == num_layers - + 1) else out_channels + resnet_in_channels = \ + prev_output_channel if i == 0 else out_channels + + resnets.append( + ResnetBlock2D( + in_channels=resnet_in_channels + res_skip_channels, + out_channels=out_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.ModuleList(resnets) + + if add_upsample: + self.upsamplers = nn.ModuleList([ + Upsample2D( + out_channels, use_conv=True, out_channels=out_channels) + ]) + else: + self.upsamplers = None + + self.gradient_checkpointing = False + + def forward(self, + hidden_states, + res_hidden_states_tuple, + temb=None, + upsample_size=None): + """forward with hidden states and res hidden states.""" + + for resnet in self.resnets: + # pop res hidden states + res_hidden_states = res_hidden_states_tuple[-1] + res_hidden_states_tuple = res_hidden_states_tuple[:-1] + hidden_states = torch.cat([hidden_states, res_hidden_states], + dim=1) + + hidden_states = resnet(hidden_states, temb) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states, upsample_size) + + return hidden_states diff --git a/mmedit/models/editors/disco_diffusion/guider.py b/mmedit/models/editors/disco_diffusion/guider.py index b1c425dd3b..03adda874c 100644 --- a/mmedit/models/editors/disco_diffusion/guider.py +++ b/mmedit/models/editors/disco_diffusion/guider.py @@ -10,7 +10,9 @@ import torch.nn.functional as F import torchvision.transforms as T import torchvision.transforms.functional as TF +from mmengine.utils import digit_version from resize_right import resize +from torchvision import __version__ as TORCHVISION_VERSION from mmedit.models.losses import tv_loss from .secondary_model import alpha_sigma_to_t @@ -206,13 +208,17 @@ def __init__(self, self.IC_Size_Pow = IC_Size_Pow self.IC_Grey_P = IC_Grey_P + random_affine_args = dict(degrees=10, translate=(0.05, 0.05)) + if digit_version(TORCHVISION_VERSION) >= digit_version('0.9.0'): + random_affine_args['interpolation'] = T.InterpolationMode.BILINEAR + else: + from PIL import Image + random_affine_args['resample'] = Image.NEAREST + self.augs = T.Compose([ T.RandomHorizontalFlip(p=0.5), T.Lambda(lambda x: x + torch.randn_like(x) * 0.01), - T.RandomAffine( - degrees=10, - translate=(0.05, 0.05), - interpolation=T.InterpolationMode.BILINEAR), + T.RandomAffine(**random_affine_args), T.Lambda(lambda x: x + torch.randn_like(x) * 0.01), T.RandomGrayscale(p=0.1), T.Lambda(lambda x: x + torch.randn_like(x) * 0.01), diff --git a/mmedit/models/editors/glean/glean_styleganv2.py b/mmedit/models/editors/glean/glean_styleganv2.py index 4a806ceff8..54aa2cae0b 100644 --- a/mmedit/models/editors/glean/glean_styleganv2.py +++ b/mmedit/models/editors/glean/glean_styleganv2.py @@ -29,11 +29,11 @@ class GLEANStyleGANv2(BaseModule): ``pretrained`` argument. We have already offered official weights as follows: - - styelgan2-ffhq-config-f: http://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa - - stylegan2-horse-config-f: http://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa - - stylegan2-car-config-f: http://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa - - styelgan2-cat-config-f: http://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa - - stylegan2-church-config-f: http://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa + - styelgan2-ffhq-config-f: http://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa + - stylegan2-horse-config-f: http://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa + - stylegan2-car-config-f: http://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa + - styelgan2-cat-config-f: http://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa + - stylegan2-church-config-f: http://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa If you want to load the ema model, you can just use following codes: diff --git a/mmedit/models/editors/real_esrgan/real_esrgan.py b/mmedit/models/editors/real_esrgan/real_esrgan.py index b16b31477a..685297876f 100644 --- a/mmedit/models/editors/real_esrgan/real_esrgan.py +++ b/mmedit/models/editors/real_esrgan/real_esrgan.py @@ -191,7 +191,9 @@ def extract_gt_data(self, data_samples): gt_imgs = [data_sample.gt_img.data for data_sample in data_samples] gt = torch.stack(gt_imgs) - gt_unsharp = [data_sample.gt_img.data for data_sample in data_samples] + gt_unsharp = [ + data_sample.gt_unsharp.data for data_sample in data_samples + ] gt_unsharp = torch.stack(gt_unsharp) gt_pixel, gt_percep, gt_gan = gt.clone(), gt.clone(), gt.clone() diff --git a/mmedit/models/editors/restormer/__init__.py b/mmedit/models/editors/restormer/__init__.py new file mode 100644 index 0000000000..e1a3c0d914 --- /dev/null +++ b/mmedit/models/editors/restormer/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .restormer_net import Restormer + +__all__ = ['Restormer'] diff --git a/mmedit/models/editors/restormer/restormer_net.py b/mmedit/models/editors/restormer/restormer_net.py new file mode 100644 index 0000000000..bc3c68bf09 --- /dev/null +++ b/mmedit/models/editors/restormer/restormer_net.py @@ -0,0 +1,564 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import numbers + +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange +from mmengine.model import BaseModule + +from mmedit.registry import MODELS + + +def to_3d(x): + """Reshape input tensor.""" + return rearrange(x, 'b c h w -> b (h w) c') + + +def to_4d(x, h, w): + """Reshape input tensor.""" + return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w) + + +class BiasFree_LayerNorm(BaseModule): + """Layer normalization without bias. + + Args: + normalized_shape (tuple): The shape of inputs. + """ + + def __init__(self, normalized_shape): + super(BiasFree_LayerNorm, self).__init__() + if isinstance(normalized_shape, numbers.Integral): + normalized_shape = (normalized_shape, ) + normalized_shape = torch.Size(normalized_shape) + + assert len(normalized_shape) == 1 + + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.normalized_shape = normalized_shape + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + sigma = x.var(-1, keepdim=True, unbiased=False) + return x / torch.sqrt(sigma + 1e-5) * self.weight + + +class WithBias_LayerNorm(BaseModule): + """Layer normalization with bias. The bias can be learned. + + Args: + normalized_shape (tuple): The shape of inputs. + """ + + def __init__(self, normalized_shape): + super(WithBias_LayerNorm, self).__init__() + if isinstance(normalized_shape, numbers.Integral): + normalized_shape = (normalized_shape, ) + normalized_shape = torch.Size(normalized_shape) + + assert len(normalized_shape) == 1 + + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.normalized_shape = normalized_shape + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + mu = x.mean(-1, keepdim=True) + sigma = x.var(-1, keepdim=True, unbiased=False) + return (x - mu) / torch.sqrt(sigma + 1e-5) * self.weight + self.bias + + +class LayerNorm(BaseModule): + """Layer normalization module. + + Note: This is different from the layernorm2d in pytorch. + The layer norm here can select Layer Normalization type. + Args: + dim (int): Channel number of inputs. + LayerNorm_type (str): Layer Normalization type. + """ + + def __init__(self, dim, LayerNorm_type): + super(LayerNorm, self).__init__() + if LayerNorm_type == 'BiasFree': + self.body = BiasFree_LayerNorm(dim) + else: + self.body = WithBias_LayerNorm(dim) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + h, w = x.shape[-2:] + return to_4d(self.body(to_3d(x)), h, w) + + +class FeedForward(BaseModule): + """Gated-Dconv Feed-Forward Network (GDFN) + + The original version of GDFN in + "Restormer: Efficient Transformer for High-Resolution Image Restoration". + + Args: + dim (int): Channel number of inputs. + ffn_expansion_factor (float): channel expansion factor. Default: 2.66 + bias (bool): The bias of convolution. + """ + + def __init__(self, dim, ffn_expansion_factor, bias): + super(FeedForward, self).__init__() + + hidden_features = int(dim * ffn_expansion_factor) + + self.project_in = nn.Conv2d( + dim, hidden_features * 2, kernel_size=1, bias=bias) + + self.dwconv = nn.Conv2d( + hidden_features * 2, + hidden_features * 2, + kernel_size=3, + stride=1, + padding=1, + groups=hidden_features * 2, + bias=bias) + + self.project_out = nn.Conv2d( + hidden_features, dim, kernel_size=1, bias=bias) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + x = self.project_in(x) + x1, x2 = self.dwconv(x).chunk(2, dim=1) + x = F.gelu(x1) * x2 + x = self.project_out(x) + return x + + +class Attention(BaseModule): + """Multi-DConv Head Transposed Self-Attention (MDTA) + + The original version of MDTA in + "Restormer: Efficient Transformer for High-Resolution Image Restoration". + + Args: + dim (int): Channel number of inputs. + num_heads (int): Number of attention heads. + bias (bool): The bias of convolution. + """ + + def __init__(self, dim, num_heads, bias): + super(Attention, self).__init__() + self.num_heads = num_heads + self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1)) + + self.qkv = nn.Conv2d(dim, dim * 3, kernel_size=1, bias=bias) + self.qkv_dwconv = nn.Conv2d( + dim * 3, + dim * 3, + kernel_size=3, + stride=1, + padding=1, + groups=dim * 3, + bias=bias) + self.project_out = nn.Conv2d(dim, dim, kernel_size=1, bias=bias) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + b, c, h, w = x.shape + + qkv = self.qkv_dwconv(self.qkv(x)) + q, k, v = qkv.chunk(3, dim=1) + + q = rearrange( + q, 'b (head c) h w -> b head c (h w)', head=self.num_heads) + k = rearrange( + k, 'b (head c) h w -> b head c (h w)', head=self.num_heads) + v = rearrange( + v, 'b (head c) h w -> b head c (h w)', head=self.num_heads) + + q = torch.nn.functional.normalize(q, dim=-1) + k = torch.nn.functional.normalize(k, dim=-1) + + attn = (q @ k.transpose(-2, -1)) * self.temperature + attn = attn.softmax(dim=-1) + + out = (attn @ v) + + out = rearrange( + out, + 'b head c (h w) -> b (head c) h w', + head=self.num_heads, + h=h, + w=w) + + out = self.project_out(out) + return out + + +class TransformerBlock(BaseModule): + """Transformer Block. + + The original version of Transformer Block in "Restormer: Efficient\ + Transformer for High-Resolution Image Restoration". + + Args: + dim (int): Channel number of inputs. + num_heads (int): Number of attention heads. + ffn_expansion_factor (float): channel expansion factor. Default: 2.66 + bias (bool): The bias of convolution. + LayerNorm_type (str): Layer Normalization type. + """ + + def __init__(self, dim, num_heads, ffn_expansion_factor, bias, + LayerNorm_type): + super(TransformerBlock, self).__init__() + + self.norm1 = LayerNorm(dim, LayerNorm_type) + self.attn = Attention(dim, num_heads, bias) + self.norm2 = LayerNorm(dim, LayerNorm_type) + self.ffn = FeedForward(dim, ffn_expansion_factor, bias) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tesnor: Forward results. + """ + x = x + self.attn(self.norm1(x)) + x = x + self.ffn(self.norm2(x)) + + return x + + +class OverlapPatchEmbed(BaseModule): + """Overlapped image patch embedding with 3x3 Conv. + + Args: + in_c (int, optional): Channel number of inputs. Default: 3 + embed_dim (int, optional): embedding dimension. Default: 48 + bias (bool, optional): The bias of convolution. Default: False + """ + + def __init__(self, in_c=3, embed_dim=48, bias=False): + super(OverlapPatchEmbed, self).__init__() + + self.proj = nn.Conv2d( + in_c, embed_dim, kernel_size=3, stride=1, padding=1, bias=bias) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tesnor: Forward results. + """ + x = self.proj(x) + + return x + + +class Downsample(BaseModule): + """Downsample modules. + + Args: + n_feat(int): Channel number of features. + """ + + def __init__(self, n_feat): + super(Downsample, self).__init__() + + self.body = nn.Sequential( + nn.Conv2d( + n_feat, + n_feat // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), nn.PixelUnshuffle(2)) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tesnor: Forward results. + """ + return self.body(x) + + +class Upsample(BaseModule): + """Upsample modules. + + Args: + n_feat(int): Channel number of features. + """ + + def __init__(self, n_feat): + super(Upsample, self).__init__() + + self.body = nn.Sequential( + nn.Conv2d( + n_feat, + n_feat * 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), nn.PixelShuffle(2)) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tesnor: Forward results. + """ + return self.body(x) + + +@MODELS.register_module() +class Restormer(BaseModule): + """Restormer A PyTorch impl of: `Restormer: Efficient Transformer for High- + Resolution Image Restoration`. Ref repo: + https://github.com/swz30/Restormer. + + Args: + inp_channels (int): Number of input image channels. Default: 3. + out_channels (int): Number of output image channels: 3. + dim (int): Number of feature dimension. Default: 48. + num_blocks (List(int)): Depth of each Transformer layer. + Default: [4, 6, 6, 8]. + num_refinement_blocks (int): Number of refinement blocks. + Default: 4. + heads (List(int)): Number of attention heads in different layers. + Default: 7. + ffn_expansion_factor (float): Ratio of feed forward network expansion. + Default: 2.66. + bias (bool): The bias of convolution. Default: False + LayerNorm_type (str|optional): Select layer Normalization type. + Optional: 'WithBias','BiasFree' + Default: 'WithBias'. + dual_pixel_task (bool): True for dual-pixel defocus deblurring only. + Also set inp_channels=6. Default: False. + dual_keys (List): Keys of dual images in inputs. + Default: ['imgL', 'imgR']. + """ + + def __init__(self, + inp_channels=3, + out_channels=3, + dim=48, + num_blocks=[4, 6, 6, 8], + num_refinement_blocks=4, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False, + dual_keys=['imgL', 'imgR']): + + super(Restormer, self).__init__() + + self.patch_embed = OverlapPatchEmbed(inp_channels, dim) + + self.encoder_level1 = nn.Sequential(*[ + TransformerBlock( + dim=dim, + num_heads=heads[0], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0]) + ]) + + self.down1_2 = Downsample(dim) + self.encoder_level2 = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**1), + num_heads=heads[1], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1]) + ]) + + self.down2_3 = Downsample(int(dim * 2**1)) + self.encoder_level3 = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**2), + num_heads=heads[2], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2]) + ]) + + self.down3_4 = Downsample(int(dim * 2**2)) + self.latent = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**3), + num_heads=heads[3], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[3]) + ]) + + self.up4_3 = Upsample(int(dim * 2**3)) + self.reduce_chan_level3 = nn.Conv2d( + int(dim * 2**3), int(dim * 2**2), kernel_size=1, bias=bias) + self.decoder_level3 = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**2), + num_heads=heads[2], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[2]) + ]) + + self.up3_2 = Upsample(int(dim * 2**2)) + self.reduce_chan_level2 = nn.Conv2d( + int(dim * 2**2), int(dim * 2**1), kernel_size=1, bias=bias) + self.decoder_level2 = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**1), + num_heads=heads[1], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[1]) + ]) + + self.up2_1 = Upsample(int(dim * 2**1)) + + self.decoder_level1 = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**1), + num_heads=heads[0], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) for i in range(num_blocks[0]) + ]) + + self.refinement = nn.Sequential(*[ + TransformerBlock( + dim=int(dim * 2**1), + num_heads=heads[0], + ffn_expansion_factor=ffn_expansion_factor, + bias=bias, + LayerNorm_type=LayerNorm_type) + for i in range(num_refinement_blocks) + ]) + + self.dual_pixel_task = dual_pixel_task + self.dual_keys = dual_keys + if self.dual_pixel_task: + self.skip_conv = nn.Conv2d( + dim, int(dim * 2**1), kernel_size=1, bias=bias) + + self.output = nn.Conv2d( + int(dim * 2**1), + out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=bias) + + def forward(self, inp_img): + """Forward function. + + Args: + inp_img (Tensor): Input tensor with shape (B, C, H, W). + Returns: + Tensor: Forward results. + """ + + if self.dual_pixel_task: + dual_images = [inp_img[key] for key in self.dual_keys] + inp_img = torch.cat(dual_images, dim=1) + + _, _, h, w = inp_img.shape + if h % 8 == 0: + padding_h = 0 + else: + padding_h = 8 - h % 8 + if w % 8 == 0: + padding_w = 0 + else: + padding_w = 8 - w % 8 + + inp_img = F.pad(inp_img, (0, padding_w, 0, padding_h), 'reflect') + inp_enc_level1 = self.patch_embed(inp_img) + out_enc_level1 = self.encoder_level1(inp_enc_level1) + + inp_enc_level2 = self.down1_2(out_enc_level1) + out_enc_level2 = self.encoder_level2(inp_enc_level2) + + inp_enc_level3 = self.down2_3(out_enc_level2) + out_enc_level3 = self.encoder_level3(inp_enc_level3) + + inp_enc_level4 = self.down3_4(out_enc_level3) + latent = self.latent(inp_enc_level4) + + inp_dec_level3 = self.up4_3(latent) + inp_dec_level3 = torch.cat([inp_dec_level3, out_enc_level3], 1) + inp_dec_level3 = self.reduce_chan_level3(inp_dec_level3) + out_dec_level3 = self.decoder_level3(inp_dec_level3) + + inp_dec_level2 = self.up3_2(out_dec_level3) + inp_dec_level2 = torch.cat([inp_dec_level2, out_enc_level2], 1) + inp_dec_level2 = self.reduce_chan_level2(inp_dec_level2) + out_dec_level2 = self.decoder_level2(inp_dec_level2) + + inp_dec_level1 = self.up2_1(out_dec_level2) + inp_dec_level1 = torch.cat([inp_dec_level1, out_enc_level1], 1) + out_dec_level1 = self.decoder_level1(inp_dec_level1) + + out_dec_level1 = self.refinement(out_dec_level1) + + if self.dual_pixel_task: + out_dec_level1 = out_dec_level1 + self.skip_conv(inp_enc_level1) + out_dec_level1 = self.output(out_dec_level1) + else: + out_dec_level1 = self.output(out_dec_level1) + inp_img + + return out_dec_level1[:, :, :h, :w] diff --git a/mmedit/models/editors/stable_diffusion/__init__.py b/mmedit/models/editors/stable_diffusion/__init__.py new file mode 100644 index 0000000000..93dac0131b --- /dev/null +++ b/mmedit/models/editors/stable_diffusion/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .stable_diffusion import StableDiffusion + +__all__ = ['StableDiffusion'] diff --git a/mmedit/models/editors/stable_diffusion/clip_wrapper.py b/mmedit/models/editors/stable_diffusion/clip_wrapper.py new file mode 100644 index 0000000000..dc867fe279 --- /dev/null +++ b/mmedit/models/editors/stable_diffusion/clip_wrapper.py @@ -0,0 +1,187 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os + +import torch +import torch.nn as nn +from mmengine.logging import MMLogger + +from mmedit.utils import try_import + +transformers = try_import('transformers') + +if transformers is not None: + from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel + from transformers.models.clip.feature_extraction_clip import \ + CLIPFeatureExtractor # noqa + from transformers.models.clip.modeling_clip import CLIPTextModel + from transformers.models.clip.tokenization_clip import CLIPTokenizer + + logger = MMLogger.get_current_instance() + + def cosine_distance(image_embeds, text_embeds): + """compute the cosine distance of image embeddings and text + embeddings.""" + normalized_image_embeds = nn.functional.normalize(image_embeds) + normalized_text_embeds = nn.functional.normalize(text_embeds) + return torch.mm(normalized_image_embeds, normalized_text_embeds.t()) + + class StableDiffusionSafetyChecker(PreTrainedModel): + config_class = CLIPConfig + _no_split_modules = ['CLIPEncoderLayer'] + + def __init__(self, config: CLIPConfig): + """check result image for stable diffsuion to prevent NSFW content + generated. + + Args: + config(CLIPConfig): config for transformers clip. + """ + + super().__init__(config) + + self.vision_model = CLIPVisionModel(config.vision_config) + self.visual_projection = nn.Linear( + config.vision_config.hidden_size, + config.projection_dim, + bias=False) + + self.concept_embeds = nn.Parameter( + torch.ones(17, config.projection_dim), requires_grad=False) + self.special_care_embeds = nn.Parameter( + torch.ones(3, config.projection_dim), requires_grad=False) + + self.concept_embeds_weights = nn.Parameter( + torch.ones(17), requires_grad=False) + self.special_care_embeds_weights = nn.Parameter( + torch.ones(3), requires_grad=False) + + @torch.no_grad() + def forward(self, clip_input, images): + """return black image if input image has nsfw content. + + Args: + clip_input(torch.Tensor): + image feature extracted by clip feature extractor. + images(torch.Tensor): + image generated by stable diffusion. + + Returns: + images(torch.Tensor): + black images if input images have nsfw content, + otherwise return input images. + has_nsfw_concepts(list[bool]): + flag list to indicate whether input images have + nsfw content. + """ + pooled_output = self.vision_model(clip_input)[1] + image_embeds = self.visual_projection(pooled_output) + + # we always cast to float32 as this does not cause + # significant overhead and is compatible with bfloa16 + special_cos_dist = cosine_distance( + image_embeds, self.special_care_embeds).cpu().float().numpy() + cos_dist = cosine_distance( + image_embeds, self.concept_embeds).cpu().float().numpy() + + result = [] + batch_size = image_embeds.shape[0] + for i in range(batch_size): + result_img = { + 'special_scores': {}, + 'special_care': [], + 'concept_scores': {}, + 'bad_concepts': [] + } + + # increase this value to create a stronger `nfsw` filter + # at the cost of increasing the possibility of + # filtering benign images + adjustment = 0.0 + + for concept_idx in range(len(special_cos_dist[0])): + concept_cos = special_cos_dist[i][concept_idx] + concept_threshold = self.special_care_embeds_weights[ + concept_idx].item() + result_img['special_scores'][concept_idx] = round( + concept_cos - concept_threshold + adjustment, 3) + if result_img['special_scores'][concept_idx] > 0: + result_img['special_care'].append({ + concept_idx, + result_img['special_scores'][concept_idx] + }) + adjustment = 0.01 + + for concept_idx in range(len(cos_dist[0])): + concept_cos = cos_dist[i][concept_idx] + concept_threshold = self.concept_embeds_weights[ + concept_idx].item() + result_img['concept_scores'][concept_idx] = round( + concept_cos - concept_threshold + adjustment, 3) + if result_img['concept_scores'][concept_idx] > 0: + result_img['bad_concepts'].append(concept_idx) + + result.append(result_img) + + has_nsfw_concepts = [ + len(res['bad_concepts']) > 0 for res in result + ] + + for idx, has_nsfw_concept in enumerate(has_nsfw_concepts): + if has_nsfw_concept: + images[idx] = torch.zeros(images[idx].shape) # black image + + if any(has_nsfw_concepts): + logger.warning( + 'NSFW content was detected in one or more images.' + ' A black image will be returned instead.' + ' Try again with a different prompt and/or seed.') + + return images, has_nsfw_concepts + + def load_clip_submodels(init_cfg, submodels, requires_safety_checker): + """ + Args: + init_cfg (dict): + ckpt path of clip models. + submodels (List): + list of stable diffusion submodels. + requires_safety_checker (bool): + whether to load safety checker + + Returns: + tokenizer(CLIPTokenizer): + tokenizer with ckpt loaded. + feature_extractor(CLIPFeatureExtractor): + feature_extractor with ckpt loaded. + text_encoder(CLIPTextModel): + text_encoder with ckpt loaded. + safety_checker(StableDiffusionSafetyChecker): + safety_checker with ckpt loaded. + + """ + pretrained_model_path = init_cfg.get('pretrained_model_path', None) + + tokenizer, feature_extractor, text_encoder, safety_checker = \ + None, None, None, None + if pretrained_model_path: + tokenizer = CLIPTokenizer.from_pretrained( + os.path.join(pretrained_model_path, 'tokenizer')) + + feature_extractor = CLIPFeatureExtractor.from_pretrained( + os.path.join(pretrained_model_path, 'feature_extractor')) + + text_encoder = CLIPTextModel.from_pretrained( + os.path.join(pretrained_model_path, 'text_encoder')) + + if requires_safety_checker: + submodels.append('safety_checker') + safety_checker = StableDiffusionSafetyChecker.from_pretrained( + os.path.join(pretrained_model_path, 'safety_checker')) + + return tokenizer, feature_extractor, text_encoder, safety_checker + +else: + + def load_clip_submodels(init_cfg, submodels, requires_safety_checker): + raise ImportError('Please install tranformers via ' + '\'pip install transformers\'') diff --git a/mmedit/models/editors/stable_diffusion/stable_diffusion.py b/mmedit/models/editors/stable_diffusion/stable_diffusion.py new file mode 100644 index 0000000000..1f70b0406e --- /dev/null +++ b/mmedit/models/editors/stable_diffusion/stable_diffusion.py @@ -0,0 +1,505 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os.path as osp +from typing import Dict, List, Optional, Union + +import torch +from mmengine.logging import MMLogger +from mmengine.model import BaseModel +from mmengine.runner import set_random_seed +from mmengine.runner.checkpoint import _load_checkpoint +from tqdm.auto import tqdm + +from mmedit.registry import DIFFUSION_SCHEDULERS, MODELS +from .clip_wrapper import load_clip_submodels +from .vae import AutoencoderKL + +logger = MMLogger.get_current_instance() + + +@MODELS.register_module('sd') +@MODELS.register_module() +class StableDiffusion(BaseModel): + """class to run stable diffsuion pipeline. + + Args: + diffusion_scheduler(dict): Diffusion scheduler config. + unet_cfg(dict): Unet config. + vae_cfg(dict): Vae config. + pretrained_ckpt_path(dict): + Pretrained ckpt path for submodels in stable diffusion. + requires_safety_checker(bool): + whether to run safety checker after image generated. + unet_sample_size(int): sampel size for unet. + """ + + def __init__(self, + diffusion_scheduler, + unet, + vae, + requires_safety_checker=True, + unet_sample_size=64, + init_cfg=None): + super().__init__() + + self.device = torch.device('cpu') + self.submodels = [ + 'tokenizer', 'vae', 'scheduler', 'unet', 'feature_extractor', + 'text_encoder' + ] + self.requires_safety_checker = requires_safety_checker + + self.scheduler = DIFFUSION_SCHEDULERS.build( + diffusion_scheduler) if isinstance(diffusion_scheduler, + dict) else diffusion_scheduler + self.scheduler.order = 1 + self.scheduler.init_noise_sigma = 1.0 + + self.unet_sample_size = unet_sample_size + self.unet = MODELS.build(unet) if isinstance(unet, dict) else unet + + self.vae = AutoencoderKL(**vae) if isinstance(vae, dict) else vae + self.vae_scale_factor = 2**(len(self.vae.block_out_channels) - 1) + + self.init_cfg = init_cfg + self.init_weights() + + def init_weights(self): + """load pretrained ckpt for each submodel.""" + if self.init_cfg is not None and self.init_cfg['type'] == 'Pretrained': + map_location = self.init_cfg.get('map_location', 'cpu') + pretrained_model_path = self.init_cfg.get('pretrained_model_path', + None) + if pretrained_model_path: + unet_ckpt_path = osp.join(pretrained_model_path, 'unet', + 'diffusion_pytorch_model.bin') + if unet_ckpt_path: + state_dict = _load_checkpoint(unet_ckpt_path, map_location) + self.unet.load_state_dict(state_dict, strict=True) + + vae_ckpt_path = osp.join(pretrained_model_path, 'vae', + 'diffusion_pytorch_model.bin') + if vae_ckpt_path: + state_dict = _load_checkpoint(vae_ckpt_path, map_location) + self.vae.load_state_dict(state_dict, strict=True) + + self.tokenizer, self.feature_extractor, self.text_encoder, self.safety_checker = load_clip_submodels( # noqa + self.init_cfg, self.submodels, self.requires_safety_checker) + + def to(self, torch_device: Optional[Union[str, torch.device]] = None): + """put submodels to torch device. + + Args: + torch_device(Optional[Union[str, torch.device]]): + device to put, default to None. + + Returns: + self(StableDiffusion): + class instance itsself. + """ + if torch_device is None: + return self + + for name in self.submodels: + module = getattr(self, name) + if isinstance(module, torch.nn.Module): + module.to(torch_device) + self.device = torch.device(torch_device) + return self + + @torch.no_grad() + def infer(self, + prompt: Union[str, List[str]], + height: Optional[int] = None, + width: Optional[int] = None, + num_inference_steps: int = 50, + guidance_scale: float = 7.5, + negative_prompt: Optional[Union[str, List[str]]] = None, + num_images_per_prompt: Optional[int] = 1, + eta: float = 0.0, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + show_progress=True, + seed=1): + """Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + height (`int`, *optional*, + defaults to self.unet_sample_size * self.vae_scale_factor): + The height in pixels of the generated image. + width (`int`, *optional*, + defaults to self.unet_sample_size * self.vae_scale_factor): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. + More denoising steps usually lead to a higher + quality image at the expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in + [Classifier-Free Diffusion Guidance] + (https://arxiv.org/abs/2207.12598). + negative_prompt (`str` or `List[str]`, *optional*): + The prompt or prompts not to guide the image generation. + Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + num_images_per_prompt (`int`, *optional*, defaults to 1): + The number of images to generate per prompt. + eta (`float`, *optional*, defaults to 0.0): + Corresponds to parameter eta (η) in the DDIM paper: + https://arxiv.org/abs/2010.02502. Only applies to + [`schedulers.DDIMScheduler`], will be ignored for others. + generator (`torch.Generator`, *optional*): + A [torch generator] to make generation deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, + sampled from a Gaussian distribution, + to be used as inputs for image generation. + Can be used to tweak the same generation + with different prompts. + If not provided, a latents tensor will be + generated by sampling using the supplied random `generator`. + + Returns: + dict:['samples', 'nsfw_content_detected']: + 'samples': image result samples + 'nsfw_content_detected': nsfw content flags for image samples. + """ + set_random_seed(seed=seed) + + # 0. Default height and width to unet + height = height or self.unet_sample_size * self.vae_scale_factor + width = width or self.unet_sample_size * self.vae_scale_factor + + # 1. Check inputs. Raise error if not correct + self.check_inputs(prompt, height, width) + + # 2. Define call parameters + batch_size = 1 if isinstance(prompt, str) else len(prompt) + device = self.device + # here `guidance_scale` is defined analog to the + # guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . + # `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + + # 3. Encode input prompt + text_embeddings = self._encode_prompt(prompt, device, + num_images_per_prompt, + do_classifier_free_guidance, + negative_prompt) + + # 4. Prepare timesteps + # self.scheduler.set_timesteps(num_inference_steps, device=device) + self.scheduler.set_timesteps(num_inference_steps) + timesteps = self.scheduler.timesteps + + # 5. Prepare latent variables + num_channels_latents = self.unet.in_channels + latents = self.prepare_latents( + batch_size * num_images_per_prompt, + num_channels_latents, + height, + width, + text_embeddings.dtype, + device, + generator, + latents, + ) + + # 6. Prepare extra step kwargs. + # TODO: Logic should ideally just be moved out of the pipeline + extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta) + + # 7. Denoising loop + if show_progress: + timesteps = tqdm(timesteps) + for i, t in enumerate(timesteps): + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat( + [latents] * 2) if do_classifier_free_guidance else latents + # latent_model_input = \ + # self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet( + latent_model_input, t, + encoder_hidden_states=text_embeddings)['outputs'] + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * ( + noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step( + noise_pred, t, latents, **extra_step_kwargs)['prev_sample'] + + # 8. Post-processing + image = self.decode_latents(latents) + + # 9. Run safety checker + image, has_nsfw_concept = self.run_safety_checker( + image, device, text_embeddings.dtype) + image = image[0].permute([2, 0, 1]) + + return {'samples': image, 'nsfw_content_detected': has_nsfw_concept} + + def _encode_prompt(self, prompt, device, num_images_per_prompt, + do_classifier_free_guidance, negative_prompt): + """Encodes the prompt into text encoder hidden states. + + Args: + prompt (str or list(int)): + prompt to be encoded. + device: (torch.device): + torch device. + num_images_per_prompt (int): + number of images that should be generated per prompt. + do_classifier_free_guidance (`bool`): + whether to use classifier free guidance or not. + negative_prompt (str or List[str]): + The prompt or prompts not to guide the image generation. + Ignored when not using guidance (i.e., ignored + if `guidance_scale` is less than `1`). + + Returns: + text_embeddings (torch.Tensor): + text embeddings generated by clip text encoder. + """ + batch_size = len(prompt) if isinstance(prompt, list) else 1 + + text_inputs = self.tokenizer( + prompt, + padding='max_length', + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors='pt', + ) + text_input_ids = text_inputs.input_ids + untruncated_ids = self.tokenizer( + prompt, padding='max_length', return_tensors='pt').input_ids + + if not torch.equal(text_input_ids, untruncated_ids): + removed_text = self.tokenizer.batch_decode( + untruncated_ids[:, self.tokenizer.model_max_length - 1:-1]) + logger.warning( + 'The following part of your input was truncated because CLIP' + ' can only handle sequences up to' + f' {self.tokenizer.model_max_length} tokens: {removed_text}') + + if hasattr(self.text_encoder.config, 'use_attention_mask' + ) and self.text_encoder.config.use_attention_mask: + attention_mask = text_inputs.attention_mask.to(device) + else: + attention_mask = None + + text_embeddings = self.text_encoder( + text_input_ids.to(device), + attention_mask=attention_mask, + ) + text_embeddings = text_embeddings[0] + + # duplicate text embeddings for each generation per prompt, + bs_embed, seq_len, _ = text_embeddings.shape + text_embeddings = text_embeddings.repeat(1, num_images_per_prompt, 1) + text_embeddings = text_embeddings.view( + bs_embed * num_images_per_prompt, seq_len, -1) + + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + uncond_tokens: List[str] + if negative_prompt is None: + uncond_tokens = [''] * batch_size + elif type(prompt) is not type(negative_prompt): + raise TypeError( + f'`negative_prompt` should be the same type to `prompt`,' + f'but got {type(negative_prompt)} !=' + f' {type(prompt)}.') + elif isinstance(negative_prompt, str): + uncond_tokens = [negative_prompt] + elif batch_size != len(negative_prompt): + raise ValueError( + f'`negative_prompt`: {negative_prompt} has ' + f'batch size {len(negative_prompt)}, but `prompt`:' + f' {prompt} has batch size {batch_size}.' + f' Please make sure that passed `negative_prompt` matches' + ' the batch size of `prompt`.') + else: + uncond_tokens = negative_prompt + + max_length = text_input_ids.shape[-1] + uncond_input = self.tokenizer( + uncond_tokens, + padding='max_length', + max_length=max_length, + truncation=True, + return_tensors='pt', + ) + + if hasattr(self.text_encoder.config, 'use_attention_mask' + ) and self.text_encoder.config.use_attention_mask: + attention_mask = uncond_input.attention_mask.to(device) + else: + attention_mask = None + + uncond_embeddings = self.text_encoder( + uncond_input.input_ids.to(device), + attention_mask=attention_mask, + ) + uncond_embeddings = uncond_embeddings[0] + + # duplicate unconditional embeddings for + # each generation per prompt, using mps friendly method + seq_len = uncond_embeddings.shape[1] + uncond_embeddings = uncond_embeddings.repeat( + 1, num_images_per_prompt, 1) + uncond_embeddings = uncond_embeddings.view( + batch_size * num_images_per_prompt, seq_len, -1) + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional + # and text embeddings into a single batch + # to avoid doing two forward passes + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + + return text_embeddings + + def run_safety_checker(self, image, device, dtype): + """run safety checker to check whether image has nsfw content. + + Args: + image (numpy.ndarray): + image generated by stable diffusion. + device (torch.device): + device to run safety checker. + dtype (torch.dtype): + float type to run. + + Returns: + image (numpy.ndarray): + black image if nsfw content detected else input image. + has_nsfw_concept (list[bool]): + flag list to indicate nsfw content detected. + """ + if self.safety_checker is not None: + safety_checker_input = self.feature_extractor( + image[0], return_tensors='pt').to(device) + image, has_nsfw_concept = self.safety_checker( + images=image, + clip_input=safety_checker_input.pixel_values.to(dtype)) + else: + has_nsfw_concept = None + return image, has_nsfw_concept + + def decode_latents(self, latents): + """use vae to decode latents. + + Args: + latents (torch.Tensor): latents to decode. + + Returns: + image (numpy.ndarray): image result. + """ + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + # we always cast to float32 as this does not cause + # significant overhead and is compatible with bfloa16 + image = image.cpu().permute(0, 2, 3, 1).float() + return image + + def prepare_extra_step_kwargs(self, generator, eta): + """prepare extra kwargs for the scheduler step. + + Args: + generator (torch.Generator): + generator for random functions. + eta (float): + eta (η) is only used with the DDIMScheduler, + it will be ignored for other schedulers. + eta corresponds to η in DDIM paper: + https://arxiv.org/abs/2010.02502 + and should be between [0, 1] + + Return: + extra_step_kwargs (dict): + dict contains 'generator' and 'eta' + """ + accepts_eta = 'eta' in set( + inspect.signature(self.scheduler.step).parameters.keys()) + extra_step_kwargs = {} + if accepts_eta: + extra_step_kwargs['eta'] = eta + + # check if the scheduler accepts generator + accepts_generator = 'generator' in set( + inspect.signature(self.scheduler.step).parameters.keys()) + if accepts_generator: + extra_step_kwargs['generator'] = generator + return extra_step_kwargs + + def check_inputs(self, prompt, height, width): + """check whether inputs are in suitable format or not.""" + + if not isinstance(prompt, str) and not isinstance(prompt, list): + raise ValueError(f'`prompt` has to be of ' + f'type `str` or `list` but is {type(prompt)}') + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f'`height` and `width` have to be divisible ' + f'by 8 but are {height} and {width}.') + + def prepare_latents(self, + batch_size, + num_channels_latents, + height, + width, + dtype, + device, + generator, + latents=None): + """prepare latents for diffusion to run in latent space. + + Args: + batch_size (int): batch size. + num_channels_latents (int): latent channel nums. + height (int): image height. + width (int): image width. + dtype (torch.dtype): float type. + device (torch.device): torch device. + generator (torch.Generator): + generator for random functions, defaults to None. + latents (torch.Tensor): + Pre-generated noisy latents, defaults to None. + + Return: + latents (torch.Tensor): prepared latents. + """ + shape = (batch_size, num_channels_latents, + height // self.vae_scale_factor, + width // self.vae_scale_factor) + if latents is None: + latents = torch.randn( + shape, generator=generator, device=device, dtype=dtype) + else: + if latents.shape != shape: + raise ValueError(f'Unexpected latents shape, ' + f'got {latents.shape}, expected {shape}') + latents = latents.to(device) + + # scale the initial noise by the standard + # deviation required by the scheduler + latents = latents * self.scheduler.init_noise_sigma + return latents + + def forward(self, + inputs: torch.Tensor, + data_samples: Optional[list] = None, + mode: str = 'tensor') -> Union[Dict[str, torch.Tensor], list]: + """forward is not implemented now.""" + raise NotImplementedError( + 'Forward is not implemented now, please use infer.') diff --git a/mmedit/models/editors/stable_diffusion/vae.py b/mmedit/models/editors/stable_diffusion/vae.py new file mode 100644 index 0000000000..38444ba766 --- /dev/null +++ b/mmedit/models/editors/stable_diffusion/vae.py @@ -0,0 +1,998 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +from typing import Optional, Tuple, Union + +import mmengine +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from addict import Dict +from mmengine.utils.dl_utils import TORCH_VERSION +from mmengine.utils.version_utils import digit_version + + +class Downsample2D(nn.Module): + """A downsampling layer with an optional convolution. + + Args: + channels (int): channels in the inputs and outputs. + use_conv (bool): a bool determining if a convolution is applied. + out_channels (int): output channels + padding (int): padding num + """ + + def __init__(self, + channels, + use_conv=False, + out_channels=None, + padding=1, + name='conv'): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.padding = padding + stride = 2 + self.name = name + + if use_conv: + conv = nn.Conv2d( + self.channels, + self.out_channels, + 3, + stride=stride, + padding=padding) + else: + assert self.channels == self.out_channels + conv = nn.AvgPool2d(kernel_size=stride, stride=stride) + + self.conv = conv + + def forward(self, hidden_states): + """forward hidden states.""" + assert hidden_states.shape[1] == self.channels + if self.use_conv and self.padding == 0: + pad = (0, 1, 0, 1) + hidden_states = F.pad(hidden_states, pad, mode='constant', value=0) + + assert hidden_states.shape[1] == self.channels + hidden_states = self.conv(hidden_states) + + return hidden_states + + +class Upsample2D(nn.Module): + """An upsampling layer with an optional convolution. + + Args: + channels (int): channels in the inputs and outputs. + use_conv (bool): a bool determining if a convolution is applied. + use_conv_transpose (bool): whether to use conv transpose. + out_channels (int): output channels. + """ + + def __init__(self, + channels, + use_conv=False, + use_conv_transpose=False, + out_channels=None, + name='conv'): + super().__init__() + self.channels = channels + self.out_channels = out_channels or channels + self.use_conv = use_conv + self.use_conv_transpose = use_conv_transpose + self.name = name + + conv = None + if use_conv: + conv = nn.Conv2d(self.channels, self.out_channels, 3, padding=1) + else: + conv = nn.ConvTranspose2d(channels, self.out_channels, 4, 2, 1) + + self.conv = conv + + def forward(self, hidden_states, output_size=None): + """forward with hidden states.""" + assert hidden_states.shape[1] == self.channels + + if self.use_conv_transpose: + return self.conv(hidden_states) + + # if `output_size` is passed we force the interpolation output + # size and do not make use of `scale_factor=2` + if output_size is None: + hidden_states = F.interpolate( + hidden_states, scale_factor=2.0, mode='nearest') + else: + hidden_states = F.interpolate( + hidden_states, size=output_size, mode='nearest') + + # TODO(Suraj, Patrick) + # - clean up after weight dicts are correctly renamed + hidden_states = self.conv(hidden_states) + + return hidden_states + + +class ResnetBlock2D(nn.Module): + """resnet block support down sample and up sample. + + Args: + in_channels (int): input channels. + out_channels (int): output channels. + conv_shortcut (bool): whether to use conv shortcut. + dropout (float): dropout rate. + temb_channels (int): time embedding channels. + groups (int): conv groups. + groups_out (int): conv out groups. + pre_norm (bool): whether to norm before conv. Todo: remove. + eps (float): eps for groupnorm. + non_linearity (str): non linearity type. + time_embedding_norm (str): time embedding norm type. + output_scale_factor (float): factor to scale input and output. + use_in_shortcut (bool): whether to use conv in shortcut. + up (bool): whether to upsample. + down (bool): whether to downsample. + """ + + def __init__( + self, + in_channels, + out_channels=None, + conv_shortcut=False, + dropout=0.0, + temb_channels=512, + groups=32, + groups_out=None, + pre_norm=True, + eps=1e-6, + non_linearity='silu', + time_embedding_norm='default', + kernel=None, + output_scale_factor=1.0, + use_in_shortcut=None, + up=False, + down=False, + ): + super().__init__() + self.pre_norm = pre_norm + self.pre_norm = True + self.in_channels = in_channels + out_channels = in_channels if out_channels is None else out_channels + self.out_channels = out_channels + self.use_conv_shortcut = conv_shortcut + self.time_embedding_norm = time_embedding_norm + self.up = up + self.down = down + self.output_scale_factor = output_scale_factor + + if groups_out is None: + groups_out = groups + + self.norm1 = torch.nn.GroupNorm( + num_groups=groups, num_channels=in_channels, eps=eps, affine=True) + + self.conv1 = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if temb_channels is not None: + self.time_emb_proj = torch.nn.Linear(temb_channels, out_channels) + else: + self.time_emb_proj = None + + self.norm2 = torch.nn.GroupNorm( + num_groups=groups_out, + num_channels=out_channels, + eps=eps, + affine=True) + self.dropout = torch.nn.Dropout(dropout) + self.conv2 = torch.nn.Conv2d( + out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + if non_linearity == 'silu' and \ + digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.nonlinearity = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.nonlinearity = nn.ReLU() + + self.upsample = self.downsample = None + if self.up: + self.upsample = Upsample2D(in_channels, use_conv=False) + elif self.down: + self.downsample = \ + Downsample2D( + in_channels, use_conv=False, padding=1, name='op') + + self.use_in_shortcut = self.in_channels != self.out_channels if use_in_shortcut is None else use_in_shortcut # noqa + + self.conv_shortcut = None + if self.use_in_shortcut: + self.conv_shortcut = torch.nn.Conv2d( + in_channels, out_channels, kernel_size=1, stride=1, padding=0) + + def forward(self, input_tensor, temb): + """forward with hidden states and time embeddings.""" + hidden_states = input_tensor + + hidden_states = self.norm1(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + if self.upsample is not None: + # upsample_nearest_nhwc fails with large batch sizes. + # see https://github.com/huggingface/diffusers/issues/984 + if hidden_states.shape[0] >= 64: + input_tensor = input_tensor.contiguous() + hidden_states = hidden_states.contiguous() + input_tensor = self.upsample(input_tensor) + hidden_states = self.upsample(hidden_states) + elif self.downsample is not None: + input_tensor = self.downsample(input_tensor) + hidden_states = self.downsample(hidden_states) + + hidden_states = self.conv1(hidden_states) + + if temb is not None: + temb = \ + self.time_emb_proj(self.nonlinearity(temb))[:, :, None, None] + hidden_states = hidden_states + temb + + hidden_states = self.norm2(hidden_states) + hidden_states = self.nonlinearity(hidden_states) + + hidden_states = self.dropout(hidden_states) + hidden_states = self.conv2(hidden_states) + + if self.conv_shortcut is not None: + input_tensor = self.conv_shortcut(input_tensor) + + output_tensor = \ + (input_tensor + hidden_states) / self.output_scale_factor + + return output_tensor + + +class AttentionBlock(nn.Module): + """An attention block that allows spatial positions to attend to each + other. Originally ported from here, but adapted to the N-d case. + https://github.com/hojonathanho/diffusion/blob/ + 1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/models/unet.py#L66. + Uses three q, k, v linear layers to compute attention. + + Args: + channels (int): The number of channels in the input and output. + num_head_channels (int, *optional*): + The number of channels in each head. If None, then `num_heads` = 1. + norm_num_groups (int, *optional*, defaults to 32): + The number of groups to use for group norm. + rescale_output_factor (float, *optional*, defaults to 1.0): + The factor to rescale the output by. + eps (float, *optional*, defaults to 1e-5): + The epsilon value to use for group norm. + """ + + def __init__( + self, + channels: int, + num_head_channels: Optional[int] = None, + norm_num_groups: int = 32, + rescale_output_factor: float = 1.0, + eps: float = 1e-5, + ): + super().__init__() + self.channels = channels + + self.num_heads = channels // num_head_channels if num_head_channels is not None else 1 # noqa + self.num_head_size = num_head_channels + self.group_norm = nn.GroupNorm( + num_channels=channels, + num_groups=norm_num_groups, + eps=eps, + affine=True) + + # define q,k,v as linear layers + self.query = nn.Linear(channels, channels) + self.key = nn.Linear(channels, channels) + self.value = nn.Linear(channels, channels) + + self.rescale_output_factor = rescale_output_factor + self.proj_attn = nn.Linear(channels, channels, 1) + + def transpose_for_scores(self, projection: torch.Tensor) -> torch.Tensor: + """transpose projection.""" + new_projection_shape = projection.size()[:-1] + (self.num_heads, -1) + # move heads to 2nd position (B, T, H * D) + # -> (B, T, H, D) -> (B, H, T, D) + new_projection = \ + projection.view(new_projection_shape).permute(0, 2, 1, 3) + return new_projection + + def forward(self, hidden_states): + """forward hidden states.""" + residual = hidden_states + batch, channel, height, width = hidden_states.shape + + # norm + hidden_states = self.group_norm(hidden_states) + + hidden_states = hidden_states.view(batch, channel, + height * width).transpose(1, 2) + + # proj to q, k, v + query_proj = self.query(hidden_states) + key_proj = self.key(hidden_states) + value_proj = self.value(hidden_states) + + scale = 1 / math.sqrt(self.channels / self.num_heads) + + # get scores + if self.num_heads > 1: + query_states = self.transpose_for_scores(query_proj) + key_states = self.transpose_for_scores(key_proj) + value_states = self.transpose_for_scores(value_proj) + + attention_scores = torch.matmul( + query_states, key_states.transpose(-1, -2)) * scale + else: + query_states, key_states, value_states = \ + query_proj, key_proj, value_proj + + attention_scores = torch.baddbmm( + torch.empty( + query_states.shape[0], + query_states.shape[1], + key_states.shape[1], + dtype=query_states.dtype, + device=query_states.device, + ), + query_states, + key_states.transpose(-1, -2), + beta=0, + alpha=scale, + ) + + attention_probs = torch.softmax( + attention_scores.float(), dim=-1).type(attention_scores.dtype) + + # compute attention output + if self.num_heads > 1: + hidden_states = torch.matmul(attention_probs, value_states) + hidden_states = hidden_states.permute(0, 2, 1, 3).contiguous() + new_hidden_states_shape = \ + hidden_states.size()[:-2] + (self.channels,) + hidden_states = hidden_states.view(new_hidden_states_shape) + else: + hidden_states = torch.bmm(attention_probs, value_states) + + # compute next hidden_states + hidden_states = self.proj_attn(hidden_states) + hidden_states = hidden_states.transpose(-1, -2).reshape( + batch, channel, height, width) + + # res connect and rescale + hidden_states = \ + (hidden_states + residual) / self.rescale_output_factor + return hidden_states + + +class UNetMidBlock2D(nn.Module): + """middle block in unet. + + Args: + in_channels (int): input channels. + temb_channels (int): time embedding channels. + dropout (float): dropout rate, defaults to 0.0. + num_layers (int): layer num. + resnet_eps (float): resnet eps, defaults to 1e-6. + resnet_time_scale_shift (str): + time scale shift, defaults to 'default'. + resnet_act_fn (str): + act function in resnet, defaults to 'silu'. + resnet_groups (int): + conv groups in resnet, defaults to 32. + resnet_pre_norm (bool): + pre norm in resnet, defaults to True. + attn_num_head_channels (int): + attention head channels, defaults to 1. + attention_type (str): + attention type ,defaults to 'default'. + output_scale_factor (float): + output scale factor, defaults to 1.0. + """ + + def __init__( + self, + in_channels: int, + temb_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'silu', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + attn_num_head_channels=1, + attention_type='default', + output_scale_factor=1.0, + ): + super().__init__() + + self.attention_type = attention_type + resnet_groups = resnet_groups if resnet_groups is not None else min( + in_channels // 4, 32) # noqa + + # there is always at least one resnet + resnets = [ + ResnetBlock2D( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + ) + ] + attentions = [] + + for _ in range(num_layers): + attentions.append( + AttentionBlock( + in_channels, + num_head_channels=attn_num_head_channels, + rescale_output_factor=output_scale_factor, + eps=resnet_eps, + norm_num_groups=resnet_groups, + )) + resnets.append( + ResnetBlock2D( + in_channels=in_channels, + out_channels=in_channels, + temb_channels=temb_channels, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.attentions = nn.ModuleList(attentions) + self.resnets = nn.ModuleList(resnets) + + def forward(self, hidden_states, temb=None, encoder_states=None): + """forward with hidden states, time embedding and encoder states.""" + hidden_states = self.resnets[0](hidden_states, temb) + for attn, resnet in zip(self.attentions, self.resnets[1:]): + if self.attention_type == 'default': + hidden_states = attn(hidden_states) + else: + hidden_states = attn(hidden_states, encoder_states) + hidden_states = resnet(hidden_states, temb) + + return hidden_states + + +class DownEncoderBlock2D(nn.Module): + """Down encoder block in vae. + + Args: + in_channels (int): input channels. + out_channels (int): output channels. + dropout (float): dropout rate, defaults to 0.0. + num_layers (int): layer nums, defaults to 1. + resnet_eps (float): resnet eps, defaults to 1e-6. + resnet_time_scale_shift (str): + time scale shift in resnet, defaults to 'default'. + resnet_act_fn (str): + act function in resnet, defaults to 'silu'. + resnet_groups (int): + group num in resnet, defaults to 32. + resnet_pre_norm (bool): + whether to pre norm in resnet, defaults to True. + output_scale_factor (float): + output scale factor, defaults to 1.0. + add_downsample (bool): + whether to add downsample, defaults to True, + downsample_padding (int): + downsample padding num, defaults to 1. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'silu', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_downsample=True, + downsample_padding=1, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + in_channels = in_channels if i == 0 else out_channels + resnets.append( + ResnetBlock2D( + in_channels=in_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.ModuleList(resnets) + + if add_downsample: + self.downsamplers = nn.ModuleList([ + Downsample2D( + out_channels, + use_conv=True, + out_channels=out_channels, + padding=downsample_padding, + name='op') + ]) + else: + self.downsamplers = None + + def forward(self, hidden_states): + """forward with hidden states.""" + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.downsamplers is not None: + for downsampler in self.downsamplers: + hidden_states = downsampler(hidden_states) + + return hidden_states + + +class Encoder(nn.Module): + """construct encoder in vae.""" + + def __init__( + self, + in_channels=3, + out_channels=3, + down_block_types=('DownEncoderBlock2D', ), + block_out_channels=(64, ), + layers_per_block=2, + norm_num_groups=32, + act_fn='silu', + double_z=True, + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = torch.nn.Conv2d( + in_channels, + block_out_channels[0], + kernel_size=3, + stride=1, + padding=1) + + self.mid_block = None + self.down_blocks = nn.ModuleList([]) + + # down + output_channel = block_out_channels[0] + for i, down_block_type in enumerate(down_block_types): + input_channel = output_channel + output_channel = block_out_channels[i] + is_final_block = i == len(block_out_channels) - 1 + + down_block = DownEncoderBlock2D( + num_layers=self.layers_per_block, + in_channels=input_channel, + out_channels=output_channel, + add_downsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + downsample_padding=0, + ) + self.down_blocks.append(down_block) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift='default', + attn_num_head_channels=None, + resnet_groups=norm_num_groups, + temb_channels=None, + ) + + # out + self.conv_norm_out = nn.GroupNorm( + num_channels=block_out_channels[-1], + num_groups=norm_num_groups, + eps=1e-6) + if digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.conv_act = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.conv_act = nn.ReLU() + + conv_out_channels = 2 * out_channels if double_z else out_channels + self.conv_out = nn.Conv2d( + block_out_channels[-1], conv_out_channels, 3, padding=1) + + def forward(self, x): + """encoder forward.""" + sample = x + sample = self.conv_in(sample) + + # down + for down_block in self.down_blocks: + sample = down_block(sample) + + # middle + sample = self.mid_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class UpDecoderBlock2D(nn.Module): + """construct up decoder block.""" + + def __init__( + self, + in_channels: int, + out_channels: int, + dropout: float = 0.0, + num_layers: int = 1, + resnet_eps: float = 1e-6, + resnet_time_scale_shift: str = 'default', + resnet_act_fn: str = 'swish', + resnet_groups: int = 32, + resnet_pre_norm: bool = True, + output_scale_factor=1.0, + add_upsample=True, + ): + super().__init__() + resnets = [] + + for i in range(num_layers): + input_channels = in_channels if i == 0 else out_channels + + resnets.append( + ResnetBlock2D( + in_channels=input_channels, + out_channels=out_channels, + temb_channels=None, + eps=resnet_eps, + groups=resnet_groups, + dropout=dropout, + time_embedding_norm=resnet_time_scale_shift, + non_linearity=resnet_act_fn, + output_scale_factor=output_scale_factor, + pre_norm=resnet_pre_norm, + )) + + self.resnets = nn.ModuleList(resnets) + + if add_upsample: + self.upsamplers = nn.ModuleList([ + Upsample2D( + out_channels, use_conv=True, out_channels=out_channels) + ]) + else: + self.upsamplers = None + + def forward(self, hidden_states): + """forward hidden states.""" + for resnet in self.resnets: + hidden_states = resnet(hidden_states, temb=None) + + if self.upsamplers is not None: + for upsampler in self.upsamplers: + hidden_states = upsampler(hidden_states) + + return hidden_states + + +class Decoder(nn.Module): + """construct decoder in vae.""" + + def __init__( + self, + in_channels=3, + out_channels=3, + up_block_types=('UpDecoderBlock2D', ), + block_out_channels=(64, ), + layers_per_block=2, + norm_num_groups=32, + act_fn='silu', + ): + super().__init__() + self.layers_per_block = layers_per_block + + self.conv_in = nn.Conv2d( + in_channels, + block_out_channels[-1], + kernel_size=3, + stride=1, + padding=1) + + self.mid_block = None + self.up_blocks = nn.ModuleList([]) + + # mid + self.mid_block = UNetMidBlock2D( + in_channels=block_out_channels[-1], + resnet_eps=1e-6, + resnet_act_fn=act_fn, + output_scale_factor=1, + resnet_time_scale_shift='default', + attn_num_head_channels=None, + resnet_groups=norm_num_groups, + temb_channels=None, + ) + + # up + reversed_block_out_channels = list(reversed(block_out_channels)) + output_channel = reversed_block_out_channels[0] + for i, up_block_type in enumerate(up_block_types): + prev_output_channel = output_channel + output_channel = reversed_block_out_channels[i] + + is_final_block = i == len(block_out_channels) - 1 + + up_block = UpDecoderBlock2D( + num_layers=self.layers_per_block + 1, + in_channels=prev_output_channel, + out_channels=output_channel, + add_upsample=not is_final_block, + resnet_eps=1e-6, + resnet_act_fn=act_fn, + resnet_groups=norm_num_groups, + ) + self.up_blocks.append(up_block) + prev_output_channel = output_channel + + # out + self.conv_norm_out = nn.GroupNorm( + num_channels=block_out_channels[0], + num_groups=norm_num_groups, + eps=1e-6) + if digit_version(TORCH_VERSION) > digit_version('1.6.0'): + self.conv_act = nn.SiLU() + else: + mmengine.print_log('\'SiLU\' is not supported for ' + f'torch < 1.6.0, found \'{torch.version}\'.' + 'Use ReLu instead but result maybe wrong') + self.conv_act = nn.ReLU() + self.conv_out = nn.Conv2d( + block_out_channels[0], out_channels, 3, padding=1) + + def forward(self, z): + """decoder forward.""" + sample = z + sample = self.conv_in(sample) + + # middle + sample = self.mid_block(sample) + + # up + for up_block in self.up_blocks: + sample = up_block(sample) + + # post-process + sample = self.conv_norm_out(sample) + sample = self.conv_act(sample) + sample = self.conv_out(sample) + + return sample + + +class DiagonalGaussianDistribution(object): + """Calculate diagonal gaussian distribution.""" + + def __init__(self, parameters, deterministic=False): + self.parameters = parameters + self.mean, self.logvar = torch.chunk(parameters, 2, dim=1) + self.logvar = torch.clamp(self.logvar, -30.0, 20.0) + self.deterministic = deterministic + self.std = torch.exp(0.5 * self.logvar) + self.var = torch.exp(self.logvar) + if self.deterministic: + self.var = self.std = torch.zeros_like( + self.mean, + device=self.parameters.device, + dtype=self.parameters.dtype) + + def sample(self, generator: Optional[torch.Generator] = None) \ + -> torch.FloatTensor: + """sample function.""" + device = self.parameters.device + sample_device = device + sample = torch.randn( + self.mean.shape, generator=generator, device=sample_device) + # make sure sample is on the same device + # as the parameters and has same dtype + sample = sample.to(device=device, dtype=self.parameters.dtype) + x = self.mean + self.std * sample + return x + + def kl(self, other=None): + """calculate kl divergence.""" + if self.deterministic: + return torch.Tensor([0.0]) + else: + if other is None: + return 0.5 * torch.sum( + torch.pow(self.mean, 2) + self.var - 1.0 - self.logvar, + dim=[1, 2, 3]) + else: + return 0.5 * torch.sum( + torch.pow(self.mean - other.mean, 2) / other.var + + self.var / other.var - 1.0 - self.logvar + other.logvar, + dim=[1, 2, 3], + ) + + def nll(self, sample, dims=[1, 2, 3]): + """calculate negative log likelihood.""" + if self.deterministic: + return torch.Tensor([0.0]) + logtwopi = np.log(2.0 * np.pi) + return 0.5 * torch.sum( + logtwopi + self.logvar + + torch.pow(sample - self.mean, 2) / self.var, + dim=dims) # noqa + + def mode(self): + """return self.mean.""" + return self.mean + + +class AutoencoderKL(nn.Module): + r"""Variational Autoencoder (VAE) model with KL loss + from the paper Auto-Encoding Variational Bayes by Diederik P. Kingma + and Max Welling. + + Args: + in_channels (int, *optional*, defaults to 3): + Number of channels in the input image. + out_channels (int, *optional*, defaults to 3): + Number of channels in the output. + down_block_types (`Tuple[str]`, *optional*, defaults to : + obj:`("DownEncoderBlock2D",)`): + Tuple of downsample block types. + up_block_types (`Tuple[str]`, *optional*, defaults to : + obj:`("UpDecoderBlock2D",)`): Tuple of upsample block types. + block_out_channels (`Tuple[int]`, *optional*, defaults to : + obj:`(64,)`): Tuple of block output channels. + act_fn (`str`, *optional*, defaults to `"silu"`): + The activation function to use. + latent_channels (`int`, *optional*, defaults to `4`): + Number of channels in the latent space. + sample_size (`int`, *optional*, defaults to `32`): + sample size is now not supported. + """ + + def __init__( + self, + in_channels: int = 3, + out_channels: int = 3, + down_block_types: Tuple[str] = ('DownEncoderBlock2D', ), + up_block_types: Tuple[str] = ('UpDecoderBlock2D', ), + block_out_channels: Tuple[int] = (64, ), + layers_per_block: int = 1, + act_fn: str = 'silu', + latent_channels: int = 4, + norm_num_groups: int = 32, + sample_size: int = 32, + ): + super().__init__() + + self.block_out_channels = block_out_channels + + # pass init params to Encoder + self.encoder = Encoder( + in_channels=in_channels, + out_channels=latent_channels, + down_block_types=down_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + act_fn=act_fn, + norm_num_groups=norm_num_groups, + double_z=True, + ) + + # pass init params to Decoder + self.decoder = Decoder( + in_channels=latent_channels, + out_channels=out_channels, + up_block_types=up_block_types, + block_out_channels=block_out_channels, + layers_per_block=layers_per_block, + norm_num_groups=norm_num_groups, + act_fn=act_fn, + ) + + self.quant_conv = torch.nn.Conv2d(2 * latent_channels, + 2 * latent_channels, 1) + self.post_quant_conv = torch.nn.Conv2d(latent_channels, + latent_channels, 1) + + def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> Dict: + """encode input.""" + h = self.encoder(x) + moments = self.quant_conv(h) + posterior = DiagonalGaussianDistribution(moments) + + if not return_dict: + return (posterior, ) + + return Dict(latent_dist=posterior) + + def decode(self, z: torch.FloatTensor, return_dict: bool = True) \ + -> Union[Dict, torch.FloatTensor]: + """decode z.""" + z = self.post_quant_conv(z) + dec = self.decoder(z) + + if not return_dict: + return (dec, ) + + return Dict(sample=dec) + + def forward( + self, + sample: torch.FloatTensor, + sample_posterior: bool = False, + return_dict: bool = True, + generator: Optional[torch.Generator] = None, + ) -> Union[Dict, torch.FloatTensor]: + """ + Args: + sample (torch.FloatTensor): Input sample. + sample_posterior (bool): + Whether to sample from the posterior. + defaults to `False`. + return_dict (`bool`, *optional*, defaults to `True`): + Whether or not to return a [`Dict`] instead of a plain tuple. + Returns: + Dict(sample=dec): decode results. + """ + x = sample + posterior = self.encode(x).latent_dist + if sample_posterior: + z = posterior.sample(generator=generator) + else: + z = posterior.mode() + dec = self.decode(z).sample + + if not return_dict: + return (dec, ) + + return Dict(sample=dec) diff --git a/mmedit/models/editors/stylegan2/stylegan2_discriminator.py b/mmedit/models/editors/stylegan2/stylegan2_discriminator.py index 5c993b6540..f752fa4055 100644 --- a/mmedit/models/editors/stylegan2/stylegan2_discriminator.py +++ b/mmedit/models/editors/stylegan2/stylegan2_discriminator.py @@ -31,11 +31,11 @@ class StyleGAN2Discriminator(BaseModule): ``pretrained`` argument. We have already offered official weights as follows: - - stylegan2-ffhq-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa - - stylegan2-horse-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa - - stylegan2-car-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa - - stylegan2-cat-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa - - stylegan2-church-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa + - stylegan2-ffhq-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa + - stylegan2-horse-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa + - stylegan2-car-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa + - stylegan2-cat-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa + - stylegan2-church-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa If you want to load the ema model, you can just use following codes: diff --git a/mmedit/models/editors/stylegan2/stylegan2_generator.py b/mmedit/models/editors/stylegan2/stylegan2_generator.py index aec58d91e8..fd83c08c11 100644 --- a/mmedit/models/editors/stylegan2/stylegan2_generator.py +++ b/mmedit/models/editors/stylegan2/stylegan2_generator.py @@ -29,11 +29,11 @@ class StyleGAN2Generator(nn.Module): ``pretrained`` argument. We have already offered official weights as follows: - - stylegan2-ffhq-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa - - stylegan2-horse-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa - - stylegan2-car-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa - - stylegan2-cat-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa - - stylegan2-church-config-f: https://download.openmmlab.com/mmgen/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa + - stylegan2-ffhq-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-ffhq-config-f-official_20210327_171224-bce9310c.pth # noqa + - stylegan2-horse-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-horse-config-f-official_20210327_173203-ef3e69ca.pth # noqa + - stylegan2-car-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-car-config-f-official_20210327_172340-8cfe053c.pth # noqa + - stylegan2-cat-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-cat-config-f-official_20210327_172444-15bc485b.pth # noqa + - stylegan2-church-config-f: https://download.openmmlab.com/mmediting/stylegan2/official_weights/stylegan2-church-config-f-official_20210327_172657-1d42b7d1.pth # noqa If you want to load the ema model, you can just use following codes: diff --git a/mmedit/models/editors/swinir/__init__.py b/mmedit/models/editors/swinir/__init__.py new file mode 100644 index 0000000000..18c102944a --- /dev/null +++ b/mmedit/models/editors/swinir/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .swinir_net import SwinIRNet + +__all__ = ['SwinIRNet'] diff --git a/mmedit/models/editors/swinir/swinir_modules.py b/mmedit/models/editors/swinir/swinir_modules.py new file mode 100644 index 0000000000..42d6a9c6fc --- /dev/null +++ b/mmedit/models/editors/swinir/swinir_modules.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch.nn as nn + +from .swinir_utils import to_2tuple + + +class PatchEmbed(nn.Module): + r""" Image to Patch Embedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. + Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], img_size[1] // patch_size[1] + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + if norm_layer is not None: + self.norm = norm_layer(embed_dim) + else: + self.norm = None + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, Ph, Pw). + + Returns: + Tensor: Forward results. + """ + x = x.flatten(2).transpose(1, 2) # B Ph*Pw C + if self.norm is not None: + x = self.norm(x) + return x + + +class PatchUnEmbed(nn.Module): + r""" Image to Patch Unembedding + Args: + img_size (int): Image size. Default: 224. + patch_size (int): Patch token size. Default: 4. + in_chans (int): Number of input image channels. Default: 3. + embed_dim (int): Number of linear projection output channels. + Default: 96. + norm_layer (nn.Module, optional): Normalization layer. Default: None + """ + + def __init__(self, + img_size=224, + patch_size=4, + in_chans=3, + embed_dim=96, + norm_layer=None): + super().__init__() + img_size = to_2tuple(img_size) + patch_size = to_2tuple(patch_size) + patches_resolution = [ + img_size[0] // patch_size[0], img_size[1] // patch_size[1] + ] + self.img_size = img_size + self.patch_size = patch_size + self.patches_resolution = patches_resolution + self.num_patches = patches_resolution[0] * patches_resolution[1] + + self.in_chans = in_chans + self.embed_dim = embed_dim + + def forward(self, x, x_size): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + x_size (tuple[int]): Resolution of input feature. + + Returns: + Tensor: Forward results. + """ + B, HW, C = x.shape + x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], + x_size[1]) # B Ph*Pw C + return x + + +class Upsample(nn.Sequential): + """Upsample module. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + """ + + def __init__(self, scale, num_feat): + m = [] + if (scale & (scale - 1)) == 0: # scale = 2^n + for _ in range(int(math.log(scale, 2))): + m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(2)) + elif scale == 3: + m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1)) + m.append(nn.PixelShuffle(3)) + else: + raise ValueError(f'scale {scale} is not supported. ' + 'Supported scales: 2^n and 3.') + super(Upsample, self).__init__(*m) + + +class UpsampleOneStep(nn.Sequential): + """UpsampleOneStep module (the difference with Upsample is that it always + only has 1conv + 1pixelshuffle) Used in lightweight SR to save parameters. + + Args: + scale (int): Scale factor. Supported scales: 2^n and 3. + num_feat (int): Channel number of intermediate features. + num_out_ch (int): Channel number for PixelShuffle. + input_resolution (tuple[int], optional): Input resolution. + Default: None + """ + + def __init__(self, scale, num_feat, num_out_ch, input_resolution=None): + self.num_feat = num_feat + self.input_resolution = input_resolution + m = [] + m.append(nn.Conv2d(num_feat, (scale**2) * num_out_ch, 3, 1, 1)) + m.append(nn.PixelShuffle(scale)) + super(UpsampleOneStep, self).__init__(*m) diff --git a/mmedit/models/editors/swinir/swinir_net.py b/mmedit/models/editors/swinir/swinir_net.py new file mode 100644 index 0000000000..859cfa634b --- /dev/null +++ b/mmedit/models/editors/swinir/swinir_net.py @@ -0,0 +1,307 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine.model import BaseModule +from mmengine.model.weight_init import trunc_normal_ + +from mmedit.registry import MODELS +from .swinir_modules import PatchEmbed, PatchUnEmbed, Upsample, UpsampleOneStep +from .swinir_rstb import RSTB + + +@MODELS.register_module() +class SwinIRNet(BaseModule): + r""" SwinIR + A PyTorch impl of: `SwinIR: Image Restoration Using Swin Transformer`, + based on Swin Transformer. + Ref repo: https://github.com/JingyunLiang/SwinIR + + Args: + img_size (int | tuple(int)): Input image size. Default 64 + patch_size (int | tuple(int)): Patch size. Default: 1 + in_chans (int): Number of input image channels. Default: 3 + embed_dim (int): Patch embedding dimension. Default: 96 + depths (tuple(int)): Depth of each Swin Transformer layer. + Default: [6, 6, 6, 6] + num_heads (tuple(int)): Number of attention heads in different layers. + Default: [6, 6, 6, 6] + window_size (int): Window size. Default: 7 + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4 + qkv_bias (bool): If True, add a learnable bias to query, key, value. + Default: True + qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. + Default: None + drop_rate (float): Dropout rate. Default: 0 + attn_drop_rate (float): Attention dropout rate. Default: 0 + drop_path_rate (float): Stochastic depth rate. Default: 0.1 + norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm. + ape (bool): If True, add absolute position embedding to the + patch embedding. Default: False + patch_norm (bool): If True, add normalization after patch embedding. + Default: True + use_checkpoint (bool): Whether to use checkpointing to save memory. + Default: False + upscale (int): Upscale factor. 2/3/4/8 for image SR, 1 for denoising + and compress artifact reduction. Default: 2 + img_range (float): Image range. 1. or 255. Default: 1.0 + upsampler (string, optional): The reconstruction module. + 'pixelshuffle' / 'pixelshuffledirect' /'nearest+conv'/None. + Default: '' + resi_connection (string): The convolutional block before residual + connection. '1conv'/'3conv'. Default: '1conv' + """ + + def __init__(self, + img_size=64, + patch_size=1, + in_chans=3, + embed_dim=96, + depths=[6, 6, 6, 6], + num_heads=[6, 6, 6, 6], + window_size=7, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + norm_layer=nn.LayerNorm, + ape=False, + patch_norm=True, + use_checkpoint=False, + upscale=2, + img_range=1., + upsampler='', + resi_connection='1conv', + **kwargs): + super(SwinIRNet, self).__init__() + num_in_ch = in_chans + num_out_ch = in_chans + num_feat = 64 + self.img_range = img_range + if in_chans == 3: + rgb_mean = (0.4488, 0.4371, 0.4040) + self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1) + else: + self.mean = torch.zeros(1, 1, 1, 1) + self.upscale = upscale + self.upsampler = upsampler + self.window_size = window_size + + # 1, shallow feature extraction + self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1) + + # 2, deep feature extraction + self.num_layers = len(depths) + self.embed_dim = embed_dim + self.ape = ape + self.patch_norm = patch_norm + self.num_features = embed_dim + self.mlp_ratio = mlp_ratio + + # split image into non-overlapping patches + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + num_patches = self.patch_embed.num_patches + patches_resolution = self.patch_embed.patches_resolution + self.patches_resolution = patches_resolution + + # merge non-overlapping patches into image + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=embed_dim, + embed_dim=embed_dim, + norm_layer=norm_layer if self.patch_norm else None) + + # absolute position embedding + if self.ape: + self.absolute_pos_embed = nn.Parameter( + torch.zeros(1, num_patches, embed_dim)) + trunc_normal_(self.absolute_pos_embed, std=.02) + + self.pos_drop = nn.Dropout(p=drop_rate) + + # stochastic depth decay rule + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] + + # build Residual Swin Transformer blocks (RSTB) + self.layers = nn.ModuleList() + for i_layer in range(self.num_layers): + layer = RSTB( + dim=embed_dim, + input_resolution=(patches_resolution[0], + patches_resolution[1]), + depth=depths[i_layer], + num_heads=num_heads[i_layer], + window_size=window_size, + mlp_ratio=self.mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop_rate, + attn_drop=attn_drop_rate, + drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])], + norm_layer=norm_layer, + downsample=None, + use_checkpoint=use_checkpoint, + img_size=img_size, + patch_size=patch_size, + resi_connection=resi_connection) + self.layers.append(layer) + self.norm = norm_layer(self.num_features) + + # build the last conv layer in deep feature extraction + if resi_connection == '1conv': + self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv_after_body = nn.Sequential( + nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1)) + + # 3, high quality image reconstruction + if self.upsampler == 'pixelshuffle': + # for classical SR + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.upsample = Upsample(upscale, num_feat) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR (to save parameters) + self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch, + (patches_resolution[0], + patches_resolution[1])) + elif self.upsampler == 'nearest+conv': + # for real-world SR (less artifacts) + self.conv_before_upsample = nn.Sequential( + nn.Conv2d(embed_dim, num_feat, 3, 1, 1), + nn.LeakyReLU(inplace=True)) + self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + if self.upscale == 4: + self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1) + self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1) + self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) + else: + # for image denoising and JPEG compression artifact reduction + self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1) + + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + @torch.jit.ignore + def no_weight_decay(self): + return {'absolute_pos_embed'} + + @torch.jit.ignore + def no_weight_decay_keywords(self): + return {'relative_position_bias_table'} + + def check_image_size(self, x): + """Check image size and pad images so that it has enough dimension do + window size. + + args: + x: input tensor image with (B, C, H, W) shape. + """ + _, _, h, w = x.size() + mod_pad_h = (self.window_size - + h % self.window_size) % self.window_size + mod_pad_w = (self.window_size - + w % self.window_size) % self.window_size + x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect') + return x + + def forward_features(self, x): + """Forward function of Deep Feature Extraction. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + x_size = (x.shape[2], x.shape[3]) + x = self.patch_embed(x) + if self.ape: + x = x + self.absolute_pos_embed + x = self.pos_drop(x) + + for layer in self.layers: + x = layer(x, x_size) + + x = self.norm(x) # B L C + x = self.patch_unembed(x, x_size) + + return x + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, C, H, W). + + Returns: + Tensor: Forward results. + """ + H, W = x.shape[2:] + x = self.check_image_size(x) + + self.mean = self.mean.type_as(x) + x = (x - self.mean) * self.img_range + + if self.upsampler == 'pixelshuffle': + # for classical SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.conv_last(self.upsample(x)) + elif self.upsampler == 'pixelshuffledirect': + # for lightweight SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.upsample(x) + elif self.upsampler == 'nearest+conv': + # for real-world SR + x = self.conv_first(x) + x = self.conv_after_body(self.forward_features(x)) + x + x = self.conv_before_upsample(x) + x = self.lrelu( + self.conv_up1( + torch.nn.functional.interpolate( + x, scale_factor=2, mode='nearest'))) + if self.upscale == 4: + x = self.lrelu( + self.conv_up2( + torch.nn.functional.interpolate( + x, scale_factor=2, mode='nearest'))) + x = self.conv_last(self.lrelu(self.conv_hr(x))) + else: + # for image denoising and JPEG compression artifact reduction + x_first = self.conv_first(x) + res = self.conv_after_body( + self.forward_features(x_first)) + x_first + x = x + self.conv_last(res) + + x = x / self.img_range + self.mean + + return x[:, :, :H * self.upscale, :W * self.upscale] diff --git a/mmedit/models/editors/swinir/swinir_rstb.py b/mmedit/models/editors/swinir/swinir_rstb.py new file mode 100644 index 0000000000..48b0b56d3b --- /dev/null +++ b/mmedit/models/editors/swinir/swinir_rstb.py @@ -0,0 +1,583 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import torch +import torch.nn as nn +import torch.utils.checkpoint as checkpoint +from mmengine.model.weight_init import trunc_normal_ + +from .swinir_modules import PatchEmbed, PatchUnEmbed +from .swinir_utils import (drop_path, to_2tuple, window_partition, + window_reverse) + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks).""" + + def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True): + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + + Returns: + Tensor: Forward results. + """ + return drop_path(x, self.drop_prob, self.training, self.scale_by_keep) + + def extra_repr(self): + return f'drop_prob={round(self.drop_prob, 3):0.3f}' + + +class Mlp(nn.Module): + """Multilayer Perceptron layer. + + Args: + in_features (int): Number of input channels. + hidden_features (int | None, optional): Number of hidden layer + channels. Default: None + out_features (int | None, optional): Number of output channels. + Default: None + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + """ + + def __init__(self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + + Returns: + Tensor: Forward results. + """ + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class WindowAttention(nn.Module): + r""" Window based multi-head self attention (W-MSA) + module with relative position bias. + It supports both of shifted and non-shifted window. + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to + query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale + of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. + Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__(self, + dim, + window_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0., + proj_drop=0.): + + super().__init__() + self.dim = dim + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # define a parameter table of relative position bias + # 2*Wh-1 * 2*Ww-1, nH + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), + num_heads)) + + # get pair-wise relative position index + # for each token inside the window + coords_h = torch.arange(self.window_size[0]) + coords_w = torch.arange(self.window_size[1]) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) # 2, Wh, Ww + coords_flatten = torch.flatten(coords, 1) # 2, Wh*Ww + relative_coords = \ + coords_flatten[:, :, None] - coords_flatten[:, None, :] + # Wh*Ww, Wh*Ww, 2 + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + # shift to start from 0 + relative_coords[:, :, 0] += self.window_size[0] - 1 + relative_coords[:, :, 1] += self.window_size[1] - 1 + relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 + relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww + self.register_buffer('relative_position_index', + relative_position_index) + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop) + self.proj = nn.Linear(dim, dim) + + self.proj_drop = nn.Dropout(proj_drop) + + trunc_normal_(self.relative_position_bias_table, std=.02) + self.softmax = nn.Softmax(dim=-1) + + def forward(self, x, mask=None): + """ + Args: + x: input features with shape of (num_windows*B, N, C) + mask: (0/-inf) mask with shape of + (num_windows, Wh*Ww, Wh*Ww) or None + """ + B_, N, C = x.shape + qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, + C // self.num_heads).permute(2, 0, 3, 1, 4) + # make torchscript happy (cannot use tensor as tuple) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B_ // nW, nW, self.num_heads, N, + N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + else: + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B_, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, window_size={self.window_size}, ' \ + f'num_heads={self.num_heads}' + + +class SwinTransformerBlock(nn.Module): + r""" Swin Transformer Block. + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + num_heads (int): Number of attention heads. + window_size (int): Window size. + shift_size (int): Shift size for SW-MSA. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias + to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk + scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float, optional): Stochastic depth rate. Default: 0.0 + act_layer (nn.Module, optional): Activation layer. Default: nn.GELU + norm_layer (nn.Module, optional): Normalization layer. + Default: nn.LayerNorm + """ + + def __init__(self, + dim, + input_resolution, + num_heads, + window_size=7, + shift_size=0, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + act_layer=nn.GELU, + norm_layer=nn.LayerNorm): + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.num_heads = num_heads + self.window_size = window_size + self.shift_size = shift_size + self.mlp_ratio = mlp_ratio + if min(self.input_resolution) <= self.window_size: + # if window size is larger than input resolution, + # we don't partition windows + self.shift_size = 0 + self.window_size = min(self.input_resolution) + assert 0 <= self.shift_size < self.window_size, \ + 'shift_size must in 0-window_size' + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=to_2tuple(self.window_size), + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=drop) + + self.drop_path = DropPath( + drop_path) if drop_path > 0. else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, + hidden_features=mlp_hidden_dim, + act_layer=act_layer, + drop=drop) + + if self.shift_size > 0: + attn_mask = self.calculate_mask(self.input_resolution) + else: + attn_mask = None + + self.register_buffer('attn_mask', attn_mask) + + def calculate_mask(self, x_size): + # calculate attention mask for SW-MSA + """Calculate attention mask for SW-MSA. + + Args: + x_size (tuple[int]): Resolution of input feature. + + Returns: + Tensor: Attention mask + """ + H, W = x_size + img_mask = torch.zeros((1, H, W, 1)) # 1 H W 1 + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + mask_windows = window_partition( + img_mask, self.window_size) # nW, window_size, window_size, 1 + mask_windows = mask_windows.view(-1, + self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, + float(-100.0)).masked_fill( + attn_mask == 0, float(0.0)) + + return attn_mask + + def forward(self, x, x_size): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + x_size (tuple[int]): Resolution of input feature. + + Returns: + Tensor: Forward results. + """ + H, W = x_size + B, L, C = x.shape + # assert L == H * W, "input feature has wrong size" + + shortcut = x + x = self.norm1(x) + x = x.view(B, H, W, C) + + # cyclic shift + if self.shift_size > 0: + shifted_x = torch.roll( + x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2)) + else: + shifted_x = x + + # partition windows + x_windows = window_partition( + shifted_x, self.window_size) # nW*B, window_size, window_size, C + x_windows = x_windows.view(-1, self.window_size * self.window_size, + C) # nW*B, window_size*window_size, C + + # W-MSA/SW-MSA (to be compatible for testing on images + # whose shapes are the multiple of window size + if self.input_resolution == x_size: + attn_windows = self.attn( + x_windows, + mask=self.attn_mask) # nW*B, window_size*window_size, C + else: + attn_windows = self.attn( + x_windows, mask=self.calculate_mask(x_size).to(x.device)) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, + self.window_size, C) + shifted_x = window_reverse(attn_windows, self.window_size, H, + W) # B H' W' C + + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, + shifts=(self.shift_size, self.shift_size), + dims=(1, 2)) + else: + x = shifted_x + x = x.view(B, H * W, C) + + # FFN + x = shortcut + self.drop_path(x) + x = x + self.drop_path(self.mlp(self.norm2(x))) + + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, ' \ + f'input_resolution={self.input_resolution}, ' \ + f'num_heads={self.num_heads}, ' \ + f'window_size={self.window_size}, ' \ + f'shift_size={self.shift_size}, ' \ + f'mlp_ratio={self.mlp_ratio}' + + +class BasicLayer(nn.Module): + """A basic Swin Transformer layer for one stage. + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias + to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk + scale of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. + Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. + Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the + end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. + Default: False. + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False): + + super().__init__() + self.dim = dim + self.input_resolution = input_resolution + self.depth = depth + self.use_checkpoint = use_checkpoint + + # build blocks + self.blocks = nn.ModuleList([ + SwinTransformerBlock( + dim=dim, + input_resolution=input_resolution, + num_heads=num_heads, + window_size=window_size, + shift_size=0 if (i % 2 == 0) else window_size // 2, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path[i] + if isinstance(drop_path, list) else drop_path, + norm_layer=norm_layer) for i in range(depth) + ]) + + # patch merging layer + if downsample is not None: + self.downsample = downsample( + input_resolution, dim=dim, norm_layer=norm_layer) + else: + self.downsample = None + + def forward(self, x, x_size): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + x_size (tuple[int]): Resolution of input feature. + + Returns: + Tensor: Forward results. + """ + for blk in self.blocks: + if self.use_checkpoint: + x = checkpoint.checkpoint(blk, x, x_size) + else: + x = blk(x, x_size) + if self.downsample is not None: + x = self.downsample(x) + return x + + def extra_repr(self) -> str: + return f'dim={self.dim}, ' \ + f'input_resolution={self.input_resolution}, ' \ + f'depth={self.depth}' + + +class RSTB(nn.Module): + """Residual Swin Transformer Block (RSTB). + + Args: + dim (int): Number of input channels. + input_resolution (tuple[int]): Input resolution. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + Default: 4.0 + qkv_bias (bool, optional): If True, add a learnable bias to + query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale + of head_dim ** -0.5 if set. + drop (float, optional): Dropout rate. Default: 0.0 + attn_drop (float, optional): Attention dropout rate. Default: 0.0 + drop_path (float | tuple[float], optional): Stochastic depth rate. + Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. + Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the + end of the layer. Default: None + use_checkpoint (bool): Whether to use checkpointing to save memory. + Default: False. + img_size (int): Input image size. Default: 224 + patch_size (int): Patch size. Default: 4 + resi_connection (string): The convolutional block before + residual connection. Default: '1conv' + """ + + def __init__(self, + dim, + input_resolution, + depth, + num_heads, + window_size, + mlp_ratio=4., + qkv_bias=True, + qk_scale=None, + drop=0., + attn_drop=0., + drop_path=0., + norm_layer=nn.LayerNorm, + downsample=None, + use_checkpoint=False, + img_size=224, + patch_size=4, + resi_connection='1conv'): + super(RSTB, self).__init__() + + self.dim = dim + self.input_resolution = input_resolution + + self.residual_group = BasicLayer( + dim=dim, + input_resolution=input_resolution, + depth=depth, + num_heads=num_heads, + window_size=window_size, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop=drop, + attn_drop=attn_drop, + drop_path=drop_path, + norm_layer=norm_layer, + downsample=downsample, + use_checkpoint=use_checkpoint) + + if resi_connection == '1conv': + self.conv = nn.Conv2d(dim, dim, 3, 1, 1) + elif resi_connection == '3conv': + # to save parameters and memory + self.conv = nn.Sequential( + nn.Conv2d(dim, dim // 4, 3, 1, 1), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim // 4, 1, 1, 0), + nn.LeakyReLU(negative_slope=0.2, inplace=True), + nn.Conv2d(dim // 4, dim, 3, 1, 1)) + + self.patch_embed = PatchEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + self.patch_unembed = PatchUnEmbed( + img_size=img_size, + patch_size=patch_size, + in_chans=0, + embed_dim=dim, + norm_layer=None) + + def forward(self, x, x_size): + """Forward function. + + Args: + x (Tensor): Input tensor with shape (B, L, C). + x_size (tuple[int]): Resolution of input feature. + + Returns: + Tensor: Forward results. + """ + return self.patch_embed( + self.conv( + self.patch_unembed(self.residual_group(x, x_size), + x_size))) + x diff --git a/mmedit/models/editors/swinir/swinir_utils.py b/mmedit/models/editors/swinir/swinir_utils.py new file mode 100644 index 0000000000..d0f6e69051 --- /dev/null +++ b/mmedit/models/editors/swinir/swinir_utils.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections.abc +from itertools import repeat + + +# From PyTorch internals +def _ntuple(n): + """A `to_tuple` function generator. It returns a function, this function + will repeat the input to a tuple of length ``n`` if the input is not an + Iterable object, otherwise, return the input directly. + + Args: + n (int): The number of the target length. + """ + + def parse(x): + if isinstance(x, collections.abc.Iterable) and not isinstance(x, str): + return x + return tuple(repeat(x, n)) + + return parse + + +to_2tuple = _ntuple(2) + + +def drop_path(x, + drop_prob: float = 0., + training: bool = False, + scale_by_keep: bool = True): + """Drop paths (Stochastic Depth) per sample (when applied in main path of + residual blocks). + + This is the same as the DropConnect impl I created for + EfficientNet, etc networks, however, the original name is misleading + as 'Drop Connect' is a different form of dropout in a separate paper... + See discussion: + https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 + I've opted for changing the layer and argument names to 'drop path' + rather than mix DropConnect as a layer name and use + 'survival rate' as the argument. + """ + if drop_prob == 0. or not training: + return x + keep_prob = 1 - drop_prob + # work with diff dim tensors, not just 2D ConvNets + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + +def window_partition(x, window_size): + """ + Args: + x: (B, H, W, C) + window_size (int): window size + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + x = x.view(B, H // window_size, window_size, W // window_size, window_size, + C) + windows = x.permute(0, 1, 3, 2, 4, + 5).contiguous().view(-1, window_size, window_size, C) + return windows + + +def window_reverse(windows, window_size, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + window_size (int): Window size + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, + window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x diff --git a/mmedit/version.py b/mmedit/version.py index 962d0e4efe..3712b40685 100644 --- a/mmedit/version.py +++ b/mmedit/version.py @@ -1,6 +1,6 @@ # Copyright (c) Open-MMLab. All rights reserved. -__version__ = '1.0.0rc4' +__version__ = '1.0.0rc5' def parse_version_info(version_str): diff --git a/model-index.yml b/model-index.yml index 61919afa89..f1de04f77e 100644 --- a/model-index.yml +++ b/model-index.yml @@ -34,14 +34,17 @@ Import: - configs/rdn/metafile.yml - configs/real_basicvsr/metafile.yml - configs/real_esrgan/metafile.yml +- configs/restormer/metafile.yml - configs/sagan/metafile.yml - configs/singan/metafile.yml - configs/sngan_proj/metafile.yml - configs/srcnn/metafile.yml - configs/srgan_resnet/metafile.yml +- configs/stable_diffusion/metafile.yml - configs/styleganv1/metafile.yml - configs/styleganv2/metafile.yml - configs/styleganv3/metafile.yml +- configs/swinir/metafile.yml - configs/tdan/metafile.yml - configs/tof/metafile.yml - configs/ttsr/metafile.yml diff --git a/projects/README.md b/projects/README.md new file mode 100644 index 0000000000..f6e6c0ca9e --- /dev/null +++ b/projects/README.md @@ -0,0 +1,67 @@ +# Welcome to Projects of MMEditing + +Welcome to the MMEditing community! +The MMEditing ecosystem consists of tutorials, libraries, and projects from a broad set of researchers in academia and industry, ML and application engineers. +The goal of this ecosystem is to support, accelerate, and aid in your exploration with MMEditing for image, video, 3D content generation, editing and processing. + +Here are a few projects that are built upon MMEditing. They are examples of how to use MMEditing as a library, to make your projects more maintainable. +Please find more projects in [MMEditing Ecosystem](https://openmmlab.com/ecosystem). + +## Show your projects on OpenMMLab Ecosystem + +You can submit your project so that it can be shown on the homepage of [OpenMMLab](https://openmmlab.com/ecosystem). + +## Add example projects to MMEditing + +Here is an [example project](./example_project) about how to add your projects to MMEditing. +You can copy and create your own project from the [example project](./example_project). + +We also provide some documentation listed below for your reference: + +- [Contribution Guide](https://mmediting.readthedocs.io/en/dev-1.x/community/contributing.html) + + The guides for new contributors about how to add your projects to MMEditing. + +- [New Model Guide](https://mmediting.readthedocs.io/en/dev-1.x/howto/models.html) + + The documentation of adding new models. + +- [Discussions](https://github.com/open-mmlab/mmediting/discussions) + + Welcome to start a discussion! + +## Projects of libraries and toolboxes + +- [PowerVQE](https://github.com/ryanxingql/powervqe): Open framework for quality enhancement of compressed videos based on PyTorch and MMEditing. + +- [VR-Baseline](https://github.com/linjing7/VR-Baseline): Video Restoration Toolbox. + +- [Derain-Toolbox](https://github.com/biubiubiiu/derain-toolbox): Single Image Deraining Toolbox and Benchmark + +## Projects of research papers + +- [Towards Interpretable Video Super-Resolution via Alternating Optimization, ECCV 2022](https://arxiv.org/abs/2207.10765)[\[github\]](https://github.com/caojiezhang/DAVSR) + +- [SepLUT:Separable Image-adaptive Lookup Tables for Real-time Image Enhancement, ECCV 2022](https://arxiv.org/abs/2207.08351)[\[github\]](https://github.com/ImCharlesY/SepLUT) + +- [TTVSR: Learning Trajectory-Aware Transformer for Video Super-Resolution, CVPR 2022](https://arxiv.org/abs/2204.04216)[\[github\]](https://github.com/researchmm/TTVSR) + +- [Arbitrary-Scale Image Synthesis, CVPR 2022](https://arxiv.org/pdf/2204.02273.pdf)[\[github\]](https://github.com/vglsd/ScaleParty) + +- [Investigating Tradeoffs in Real-World Video Super-Resolution(RealBasicVSR), CVPR 2022](https://arxiv.org/abs/2111.12704)[\[github\]](https://github.com/ckkelvinchan/RealBasicVSR) + +- [BasicVSR++: Improving Video Super-Resolution with Enhanced Propagation and Alignment, CVPR 2022](https://arxiv.org/abs/2104.13371)[\[github\]](https://github.com/ckkelvinchan/BasicVSR_PlusPlus) + +- [Multi-Scale Memory-Based Video Deblurring, CVPR 2022](https://arxiv.org/abs/2204.02977)[\[github\]](https://github.com/jibo27/MemDeblur) + +- [AdaInt:Learning Adaptive Intervals for 3D Lookup Tables on Real-time Image Enhancement, CVPR 2022](https://arxiv.org/abs/2204.13983)[\[github\]](https://github.com/ImCharlesY/AdaInt) + +- [A New Dataset and Transformer for Stereoscopic Video Super-Resolution, CVPRW 2022](https://openaccess.thecvf.com/content/CVPR2022W/NTIRE/papers/Imani_A_New_Dataset_and_Transformer_for_Stereoscopic_Video_Super-Resolution_CVPRW_2022_paper.pdf)[\[github\]](https://github.com/H-deep/Trans-SVSR) + +- [Liquid warping GAN with attention: A unified framework for human image synthesis, TPAMI 2021](https://arxiv.org/pdf/2011.09055.pdf)[\[github\]](https://github.com/iPERDance/iPERCore) + +- [BasicVSR:The Search for Essential Components in Video Super-Resolution and Beyond, CVPR 2021](https://arxiv.org/abs/2012.02181)[\[github\]](https://github.com/ckkelvinchan/BasicVSR-IconVSR) + +- [GLEAN:Generative Latent Bank for Large-Factor Image Super-Resolution, CVPR 2021](https://arxiv.org/abs/2012.00739)[\[github\]](https://github.com/ckkelvinchan/GLEAN) + +- [DAN:Unfolding the Alternating Optimization for Blind Super Resolution, NeurIPS 2020](https://arxiv.org/abs/2010.02631v4)[\[github\]](https://github.com/AlexZou14/DAN-Basd-on-Openmmlab) diff --git a/projects/example_project/README.md b/projects/example_project/README.md new file mode 100644 index 0000000000..7675bfb700 --- /dev/null +++ b/projects/example_project/README.md @@ -0,0 +1,140 @@ +# Example Project + +This is an example README for community `projects/`. You can write your README in your own project. Here are +some recommended parts of a README for others to understand and use your project, you can copy or modify them +according to your project. + +## Description \[required\] + +You can share any information you would like others to know. For example: + +``` +Author: @xxx. + +This is an implementation of \[XXX\]. +``` + +## Usage \[required\] + +### Setup Environment \[required\] + +Please refer to [Get Started](https://mmediting.readthedocs.io/en/1.x/get_started/I.html) to install +MMEditing. + +At first, add the current folder to `PYTHONPATH`, so that Python can find your code. Run command in the current directory to add it. + +> Please run it every time after you opened a new shell. + +```shell +export PYTHONPATH=`pwd`:$PYTHONPATH +``` + +### Data Preparation \[optional\] + +Prepare the ImageNet-2012 dataset according to the [instruction](https://mmediting.readthedocs.io/en/dev-1.x/user_guides/dataset_prepare.html#imagenet). + +### Training commands \[optional\] + +**To train with single GPU:** + +```bash +mim train mmedit configs/examplenet_8xb32_in1k.py +``` + +**To train with multiple GPUs:** + +```bash +mim train mmedit configs/examplenet_8xb32_in1k.py --launcher pytorch --gpus 8 +``` + +**To train with multiple GPUs by slurm:** + +```bash +mim train mmedit configs/examplenet_8xb32_in1k.py --launcher slurm \ + --gpus 16 --gpus-per-node 8 --partition $PARTITION +``` + +### Testing commands \[required\] + +**To test with single GPU:** + +```bash +mim test mmedit configs/examplenet_8xb32_in1k.py $CHECKPOINT +``` + +**To test with multiple GPUs:** + +```bash +mim test mmedit configs/examplenet_8xb32_in1k.py $CHECKPOINT --launcher pytorch --gpus 8 +``` + +**To test with multiple GPUs by slurm:** + +```bash +mim test mmedit configs/examplenet_8xb32_in1k.py $CHECKPOINT --launcher slurm \ + --gpus 16 --gpus-per-node 8 --partition $PARTITION +``` + +## Results \[required\] + +| Model | Pretrain | Top-1 (%) | Top-5 (%) | Config | Download | +| :----------------: | :----------: | :-------: | :-------: | :----------------------------------------: | :------------------------------------: | +| ExampleNet-tiny | From scratch | 82.33 | 96.15 | [config](configs/examplenet_8xb32_in1k.py) | [model](MODEL-LINK) \| [log](LOG-LINK) | +| ExampleNet-small\* | From scratch | 83.63 | 96.51 | [config](configs/examplenet_8xb32_in1k.py) | [model](MODEL-LINK) | +| ExampleNet-base\* | From scratch | 84.34 | 96.86 | [config](configs/examplenet_8xb32_in1k.py) | [model](MODEL-LINK) | + +*Models with * are converted from the [official repo](REPO-LINK). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.* + +You can also paste some visual results here if the model doesn't have quantitative results. + +## Citation \[required\] + + + +```bibtex +@misc{mmediting2022, + title = {{MMEditing}: {OpenMMLab} Image and Video Editing Toolbox}, + author = {{MMEditing Contributors}}, + howpublished = {\url{https://github.com/open-mmlab/mmediting}}, + year = {2022} +} +``` + +## Checklist \[required\] + +Here is a checklist of this project's progress. And you can ignore this part if you don't plan to contribute +to MMediting projects. + +- [ ] Milestone 1: PR-ready, and acceptable to be one of the `projects/`. + + - [ ] Finish the code + + + + - [ ] Basic docstrings & proper citation + + + + - [ ] Converted checkpoint and results (Only for reproduction) + + + +- [ ] Milestone 2: Indicates a successful model implementation. + + - [ ] Training results + + + +- [ ] Milestone 3: Good to be a part of our core package! + + - [ ] Unit tests + + + + - [ ] Code style + + + + - [ ] `metafile.yml` and `README.md` + + diff --git a/projects/example_project/configs/examplenet_8xb32_in1k.py b/projects/example_project/configs/examplenet_8xb32_in1k.py new file mode 100644 index 0000000000..2a047ad943 --- /dev/null +++ b/projects/example_project/configs/examplenet_8xb32_in1k.py @@ -0,0 +1,7 @@ +# Directly inherit the entire recipe you want to use. +_base_ = 'mmediting::srcnn/srcnn_x4k915_1xb16-1000k_div2k.py' + +# This line is to import your own modules. +custom_imports = dict(imports='models') + +# Set your model, training, testing configurations. diff --git a/projects/example_project/models/__init__.py b/projects/example_project/models/__init__.py new file mode 100644 index 0000000000..e2d4f2f571 --- /dev/null +++ b/projects/example_project/models/__init__.py @@ -0,0 +1,3 @@ +from .example_net import ExampleNet + +__all__ = ['ExampleNet'] diff --git a/projects/example_project/models/example_net.py b/projects/example_project/models/example_net.py new file mode 100644 index 0000000000..374b761800 --- /dev/null +++ b/projects/example_project/models/example_net.py @@ -0,0 +1,31 @@ +from mmedit.models import ResNet +from mmedit.registry import MODELS + + +# Register your model to the `MODELS`. +@MODELS.register_module() +class ExampleNet(ResNet): + """Implements an example backbone. + + Implement the backbone network just like a normal pytorch network. + """ + + def __init__(self, **kwargs) -> None: + print('#############################\n' + '# Hello MMEditing! #\n' + '#############################') + super().__init__(**kwargs) + + def forward(self, x): + """The forward method of the network. + + Args: + x (torch.Tensor): A tensor of image batch with shape + ``(batch_size, num_channels, height, width)``. + + Returns: + Tuple[torch.Tensor]: Please return a tuple of tensors and every + tensor is a feature map of specified scale. If you only want the + final feature map, simply return a tuple with one item. + """ + return super().forward(x) diff --git a/projects/glide/configs/README.md b/projects/glide/configs/README.md new file mode 100644 index 0000000000..e9081da481 --- /dev/null +++ b/projects/glide/configs/README.md @@ -0,0 +1,77 @@ +# GLIDE (Arxiv'2021) + +> [GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models](https://papers.nips.cc/paper/2021/file/49ad23d1ec9fa4bd8d77d02681df5cfa-Paper.pdf) + +> **Task**: Text2Image, diffusion + + + +## Abstract + + + +Diffusion models have recently been shown to generate high-quality synthetic images, especially when paired with a guidance technique to trade off diversity for fidelity. We explore diffusion models for the problem of text-conditional image synthesis and compare two different guidance strategies: CLIP guidance and classifier-free guidance. We find that the latter is preferred by human evaluators for both photorealism and caption similarity, and often produces photorealistic samples. Samples from a 3.5 billion parameter text-conditional diffusion model using classifierfree guidance are favored by human evaluators to those from DALL-E, even when the latter uses expensive CLIP reranking. Additionally, we find that our models can be fine-tuned to perform image inpainting, enabling powerful text-driven image editing. We train a smaller model on a filtered dataset and release the code and weights at https://github.com/openai/glide-text2im. + + + +
+ +
+ +## Results and models + +
+ an oil painting of a corgi +
+ +
+ +
+ an cartoon painting of a cat +
+ +
+ +**Laion** + +| Method | Resolution | Config | Weights | +| ------ | ---------- | -------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| Glide | 64x64 | [config](projects/glide/configs/glide_ddim-classifier-free_laion-64x64.py) | [model](https://download.openmmlab.com/mmediting/glide/glide_laion-64x64-02afff47.pth) | + +## Quick Start + +You can run glide as follows: + +```python +import torch +from mmedit.apis import init_model +from mmedit.utils import register_all_modules +from projects.glide.models import * + +register_all_modules() + +config = 'projects/glide/configs/glide_ddim-classifier-free_laion-64x64.py' +ckpt = 'https://download.openmmlab.com/mmediting/glide/glide_laion-64x64-02afff47.pth' +model = init_model(config, ckpt).cuda().eval() +prompt = "an oil painting of a corgi" + +with torch.no_grad(): + samples = model.infer(init_image=None, + prompt=prompt, + batch_size=16, + guidance_scale=3., + num_inference_steps=100, + labels=None, + classifier_scale=0.0, + show_progress=True)['samples'] +``` + +## Citation + +```bibtex +@article{2021GLIDE, + title={GLIDE: Towards Photorealistic Image Generation and Editing with Text-Guided Diffusion Models}, + author={ Nichol, A. and Dhariwal, P. and Ramesh, A. and Shyam, P. and Mishkin, P. and Mcgrew, B. and Sutskever, I. and Chen, M. }, + year={2021}, +} +``` diff --git a/projects/glide/configs/glide_ddim-classifier-free_laion-64x64.py b/projects/glide/configs/glide_ddim-classifier-free_laion-64x64.py new file mode 100644 index 0000000000..5344540c3e --- /dev/null +++ b/projects/glide/configs/glide_ddim-classifier-free_laion-64x64.py @@ -0,0 +1,35 @@ +model = dict( + type='Glide', + data_preprocessor=dict( + type='EditDataPreprocessor', mean=[127.5], std=[127.5]), + unet=dict( + type='Text2ImUNet', + image_size=64, + base_channels=192, + in_channels=3, + resblocks_per_downsample=3, + attention_res=(32, 16, 8), + norm_cfg=dict(type='GN32', num_groups=32), + dropout=0.1, + num_classes=0, + use_fp16=False, + resblock_updown=True, + attention_cfg=dict( + type='MultiHeadAttentionBlock', + num_heads=1, + num_head_channels=64, + use_new_attention_order=False, + encoder_channels=512), + use_scale_shift_norm=True, + text_ctx=128, + xf_width=512, + xf_layers=16, + xf_heads=8, + xf_final_ln=True, + xf_padding=True, + ), + diffusion_scheduler=dict( + type='DDIMScheduler', + variance_type='learned_range', + beta_schedule='squaredcos_cap_v2'), + use_fp16=False) diff --git a/projects/glide/models/__init__.py b/projects/glide/models/__init__.py new file mode 100644 index 0000000000..6b40ebf542 --- /dev/null +++ b/projects/glide/models/__init__.py @@ -0,0 +1,4 @@ +from .glide import Glide +from .text2im_unet import Text2ImUNet + +__all__ = ['Text2ImUNet', 'Glide'] diff --git a/projects/glide/models/glide.py b/projects/glide/models/glide.py new file mode 100644 index 0000000000..eb1d96e861 --- /dev/null +++ b/projects/glide/models/glide.py @@ -0,0 +1,330 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from copy import deepcopy +from typing import Dict, List, Optional, Union + +import mmengine +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmengine import MessageHub +from mmengine.model import BaseModel, is_model_wrapper +from mmengine.optim import OptimWrapperDict +from mmengine.runner.checkpoint import _load_checkpoint_with_prefix +from tqdm import tqdm + +from mmedit.registry import DIFFUSION_SCHEDULERS, MODELS, MODULES +from mmedit.structures import EditDataSample, PixelData +from mmedit.utils.typing import ForwardInputs, SampleList + +# from .guider import ImageTextGuider + +ModelType = Union[Dict, nn.Module] + + +def classifier_grad(classifier, x, t, y=None, classifier_scale=1.0): + """compute classification gradient to x.""" + assert y is not None + with torch.enable_grad(): + x_in = x.detach().requires_grad_(True) + logits = classifier(x_in, t) + log_probs = F.log_softmax(logits, dim=-1) + selected = log_probs[range(len(logits)), y.view(-1)] + return torch.autograd.grad(selected.sum(), x_in)[0] * classifier_scale + + +@MODELS.register_module() +class Glide(BaseModel): + """Guided diffusion Model. + + Args: + data_preprocessor (dict, optional): The pre-process config of + :class:`BaseDataPreprocessor`. + unet (ModelType): Config of denoising Unet. + diffusion_scheduler (ModelType): Config of diffusion_scheduler + scheduler. + use_fp16 (bool): Whether to use fp16 for unet model. Defaults to False. + classifier (ModelType): Config of classifier. Defaults to None. + pretrained_cfgs (dict): Path Config for pretrained weights. Usually + this is a dict contains module name and the corresponding ckpt + path.Defaults to None. + """ + + def __init__(self, + data_preprocessor, + unet, + diffusion_scheduler, + use_fp16=False, + classifier=None, + classifier_scale=1.0, + pretrained_cfgs=None): + + super().__init__(data_preprocessor=data_preprocessor) + self.unet = MODULES.build(unet) + self.diffusion_scheduler = DIFFUSION_SCHEDULERS.build( + diffusion_scheduler) + if classifier: + self.classifier = MODULES.build(classifier) + else: + self.classifier = None + self.classifier_scale = classifier_scale + + if pretrained_cfgs: + self.load_pretrained_models(pretrained_cfgs) + if use_fp16: + mmengine.print_log('Convert unet modules to floatpoint16') + self.unet.convert_to_fp16() + + def load_pretrained_models(self, pretrained_cfgs): + """_summary_ + + Args: + pretrained_cfgs (_type_): _description_ + """ + for key, ckpt_cfg in pretrained_cfgs.items(): + prefix = ckpt_cfg.get('prefix', '') + map_location = ckpt_cfg.get('map_location', 'cpu') + strict = ckpt_cfg.get('strict', True) + ckpt_path = ckpt_cfg.get('ckpt_path') + state_dict = _load_checkpoint_with_prefix(prefix, ckpt_path, + map_location) + getattr(self, key).load_state_dict(state_dict, strict=strict) + mmengine.print_log(f'Load pretrained {key} from {ckpt_path}') + + @property + def device(self): + """Get current device of the model. + + Returns: + torch.device: The current device of the model. + """ + return next(self.parameters()).device + + @torch.no_grad() + def infer(self, + init_image=None, + prompt=None, + batch_size=1, + guidance_scale=3., + num_inference_steps=50, + labels=None, + classifier_scale=0.0, + show_progress=False): + """_summary_ + + Args: + init_image (_type_, optional): _description_. Defaults to None. + batch_size (int, optional): _description_. Defaults to 1. + num_inference_steps (int, optional): _description_. + Defaults to 1000. + labels (_type_, optional): _description_. Defaults to None. + show_progress (bool, optional): _description_. Defaults to False. + + Returns: + _type_: _description_ + """ + # Sample gaussian noise to begin loop + if init_image is None: + image = torch.randn((2 * batch_size, self.unet.in_channels, + self.unet.image_size, self.unet.image_size)) + image = image.to(self.device) + else: + image = init_image + + # set step values + if num_inference_steps > 0: + self.diffusion_scheduler.set_timesteps(num_inference_steps) + + timesteps = self.diffusion_scheduler.timesteps + + # text embedding + tokens = self.unet.tokenizer.encode(prompt) + tokens, mask = self.unet.tokenizer.padded_tokens_and_mask(tokens, 128) + + # Create the classifier-free guidance tokens (empty) + # full_batch_size = batch_size * 2 + uncond_tokens, uncond_mask = \ + self.unet.tokenizer.padded_tokens_and_mask( + [], 128) + + tokens = torch.tensor( + [tokens] * batch_size + [uncond_tokens] * batch_size, + device=self.device) + mask = torch.tensor( + [mask] * batch_size + [uncond_mask] * batch_size, + dtype=torch.bool, + device=self.device) + + if show_progress and mmengine.dist.is_main_process(): + timesteps = tqdm(timesteps) + + for t in timesteps: + # 1. predicted model_output + half = image[:len(image) // 2] + combined = torch.concat([half, half], dim=0) + model_output = self.unet(combined, t, tokens=tokens, mask=mask) + eps, rest = model_output[:, :3], model_output[:, 3:] + cond_eps, uncond_eps = torch.split(eps, len(eps) // 2, dim=0) + half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps) + eps = torch.cat([half_eps, half_eps], dim=0) + noise_pred = torch.concat([eps, rest], dim=1) + # noise_pred_text, noise_pred_uncond = model_output.chunk(2) + # noise_pred = noise_pred_uncond + guidance_scale * + # (noise_pred_text - noise_pred_uncond) + + # 2. compute previous image: x_t -> x_t-1 + diffusion_scheduler_output = self.diffusion_scheduler.step( + noise_pred, t, image) + + # 3. applying classifier guide + if self.classifier and classifier_scale != 0.0: + gradient = classifier_grad( + self.classifier, + image, + t, + labels, + classifier_scale=classifier_scale) + guided_mean = ( + diffusion_scheduler_output['mean'].float() + + diffusion_scheduler_output['sigma'] * gradient.float()) + image = guided_mean + diffusion_scheduler_output[ + 'sigma'] * diffusion_scheduler_output['noise'] + else: + image = diffusion_scheduler_output['prev_sample'] + + return {'samples': image} + + def forward(self, + inputs: ForwardInputs, + data_samples: Optional[list] = None, + mode: Optional[str] = None) -> List[EditDataSample]: + """_summary_ + + Args: + inputs (ForwardInputs): _description_ + data_samples (Optional[list], optional): _description_. + Defaults to None. + mode (Optional[str], optional): _description_. Defaults to None. + + Returns: + List[EditDataSample]: _description_ + """ + init_image = inputs.get('init_image', None) + batch_size = inputs.get('batch_size', 1) + labels = data_samples.get('labels', None) + sample_kwargs = inputs.get('sample_kwargs', dict()) + + num_inference_steps = sample_kwargs.get( + 'num_inference_steps', + self.diffusion_scheduler.num_train_timesteps) + show_progress = sample_kwargs.get('show_progress', False) + classifier_scale = sample_kwargs.get('classifier_scale', + self.classifier_scale) + + outputs = self.infer( + init_image=init_image, + batch_size=batch_size, + num_inference_steps=num_inference_steps, + show_progress=show_progress, + classifier_scale=classifier_scale) + + batch_sample_list = [] + for idx in range(batch_size): + gen_sample = EditDataSample() + if data_samples: + gen_sample.update(data_samples[idx]) + if isinstance(outputs, dict): + gen_sample.ema = EditDataSample( + fake_img=PixelData(data=outputs['ema'][idx]), + sample_model='ema') + gen_sample.orig = EditDataSample( + fake_img=PixelData(data=outputs['orig'][idx]), + sample_model='orig') + gen_sample.sample_model = 'ema/orig' + gen_sample.set_gt_label(labels[idx]) + gen_sample.ema.set_gt_label(labels[idx]) + gen_sample.orig.set_gt_label(labels[idx]) + else: + gen_sample.fake_img = PixelData(data=outputs[idx]) + gen_sample.set_gt_label(labels[idx]) + + # Append input condition (noise and sample_kwargs) to + # batch_sample_list + if init_image is not None: + gen_sample.noise = init_image[idx] + gen_sample.sample_kwargs = deepcopy(sample_kwargs) + batch_sample_list.append(gen_sample) + return batch_sample_list + + def val_step(self, data: dict) -> SampleList: + """Gets the generated image of given data. + + Calls ``self.data_preprocessor(data)`` and + ``self(inputs, data_sample, mode=None)`` in order. Return the + generated results which will be passed to evaluator. + + Args: + data (dict): Data sampled from metric specific + sampler. More detials in `Metrics` and `Evaluator`. + + Returns: + SampleList: Generated image or image dict. + """ + data = self.data_preprocessor(data) + outputs = self(**data) + return outputs + + def test_step(self, data: dict) -> SampleList: + """Gets the generated image of given data. Same as :meth:`val_step`. + + Args: + data (dict): Data sampled from metric specific + sampler. More detials in `Metrics` and `Evaluator`. + + Returns: + List[EditDataSample]: Generated image or image dict. + """ + data = self.data_preprocessor(data) + outputs = self(**data) + return outputs + + def train_step(self, data: dict, optim_wrapper: OptimWrapperDict): + """_summary_ + + Args: + data (dict): _description_ + optim_wrapper (OptimWrapperDict): _description_ + + Returns: + _type_: _description_ + """ + message_hub = MessageHub.get_current_instance() + curr_iter = message_hub.get_info('iter') + + # sampling x0 and timestep + data = self.data_preprocessor(data) + real_imgs = data['inputs'] + timestep = self.diffusion_scheduler.sample_timestep() + + # calculating loss + loss_dict = self.diffusion_scheduler.training_loss( + self.unet, real_imgs, timestep) + loss, log_vars = self._parse_losses(loss_dict) + optim_wrapper['denoising'].update_params(loss) + + # update EMA + if self.with_ema_denoising and (curr_iter + 1) >= self.ema_start: + self.denoising_ema.update_parameters( + self.denoising_ema. + module if is_model_wrapper(self.denoising) else self.denoising) + # if not update buffer, copy buffer from orig model + if not self.denoising_ema.update_buffers: + self.denoising_ema.sync_buffers( + self.denoising.module + if is_model_wrapper(self.denoising) else self.denoising) + elif self.with_ema_denoising: + # before ema, copy weights from orig + self.denoising_ema.sync_parameters( + self.denoising. + module if is_model_wrapper(self.denoising) else self.denoising) + + return log_vars diff --git a/projects/glide/models/glide_modules.py b/projects/glide/models/glide_modules.py new file mode 100644 index 0000000000..0c2d95203a --- /dev/null +++ b/projects/glide/models/glide_modules.py @@ -0,0 +1,111 @@ +import math + +import torch +import torch.nn as nn + + +class QKVMultiheadAttention(nn.Module): + + def __init__(self, n_heads: int, n_ctx: int): + super().__init__() + self.n_heads = n_heads + self.n_ctx = n_ctx + + def forward(self, qkv): + bs, n_ctx, width = qkv.shape + attn_ch = width // self.n_heads // 3 + scale = 1 / math.sqrt(math.sqrt(attn_ch)) + qkv = qkv.view(bs, n_ctx, self.n_heads, -1) + q, k, v = torch.split(qkv, attn_ch, dim=-1) + weight = torch.einsum( + 'bthc,bshc->bhts', q * scale, + k * scale) # More stable with f16 than dividing afterwards + wdtype = weight.dtype + weight = torch.softmax(weight.float(), dim=-1).type(wdtype) + return torch.einsum('bhts,bshc->bthc', weight, + v).reshape(bs, n_ctx, -1) + + +class MultiheadAttention(nn.Module): + + def __init__(self, n_ctx, width, heads): + super().__init__() + self.n_ctx = n_ctx + self.width = width + self.heads = heads + self.c_qkv = nn.Linear(width, width * 3) + self.c_proj = nn.Linear(width, width) + self.attention = QKVMultiheadAttention(heads, n_ctx) + + def forward(self, x): + x = self.c_qkv(x) + x = self.attention(x) + x = self.c_proj(x) + return x + + +class MLP(nn.Module): + + def __init__(self, width): + super().__init__() + self.width = width + self.c_fc = nn.Linear(width, width * 4) + self.c_proj = nn.Linear(width * 4, width) + self.gelu = nn.GELU() + + def forward(self, x): + return self.c_proj(self.gelu(self.c_fc(x))) + + +class ResidualAttentionBlock(nn.Module): + + def __init__( + self, + n_ctx: int, + width: int, + heads: int, + ): + super().__init__() + + self.attn = MultiheadAttention( + n_ctx, + width, + heads, + ) + # TODO should the LayerNorm support fp15 gain + # and bias but fp32 mean and std? + self.ln_1 = nn.LayerNorm(width) + self.mlp = MLP(width) + self.ln_2 = nn.LayerNorm(width) + + def forward(self, x: torch.Tensor): + x = x + self.attn(self.ln_1(x)) + x = x + self.mlp(self.ln_2(x)) + return x + + +class Transformer(nn.Module): + + def __init__( + self, + n_ctx: int, + width: int, + layers: int, + heads: int, + ): + super().__init__() + self.n_ctx = n_ctx + self.width = width + self.layers = layers + self.resblocks = nn.ModuleList([ + ResidualAttentionBlock( + n_ctx, + width, + heads, + ) for _ in range(layers) + ]) + + def forward(self, x: torch.Tensor): + for block in self.resblocks: + x = block(x) + return x diff --git a/projects/glide/models/glide_tokenizer/__init__.py b/projects/glide/models/glide_tokenizer/__init__.py new file mode 100644 index 0000000000..5813d9545e --- /dev/null +++ b/projects/glide/models/glide_tokenizer/__init__.py @@ -0,0 +1,3 @@ +from .bpe import get_encoder + +__all__ = ['get_encoder'] diff --git a/projects/glide/models/glide_tokenizer/bpe.py b/projects/glide/models/glide_tokenizer/bpe.py new file mode 100644 index 0000000000..6993c488d0 --- /dev/null +++ b/projects/glide/models/glide_tokenizer/bpe.py @@ -0,0 +1,163 @@ +"""Byte pair encoding utilities adapted from: + +https://github.com/openai/gpt-2/blob/master/src/encoder.py +""" + +import gzip +import json +import os +from functools import lru_cache +from typing import List, Tuple + +import regex as re + + +@lru_cache() +def bytes_to_unicode(): + """Returns list of utf-8 byte and a corresponding list of unicode strings. + + The reversible bpe codes work on unicode strings. This means you need a + large # of unicode characters in your vocab if you want to avoid UNKs. When + you're at something like a 10B token dataset you end up needing around 5K + for decent coverage. This is a significant percentage of your normal, say, + 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and + unicode strings. And avoids mapping to whitespace/control characters the + bpe code barfs on. + """ + bs = ( + list(range(ord('!'), + ord('~') + 1)) + list(range(ord('¡'), + ord('¬') + 1)) + + list(range(ord('®'), + ord('ÿ') + 1))) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + + Word is represented as tuple of symbols (symbols being variable-length + strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +class Encoder: + + def __init__(self, encoder, bpe_merges, errors='replace'): + self.encoder = encoder + self.decoder = {v: k for k, v in self.encoder.items()} + self.errors = errors # how to handle errors in decoding + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) + self.cache = {} + + # Should haved added re.IGNORECASE so BPE merges can happen for + # capitalized versions of contractions + self.pat = re.compile( + r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}] + +|\s+(?!\S)|\s+""") + + @property + def n_vocab(self) -> int: + return len(self.encoder) + + @property + def end_token(self) -> int: + return self.n_vocab - 1 + + def padded_tokens_and_mask(self, tokens: List[int], + text_ctx: int) -> Tuple[List[int], List[bool]]: + tokens = tokens[:text_ctx] + padding = text_ctx - len(tokens) + padded_tokens = tokens + [self.end_token] * padding + mask = [True] * len(tokens) + [False] * padding + return padded_tokens, mask + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token) + pairs = get_pairs(word) + + if not pairs: + return token + + while True: + bigram = min( + pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: # noqa: E722 + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[ + i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + text = text.lower() + bpe_tokens = [] + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] + for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] + for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = bytearray([self.byte_decoder[c] for c in text]).decode( + 'utf-8', errors=self.errors) + return text + + +def get_encoder(): + root_dir = os.path.dirname(os.path.abspath(__file__)) + with gzip.open(os.path.join(root_dir, 'encoder.json.gz'), 'r') as f: + encoder = json.load(f) + with gzip.open(os.path.join(root_dir, 'vocab.bpe.gz'), 'r') as f: + bpe_data = str(f.read(), 'utf-8') + bpe_merges = [ + tuple(merge_str.split()) for merge_str in bpe_data.split('\n')[1:-1] + ] + return Encoder( + encoder=encoder, + bpe_merges=bpe_merges, + ) diff --git a/projects/glide/models/glide_tokenizer/bpe_simple_vocab_16e6.txt.gz b/projects/glide/models/glide_tokenizer/bpe_simple_vocab_16e6.txt.gz new file mode 100644 index 0000000000..7b5088a527 Binary files /dev/null and b/projects/glide/models/glide_tokenizer/bpe_simple_vocab_16e6.txt.gz differ diff --git a/projects/glide/models/glide_tokenizer/encoder.json.gz b/projects/glide/models/glide_tokenizer/encoder.json.gz new file mode 100644 index 0000000000..87a7a6214a Binary files /dev/null and b/projects/glide/models/glide_tokenizer/encoder.json.gz differ diff --git a/projects/glide/models/glide_tokenizer/simple_tokenizer.py b/projects/glide/models/glide_tokenizer/simple_tokenizer.py new file mode 100644 index 0000000000..e9e49beb8d --- /dev/null +++ b/projects/glide/models/glide_tokenizer/simple_tokenizer.py @@ -0,0 +1,177 @@ +"""Copied from: https://github.com/openai/CLIP/blob/573315e83f07b53a61ff5098757 +e8fc885f1703e/clip/simple_tokenizer.py.""" + +import gzip +import html +import os +from functools import lru_cache +from typing import List, Tuple + +import ftfy +import regex as re + + +@lru_cache() +def default_bpe(): + return os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'bpe_simple_vocab_16e6.txt.gz') + + +@lru_cache() +def bytes_to_unicode(): + """Returns list of utf-8 byte and a corresponding list of unicode strings. + + The reversible bpe codes work on unicode strings. This means you need a + large # of unicode characters in your vocab if you want to avoid UNKs. When + you're at something like a 10B token dataset you end up needing around 5K + for decent coverage. This is a significant percentage of your normal, say, + 32K bpe vocab. To avoid that, we want lookup tables between utf-8 bytes and + unicode strings. And avoids mapping to whitespace/control characters the + bpe code barfs on. + """ + bs = ( + list(range(ord('!'), + ord('~') + 1)) + list(range(ord('¡'), + ord('¬') + 1)) + + list(range(ord('®'), + ord('ÿ') + 1))) + cs = bs[:] + n = 0 + for b in range(2**8): + if b not in bs: + bs.append(b) + cs.append(2**8 + n) + n += 1 + cs = [chr(n) for n in cs] + return dict(zip(bs, cs)) + + +def get_pairs(word): + """Return set of symbol pairs in a word. + + Word is represented as tuple of symbols (symbols being variable-length + strings). + """ + pairs = set() + prev_char = word[0] + for char in word[1:]: + pairs.add((prev_char, char)) + prev_char = char + return pairs + + +def basic_clean(text): + text = ftfy.fix_text(text) + text = html.unescape(html.unescape(text)) + return text.strip() + + +def whitespace_clean(text): + text = re.sub(r'\s+', ' ', text) + text = text.strip() + return text + + +class SimpleTokenizer(object): + + def __init__(self, bpe_path: str = default_bpe()): + self.byte_encoder = bytes_to_unicode() + self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} + merges = gzip.open(bpe_path).read().decode('utf-8').split('\n') + merges = merges[1:49152 - 256 - 2 + 1] + merges = [tuple(merge.split()) for merge in merges] + vocab = list(bytes_to_unicode().values()) + vocab = vocab + [v + '' for v in vocab] + for merge in merges: + vocab.append(''.join(merge)) + vocab.extend(['<|startoftext|>', '<|endoftext|>']) + self.encoder = dict(zip(vocab, range(len(vocab)))) + self.decoder = {v: k for k, v in self.encoder.items()} + self.bpe_ranks = dict(zip(merges, range(len(merges)))) + self.cache = { + '<|startoftext|>': '<|startoftext|>', + '<|endoftext|>': '<|endoftext|>' + } + self.pat = re.compile( + r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p + {L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", + re.IGNORECASE, + ) + + @property + def start_token(self): + return self.encoder['<|startoftext|>'] + + @property + def end_token(self): + return self.encoder['<|endoftext|>'] + + def padded_tokens_and_len(self, tokens: List[int], + text_ctx: int) -> Tuple[List[int], int]: + tokens = [self.start_token] + tokens[:text_ctx - 2] + [self.end_token] + text_len = len(tokens) + padding = text_ctx - len(tokens) + padded_tokens = tokens + [0] * padding + return padded_tokens, text_len + + def bpe(self, token): + if token in self.cache: + return self.cache[token] + word = tuple(token[:-1]) + (token[-1] + '', ) + pairs = get_pairs(word) + + if not pairs: + return token + '' + + while True: + bigram = min( + pairs, key=lambda pair: self.bpe_ranks.get(pair, float('inf'))) + if bigram not in self.bpe_ranks: + break + first, second = bigram + new_word = [] + i = 0 + while i < len(word): + try: + j = word.index(first, i) + new_word.extend(word[i:j]) + i = j + except: # noqa: E722 + new_word.extend(word[i:]) + break + + if word[i] == first and i < len(word) - 1 and word[ + i + 1] == second: + new_word.append(first + second) + i += 2 + else: + new_word.append(word[i]) + i += 1 + new_word = tuple(new_word) + word = new_word + if len(word) == 1: + break + else: + pairs = get_pairs(word) + word = ' '.join(word) + self.cache[token] = word + return word + + def encode(self, text): + bpe_tokens = [] + text = whitespace_clean(basic_clean(text)).lower() + for token in re.findall(self.pat, text): + token = ''.join(self.byte_encoder[b] + for b in token.encode('utf-8')) + bpe_tokens.extend(self.encoder[bpe_token] + for bpe_token in self.bpe(token).split(' ')) + return bpe_tokens + + def decode(self, tokens): + text = ''.join([self.decoder[token] for token in tokens]) + text = ( + bytearray([self.byte_decoder[c] for c in text + ]).decode('utf-8', + errors='replace').replace('', ' ')) + return text diff --git a/projects/glide/models/glide_tokenizer/vocab.bpe.gz b/projects/glide/models/glide_tokenizer/vocab.bpe.gz new file mode 100644 index 0000000000..14c1476f23 Binary files /dev/null and b/projects/glide/models/glide_tokenizer/vocab.bpe.gz differ diff --git a/projects/glide/models/text2im_unet.py b/projects/glide/models/text2im_unet.py new file mode 100644 index 0000000000..c14d65d527 --- /dev/null +++ b/projects/glide/models/text2im_unet.py @@ -0,0 +1,157 @@ +import torch +import torch.nn as nn + +from mmedit.models import DenoisingUnet +from mmedit.registry import MODELS +from .glide_modules import Transformer +from .glide_tokenizer import get_encoder + + +@MODELS.register_module() +class Text2ImUNet(DenoisingUnet): + """A UNetModel that conditions on text with an encoding transformer. + Expects an extra kwarg `tokens` of text. + + :param text_ctx: number of text tokens to expect. + :param xf_width: width of the transformer. + :param xf_layers: depth of the transformer. + :param xf_heads: heads in the transformer. + :param xf_final_ln: use a LayerNorm after the output layer. + :param tokenizer: the text tokenizer for sampling/vocab size. + """ + + def __init__( + self, + text_ctx, + xf_width, + xf_layers, + xf_heads, + xf_final_ln, + *args, + tokenizer=get_encoder(), + cache_text_emb=False, + xf_ar=0.0, + xf_padding=False, + share_unemb=False, + **kwargs, + ): + self.text_ctx = text_ctx + self.xf_width = xf_width + self.xf_ar = xf_ar + self.xf_padding = xf_padding + self.tokenizer = tokenizer + + if not xf_width: + super().__init__(*args, **kwargs, encoder_channels=None) + else: + super().__init__(*args, **kwargs, encoder_channels=xf_width) + + # del self.label_embedding + + if self.xf_width: + self.transformer = Transformer( + text_ctx, + xf_width, + xf_layers, + xf_heads, + ) + if xf_final_ln: + self.final_ln = nn.LayerNorm(xf_width) + else: + self.final_ln = None + + self.token_embedding = nn.Embedding(self.tokenizer.n_vocab, + xf_width) + self.positional_embedding = nn.Parameter( + torch.empty(text_ctx, xf_width, dtype=torch.float32)) + self.transformer_proj = nn.Linear(xf_width, self.base_channels * 4) + + if self.xf_padding: + self.padding_embedding = nn.Parameter( + torch.empty(text_ctx, xf_width, dtype=torch.float32)) + if self.xf_ar: + self.unemb = nn.Linear(xf_width, self.tokenizer.n_vocab) + if share_unemb: + self.unemb.weight = self.token_embedding.weight + + self.cache_text_emb = cache_text_emb + self.cache = None + + # def convert_to_fp16(self): + # super().convert_to_fp16() + # if self.xf_width: + # self.transformer.apply(convert_module_to_f16) + # self.transformer_proj.to(torch.float16) + # self.token_embedding.to(torch.float16) + # self.positional_embedding.to(torch.float16) + # if self.xf_padding: + # self.padding_embedding.to(torch.float16) + # if self.xf_ar: + # self.unemb.to(torch.float16) + + def get_text_emb(self, tokens, mask): + assert tokens is not None + + if self.cache_text_emb and self.cache is not None: + assert (tokens == self.cache['tokens']).all( + ), f"Tokens {tokens.cpu().numpy().tolist()} do not match \ + cache {self.cache['tokens'].cpu().numpy().tolist()}" + + return self.cache + + xf_in = self.token_embedding(tokens.long()) + xf_in = xf_in + self.positional_embedding[None] + if self.xf_padding: + assert mask is not None + xf_in = torch.where(mask[..., None], xf_in, + self.padding_embedding[None]) + xf_out = self.transformer(xf_in.to(self.dtype)) + if self.final_ln is not None: + xf_out = self.final_ln(xf_out) + xf_proj = self.transformer_proj(xf_out[:, -1]) + xf_out = xf_out.permute(0, 2, 1) # NLC -> NCL + + outputs = dict(xf_proj=xf_proj, xf_out=xf_out) + + if self.cache_text_emb: + self.cache = dict( + tokens=tokens, + xf_proj=xf_proj.detach(), + xf_out=xf_out.detach() if xf_out is not None else None, + ) + + return outputs + + def del_cache(self): + self.cache = None + + def forward(self, x, timesteps, tokens=None, mask=None): + hs = [] + if not torch.is_tensor(timesteps): + timesteps = torch.tensor([timesteps], + dtype=torch.long, + device=x.device) + elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0: + timesteps = timesteps[None].to(x.device) + + # TODO not sure + if timesteps.shape[0] != x.shape[0]: + timesteps = timesteps.repeat(x.shape[0]) + emb = self.time_embedding(timesteps) + if self.xf_width: + text_outputs = self.get_text_emb(tokens, mask) + xf_proj, xf_out = text_outputs['xf_proj'], text_outputs['xf_out'] + emb = emb + xf_proj.to(emb) + else: + xf_out = None + h = x.type(self.dtype) + for module in self.in_blocks: + h = module(h, emb, xf_out) + hs.append(h) + h = self.mid_blocks(h, emb, xf_out) + for module in self.out_blocks: + h = torch.cat([h, hs.pop()], dim=1) + h = module(h, emb, xf_out) + h = h.type(x.dtype) + h = self.out(h) + return h diff --git a/requirements/docs.txt b/requirements/docs.txt index 64eb44b257..486729d482 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,6 +1,10 @@ docutils==0.16.0 +modelindex myst_parser -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme -sphinx==4.0.2 +sphinx==4.5.0 +sphinx-autoapi sphinx-copybutton +sphinx-notfound-page +sphinx-tabs sphinx_markdown_tables diff --git a/requirements/optional.txt b/requirements/optional.txt index d1d51047ce..dc41e8ab66 100644 --- a/requirements/optional.txt +++ b/requirements/optional.txt @@ -3,3 +3,4 @@ imageio-ffmpeg==0.4.4 mmdet >= 3.0.0rc2 open_clip_torch PyQt5 +transformers diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt index 3d5505bbf7..17e5a5c8ea 100644 --- a/requirements/readthedocs.txt +++ b/requirements/readthedocs.txt @@ -1,9 +1,13 @@ lmdb +lpips mmcv>=2.0.0rc1 +mmdet >= 3.0.0rc2 +mmengine prettytable Pygments regex scikit-image +tabulate titlecase torch torchvision diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 1035119e93..d98b318732 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,5 +1,6 @@ av av==8.0.3; python_version < '3.7' +einops face-alignment facexlib lmdb @@ -16,5 +17,3 @@ opencv-python!=4.5.5.62,!=4.5.5.64 Pillow resize_right tensorboard -torch -torchvision diff --git a/requirements/tests.txt b/requirements/tests.txt index 44d49a9d9c..419a6c9ffb 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -1,3 +1,4 @@ +clip @ git+https://github.com/openai/CLIP.git@d50d76daa670286dd6cacf3bcd80b5e4823fc8e1 # codecov # flake8 # isort==5.10.1 @@ -5,6 +6,9 @@ # pytest # pytest-runner # yapf -coverage +coverage < 7.0.0 +imageio-ffmpeg==0.4.4 interrogate +mmdet >= 3.0.0rc2 pytest +transformers diff --git a/tests/data/coco/annotations/captions_train2014.json b/tests/data/coco/annotations/captions_train2014.json new file mode 100644 index 0000000000..466f72b6c5 --- /dev/null +++ b/tests/data/coco/annotations/captions_train2014.json @@ -0,0 +1,3 @@ +{ + "annotations": [{"image_id": 9, "caption": "a good meal"}] +} \ No newline at end of file diff --git a/tests/data/coco/annotations/captions_val2014.json b/tests/data/coco/annotations/captions_val2014.json new file mode 100644 index 0000000000..dba978c89f --- /dev/null +++ b/tests/data/coco/annotations/captions_val2014.json @@ -0,0 +1,3 @@ +{ + "annotations": [{"image_id": 42, "caption": "a pair of slippers"}] +} \ No newline at end of file diff --git a/tests/data/coco/train2014/COCO_train2014_000000000009.jpg b/tests/data/coco/train2014/COCO_train2014_000000000009.jpg new file mode 100644 index 0000000000..83d12e57f2 Binary files /dev/null and b/tests/data/coco/train2014/COCO_train2014_000000000009.jpg differ diff --git a/tests/data/coco/val2014/COCO_val2014_000000000042.jpg b/tests/data/coco/val2014/COCO_val2014_000000000042.jpg new file mode 100644 index 0000000000..8f03e8e170 Binary files /dev/null and b/tests/data/coco/val2014/COCO_val2014_000000000042.jpg differ diff --git a/tests/data/dataset/anno.json b/tests/data/dataset/anno.json new file mode 100644 index 0000000000..d1cdadedfa --- /dev/null +++ b/tests/data/dataset/anno.json @@ -0,0 +1,14 @@ +{ + "a/1.JPG": [ + 1, + 2, + 3, + 4 + ], + "b/2.jpeg": [ + 1, + 4, + 5, + 3 + ] +} \ No newline at end of file diff --git a/tests/data/dataset/wrong.yml b/tests/data/dataset/wrong.yml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_apis/test_inferencers/test_inference_functions.py b/tests/test_apis/test_inferencers/test_inference_functions.py index 9eed091a74..21a0157456 100644 --- a/tests/test_apis/test_inferencers/test_inference_functions.py +++ b/tests/test_apis/test_inferencers/test_inference_functions.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp import platform +import sys import unittest import pytest @@ -233,6 +234,8 @@ def test_restoration_inference(): assert output.detach().cpu().numpy().shape == (3, 480, 500) +@pytest.mark.skipif( + sys.version_info < (3, 7), reason='skip because python version is old.') def test_restoration_video_inference(): if torch.cuda.is_available(): device = torch.device('cuda', 0) diff --git a/tests/test_apis/test_inferencers/test_text2image_inferencers.py b/tests/test_apis/test_inferencers/test_text2image_inferencers.py index e70913c460..265238ed97 100644 --- a/tests/test_apis/test_inferencers/test_text2image_inferencers.py +++ b/tests/test_apis/test_inferencers/test_text2image_inferencers.py @@ -1,33 +1,107 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp +import unittest +from unittest import TestCase +from unittest.mock import patch -import pytest import torch +import torch.nn as nn +from mmengine.utils import digit_version +from torchvision.version import __version__ as TV_VERSION from mmedit.apis.inferencers.text2image_inferencer import Text2ImageInferencer +from mmedit.models import DDIMScheduler, DenoisingUnet, DiscoDiffusion from mmedit.utils import register_all_modules register_all_modules() -@pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda') -def test_translation_inferencer(): - cfg = osp.join( - osp.dirname(__file__), '..', '..', '..', 'configs', 'disco_diffusion', - 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py') - text = {0: ['sad']} - result_out_dir = osp.join( - osp.dirname(__file__), '..', '..', 'data', 'disco_result.png') +class clip_mock(nn.Module): - inferencer_instance = \ - Text2ImageInferencer( - cfg, None, extra_parameters={'num_inference_steps': 2}) - inferencer_instance(text=text) - inference_result = inferencer_instance( - text=text, result_out_dir=result_out_dir) - result_img = inference_result[1] - assert result_img[0].cpu().numpy().shape == (3, 512, 512) + def __init__(self, device='cuda'): + super().__init__() + self.register_buffer('tensor', torch.randn([1, 512])) + def encode_image(self, inputs): + return inputs.mean() * self.tensor.repeat(inputs.shape[0], 1).to( + inputs.device) -if __name__ == '__main__': - test_translation_inferencer() + def encode_text(self, inputs): + return self.tensor.repeat(inputs.shape[0], 1).to(inputs.device) + + def forward(self, x): + return x + + +class clip_mock_wrapper(nn.Module): + + def __init__(self): + super().__init__() + self.model = clip_mock() + + def forward(self, x): + return x + + +class TestTranslationInferencer(TestCase): + + def setUp(self): + self.unet32 = DenoisingUnet( + image_size=32, + in_channels=3, + base_channels=8, + resblocks_per_downsample=2, + attention_res=(8, ), + norm_cfg=dict(type='GN32', num_groups=8), + dropout=0.0, + num_classes=0, + use_fp16=True, + resblock_updown=True, + attention_cfg=dict( + type='MultiHeadAttentionBlock', + num_heads=2, + num_head_channels=8, + use_new_attention_order=False), + use_scale_shift_norm=True) + # mock clip + self.clip_models = [clip_mock_wrapper(), clip_mock_wrapper()] + # diffusion_scheduler + self.diffusion_scheduler = DDIMScheduler( + variance_type='learned_range', + beta_schedule='linear', + clip_sample=False) + + unet32 = self.unet32 + diffusion_scheduler = self.diffusion_scheduler + clip_models = self.clip_models + self.disco_diffusion = DiscoDiffusion( + unet=unet32, + diffusion_scheduler=diffusion_scheduler, + secondary_model=None, + clip_models=clip_models, + use_fp16=True).cuda() + + @unittest.skipIf( + digit_version(TV_VERSION) <= digit_version('0.7.0'), + reason='torchvision version limiation') + @unittest.skipIf(not torch.cuda.is_available(), reason='requires cuda') + def test_translation(self): + cfg_root = osp.join( + osp.dirname(__file__), '..', '..', '..', 'configs', + 'disco_diffusion') + cfg = osp.join(cfg_root, + 'disco-diffusion_adm-u-finetuned_imagenet-512x512.py') + text = {0: ['sad']} + result_out_dir = osp.join( + osp.dirname(__file__), '..', '..', 'data', 'disco_result.png') + + with patch.object(Text2ImageInferencer, '_init_model'): + inferencer_instance = Text2ImageInferencer( + cfg, None, extra_parameters={'num_inference_steps': 2}) + # mock model + inferencer_instance.model = self.disco_diffusion + inferencer_instance(text=text) + inference_result = inferencer_instance( + text=text, result_out_dir=result_out_dir) + result_img = inference_result[1] + assert result_img[0].cpu().numpy().shape == (3, 32, 32) diff --git a/tests/test_apis/test_inferencers/test_video_restoration_inferencer.py b/tests/test_apis/test_inferencers/test_video_restoration_inferencer.py index 09be036fb4..3cf61486af 100644 --- a/tests/test_apis/test_inferencers/test_video_restoration_inferencer.py +++ b/tests/test_apis/test_inferencers/test_video_restoration_inferencer.py @@ -1,5 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. import os.path as osp +import sys + +import pytest from mmedit.apis.inferencers.video_restoration_inferencer import \ VideoRestorationInferencer @@ -8,6 +11,8 @@ register_all_modules() +@pytest.mark.skipif( + sys.version_info < (3, 7), reason='skip because python version is old.') def test_video_restoration_inferencer(): cfg = osp.join( osp.dirname(__file__), '..', '..', '..', 'configs', 'basicvsr', @@ -27,6 +32,8 @@ def test_video_restoration_inferencer(): assert inference_result is None +@pytest.mark.skipif( + sys.version_info < (3, 7), reason='skip because python version is old.') def test_video_restoration_inferencer_input_dir(): cfg = osp.join( osp.dirname(__file__), '..', '..', '..', 'configs', 'basicvsr', @@ -47,6 +54,8 @@ def test_video_restoration_inferencer_input_dir(): assert inference_result is None +@pytest.mark.skipif( + sys.version_info < (3, 7), reason='skip because python version is old.') def test_video_restoration_inferencer_window_size(): cfg = osp.join( osp.dirname(__file__), '..', '..', '..', 'configs', 'basicvsr', @@ -69,6 +78,8 @@ def test_video_restoration_inferencer_window_size(): assert inference_result is None +@pytest.mark.skipif( + sys.version_info < (3, 7), reason='skip because python version is old.') def test_video_restoration_inferencer_max_seq_len(): cfg = osp.join( osp.dirname(__file__), '..', '..', '..', 'configs', 'basicvsr', diff --git a/tests/test_datasets/test_basic_conditional_dataset.py b/tests/test_datasets/test_basic_conditional_dataset.py index 16b6205d4c..85e34cdfac 100644 --- a/tests/test_datasets/test_basic_conditional_dataset.py +++ b/tests/test_datasets/test_basic_conditional_dataset.py @@ -32,6 +32,10 @@ def test_init(self): self.assertEqual(dataset.CLASSES, ('bus', 'car')) self.assertEqual(dataset.class_to_idx, {'bus': 0, 'car': 1}) + ann_file = osp.abspath(osp.join(DATA_DIR, 'wrong.yml')) + with self.assertRaises(TypeError): + BasicConditionalDataset(data_root=DATA_DIR, ann_file=ann_file) + gt_labels = dataset.get_gt_labels() print(type(gt_labels)) self.assertTrue((gt_labels == np.array([0, 1, 1])).all()) @@ -79,3 +83,15 @@ def test_init(self): self.assertFalse(dataset._fully_initialized) self.assertIn("Haven't been initialized", repr(dataset)) self.assertIn('With transforms:', repr(dataset)) + + # test load label from json file + ann_file = osp.abspath(osp.join(DATA_DIR, 'anno.json')) + dataset = BasicConditionalDataset( + data_root=DATA_DIR, + ann_file=ann_file, + lazy_init=True, + pipeline=[dict(type='PackEditInputs')]) + self.assertEqual(dataset[0]['data_samples'].gt_label.label.tolist(), + [1, 2, 3, 4]) + self.assertEqual(dataset[1]['data_samples'].gt_label.label.tolist(), + [1, 4, 5, 3]) diff --git a/tests/test_datasets/test_mscoco_dataset.py b/tests/test_datasets/test_mscoco_dataset.py new file mode 100644 index 0000000000..5ebc1a7b01 --- /dev/null +++ b/tests/test_datasets/test_mscoco_dataset.py @@ -0,0 +1,31 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +from pathlib import Path + +from mmedit.datasets import MSCoCoDataset + + +class TestMSCoCoDatasets: + + @classmethod + def setup_class(cls): + cls.data_root = Path(__file__).parent.parent / 'data' / 'coco' + + def test_mscoco(self): + + # test basic usage + dataset = MSCoCoDataset(data_root=self.data_root, pipeline=[]) + assert dataset[0] == dict( + gt_label='a good meal', + img_path=os.path.join(self.data_root, 'train2014', + 'COCO_train2014_000000000009.jpg'), + sample_idx=0) + + # test with different phase + dataset = MSCoCoDataset( + data_root=self.data_root, phase='val', pipeline=[]) + assert dataset[0] == dict( + gt_label='a pair of slippers', + img_path=os.path.join(self.data_root, 'val2014', + 'COCO_val2014_000000000042.jpg'), + sample_idx=0) diff --git a/tests/test_datasets/test_transforms/test_random_degradations.py b/tests/test_datasets/test_transforms/test_random_degradations.py index 2008033322..12546ac5eb 100644 --- a/tests/test_datasets/test_transforms/test_random_degradations.py +++ b/tests/test_datasets/test_transforms/test_random_degradations.py @@ -9,7 +9,7 @@ def test_random_noise(): results = {} - results['lq'] = np.ones((8, 8, 3)).astype(np.float32) + results['lq'] = np.ones((8, 8, 3)).astype(np.uint8) # Gaussian noise model = RandomNoise( @@ -49,17 +49,21 @@ def test_random_noise(): def test_random_jpeg_compression(): results = {} - results['lq'] = np.ones((8, 8, 3)).astype(np.float32) + results['lq'] = np.ones((8, 8, 3)).astype(np.uint8) - model = RandomJPEGCompression(params=dict(quality=[5, 50]), keys=['lq']) + model = RandomJPEGCompression( + params=dict(quality=[5, 50], color_type='color'), keys=['lq']) results = model(results) assert results['lq'].shape == (8, 8, 3) # skip degradations with prob < 1 - params = dict(quality=[5, 50], prob=0) + params = dict(quality=[5, 50], color_type='color', prob=0) model = RandomJPEGCompression(params=params, keys=['lq']) assert model(results) == results + model = RandomJPEGCompression(params=params, keys=['lq'], bgr2rgb=True) + assert model(results)['lq'].shape == results['lq'].shape + assert repr(model) == model.__class__.__name__ + f'(params={params}, ' \ + "keys=['lq'])" @@ -334,7 +338,7 @@ def test_random_blur(): def test_degradations_with_shuffle(): results = {} - results['lq'] = np.ones((8, 8, 3)).astype(np.float32) + results['lq'] = np.ones((8, 8, 3)).astype(np.uint8) # shuffle all model = DegradationsWithShuffle( @@ -360,10 +364,10 @@ def test_degradations_with_shuffle(): [ dict( type='RandomJPEGCompression', - params=dict(quality=[5, 10])), + params=dict(quality=[5, 10], color_type='color')), dict( type='RandomJPEGCompression', - params=dict(quality=[15, 20])) + params=dict(quality=[15, 20], color_type='color')) ] ], keys=['lq'], @@ -391,8 +395,12 @@ def test_degradations_with_shuffle(): resize_prob=[1 / 3., 1 / 3., 1 / 3.], target_size=(16, 16))), [ - dict(type='RandomJPEGCompression', params=dict(quality=[5, 10])), - dict(type='RandomJPEGCompression', params=dict(quality=[15, 20])) + dict( + type='RandomJPEGCompression', + params=dict(quality=[5, 10], color_type='color')), + dict( + type='RandomJPEGCompression', + params=dict(quality=[15, 20], color_type='color')) ] ] model = DegradationsWithShuffle( diff --git a/tests/test_evaluation/test_metrics/test_base_gen_metric.py b/tests/test_evaluation/test_metrics/test_base_gen_metric.py index da0602fe60..699f188ac2 100644 --- a/tests/test_evaluation/test_metrics/test_base_gen_metric.py +++ b/tests/test_evaluation/test_metrics/test_base_gen_metric.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from unittest.mock import MagicMock, patch +import pytest import torch from mmengine.model import MMDistributedDataParallel @@ -43,6 +44,7 @@ def test_GenMetric(): # test get_metric_sampler model = MagicMock() dataset = MagicMock() + dataset.__len__.return_value = 10 dataloader = MagicMock() dataloader.batch_size = 4 dataloader.dataset = dataset @@ -57,6 +59,11 @@ def test_GenMetric(): metric.prepare(model, dataloader) assert metric.data_preprocessor == preprocessor + # test raise error with dataset is length than real_nums + dataset.__len__.return_value = 5 + with pytest.raises(AssertionError): + metric.get_metric_sampler(model, dataloader, [metric]) + def test_GenerativeMetric(): metric = ToyGenerativeMetric(11, need_cond_input=True) diff --git a/tests/test_evaluation/test_metrics/test_swd.py b/tests/test_evaluation/test_metrics/test_swd.py index 9e85310198..201772670f 100644 --- a/tests/test_evaluation/test_metrics/test_swd.py +++ b/tests/test_evaluation/test_metrics/test_swd.py @@ -13,8 +13,7 @@ class TestSWD(TestCase): def test_init(self): - swd = SlicedWassersteinDistance( - fake_nums=10, image_shape=(3, 32, 32)) # noqa + swd = SlicedWassersteinDistance(fake_nums=10, image_shape=(3, 32, 32)) self.assertEqual(len(swd.real_results), 2) def test_prosess(self): @@ -35,6 +34,13 @@ def test_prosess(self): ] swd.process(real_samples, fake_samples) + # 100 samples are passed in 1 batch, _num_processed should be 100 + self.assertEqual(swd._num_processed, 100) + # _num_processed(100) > fake_nums(4), _num_processed should be + # unchanged + swd.process(real_samples, fake_samples) + self.assertEqual(swd._num_processed, 100) + output = swd.evaluate() result = [16.495922580361366, 24.15413036942482, 20.325026474893093] output = [item / 100 for item in output.values()] @@ -48,3 +54,34 @@ def test_prosess(self): sample_model='orig', image_shape=(3, 32, 32)) swd.prepare(model, None) + + # test gray scale input + swd.image_shape = (1, 32, 32) + real_samples = [ + dict(inputs=torch.rand(1, 32, 32) * 255.) for _ in range(100) + ] + fake_samples = [ + EditDataSample( + fake_img=PixelData(data=torch.rand(1, 32, 32) * 2 - 1), + gt_img=PixelData(data=torch.rand(1, 32, 32) * 2 - + 1)).to_dict() for _ in range(100) + ] + swd.process(real_samples, fake_samples) + + # test fake_nums is -1 + swd = SlicedWassersteinDistance( + fake_nums=-1, + fake_key='fake', + real_key='img', + sample_model='orig', + image_shape=(3, 32, 32)) + fake_samples = [ + EditDataSample( + fake_img=PixelData(data=torch.rand(3, 32, 32) * 2 - 1), + gt_img=PixelData(data=torch.rand(3, 32, 32) * 2 - + 1)).to_dict() for _ in range(10) + ] + for _ in range(3): + swd.process(None, fake_samples) + # fake_nums is -1, all samples (10 * 3 = 30) is processed + self.assertEqual(swd._num_processed, 30) diff --git a/tests/test_models/test_editors/test_cyclegan/test_cyclegan_modules.py b/tests/test_models/test_editors/test_cyclegan/test_cyclegan_modules.py index 4f5d4cc16e..d92515d7e6 100644 --- a/tests/test_models/test_editors/test_cyclegan/test_cyclegan_modules.py +++ b/tests/test_models/test_editors/test_cyclegan/test_cyclegan_modules.py @@ -12,6 +12,9 @@ def test_residual_block_with_dropout(): output = block(input) assert output.detach().numpy().shape == (2, 16, 128, 128) + block = ResidualBlockWithDropout(16, 'zeros', use_dropout=False) + assert len(block.block) == 2 + def test_gan_image_buffer(): # test buffer size = 0 @@ -46,3 +49,28 @@ def test_gan_image_buffer(): and torch.equal(buffer.image_buffer[0], img_tensor_0)) or \ (torch.equal(img_tensor_return, img_tensor_00) and torch.equal(buffer.image_buffer[0], img_tensor_1)) + + # set buffer ratio as 1 and 0 to cover more lines + buffer = GANImageBuffer(buffer_size=1, buffer_ratio=1) + img_np = np.random.randn(2, 3, 256, 256) + img_tensor = torch.from_numpy(img_np) + img_tensor_0 = torch.unsqueeze(img_tensor[0], 0) + img_tensor_1 = torch.unsqueeze(img_tensor[1], 0) + img_tensor_00 = torch.cat([img_tensor_0, img_tensor_0], 0) + img_tensor_return = buffer.query(img_tensor) + assert (torch.equal(img_tensor_return, img_tensor) + and torch.equal(buffer.image_buffer[0], img_tensor_0)) or \ + (torch.equal(img_tensor_return, img_tensor_00) + and torch.equal(buffer.image_buffer[0], img_tensor_1)) + + buffer = GANImageBuffer(buffer_size=1, buffer_ratio=0) + img_np = np.random.randn(2, 3, 256, 256) + img_tensor = torch.from_numpy(img_np) + img_tensor_0 = torch.unsqueeze(img_tensor[0], 0) + img_tensor_1 = torch.unsqueeze(img_tensor[1], 0) + img_tensor_00 = torch.cat([img_tensor_0, img_tensor_0], 0) + img_tensor_return = buffer.query(img_tensor) + assert (torch.equal(img_tensor_return, img_tensor) + and torch.equal(buffer.image_buffer[0], img_tensor_0)) or \ + (torch.equal(img_tensor_return, img_tensor_00) + and torch.equal(buffer.image_buffer[0], img_tensor_1)) diff --git a/tests/test_models/test_editors/test_ddim/test_ddim_scheduler.py b/tests/test_models/test_editors/test_ddim/test_ddim_scheduler.py new file mode 100644 index 0000000000..ba8eb0663c --- /dev/null +++ b/tests/test_models/test_editors/test_ddim/test_ddim_scheduler.py @@ -0,0 +1,52 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmedit.models.editors.ddim.ddim_scheduler import DDIMScheduler + + +def test_ddim(): + modelout = torch.rand((1, 8, 32, 32)) + sample = torch.rand((1, 4, 32, 32)) + ddim = DDIMScheduler( + num_train_timesteps=1000, variance_type='learned_range') + ddim.set_timesteps(10) + result = ddim.step(modelout, 980, sample) + assert result['prev_sample'].shape == (1, 4, 32, 32) + + noise = torch.rand((1, 4, 32, 32)) + result = ddim.add_noise(sample, noise, 10) + assert result.shape == (1, 4, 32, 32) + + assert len(ddim) == 1000 + + +def test_ddim_init(): + ddim = DDIMScheduler( + num_train_timesteps=1000, beta_schedule='scaled_linear') + + ddim = DDIMScheduler( + num_train_timesteps=1000, beta_schedule='squaredcos_cap_v2') + + assert isinstance(ddim, DDIMScheduler) + + with pytest.raises(Exception): + DDIMScheduler(num_train_timesteps=1000, beta_schedule='fake') + + +def test_ddim_step(): + modelout = torch.rand((1, 8, 32, 32)) + sample = torch.rand((1, 4, 32, 32)) + ddim = DDIMScheduler( + num_train_timesteps=1000, variance_type='learned_range') + with pytest.raises(Exception): + ddim.step(modelout, 980, sample) + + ddim.set_timesteps(10) + result = ddim.step( + modelout, 980, sample, eta=1, use_clipped_model_output=True) + assert result['prev_sample'].shape == (1, 4, 32, 32) + + +if __name__ == '__main__': + test_ddim_step() diff --git a/tests/test_models/test_editors/test_ddpm/test_attention.py b/tests/test_models/test_editors/test_ddpm/test_attention.py new file mode 100644 index 0000000000..7aae5e85ec --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_attention.py @@ -0,0 +1,39 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmedit.models.editors.ddpm.attention import (ApproximateGELU, + CrossAttention, FeedForward, + Transformer2DModel) + + +def test_ApproximateGELU(): + input = torch.rand((16, 16)) + gelu = ApproximateGELU(16, 24) + output = gelu.forward(input) + assert output.shape == (16, 24) + + +def test_crossattention(): + input = torch.rand((2, 64, 64)) + crossattention = CrossAttention(64) + crossattention._slice_size = 2 + output = crossattention.forward(input) + assert output.shape == (2, 64, 64) + + +def test_Transformer2DModel_init(): + with pytest.raises(Exception): + Transformer2DModel(in_channels=32, num_vector_embeds=4) + + with pytest.raises(Exception): + Transformer2DModel() + + Transformer2DModel(in_channels=32, use_linear_projection=True) + + +def test_FeedForward(): + input = torch.rand((2, 64, 64)) + feed_forward = FeedForward(64, 64, activation_fn='geglu-approximate') + output = feed_forward.forward(input) + assert output.shape == (2, 64, 64) diff --git a/tests/test_models/test_editors/test_ddpm/test_ddpm_scheduler.py b/tests/test_models/test_editors/test_ddpm/test_ddpm_scheduler.py new file mode 100644 index 0000000000..fe4c33126b --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_ddpm_scheduler.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmedit.models.editors.ddpm.ddpm_scheduler import DDPMScheduler + + +def test_ddpm(): + modelout = torch.rand((1, 8, 32, 32)) + sample = torch.rand((1, 4, 32, 32)) + ddpm = DDPMScheduler( + num_train_timesteps=1000, variance_type='learned_range') + result = ddpm.step(modelout, 980, sample) + assert result['prev_sample'].shape == (1, 4, 32, 32) + + ddpm.set_timesteps(100) + + predicted_variance = torch.tensor(1.0) + ddpm._get_variance(t=0, predicted_variance=predicted_variance) + ddpm._get_variance(t=1, variance_type='fixed_large') + ddpm._get_variance(t=1, variance_type='fixed_large_log') + ddpm._get_variance(t=1, variance_type='learned') + + with pytest.raises(Exception): + ddpm.training_loss(1, 2, 3) + + with pytest.raises(Exception): + ddpm.sample_timestep() + + steps = len(ddpm) + assert steps == 1000 + + +def test_ddpm_init(): + DDPMScheduler(trained_betas=1) + + DDPMScheduler(beta_schedule='scaled_linear') + + DDPMScheduler(beta_schedule='squaredcos_cap_v2') + + with pytest.raises(Exception): + DDPMScheduler(beta_schedule='tem') diff --git a/tests/test_models/test_editors/test_ddpm/test_denoising_unet.py b/tests/test_models/test_editors/test_ddpm/test_denoising_unet.py new file mode 100644 index 0000000000..02be8c5cb3 --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_denoising_unet.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmedit.models.editors.ddpm.denoising_unet import (DenoisingUnet, + NormWithEmbedding) + + +def test_DenoisingUnet(): + input = torch.rand((1, 3, 32, 32)) + unet = DenoisingUnet(32) + output = unet.forward(input, 10) + assert output['outputs'].shape == (1, 6, 32, 32) + + +def test_NormWithEmbedding(): + input = torch.rand((4, 32)) + emb = torch.rand((4, 32)) + ins = NormWithEmbedding(32, 32) + output = ins.forward(input, emb) + assert output.shape == (4, 32, 4, 32) diff --git a/tests/test_models/test_editors/test_ddpm/test_embeddings.py b/tests/test_models/test_editors/test_ddpm/test_embeddings.py new file mode 100644 index 0000000000..811ebb8962 --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_embeddings.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmedit.models.editors.ddpm.embeddings import TimestepEmbedding, Timesteps + + +def test_TimestepEmbedding(): + input = torch.rand((1, 64, 16)) + timestep_emb = TimestepEmbedding( + in_channels=16, time_embed_dim=16, act_fn='mish') + output = timestep_emb.forward(input) + assert output.shape == (1, 64, 16) + + timestep_emb = TimestepEmbedding( + in_channels=16, time_embed_dim=16, out_dim=96) + timestep_emb.act = None + output = timestep_emb.forward(input) + assert output.shape == (1, 64, 96) + + +def test_Timesteps(): + input = torch.tensor([4]) + timesteps = Timesteps(num_channels=9) + emb = timesteps.forward(input) + assert emb.shape == (1, 9) diff --git a/tests/test_models/test_editors/test_ddpm/test_res_blocks.py b/tests/test_models/test_editors/test_ddpm/test_res_blocks.py new file mode 100644 index 0000000000..808af63b2c --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_res_blocks.py @@ -0,0 +1,34 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmedit.models.editors.ddpm.res_blocks import (Downsample2D, ResnetBlock2D, + Upsample2D) + + +def test_resnetblock2d(): + input = torch.rand((1, 64, 16, 16)) + resblock = ResnetBlock2D(in_channels=64, up=True) + output = resblock.forward(input, None) + assert output.shape == (1, 64, 64, 64) + + resblock = ResnetBlock2D(in_channels=64, down=True) + output = resblock.forward(input, None) + assert output.shape == (1, 64, 8, 8) + + +def test_Downsample2D(): + input = torch.rand((1, 64, 16, 16)) + downsample = Downsample2D(channels=64, use_conv=True, padding=0) + output = downsample.forward(input) + assert output.shape == (1, 64, 8, 8) + + +def test_Upsample2D(): + input = torch.rand((1, 64, 16, 16)) + upsample = Upsample2D(channels=64, use_conv_transpose=True) + output = upsample.forward(input) + assert output.shape == (1, 64, 32, 32) + + upsample = Upsample2D(channels=64) + output = upsample.forward(input, output_size=(32, 32)) + assert output.shape == (1, 64, 64, 64) diff --git a/tests/test_models/test_editors/test_ddpm/test_unet_blocks.py b/tests/test_models/test_editors/test_ddpm/test_unet_blocks.py new file mode 100644 index 0000000000..e7ea584e5f --- /dev/null +++ b/tests/test_models/test_editors/test_ddpm/test_unet_blocks.py @@ -0,0 +1,46 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmedit.models.editors.ddpm.unet_blocks import (CrossAttnDownBlock2D, + CrossAttnUpBlock2D, + UNetMidBlock2DCrossAttn, + get_down_block, + get_up_block) + + +def test_UNetMidBlock2DCrossAttn(): + input = torch.rand((1, 64, 64, 64)) + midblock = UNetMidBlock2DCrossAttn(64, 64, cross_attention_dim=64) + midblock.set_attention_slice(1) + output = midblock.forward(input) + assert output.shape == (1, 64, 64, 64) + + with pytest.raises(Exception): + midblock.set_attention_slice(8) + + +def test_CrossAttnDownBlock2D(): + input = torch.rand((1, 64, 64, 64)) + downblock = CrossAttnDownBlock2D(64, 64, 64, cross_attention_dim=64) + downblock.set_attention_slice(1) + output, _ = downblock.forward(input) + assert output.shape == (1, 64, 32, 32) + + with pytest.raises(Exception): + downblock.set_attention_slice(8) + + +def test_CrossAttnUpBlock2D(): + downblock = CrossAttnUpBlock2D(64, 64, 64, 64, cross_attention_dim=64) + downblock.set_attention_slice(1) + + +def test_get_down_block(): + with pytest.raises(Exception): + get_down_block('tem', 1, 1, 1, 1, True, 'silu', 1) + + +def get_get_up_block(): + with pytest.raises(Exception): + get_up_block('tem', 1, 1, 1, 1, 1, True, 'silu', 1) diff --git a/tests/test_models/test_editors/test_disco_diffusion/test_disco_diffusion.py b/tests/test_models/test_editors/test_disco_diffusion/test_disco_diffusion.py index ffd350ecf9..7a56a6603a 100644 --- a/tests/test_models/test_editors/test_disco_diffusion/test_disco_diffusion.py +++ b/tests/test_models/test_editors/test_disco_diffusion/test_disco_diffusion.py @@ -1,9 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. +import unittest +from copy import deepcopy from unittest import TestCase +from unittest.mock import patch -import pytest import torch import torch.nn as nn +from mmengine.utils import digit_version +from torchvision.version import __version__ as TV_VERSION from mmedit.models import DDIMScheduler, DenoisingUnet, DiscoDiffusion from mmedit.utils import register_all_modules @@ -40,9 +44,9 @@ def forward(self, x): class TestDiscoDiffusion(TestCase): - def test_init(self): + def setUp(self): # unet - unet32 = DenoisingUnet( + self.unet32 = DenoisingUnet( image_size=32, in_channels=3, base_channels=8, @@ -60,13 +64,17 @@ def test_init(self): use_new_attention_order=False), use_scale_shift_norm=True) # mock clip - clip_models = [clip_mock_wrapper(), clip_mock_wrapper()] + self.clip_models = [clip_mock_wrapper(), clip_mock_wrapper()] # diffusion_scheduler - diffusion_scheduler = DDIMScheduler( + self.diffusion_scheduler = DDIMScheduler( variance_type='learned_range', beta_schedule='linear', clip_sample=False) + def test_init(self): + unet32 = deepcopy(self.unet32) + diffusion_scheduler = deepcopy(self.diffusion_scheduler) + clip_models = deepcopy(self.clip_models) self.disco_diffusion = DiscoDiffusion( unet=unet32, diffusion_scheduler=diffusion_scheduler, @@ -74,9 +82,22 @@ def test_init(self): clip_models=clip_models, use_fp16=True) - @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda') + @unittest.skipIf( + digit_version(TV_VERSION) <= digit_version('0.7.0'), + reason='torchvision version limitation') + @unittest.skipIf(not torch.cuda.is_available(), reason='requires cuda') def test_infer(self): + unet32 = deepcopy(self.unet32) + diffusion_scheduler = deepcopy(self.diffusion_scheduler) + clip_models = deepcopy(self.clip_models) + self.disco_diffusion = DiscoDiffusion( + unet=unet32, + diffusion_scheduler=diffusion_scheduler, + secondary_model=None, + clip_models=clip_models, + use_fp16=True) self.disco_diffusion.cuda().eval() + # test model structure text_prompts = { 0: ['clouds surround the mountains and palaces,sunshine,lake'] @@ -84,7 +105,7 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8)['samples'] assert image.shape == (1, 3, 32, 32) # test with different text prompts @@ -96,7 +117,7 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8)['samples'] assert image.shape == (1, 3, 32, 32) @@ -111,7 +132,7 @@ def test_infer(self): text_prompts=text_prompts, init_image=init_image, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8)['samples'] assert image.shape == (1, 3, 32, 32) @@ -124,7 +145,7 @@ def test_infer(self): width=128, text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8)['samples'] assert image.shape == (1, 3, 64, 128) @@ -132,7 +153,7 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, clip_guidance_scale=8000, eta=0.8)['samples'] assert image.shape == (1, 3, 32, 32) @@ -144,7 +165,7 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8, tv_scale=tv_scale, sat_scale=sat_scale, @@ -160,7 +181,7 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8, cut_overview=cut_overview, cut_innercut=cut_innercut, @@ -192,6 +213,35 @@ def test_infer(self): image = self.disco_diffusion.infer( text_prompts=text_prompts, show_progress=True, - num_inference_steps=5, + num_inference_steps=2, eta=0.8)['samples'] assert image.shape == (1, 3, 64, 64) + + class affineMock(nn.Module): + + def __init__(self, *args, **kwargs): + super().__init__() + + def forward(self, x): + return x + + mock_path = ('mmedit.models.editors.disco_diffusion.guider.' + 'TORCHVISION_VERSION') + affine_mock_path = ('mmedit.models.editors.disco_diffusion.guider.T.' + 'RandomAffine') + with patch(affine_mock_path, new=affineMock): + with patch(mock_path, '0.8.1'): + image = self.disco_diffusion.infer( + text_prompts=text_prompts, + show_progress=True, + num_inference_steps=2, + eta=0.8)['samples'] + assert image.shape == (1, 3, 64, 64) + + with patch(mock_path, '0.9.0'): + image = self.disco_diffusion.infer( + text_prompts=text_prompts, + show_progress=True, + num_inference_steps=2, + eta=0.8)['samples'] + assert image.shape == (1, 3, 64, 64) diff --git a/tests/test_models/test_editors/test_restormer/test_restormer_net.py b/tests/test_models/test_editors/test_restormer/test_restormer_net.py new file mode 100644 index 0000000000..64624d825f --- /dev/null +++ b/tests/test_models/test_editors/test_restormer/test_restormer_net.py @@ -0,0 +1,108 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmedit.models.editors import Restormer + + +@pytest.mark.skipif( + torch.__version__ < '1.8.0', + reason='skip on torch<1.8 due to unsupported PixelUnShuffle') +def test_restormer_cpu(): + """Test Restormer.""" + + # Motion Deblurring or Image Deraining + net = Restormer( + inp_channels=3, + out_channels=3, + dim=24, + num_blocks=[2, 2, 2, 4], + num_refinement_blocks=1, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False) + img = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 16, 16) + + # Image Denoising Gray + net = Restormer( + inp_channels=1, + out_channels=1, + dim=24, + num_blocks=[2, 2, 2, 4], + num_refinement_blocks=1, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False) + img = torch.rand(1, 1, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 1, 16, 16) + + # Image Denoising Color + net = Restormer( + inp_channels=3, + out_channels=3, + dim=24, + num_blocks=[2, 2, 2, 4], + num_refinement_blocks=1, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='BiasFree', + dual_pixel_task=False) + img = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 16, 16) + + # Image Dual Defocus Deblurring + net = Restormer( + inp_channels=6, + out_channels=3, + dim=24, + num_blocks=[2, 2, 2, 4], + num_refinement_blocks=1, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=True, + dual_keys=['imgL', 'imgR']) + img = dict() + img['imgL'] = torch.rand(1, 3, 16, 16) + img['imgR'] = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 16, 16) + + +@pytest.mark.skipif( + torch.__version__ < '1.8.0', + reason='skip on torch<1.8 due to unsupported PixelUnShuffle') +def test_restormer_cuda(): + net = Restormer( + inp_channels=3, + out_channels=3, + dim=24, + num_blocks=[2, 2, 2, 4], + num_refinement_blocks=1, + heads=[1, 2, 4, 8], + ffn_expansion_factor=2.66, + bias=False, + LayerNorm_type='WithBias', + dual_pixel_task=False) + img = torch.rand(1, 3, 16, 16) + + # Image Deblurring or Image Deraining (gpu) + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda()) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 16, 16) diff --git a/tests/test_models/test_editors/test_stable_diffusion/test_clip_wrapper.py b/tests/test_models/test_editors/test_stable_diffusion/test_clip_wrapper.py new file mode 100644 index 0000000000..35076616dd --- /dev/null +++ b/tests/test_models/test_editors/test_stable_diffusion/test_clip_wrapper.py @@ -0,0 +1,54 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib +import sys + +import pytest +import torch + + +def test_clip_wrapper(): + from transformers import CLIPConfig + + from mmedit.models.editors.stable_diffusion.clip_wrapper import \ + StableDiffusionSafetyChecker + clipconfig = CLIPConfig() + safety_checker = StableDiffusionSafetyChecker(clipconfig) + + clip_input = torch.rand((1, 3, 224, 224)) + images_input = torch.rand((1, 512, 512, 3)) + + result = safety_checker.forward(clip_input, images_input) + assert result[0].shape == (1, 512, 512, 3) + + +def test_load_clip_submodels(): + from mmedit.models.editors.stable_diffusion.clip_wrapper import \ + load_clip_submodels + init_cfg = dict( + type='Pretrained', + pretrained_model_path='tem', + ) + + submodels = [] + with pytest.raises(Exception): + load_clip_submodels(init_cfg, submodels, True) + + +def test_load_clip_submodels_transformers_none(): + transformer_location = sys.modules['transformers'] + sys.modules['transformers'] = None + importlib.reload( + sys.modules['mmedit.models.editors.stable_diffusion.clip_wrapper']) + from mmedit.models.editors.stable_diffusion.clip_wrapper import \ + load_clip_submodels + + init_cfg = dict( + type='Pretrained', + pretrained_model_path='tem', + ) + submodels = [] + with pytest.raises(ImportError): + # import pdb;pdb.set_trace(); + load_clip_submodels(init_cfg, submodels, True) + + sys.modules['transformers'] = transformer_location diff --git a/tests/test_models/test_editors/test_stable_diffusion/test_stable_diffusion.py b/tests/test_models/test_editors/test_stable_diffusion/test_stable_diffusion.py new file mode 100644 index 0000000000..fb2fe3352d --- /dev/null +++ b/tests/test_models/test_editors/test_stable_diffusion/test_stable_diffusion.py @@ -0,0 +1,124 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import platform +import sys + +import pytest +import torch +from addict import Dict +from mmengine import MODELS, Config + +from mmedit.utils import register_all_modules + +register_all_modules() + +unet = dict( + type='DenoisingUnet', + image_size=512, + base_channels=320, + channels_cfg=[1, 2, 4, 4], + unet_type='stable', + act_cfg=dict(type='silu', inplace=False), + cross_attention_dim=768, + num_heads=8, + in_channels=4, + layers_per_block=2, + down_block_types=[ + 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', 'CrossAttnDownBlock2D', + 'DownBlock2D' + ], + up_block_types=[ + 'UpBlock2D', 'CrossAttnUpBlock2D', 'CrossAttnUpBlock2D', + 'CrossAttnUpBlock2D' + ], + output_cfg=dict(var='fixed')) + +vae = dict( + act_fn='silu', + block_out_channels=[128, 256, 512, 512], + down_block_types=[ + 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', + 'DownEncoderBlock2D' + ], + in_channels=3, + latent_channels=4, + layers_per_block=2, + norm_num_groups=32, + out_channels=3, + sample_size=512, + up_block_types=[ + 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', + 'UpDecoderBlock2D' + ]) + +diffusion_scheduler = dict( + type='DDIMScheduler', + variance_type='learned_range', + beta_end=0.012, + beta_schedule='scaled_linear', + beta_start=0.00085, + num_train_timesteps=1000, + set_alpha_to_one=False, + clip_sample=False) + +init_cfg = dict(type='Pretrained', pretrained_model_path=None) + +model = dict( + type='StableDiffusion', + diffusion_scheduler=diffusion_scheduler, + unet=unet, + vae=vae, + init_cfg=init_cfg, + requires_safety_checker=False, +) + + +class dummy_tokenizer: + + def __init__(self): + self.model_max_length = 0 + + def __call__(self, + prompt, + padding='max_length', + max_length=0, + truncation=False, + return_tensors='pt'): + text_inputs = Dict() + text_inputs['input_ids'] = torch.ones([1, 77]) + text_inputs['attention_mask'] = torch.ones([1, 77]) + return text_inputs + + +class dummy_text_encoder: + + def __init__(self): + self.config = None + + def __call__(self, x, attention_mask): + result = torch.rand([1, 77, 768]) + return [result] + + +@pytest.mark.skipif( + 'win' in platform.system().lower(), + reason='skip on windows due to limited RAM.') +@pytest.mark.skipif( + sys.version_info < (3, 8), reason='skip because python version is old.') +def test_stable_diffusion(): + StableDiffuser = MODELS.build(Config(model)) + StableDiffuser.tokenizer = dummy_tokenizer() + StableDiffuser.text_encoder = dummy_text_encoder() + + with pytest.raises(Exception): + StableDiffuser.infer(1, height=64, width=64) + + with pytest.raises(Exception): + StableDiffuser.infer('temp', height=31, width=31) + + result = StableDiffuser.infer( + 'an insect robot preparing a delicious meal', + height=64, + width=64, + num_inference_steps=1) + + assert result['samples'].shape == (3, 64, 64) diff --git a/tests/test_models/test_editors/test_stable_diffusion/test_vae.py b/tests/test_models/test_editors/test_stable_diffusion/test_vae.py new file mode 100644 index 0000000000..0c8898a1e6 --- /dev/null +++ b/tests/test_models/test_editors/test_stable_diffusion/test_vae.py @@ -0,0 +1,66 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmedit.models.editors.stable_diffusion.vae import ( + AttentionBlock, AutoencoderKL, DiagonalGaussianDistribution, Downsample2D, + ResnetBlock2D, Upsample2D) + + +def test_vae(): + input = torch.rand((1, 3, 32, 32)) + vae = AutoencoderKL() + output = vae.forward(input) + assert output['sample'].shape == (1, 3, 32, 32) + + +def test_resnetblock2d(): + input = torch.rand((1, 64, 16, 16)) + resblock = ResnetBlock2D(in_channels=64, up=True) + output = resblock.forward(input, None) + assert output.shape == (1, 64, 64, 64) + + resblock = ResnetBlock2D(in_channels=64, down=True) + output = resblock.forward(input, None) + assert output.shape == (1, 64, 8, 8) + + +def test_DiagonalGaussianDistribution(): + param = torch.rand((1, 2, 16, 16)) + sample = torch.rand((1, 1, 16, 16)) + + gauss_dist = DiagonalGaussianDistribution(param, deterministic=False) + gauss_dist.sample() + gauss_dist.kl() + output = gauss_dist.nll(sample) + assert output.shape == (1, ) + + gauss_dist = DiagonalGaussianDistribution(param, deterministic=True) + gauss_dist.sample() + gauss_dist.kl() + output = gauss_dist.nll(sample) + assert output.shape == (1, ) + + +def test_AttentionBlock(): + input = torch.rand((1, 64, 32, 32)) + attention = AttentionBlock(64, num_head_channels=8) + output = attention.forward(input) + assert output.shape == (1, 64, 32, 32) + + +def test_Downsample2D(): + input = torch.rand((1, 64, 16, 16)) + downsample = Downsample2D(channels=64, use_conv=True, padding=0) + output = downsample.forward(input) + assert output.shape == (1, 64, 8, 8) + + +def test_Upsample2D(): + input = torch.rand((1, 64, 16, 16)) + upsample = Upsample2D(channels=64, use_conv_transpose=True) + output = upsample.forward(input) + assert output.shape == (1, 64, 32, 32) + + upsample = Upsample2D(channels=64) + output = upsample.forward(input, output_size=(32, 32)) + assert output.shape == (1, 64, 64, 64) diff --git a/tests/test_models/test_editors/test_stylegan1/test_stylegan1.py b/tests/test_models/test_editors/test_stylegan1/test_stylegan1.py index cac2273712..963f2c7eb4 100644 --- a/tests/test_models/test_editors/test_stylegan1/test_stylegan1.py +++ b/tests/test_models/test_editors/test_stylegan1/test_stylegan1.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import platform +import sys from unittest import TestCase import numpy as np @@ -35,6 +36,9 @@ class TestStyleGAN1(TestCase): @pytest.mark.skipif( 'win' in platform.system().lower() and 'cu' in torch.__version__, reason='skip on windows-cuda due to limited RAM.') + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason='skip because python version is old.') def test_stylegan_cpu(self): message_hub = MessageHub.get_instance('test-s1') message_hub.update_info('iter', 0) diff --git a/tests/test_models/test_editors/test_stylegan2/test_stylegan2.py b/tests/test_models/test_editors/test_stylegan2/test_stylegan2.py index c28542e4a4..7d49699e04 100644 --- a/tests/test_models/test_editors/test_stylegan2/test_stylegan2.py +++ b/tests/test_models/test_editors/test_stylegan2/test_stylegan2.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import platform +import sys from unittest import TestCase import pytest @@ -39,6 +40,9 @@ def setup_class(cls): @pytest.mark.skipif( 'win' in platform.system().lower() and 'cu' in torch.__version__, reason='skip on windows-cuda due to limited RAM.') + @pytest.mark.skipif( + sys.version_info < (3, 8), + reason='skip because python version is old.') def test_stylegan2_cpu(self): accu_iter = 1 message_hub = MessageHub.get_instance('test-s2') diff --git a/tests/test_models/test_editors/test_swinir/test_swinir_modules.py b/tests/test_models/test_editors/test_swinir/test_swinir_modules.py new file mode 100644 index 0000000000..3f02556236 --- /dev/null +++ b/tests/test_models/test_editors/test_swinir/test_swinir_modules.py @@ -0,0 +1,84 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import platform + +import pytest +import torch + +from mmedit.models.editors.swinir.swinir_modules import (PatchEmbed, + PatchUnEmbed, + Upsample, + UpsampleOneStep) + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_patchEmbed(): + + net = PatchEmbed( + img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None) + + img = torch.randn(1, 3, 4, 4) + output = net(img) + assert output.shape == (1, 16, 3) + + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda()) + assert output.shape == (1, 16, 3) + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_patchUnEmbed(): + + net = PatchUnEmbed( + img_size=16, patch_size=4, in_chans=3, embed_dim=3, norm_layer=None) + + img = torch.randn(1, 64, 3) + output = net(img, (8, 8)) + assert output.shape == (1, 3, 8, 8) + + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda(), (8, 8)) + assert output.shape == (1, 3, 8, 8) + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_upsample(): + + net = Upsample(scale=2, num_feat=3) + + img = torch.randn(1, 3, 8, 8) + output = net(img) + assert output.shape == (1, 3, 16, 16) + + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda()) + assert output.shape == (1, 3, 16, 16) + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_upsampleOneStep(): + + net = UpsampleOneStep( + scale=2, + num_feat=3, + num_out_ch=4, + ) + + img = torch.randn(1, 3, 8, 8) + output = net(img) + assert output.shape == (1, 4, 16, 16) + + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda()) + assert output.shape == (1, 4, 16, 16) diff --git a/tests/test_models/test_editors/test_swinir/test_swinir_net.py b/tests/test_models/test_editors/test_swinir/test_swinir_net.py new file mode 100644 index 0000000000..b2944baa89 --- /dev/null +++ b/tests/test_models/test_editors/test_swinir/test_swinir_net.py @@ -0,0 +1,108 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import platform + +import pytest +import torch + +from mmedit.models.editors import SwinIRNet + + +def test_swinir_cpu(): + """Test SwinIRNet.""" + + # x2 model classical SR + net = SwinIRNet( + upscale=2, + in_channels=3, + img_size=48, + window_size=8, + img_range=1.0, + depths=[6], + embed_dim=60, + num_heads=[6], + mlp_ratio=2, + upsampler='pixelshuffledirect', + resi_connection='3conv') + img = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 32, 32) + + net = SwinIRNet( + upscale=1, + in_channels=3, + img_size=48, + window_size=8, + img_range=1.0, + depths=[6], + embed_dim=60, + num_heads=[6], + mlp_ratio=2, + upsampler='', + resi_connection='1conv') + img = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 16, 16) + + # x3 model classical SR, initialization and forward (cpu) + net = SwinIRNet( + upscale=3, + in_channels=3, + img_size=16, + window_size=8, + img_range=1.0, + depths=[2], + embed_dim=8, + num_heads=[2], + mlp_ratio=2, + upsampler='pixelshuffle', + resi_connection='1conv') + img = torch.rand(1, 3, 16, 16) + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 48, 48) + + # x4 model lightweight SR, initialization and forward (cpu) + net = SwinIRNet( + upscale=4, + in_channels=3, + img_size=16, + window_size=8, + img_range=1.0, + depths=[2], + embed_dim=8, + num_heads=[2], + mlp_ratio=2, + ape=True, + upsampler='nearest+conv', + resi_connection='1conv') + output = net(img) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 64, 64) + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_swinir_cuda(): + net = SwinIRNet( + upscale=4, + in_channels=3, + img_size=16, + window_size=8, + img_range=1.0, + depths=[2], + embed_dim=8, + num_heads=[2], + mlp_ratio=2, + upsampler='pixelshuffledirect', + resi_connection='1conv') + img = torch.rand(1, 3, 16, 16) + + # x4 model lightweight SR forward (gpu) + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda()) + assert isinstance(output, torch.Tensor) + assert output.shape == (1, 3, 64, 64) diff --git a/tests/test_models/test_editors/test_swinir/test_swinir_rstb.py b/tests/test_models/test_editors/test_swinir/test_swinir_rstb.py new file mode 100644 index 0000000000..b5f9c1db1d --- /dev/null +++ b/tests/test_models/test_editors/test_swinir/test_swinir_rstb.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import platform + +import pytest +import torch + +from mmedit.models.editors.swinir.swinir_rstb import RSTB + + +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') +def test_rstb(): + + net = RSTB( + dim=6, input_resolution=(8, 8), depth=6, num_heads=6, window_size=8) + + img = torch.randn(1, 64, 6) + output = net(img, (8, 8)) + assert output.shape == (1, 64, 6) + + if torch.cuda.is_available(): + net = net.cuda() + output = net(img.cuda(), (8, 8)) + assert output.shape == (1, 64, 6) diff --git a/tests/test_models/test_editors/test_swinir/test_swinir_utils.py b/tests/test_models/test_editors/test_swinir/test_swinir_utils.py new file mode 100644 index 0000000000..0971960401 --- /dev/null +++ b/tests/test_models/test_editors/test_swinir/test_swinir_utils.py @@ -0,0 +1,26 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmedit.models.editors.swinir.swinir_utils import (drop_path, to_2tuple, + window_partition, + window_reverse) + + +def test_drop_path(): + x = torch.randn(1, 3, 8, 8) + x = drop_path(x) + assert x.shape == (1, 3, 8, 8) + + +def test_to_2tuple(): + x = 8 + x = to_2tuple(x) + assert x == (8, 8) + + +def test_window(): + x = torch.randn(1, 8, 8, 3) + x = window_partition(x, 4) + assert x.shape == (4, 4, 4, 3) + x = window_reverse(x, 4, 8, 8) + assert x.shape == (1, 8, 8, 3) diff --git a/tests/test_models/test_editors/test_wgan_gp/test_wgan_gp.py b/tests/test_models/test_editors/test_wgan_gp/test_wgan_gp.py index 99b82cb468..fde0125b54 100644 --- a/tests/test_models/test_editors/test_wgan_gp/test_wgan_gp.py +++ b/tests/test_models/test_editors/test_wgan_gp/test_wgan_gp.py @@ -1,7 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. +import platform from copy import deepcopy from unittest import TestCase +import pytest import torch from mmengine import MessageHub from mmengine.optim import OptimWrapper, OptimWrapperDict @@ -54,6 +56,9 @@ def test_init(self): gan = WGANGP(generator=gen, data_preprocessor=GenDataPreprocessor()) self.assertEqual(gan.discriminator, None) + @pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') def test_train_step(self): # prepare model accu_iter = 1 diff --git a/tests/test_models/test_losses/test_feature_loss.py b/tests/test_models/test_losses/test_feature_loss.py index b0ef06b92a..1b71a99627 100644 --- a/tests/test_models/test_losses/test_feature_loss.py +++ b/tests/test_models/test_losses/test_feature_loss.py @@ -1,10 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. +import platform + import pytest import torch from mmedit.models.losses import LightCNNFeatureLoss +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') def test_light_cnn_feature_loss(): pretrained = 'https://download.openmmlab.com/mmediting/' + \ diff --git a/tests/test_models/test_losses/test_loss_comps/test_gen_auxiliary_loss_comps.py b/tests/test_models/test_losses/test_loss_comps/test_gen_auxiliary_loss_comps.py index 8753f77380..160170d609 100644 --- a/tests/test_models/test_losses/test_loss_comps/test_gen_auxiliary_loss_comps.py +++ b/tests/test_models/test_losses/test_loss_comps/test_gen_auxiliary_loss_comps.py @@ -1,4 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. +import platform + import pytest import torch from mmengine.utils.dl_utils import TORCH_VERSION @@ -8,6 +10,9 @@ from mmedit.models.losses import GeneratorPathRegularizerComps +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') class TestPathRegularizer: @classmethod diff --git a/tests/test_models/test_losses/test_perceptual_loss.py b/tests/test_models/test_losses/test_perceptual_loss.py index 3c2ebc12ff..d928600049 100644 --- a/tests/test_models/test_losses/test_perceptual_loss.py +++ b/tests/test_models/test_losses/test_perceptual_loss.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import platform from unittest.mock import patch import pytest @@ -8,6 +9,9 @@ TransferalPerceptualLoss) +@pytest.mark.skipif( + 'win' in platform.system().lower() and 'cu' in torch.__version__, + reason='skip on windows-cuda due to limited RAM.') @patch.object(PerceptualVGG, 'init_weights') def test_perceptual_loss(init_weights): if torch.cuda.is_available(): diff --git a/tools/dataset_converters/matting/bgm/preprocess_bgm_dataset.py b/tools/dataset_converters/bgm/preprocess_bgm_dataset.py similarity index 100% rename from tools/dataset_converters/matting/bgm/preprocess_bgm_dataset.py rename to tools/dataset_converters/bgm/preprocess_bgm_dataset.py diff --git a/tools/dataset_converters/inpainting/celeba-hq/README.md b/tools/dataset_converters/celeba-hq/README.md similarity index 100% rename from tools/dataset_converters/inpainting/celeba-hq/README.md rename to tools/dataset_converters/celeba-hq/README.md diff --git a/tools/dataset_converters/inpainting/celeba-hq/README_zh-CN.md b/tools/dataset_converters/celeba-hq/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/inpainting/celeba-hq/README_zh-CN.md rename to tools/dataset_converters/celeba-hq/README_zh-CN.md diff --git a/tools/dataset_converters/matting/comp1k/README.md b/tools/dataset_converters/comp1k/README.md similarity index 100% rename from tools/dataset_converters/matting/comp1k/README.md rename to tools/dataset_converters/comp1k/README.md diff --git a/tools/dataset_converters/matting/comp1k/README_zh-CN.md b/tools/dataset_converters/comp1k/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/matting/comp1k/README_zh-CN.md rename to tools/dataset_converters/comp1k/README_zh-CN.md diff --git a/tools/dataset_converters/matting/comp1k/check_extended_fg.py b/tools/dataset_converters/comp1k/check_extended_fg.py similarity index 100% rename from tools/dataset_converters/matting/comp1k/check_extended_fg.py rename to tools/dataset_converters/comp1k/check_extended_fg.py diff --git a/tools/dataset_converters/evaluate_comp1k.py b/tools/dataset_converters/comp1k/evaluate_comp1k.py similarity index 100% rename from tools/dataset_converters/evaluate_comp1k.py rename to tools/dataset_converters/comp1k/evaluate_comp1k.py diff --git a/tools/dataset_converters/matting/comp1k/extend_fg.py b/tools/dataset_converters/comp1k/extend_fg.py similarity index 100% rename from tools/dataset_converters/matting/comp1k/extend_fg.py rename to tools/dataset_converters/comp1k/extend_fg.py diff --git a/tools/dataset_converters/matting/comp1k/filter_comp1k_anno.py b/tools/dataset_converters/comp1k/filter_comp1k_anno.py similarity index 100% rename from tools/dataset_converters/matting/comp1k/filter_comp1k_anno.py rename to tools/dataset_converters/comp1k/filter_comp1k_anno.py diff --git a/tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py b/tools/dataset_converters/comp1k/preprocess_comp1k_dataset.py similarity index 100% rename from tools/dataset_converters/matting/comp1k/preprocess_comp1k_dataset.py rename to tools/dataset_converters/comp1k/preprocess_comp1k_dataset.py diff --git a/tools/dataset_converters/super-resolution/df2k_ost/README.md b/tools/dataset_converters/df2k_ost/README.md similarity index 100% rename from tools/dataset_converters/super-resolution/df2k_ost/README.md rename to tools/dataset_converters/df2k_ost/README.md diff --git a/tools/dataset_converters/super-resolution/df2k_ost/README_zh-CN.md b/tools/dataset_converters/df2k_ost/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/super-resolution/df2k_ost/README_zh-CN.md rename to tools/dataset_converters/df2k_ost/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/df2k_ost/preprocess_df2k_ost_dataset.py b/tools/dataset_converters/df2k_ost/preprocess_df2k_ost_dataset.py similarity index 100% rename from tools/dataset_converters/super-resolution/df2k_ost/preprocess_df2k_ost_dataset.py rename to tools/dataset_converters/df2k_ost/preprocess_df2k_ost_dataset.py diff --git a/tools/dataset_converters/super-resolution/div2k/README.md b/tools/dataset_converters/div2k/README.md similarity index 100% rename from tools/dataset_converters/super-resolution/div2k/README.md rename to tools/dataset_converters/div2k/README.md diff --git a/tools/dataset_converters/super-resolution/div2k/README_zh-CN.md b/tools/dataset_converters/div2k/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/super-resolution/div2k/README_zh-CN.md rename to tools/dataset_converters/div2k/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/div2k/preprocess_div2k_dataset.py b/tools/dataset_converters/div2k/preprocess_div2k_dataset.py similarity index 100% rename from tools/dataset_converters/super-resolution/div2k/preprocess_div2k_dataset.py rename to tools/dataset_converters/div2k/preprocess_div2k_dataset.py diff --git a/tools/dataset_converters/generation/README.md b/tools/dataset_converters/generation/README.md deleted file mode 100644 index f195e361c5..0000000000 --- a/tools/dataset_converters/generation/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Generation Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported generation datasets: - -- [Paired Dataset for Pix2pix](paired-pix2pix/README.md) \[ [Homepage](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) \] -- [Unpaired Dataset for CycleGAN](unpaired-cyclegan/README.md) \[ [Homepage](https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/) \] diff --git a/tools/dataset_converters/generation/README_zh-CN.md b/tools/dataset_converters/generation/README_zh-CN.md deleted file mode 100644 index 1dd3b8c1e4..0000000000 --- a/tools/dataset_converters/generation/README_zh-CN.md +++ /dev/null @@ -1,8 +0,0 @@ -# 图像生成数据集 - -建议将数据集软链接到 `$MMEDITING/data` 。如果您的文件夹结构不同,您可能需要更改配置文件中的相应路径。 - -MMEditing 支持的生成数据集: - -- [Pix2Pix 的配对数据集](paired-pix2pix/README.md) \[ [主页](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) \] -- [CycleGAN 的未配对数据集](unpaired-cyclegan/README.md) \[ [主页](https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/) \] diff --git a/tools/dataset_converters/image_translation/README.md b/tools/dataset_converters/image_translation/README.md deleted file mode 100644 index 51e5312cdd..0000000000 --- a/tools/dataset_converters/image_translation/README.md +++ /dev/null @@ -1,147 +0,0 @@ -# Image Translation Datasets - -**Data preparation for translation model** needs a little attention. You should organize the files in the way we told you in `quick_run.md`. Fortunately, for most official datasets like facades and summer2winter_yosemite, they already have the right format. Also, you should set a symlink in the `data` directory. For paired-data trained translation model like Pix2Pix , `PairedImageDataset` is designed to train such translation models. Here is an example config for facades dataset: - -```python -train_dataset_type = 'PairedImageDataset' -val_dataset_type = 'PairedImageDataset' -img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -train_pipeline = [ - dict( - type='LoadPairedImageFromFile', - io_backend='disk', - key='pair', - domain_a=domain_a, - domain_b=domain_b, - flag='color'), - dict( - type='Resize', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - scale=(286, 286), - interpolation='bicubic') -] -test_pipeline = [ - dict( - type='LoadPairedImageFromFile', - io_backend='disk', - key='image', - domain_a=domain_a, - domain_b=domain_b, - flag='color'), - dict( - type='Resize', - keys=[f'img_{domain_a}', f'img_{domain_b}'], - scale=(256, 256), - interpolation='bicubic') -] -dataroot = 'data/paired/facades' -train_dataloader = dict( - batch_size=1, - num_workers=4, - persistent_workers=True, - sampler=dict(type='InfiniteSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=train_pipeline)) - -val_dataloader = dict( - batch_size=1, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) - -test_dataloader = dict( - batch_size=1, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=dataroot, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) -``` - -Here, we adopt `LoadPairedImageFromFile` to load a paired image as the common loader does and crops -it into two images with the same shape in different domains. As shown in the example, `pipeline` provides important data pipeline to process images, including loading from file system, resizing, cropping, flipping, transferring to `torch.Tensor` and packing to `EditDataSample`. All of supported data pipelines can be found in `mmedit/datasets/transforms`. - -For unpaired-data trained translation model like CycleGAN , `UnpairedImageDataset` is designed to train such translation models. Here is an example config for horse2zebra dataset: - -```python -train_dataset_type = 'UnpairedImageDataset' -val_dataset_type = 'UnpairedImageDataset' -img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) -domain_a, domain_b = 'horse', 'zebra' -train_pipeline = [ - dict( - type='LoadImageFromFile', - io_backend='disk', - key=f'img_{domain_a}', - flag='color'), - dict( - type='LoadImageFromFile', - io_backend='disk', - key=f'img_{domain_b}', - flag='color'), - dict( - type='TransformBroadcaster', - mapping={'img': [f'img_{domain_a}', f'img_{domain_b}']}, - auto_remap=True, - share_random_params=True, - transforms=[ - dict(type='Resize', scale=(286, 286), interpolation='bicubic'), - dict(type='Crop', crop_size=(256, 256), random_crop=True), - ]), - dict(type='Flip', keys=[f'img_{domain_a}'], direction='horizontal'), - dict(type='Flip', keys=[f'img_{domain_b}'], direction='horizontal'), - dict( - type='PackEditInputs', - keys=[f'img_{domain_a}', f'img_{domain_b}']) -] -test_pipeline = [ - dict(type='LoadImageFromFile', io_backend='disk', key='img', flag='color'), - dict(type='Resize', scale=(256, 256), interpolation='bicubic'), - dict( - type='PackEditInputs', - keys=[f'img_{domain_a}', f'img_{domain_b}']) -] -data_root = './data/horse2zebra/' -# `batch_size` and `data_root` need to be set. -train_dataloader = dict( - batch_size=1, - num_workers=4, - persistent_workers=True, - sampler=dict(type='InfiniteSampler', shuffle=True), - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=train_pipeline)) - -val_dataloader = dict( - batch_size=None, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) - -test_dataloader = dict( - batch_size=None, - num_workers=4, - dataset=dict( - type=dataset_type, - data_root=data_root, # set by user - pipeline=test_pipeline), - sampler=dict(type='DefaultSampler', shuffle=False), - persistent_workers=True) -``` - -`UnpairedImageDataset` will load both images (domain A and B) from different paths and transform them at the same time. - -Here, we provide download links of datasets used in [Pix2Pix](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) and [CycleGAN](https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/). diff --git a/tools/dataset_converters/inpainting/README.md b/tools/dataset_converters/inpainting/README.md deleted file mode 100644 index ecc6d7b45d..0000000000 --- a/tools/dataset_converters/inpainting/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Inpainting Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported inpainting datasets: - -- [CelebA-HQ](celeba-hq/README.md) \[ [Homepage](https://github.com/tkarras/progressive_growing_of_gans#preparing-datasets-for-training) \] -- [Paris Street View](paris-street-view/README.md) \[ [Homepage](https://github.com/pathak22/context-encoder/issues/24) \] -- [Places365](places365/README.md) \[ [Homepage](http://places2.csail.mit.edu/) \] - -As we only need images for inpainting task, further preparation is not necessary and the folder structure can be different from the example. You can utilize the information provided by the original dataset like `Place365` (e.g. `meta`). Also, you can easily scan the data set and list all of the images to a specific `txt` file. Here is an example for the `Places365_val.txt` from Places365 and we will only use the image name information in inpainting. - -``` -Places365_val_00000001.jpg 165 -Places365_val_00000002.jpg 358 -Places365_val_00000003.jpg 93 -Places365_val_00000004.jpg 164 -Places365_val_00000005.jpg 289 -Places365_val_00000006.jpg 106 -Places365_val_00000007.jpg 81 -Places365_val_00000008.jpg 121 -Places365_val_00000009.jpg 150 -Places365_val_00000010.jpg 302 -Places365_val_00000011.jpg 42 -``` diff --git a/tools/dataset_converters/inpainting/README_zh-CN.md b/tools/dataset_converters/inpainting/README_zh-CN.md deleted file mode 100644 index f9a241884f..0000000000 --- a/tools/dataset_converters/inpainting/README_zh-CN.md +++ /dev/null @@ -1,25 +0,0 @@ -# 图像补全数据集 - -建议将数据集软链接到 `$MMEDITING/data` 。如果您的文件夹结构不同,您可能需要更改配置文件中的相应路径。 - -MMEditing 支持的补全数据集: - -- [Paris Street View](paris-street-view/README.md) \[ [主页](https://github.com/pathak22/context-encoder/issues/24) \] -- [CelebA-HQ](celeba-hq/README.md) \[ [主页](https://github.com/tkarras/progressive_growing_of_gans#preparing-datasets-for-training) \] -- [Places365](places365/README.md) \[ [主页](http://places2.csail.mit.edu/) \] - -由于在图像补全任务中,我们只需要使用图像,因此我们不需要对数据集进行额外的预处理操作,文件目录的结构也可以和本例有所不同。您可以利用原始数据集提供的信息,如 `Place365` (例如 `meta`)。或者,您可以直接遍历数据集文件夹,并将所有图像文件的路径罗列在一个文本文件中。下面的例子节选自 Places365 数据集中的 `Places365_val.txt`,针对图像补全任务,我们只需要使用其中的文件名信息。 - -``` -Places365_val_00000001.jpg 165 -Places365_val_00000002.jpg 358 -Places365_val_00000003.jpg 93 -Places365_val_00000004.jpg 164 -Places365_val_00000005.jpg 289 -Places365_val_00000006.jpg 106 -Places365_val_00000007.jpg 81 -Places365_val_00000008.jpg 121 -Places365_val_00000009.jpg 150 -Places365_val_00000010.jpg 302 -Places365_val_00000011.jpg 42 -``` diff --git a/tools/dataset_converters/matting/README.md b/tools/dataset_converters/matting/README.md deleted file mode 100644 index 8cb9183d13..0000000000 --- a/tools/dataset_converters/matting/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Matting Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported matting datasets: - -- [Composition-1k](comp1k/README.md) \[ [Homepage](https://sites.google.com/view/deepimagematting) \] diff --git a/tools/dataset_converters/matting/README_zh-CN.md b/tools/dataset_converters/matting/README_zh-CN.md deleted file mode 100644 index fad8f5b3b2..0000000000 --- a/tools/dataset_converters/matting/README_zh-CN.md +++ /dev/null @@ -1,7 +0,0 @@ -# 抠图数据集 - -建议将数据集软链接到 `$MMEDITING/data` 。如果您的文件夹结构不同,您可能需要更改配置文件中的相应路径。 - -MMEditing 支持的抠图数据集: - -- [Composition-1k](comp1k/README.md) \[ [Homepage](https://sites.google.com/view/deepimagematting) \] diff --git a/tools/dataset_converters/generation/paired-pix2pix/README.md b/tools/dataset_converters/paired-pix2pix/README.md similarity index 100% rename from tools/dataset_converters/generation/paired-pix2pix/README.md rename to tools/dataset_converters/paired-pix2pix/README.md diff --git a/tools/dataset_converters/generation/paired-pix2pix/README_zh-CN.md b/tools/dataset_converters/paired-pix2pix/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/generation/paired-pix2pix/README_zh-CN.md rename to tools/dataset_converters/paired-pix2pix/README_zh-CN.md diff --git a/tools/dataset_converters/inpainting/paris-street-view/README.md b/tools/dataset_converters/paris-street-view/README.md similarity index 100% rename from tools/dataset_converters/inpainting/paris-street-view/README.md rename to tools/dataset_converters/paris-street-view/README.md diff --git a/tools/dataset_converters/inpainting/paris-street-view/README_zh-CN.md b/tools/dataset_converters/paris-street-view/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/inpainting/paris-street-view/README_zh-CN.md rename to tools/dataset_converters/paris-street-view/README_zh-CN.md diff --git a/tools/dataset_converters/inpainting/places365/README.md b/tools/dataset_converters/places365/README.md similarity index 100% rename from tools/dataset_converters/inpainting/places365/README.md rename to tools/dataset_converters/places365/README.md diff --git a/tools/dataset_converters/inpainting/places365/README_zh-CN.md b/tools/dataset_converters/places365/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/inpainting/places365/README_zh-CN.md rename to tools/dataset_converters/places365/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/reds/README.md b/tools/dataset_converters/reds/README.md similarity index 100% rename from tools/dataset_converters/super-resolution/reds/README.md rename to tools/dataset_converters/reds/README.md diff --git a/tools/dataset_converters/super-resolution/reds/README_zh-CN.md b/tools/dataset_converters/reds/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/super-resolution/reds/README_zh-CN.md rename to tools/dataset_converters/reds/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/reds/crop_sub_images.py b/tools/dataset_converters/reds/crop_sub_images.py similarity index 100% rename from tools/dataset_converters/super-resolution/reds/crop_sub_images.py rename to tools/dataset_converters/reds/crop_sub_images.py diff --git a/tools/dataset_converters/super-resolution/reds/preprocess_reds_dataset.py b/tools/dataset_converters/reds/preprocess_reds_dataset.py similarity index 100% rename from tools/dataset_converters/super-resolution/reds/preprocess_reds_dataset.py rename to tools/dataset_converters/reds/preprocess_reds_dataset.py diff --git a/tools/dataset_converters/super-resolution/README.md b/tools/dataset_converters/super-resolution/README.md deleted file mode 100644 index 42c33f7305..0000000000 --- a/tools/dataset_converters/super-resolution/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Super-Resolution Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported super-resolution datasets: - -- Image Super-Resolution - - [DF2K_OST](df2k_ost/README.md) \[ [Homepage](https://github.com/xinntao/Real-ESRGAN/blob/master/docs/Training.md) \] - - [DIV2K](div2k/README.md) \[ [Homepage](https://data.vision.ee.ethz.ch/cvl/DIV2K/) \] -- Video Super-Resolution - - [REDS](reds/README.md) \[ [Homepage](https://seungjunnah.github.io/Datasets/reds.html) \] - - [Vid4](vid4/README.md) \[ [Homepage](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view) \] - - [Vimeo90K](vimeo90k/README.md) \[ [Homepage](http://toflow.csail.mit.edu) \] diff --git a/tools/dataset_converters/super-resolution/README_zh-CN.md b/tools/dataset_converters/super-resolution/README_zh-CN.md deleted file mode 100644 index c0aa099e73..0000000000 --- a/tools/dataset_converters/super-resolution/README_zh-CN.md +++ /dev/null @@ -1,13 +0,0 @@ -# 超分辨率数据集 - -建议将数据集的根目录链接到 `$MMEDITING/data` 下,如果您的文件目录结构不一致,那么可能需要在配置文件中修改对应的文件路径。 - -MMEditing 支持下列超分辨率数据集: - -- 图像超分辨率 - - [DIV2K](div2k/README_zh-CN.md) \[ [Homepage](https://data.vision.ee.ethz.ch/cvl/DIV2K/) \] - - [DF2K_OST](df2k_ost/README_zh-CN.md) \[ [Homepage](https://github.com/xinntao/Real-ESRGAN/blob/master/docs/Training.md) \] -- 视频超分辨率 - - [REDS](reds/README_zh-CN.md) \[ [Homepage](https://seungjunnah.github.io/Datasets/reds.html) \] - - [Vimeo90K](vimeo90k/README_zh-CN.md) \[ [Homepage](http://toflow.csail.mit.edu) \] - - [Vid4](vid4/README_zh-CN.md) \[ [Homepage](https://drive.google.com/file/d/1ZuvNNLgR85TV_whJoHM7uVb-XW1y70DW/view) \] diff --git a/tools/dataset_converters/generation/unpaired-cyclegan/README.md b/tools/dataset_converters/unpaired-cyclegan/README.md similarity index 100% rename from tools/dataset_converters/generation/unpaired-cyclegan/README.md rename to tools/dataset_converters/unpaired-cyclegan/README.md diff --git a/tools/dataset_converters/generation/unpaired-cyclegan/README_zh-CN.md b/tools/dataset_converters/unpaired-cyclegan/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/generation/unpaired-cyclegan/README_zh-CN.md rename to tools/dataset_converters/unpaired-cyclegan/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/vid4/README.md b/tools/dataset_converters/vid4/README.md similarity index 100% rename from tools/dataset_converters/super-resolution/vid4/README.md rename to tools/dataset_converters/vid4/README.md diff --git a/tools/dataset_converters/super-resolution/vid4/README_zh-CN.md b/tools/dataset_converters/vid4/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/super-resolution/vid4/README_zh-CN.md rename to tools/dataset_converters/vid4/README_zh-CN.md diff --git a/tools/dataset_converters/video-interpolation/README.md b/tools/dataset_converters/video-interpolation/README.md deleted file mode 100644 index 04036a026f..0000000000 --- a/tools/dataset_converters/video-interpolation/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Video Frame Interpolation Datasets - -It is recommended to symlink the dataset root to `$MMEDITING/data`. If your folder structure is different, you may need to change the corresponding paths in config files. - -MMEditing supported video frame interpolation datasets: - -- [Vimeo90K-triplet](vimeo90k-triplet/README.md) \[ [Homepage](http://toflow.csail.mit.edu) \] diff --git a/tools/dataset_converters/video-interpolation/README_zh-CN.md b/tools/dataset_converters/video-interpolation/README_zh-CN.md deleted file mode 100644 index 6f989e6fa7..0000000000 --- a/tools/dataset_converters/video-interpolation/README_zh-CN.md +++ /dev/null @@ -1,7 +0,0 @@ -# 视频插帧数据集 - -建议将数据集的根目录链接到 `$MMEDITING/data` 下,如果您的文件目录结构不一致,那么可能需要在配置文件中修改对应的文件路径。 - -MMEditing 支持下列视频插帧数据集: - -- [Vimeo90K-triplet](vimeo90k-triplet/README.md) \[ [Homepage](http://toflow.csail.mit.edu) \] diff --git a/tools/dataset_converters/video-interpolation/vimeo90k-triplet/README.md b/tools/dataset_converters/vimeo90k-triplet/README.md similarity index 100% rename from tools/dataset_converters/video-interpolation/vimeo90k-triplet/README.md rename to tools/dataset_converters/vimeo90k-triplet/README.md diff --git a/tools/dataset_converters/video-interpolation/vimeo90k-triplet/README_zh-CN.md b/tools/dataset_converters/vimeo90k-triplet/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/video-interpolation/vimeo90k-triplet/README_zh-CN.md rename to tools/dataset_converters/vimeo90k-triplet/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/vimeo90k/README.md b/tools/dataset_converters/vimeo90k/README.md similarity index 100% rename from tools/dataset_converters/super-resolution/vimeo90k/README.md rename to tools/dataset_converters/vimeo90k/README.md diff --git a/tools/dataset_converters/super-resolution/vimeo90k/README_zh-CN.md b/tools/dataset_converters/vimeo90k/README_zh-CN.md similarity index 100% rename from tools/dataset_converters/super-resolution/vimeo90k/README_zh-CN.md rename to tools/dataset_converters/vimeo90k/README_zh-CN.md diff --git a/tools/dataset_converters/super-resolution/vimeo90k/preprocess_vimeo90k_dataset.py b/tools/dataset_converters/vimeo90k/preprocess_vimeo90k_dataset.py similarity index 100% rename from tools/dataset_converters/super-resolution/vimeo90k/preprocess_vimeo90k_dataset.py rename to tools/dataset_converters/vimeo90k/preprocess_vimeo90k_dataset.py