Source code for torchio.datasets.rsna_miccai

import csv
import warnings
from collections.abc import Sequence
from pathlib import Path

from ..data import ScalarImage
from ..data import Subject
from ..data import SubjectsDataset
from ..types import TypePath


[docs] class RSNAMICCAI(SubjectsDataset): """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset. This is a helper class for the dataset used in the `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before instantiating this class (as opposed to, e.g., :class:`torchio.datasets.IXI`). This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_ includes a usage example including preprocessing of all the scans. If you reference or use the dataset in any form, include the following citation: U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021. Args: root_dir: Directory containing the dataset (``train`` directory, ``test`` directory, etc.). train: If ``True``, the ``train`` set will be used. Otherwise the ``test`` set will be used. ignore_empty: If ``True``, the three subjects flagged as "presenting issues" (empty images) by the challenge organizers will be ignored. The subject IDs are ``00109``, ``00123`` and ``00709``. Example: >>> import torchio as tio >>> from subprocess import call >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split()) >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification' >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True) >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False) >>> len(train_set), len(test_set) (582, 87) .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification """ id_key = 'BraTS21ID' label_key = 'MGMT_value' bad_subjects = '00109', '00123', '00709' def __init__( self, root_dir: TypePath, train: bool = True, ignore_empty: bool = True, modalities: Sequence[str] = ('T1w', 'T1wCE', 'T2w', 'FLAIR'), **kwargs, ): self.root_dir = Path(root_dir).expanduser().resolve() if isinstance(modalities, str): modalities = [modalities] self.modalities = modalities subjects = self._get_subjects(self.root_dir, train, ignore_empty) super().__init__(subjects, **kwargs) self.train = train def _get_subjects( self, root_dir: Path, train: bool, ignore_empty: bool, ) -> list[Subject]: subjects = [] if train: csv_path = root_dir / 'train_labels.csv' try: with open(csv_path) as csvfile: reader = csv.DictReader(csvfile) labels_dict = { row[self.id_key]: int(row[self.label_key]) for row in reader } except FileNotFoundError: warnings.warn( 'Labels CSV not found. Ignoring MGMT labels', stacklevel=2, ) labels_dict = {} subjects_dir = root_dir / 'train' else: subjects_dir = root_dir / 'test' for subject_dir in sorted(subjects_dir.iterdir()): subject_id = subject_dir.name if ignore_empty and subject_id in self.bad_subjects: continue try: int(subject_id) except ValueError: continue images_dict: dict[str, str | int | ScalarImage] images_dict = {self.id_key: subject_dir.name} if train and labels_dict: images_dict[self.label_key] = labels_dict[subject_id] for modality in self.modalities: image_dir = subject_dir / modality filepaths = list(image_dir.iterdir()) num_files = len(filepaths) path = filepaths[0] if num_files == 1 else image_dir images_dict[modality] = ScalarImage(path) subject = Subject(images_dict) subjects.append(subject) return subjects