• Docs >
  • mnist.mnist_with_tensorboard_logger_and_high_level_apis.py
Shortcuts

mnist.mnist_with_tensorboard_logger_and_high_level_apis.py

"""
 MNIST example with training and validation monitoring using TensorboardX and Tensorboard.
 Requirements:
    Optionally TensorboardX (https://github.com/lanpa/tensorboard-pytorch): `pip install tensorboardX`
    Tensorboard: `pip install tensorflow` (or just install tensorboard without the rest of tensorflow)
 Usage:
    Start tensorboard:
    ```bash
    tensorboard --logdir=/tmp/tensorboard_logs/
    ```
    Run the example:
    ```bash
    python mnist_with_tensorboard_logger_and_low_level_apis.rst --log_dir=/tmp/tensorboard_logs
    ```
"""
import sys
from argparse import ArgumentParser
import logging

from ignite_framework.states import state

from ignite_framework.pipelines import SupervisedEvaluator, SupervisedTrainer
from ignite_framework.tensorboard import EnginesMetricsCharts, EnginesMetricsComparisonCharts

import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, ToTensor, Normalize


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=-1)


def get_data_loaders():
    data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])

    with state.dataloaders as d:
        # NOTE: state shortcut used: `state.train_batch_size == state.params.arguments.train_batch_size
        d.trainer_loader = DataLoader(MNIST(download=True, root=state.data_dir, transform=data_transform, train=True),
                                      batch_size=state.train_batch_size, shuffle=True)

        d.xvalidator_loader = DataLoader(
            MNIST(download=False, root=state.data_dir, transform=data_transform, train=False),
            batch_size=state.xval_batch_size, shuffle=False)

        d.validator_loader = DataLoader(
            MNIST(download=False, root=state.data_dir, transform=data_transform, train=False),
            batch_size=state.val_batch_size, shuffle=False)
    # Not required
    del d


def run():
    get_data_loaders()

    # # Not required, because integrated into `state` automation
    # device = 'cpu'
    #
    # if torch.cuda.is_available():
    #     state.device = 'cuda'

    with state.modules as m:
        m.model = Net()
        m.model.cuda(device=state.device_for_modules)
        m.x_entropy = nn.CrossEntropyLoss()
    # Not required
    del m

    # Directly setting attribute instaed of using `with`-statement
    state.optimizers.sgd = SGD(state.model.parameters(), lr=state.lr, momentum=state.momentum)

    SupervisedTrainer(name='trainer', model=state.model, optimizer=state.optimizers.sgd, loss_fn=state.x_entropy,
                      dataloader=state.trainer_loader, n_epochs=state.epochs)

    SupervisedEvaluator(name='evaluator', model=state.model, loss_fn=state.x_entropy, dataloader=state.validator_loader,
                        engine_run_started_ref=state.trainer.engine_run_completed_ref)

    SupervisedEvaluator(name='xvalidator', model=state.model, loss_fn=state.x_entropy,
                        dataloader=state.validator_loader,
                        engine_run_started_ref=state.trainer.get(
                            'n_samples_every', state.n_xval_step_samples, 'ref'),
                        n_samples=state.n_xval_samples)

    ### HARDWARE RESOURCE TRACKING TO BE IMPLEMENTED SOON ###
    # ========================================================
    # NOTE: The current GPU-tracker may have the issues that it is triggered by `Event.ITERATION_COMPLETED` logging
    #       the GPU-usage at this point (not even max usage). Unfortunately, when the iteration is completed the
    #       GPU-calculations are also completed, `GpuInfo` should only be measuring the resource usage during downtime.
    #       Therefore the new implementation will be implmented on a separate thread with slightly randomized logging
    #       timing.

    # if sys.version_info > (3,):
    #     from ignite.contrib.metrics.gpu_info import GpuInfo
    #     try:
    #         GpuInfo().attach(state.trainer)
    #     except RuntimeError:
    #         print("INFO: By default, in this example it is possible to log GPU information (used memory,
    #         utilization). "
    #               "As there is no pynvml python package installed, GPU information won't be logged. Otherwise,
    #               please "
    #               "install it : `pip install pynvml`")

    # Automatically identify and generate metric chats comparing the different engines
    EnginesMetricsComparisonCharts(x_axis_ref=state.trainer.n_samples_ref, n_identical_metric_name_suffixes=1)
    # Automatically generate for each engine a summary of all metric charts
    EnginesMetricsCharts(x_axes_refs=state.trainer.n_samples_ref, n_identical_metric_name_suffixes=1)

    state.run()


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('--train_batch_size', type=int, default=64,
                        help='input batch size for training (default: 64)')
    parser.add_argument('--epochs', type=int, default=10,
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--val_batch_size', type=int, default=1000,
                        help='input batch size for validation (default: 1000)')
    parser.add_argument('--n_xval_step_samples', type=int, default=640,
                        help='number of samples between each evaluation step (default: 640')
    parser.add_argument('--xval_batch_size', type=int, default=100,
                        help='input batch size for xvalidation (default: 100)')
    parser.add_argument('--n_xval_samples', type=int, default=200,
                        help='numbers of samples infered for evaluation (default: 1000)')
    parser.add_argument('--lr', type=float, default=0.01,
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5,
                        help='SGD momentum (default: 0.5)')
    parser.add_argument("--log_dir", type=str, default="./tensorboard_logs",
                        help='log directory for Tensorboard log output, (default: `./tensorboard`)')
    parser.add_argument('--data_dir', type=str, default='.',
                        help='data directory for MNIST dataset (default: `.`)')

    args = parser.parse_args()

    # Setup engine logger
    # NOTE: General loggers could also be integrated in a `state.loggers` container, so users could add their preferred
    # loggers to their `custom state`
    logger = logging.getLogger("ignite.engine.engine.Engine")
    handler = logging.StreamHandler()
    formatter = logging.Formatter("%(asctime)s %(name)-12s %(levelname)-8s %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.INFO)

    # Changing default `tensorbaord_log_dir` with shortcut call
    # NOTE: shortcut calls `state.configs.default_configs.tensorboard_log_dir = args.pop('log_dir')
    state.tensorboard_log_dir = args.__dict__.pop('log_dir')
    # NOTE:
    # - `data_dir` will be assigned to `state.configs.user_defined_configs.data_dir` but available as `state.data_dir`
    state.configs.data_dir = args.__dict__.pop('data_dir')
    with state.params as p:
        p.update(args.__dict__)

    run()