Skip to content
Snippets Groups Projects
Commit 6eb2b5c6 authored by Schiffer, Christian's avatar Schiffer, Christian
Browse files

Added switch to disable writing of diagnostics

parent adc0fbc1
No related branches found
No related tags found
No related merge requests found
......@@ -54,7 +54,8 @@ def dl_cli():
@click.option("--quiet", default=False, type=bool, help="Disable debug logging for slave processors.", is_flag=True)
@click.option("--dms/--no-dms", default=True, type=bool, help="Use dead man switch (DMS). Default is enable.", is_flag=True)
@click.option("--log-limit-cpus", default=None, type=int, help="Omit logging of non-coordinator processes beyond this many CPUs. Unlimited by default.")
def dl_train(experiment_folders, verbose, distribute, profile, fake_gpus, dry, fake_data, fake_samples, cache_mode, files_to_cache, slaves_per_master, quiet, dms, log_limit_cpus):
@click.option("--diagnostics/--no-diagnostics", default=True, type=bool, help="Save diagnostics at the end of the run. Default is false.", is_flag=True)
def dl_train(experiment_folders, verbose, distribute, profile, fake_gpus, dry, fake_data, fake_samples, cache_mode, files_to_cache, slaves_per_master, quiet, dms, log_limit_cpus, diagnostics):
"""
Train a deep neural network. Each experiment_folder describes an experiment. The folder has to contain a file
named "config.py", which contains the parameters for training and how to generate training and testing data for
......@@ -91,6 +92,7 @@ def dl_train(experiment_folders, verbose, distribute, profile, fake_gpus, dry, f
quiet=quiet,
use_dms=dms,
log_limit_cpus=log_limit_cpus,
save_diagnostics=diagnostics,
)
......
......@@ -33,6 +33,7 @@ def train(
quiet=False,
use_dms=True,
log_limit_cpus=None,
save_diagnostics=True,
):
"""
Train a deep neural network. Each experiment_folder describes an experiment. The folder has to contain a file
......@@ -54,6 +55,7 @@ def train(
quiet (bool): Disable logging for slave processors.
use_dms (bool): Use dead man switch (DMS).
log_limit_cpus (int): Omit logging of non coordinator processors beyond this many CPUs. Unlimited (None) by default.
save_diagnostics (bool): Whether to save diagnostics at the end of the run. Default is true.
"""
import os
......@@ -175,7 +177,11 @@ def train(
log.info("Finished training for {}".format(experiment_folder))
# Save diagnostics file
if save_diagnostics:
log.info(f"Saving diagnostics to {files.get_diagnostics_path_of_run(run_dir)}")
diagnostics.save(directory=files.get_diagnostics_path_of_run(run_dir), mode="training")
else:
log.info("Omitting saving diagnostics.")
if profile:
profiling.stop_profiling()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment