Skip to content
Snippets Groups Projects
repositories.py 44.4 KiB
Newer Older
  • Learn to ignore specific revisions
  •     def enter_context(self, force=False):
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            """
            Enter the tracking context. This includes:
             - Ensure no uncommitted changes in the project repository
             - Remove all uncommitted changes in the output repository
             - Clean up empty branches in the output repository
             - Create a new empty output branch in the output repository
    
            :return:
                The name of the newly created output branch.
            """
    
            self.test_for_correct_repo_setup()
    
            self._on_context_enter_commit_hash = self.current_commit_hash
    
            self._is_in_context_manager = True
    
            if output_repo.exist_uncomitted_changes:
    
                output_repo.reset_hard_to_head(force_entry=force)
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            output_repo.delete_active_branch_if_branch_is_empty()
    
    
            new_branch_name = self.get_new_output_branch_name()
    
            # update urls in main branch of output_repo
            output_repo._git.checkout("main")
    
            project_repo_remotes = self.remote_urls
    
            output_repo.add_list_of_remotes_in_readme_file("project_repo", project_repo_remotes)
    
            output_repo.commit("Update urls", verbosity=0)
    
            output_repo.prepare_new_branch(new_branch_name)
            return new_branch_name
    
    
        def copy_data_to_cache(self, branch_name=None):
    
            """
            Copy all existing output results into a cached folder and make it read-only.
    
    
            :param branch_name:
            optional branch name, if None, current branch is used.
    
    
            :return:
            """
    
                source_filepath = self.output_repo.path
    
                if branch_name is None:
                    branch_name = self.output_repo.active_branch.name
                    previous_branch = None
                else:
                    previous_branch = self.output_repo.active_branch.name
                    self.output_repo.checkout(branch_name)
    
                target_folder = self.path / (self._output_folder + "_cached") / branch_name
    
    
                shutil.copytree(source_filepath, target_folder)
    
                # Set all files to read only
                for filename in glob.iglob(f"{target_folder}/**/*", recursive=True):
                    absolute_path = os.path.abspath(filename)
                    if os.path.isdir(absolute_path):
                        continue
                    os.chmod(os.path.abspath(filename), S_IREAD)
    
    
                if previous_branch is not None:
                    self.output_repo.checkout(previous_branch)
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            """
            After running all project code, this prepares the commit of the results to the output repository. This includes
             - Ensure no uncommitted changes in the project repository
             - Stage all changes in the output repository
             - Commit all changes in the output repository with the given commit message.
    
             - Update the log files in the main branch of the output repository.
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            :param message:
                Commit message for the output repository commit.
            """
    
            if self._on_context_enter_commit_hash != self.current_commit_hash:
                raise RuntimeError("Code has changed since starting the context. Don't do that.")
    
    
            print("Completed computations, commiting results")
    
            self.output_repo.add(".")
    
                # This has to be using ._git.commit to raise an error if no results have been written.
    
                commit_return = self.output_repo._git.commit("-m", message)
    
                self.copy_data_to_cache()
    
                self.update_output_main_logs()
    
                main_cach_path = self.path / (self._output_folder + "_cached") / "main"
                if main_cach_path.exists():
                    delete_path(main_cach_path)
    
                self.copy_data_to_cache("main")
    
                print("\n" + commit_return + "\n")
            except git.exc.GitCommandError as e:
    
                self.output_repo.delete_active_branch_if_branch_is_empty()
    
                # self.remove_cached_files()
    
                self._is_in_context_manager = False
                self._on_context_enter_commit_hash = None
    
        def track_results(self, results_commit_message: str, debug=False, force=False):
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            """
    
            Context manager to be used when running project code that produces output that should
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            be tracked in the output repository.
            :param results_commit_message:
                Commit message for the commit of the output repository.
    
            :param debug:
                Perform calculations without tracking output.
            :param force:
                Skip confirmation and force tracking of results.
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
            """
    
            new_branch_name = self.enter_context(force=force)
    
            try:
                yield new_branch_name
            except Exception as e:
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
                self.capture_error(e)
    
                raise e
            else:
                self.exit_context(message=results_commit_message)
    
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
        def capture_error(self, error):
            print(traceback.format_exc())
    
            write_lines_to_file(self.output_path / "error.stack", traceback.format_exc().split("\n"))
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
    
    
        @property
        def output_log_file_path(self):
    
            self.checkout("main")
    
            return self.path / "log.tsv"
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
    
    
        @property
        def output_log(self):
            return OutputLog(filepath=self.output_log_file_path)
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
    
    
        def print_output_log(self):
            self.checkout("main")
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
    
    
            output_log = self.output_log
            print(output_log)
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
    
            self.checkout(self._most_recent_branch)
    
        def prepare_new_branch(self, branch_name):
            """
            Prepares a new branch to receive data. This includes:
             - checking out the main branch,
             - creating a new branch from there
            This thereby produces a clear, empty directory for data, while still maintaining
            .gitignore and .gitattributes
            :param branch_name:
                Name of the new branch.
            """
            self._git.checkout("main")
            self._git.checkout('-b', branch_name)  # equivalent to $ git checkout -b %branch_name
            code_backup_path = self.path / "run_history"
            logs_path = self.path / "log.tsv"
            if code_backup_path.exists():
                try:
                    # Remove previous code backup
    
                    delete_path(code_backup_path)
                except Exception as e:
                    print(e)
            if logs_path.exists():
                try:
                    # Remove previous logs
                    delete_path(logs_path)
                except Exception as e:
                    print(e)
    
    
    
    class JupyterInterfaceRepo(ProjectRepo):
    
        def commit(self, message: str, add_all=True, verbosity=1):
            """
            Commit current state of the repository.
    
            :param message:
                Commit message
            :param add_all:
                Option to add all changed and new files to git automatically.
            :param verbosity:
                Option to choose degree of printed feedback.
            """
    
    
            if "nbconvert_call" in sys.argv:
                print("Not committing during nbconvert.")
                return
    
    
            super().commit(message, add_all, verbosity)
    
        def commit_nb_output(self, notebook_path: str, results_commit_message: str,
    
                             force_rerun=True, timeout=600, conversion_formats: list = None):
            if "nbconvert_call" in sys.argv:
    
    Ronald Jäpel's avatar
    Ronald Jäpel committed
                return
                # This is reached in the first call of this function
    
            if not Path(notebook_path).is_absolute():
    
                notebook_path = self.path / notebook_path
    
    
            notebook = Notebook(notebook_path)
    
    
            with self.track_results(results_commit_message, force=True):
    
                notebook.check_and_rerun_notebook(force_rerun=force_rerun,
                                                  timeout=timeout)
    
    
                # This is executed after the nbconvert call
    
                notebook.convert_ipynb(self.output_path, formats=conversion_formats)
                notebook.export_all_figures(self.output_path)