Skip to content
Snippets Groups Projects
repositories.py 45.4 KiB
Newer Older
Ronald Jäpel's avatar
Ronald Jäpel committed
        Delete all previously cached results.
        """
        if (self.path / (self._output_folder + "_cached")).exists():
            delete_path(self.path / (self._output_folder + "_cached"))
    def test_for_correct_repo_setup(self):
        """
        ToDo: implement
        :return:
        """

    def enter_context(self, force=False):
Ronald Jäpel's avatar
Ronald Jäpel committed
        """
        Enter the tracking context. This includes:
         - Ensure no uncommitted changes in the project repository
         - Remove all uncommitted changes in the output repository
         - Clean up empty branches in the output repository
         - Create a new empty output branch in the output repository

        :return:
            The name of the newly created output branch.
        """
        self.test_for_correct_repo_setup()
        self._on_context_enter_commit_hash = self.current_commit_hash
        self._is_in_context_manager = True
        if output_repo.exist_uncomitted_changes:
            output_repo._reset_hard_to_head(force_entry=force)
Ronald Jäpel's avatar
Ronald Jäpel committed
        output_repo.delete_active_branch_if_branch_is_empty()

        new_branch_name = self.get_new_output_branch_name()
        # update urls in main branch of output_repo
        output_repo._git.checkout("main")
        project_repo_remotes = self.remote_urls
        output_repo.add_list_of_remotes_in_readme_file("project_repo", project_repo_remotes)
        output_repo.commit("Update urls", verbosity=0)
        output_repo.prepare_new_branch(new_branch_name)
        return new_branch_name

    def copy_data_to_cache(self, branch_name=None):
        """
        Copy all existing output results into a cached folder and make it read-only.

        :param branch_name:
        optional branch name, if None, current branch is used.

        :return:
        """
            source_filepath = self.output_repo.path
            if branch_name is None:
                branch_name = self.output_repo.active_branch.name
                previous_branch = None
            else:
                previous_branch = self.output_repo.active_branch.name
                self.output_repo.checkout(branch_name)
            target_folder = self.path / (self._output_folder + "_cached") / branch_name

            shutil.copytree(source_filepath, target_folder)

            # Set all files to read only
            for filename in glob.iglob(f"{target_folder}/**/*", recursive=True):
                absolute_path = os.path.abspath(filename)
                if os.path.isdir(absolute_path):
                    continue
                os.chmod(os.path.abspath(filename), S_IREAD)

            if previous_branch is not None:
                self.output_repo.checkout(previous_branch)
Ronald Jäpel's avatar
Ronald Jäpel committed
        """
        After running all project code, this prepares the commit of the results to the output repository. This includes
         - Ensure no uncommitted changes in the project repository
         - Stage all changes in the output repository
         - Commit all changes in the output repository with the given commit message.
         - Update the log files in the main branch of the output repository.
Ronald Jäpel's avatar
Ronald Jäpel committed
        :param message:
            Commit message for the output repository commit.
        """
        if self._on_context_enter_commit_hash != self.current_commit_hash:
            raise RuntimeError("Code has changed since starting the context. Don't do that.")

        print("Completed computations, commiting results")
        self.output_repo.add(".")
            # This has to be using ._git.commit to raise an error if no results have been written.
            commit_return = self.output_repo._git.commit("-m", message)
            self.copy_data_to_cache()
            self.update_output_main_logs()
            main_cach_path = self.path / (self._output_folder + "_cached") / "main"
            if main_cach_path.exists():
                delete_path(main_cach_path)
            self.copy_data_to_cache("main")
            print("\n" + commit_return + "\n")
        except git.exc.GitCommandError as e:
            self.output_repo.delete_active_branch_if_branch_is_empty()
            # self.remove_cached_files()
            self._is_in_context_manager = False
            self._on_context_enter_commit_hash = None
    def track_results(self, results_commit_message: str, debug=False, force=False):
Ronald Jäpel's avatar
Ronald Jäpel committed
        """
        Context manager to be used when running project code that produces output that should
Ronald Jäpel's avatar
Ronald Jäpel committed
        be tracked in the output repository.
        :param results_commit_message:
            Commit message for the commit of the output repository.
        :param debug:
            Perform calculations without tracking output.
        :param force:
            Skip confirmation and force tracking of results.
Ronald Jäpel's avatar
Ronald Jäpel committed
        """
        new_branch_name = self.enter_context(force=force)
        try:
            yield new_branch_name
        except Exception as e:
Ronald Jäpel's avatar
Ronald Jäpel committed
            self.capture_error(e)
            raise e
        else:
            self.exit_context(message=results_commit_message)

Ronald Jäpel's avatar
Ronald Jäpel committed
    def capture_error(self, error):
        print(traceback.format_exc())
        write_lines_to_file(self.output_path / "error.stack", traceback.format_exc().split("\n"))
Ronald Jäpel's avatar
Ronald Jäpel committed

    @property
    def output_log_file_path(self):
        if not self.active_branch == "main":
            self.checkout("main")
        return self.path / "log.tsv"
Ronald Jäpel's avatar
Ronald Jäpel committed

    @property
    def output_log(self):
        self.checkout("main")
        self._reset_hard_to_head(force_entry=True)
        return OutputLog(filepath=self.output_log_file_path)
Ronald Jäpel's avatar
Ronald Jäpel committed

    def print_output_log(self):
        self.checkout("main")
Ronald Jäpel's avatar
Ronald Jäpel committed

        output_log = self.output_log
        print(output_log)
Ronald Jäpel's avatar
Ronald Jäpel committed

        self.checkout(self._most_recent_branch)
    def prepare_new_branch(self, branch_name):
        """
        Prepares a new branch to receive data. This includes:
         - checking out the main branch,
         - creating a new branch from there
        This thereby produces a clear, empty directory for data, while still maintaining
        .gitignore and .gitattributes
        :param branch_name:
            Name of the new branch.
        """
        self._git.checkout("main")
        self._git.checkout('-b', branch_name)  # equivalent to $ git checkout -b %branch_name
        code_backup_path = self.path / "run_history"
        logs_path = self.path / "log.tsv"
        if code_backup_path.exists():
            try:
                # Remove previous code backup

                delete_path(code_backup_path)
            except Exception as e:
                print(e)
        if logs_path.exists():
            try:
                # Remove previous logs
                delete_path(logs_path)
            except Exception as e:
                print(e)


class JupyterInterfaceRepo(ProjectRepo):
    def commit(self, message: str, add_all=True, verbosity=1):
        """
        Commit current state of the repository.

        :param message:
            Commit message
        :param add_all:
            Option to add all changed and new files to git automatically.
        :param verbosity:
            Option to choose degree of printed feedback.
        """

        if "nbconvert_call" in sys.argv:
            print("Not committing during nbconvert.")
            return

        super().commit(message, add_all, verbosity)
    def commit_nb_output(self, notebook_path: str, results_commit_message: str,
                         force_rerun=True, timeout=600, conversion_formats: list = None):
        if "nbconvert_call" in sys.argv:
Ronald Jäpel's avatar
Ronald Jäpel committed
            return
            # This is reached in the first call of this function
        if not Path(notebook_path).is_absolute():
            notebook_path = self.path / notebook_path

        notebook = Notebook(notebook_path)

        with self.track_results(results_commit_message, force=True):
            notebook.check_and_rerun_notebook(force_rerun=force_rerun,
                                              timeout=timeout)

            # This is executed after the nbconvert call
            notebook.convert_ipynb(self.output_path, formats=conversion_formats)
            notebook.export_all_figures(self.output_path)