From 1291de1f66e0e6b61f9eedddb1ddb7c69f1d69cb Mon Sep 17 00:00:00 2001
From: "r.jaepel" <r.jaepel@fz-juelich.de>
Date: Tue, 22 Aug 2023 16:33:13 +0200
Subject: [PATCH] Branch new branches off from master and some refactoring

---
 cadetrdm/initialize_repo.py |   5 +-
 cadetrdm/utils.py           | 127 +++++++++++++++++++++++-------------
 tests/test_git_adapter.py   |  19 ++++--
 3 files changed, 99 insertions(+), 52 deletions(-)

diff --git a/cadetrdm/initialize_repo.py b/cadetrdm/initialize_repo.py
index 7be209e..ce09ac7 100644
--- a/cadetrdm/initialize_repo.py
+++ b/cadetrdm/initialize_repo.py
@@ -138,7 +138,8 @@ def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "out
         # If output_repo_name is False we are in the output_repo and should finish by committing the changes
         repo = ResultsRepo(".")
 
-    repo.git.add(".")
-    repo.git.commit("-m", "initial commit")
+    # ToDo: Why does this break tests if changed to repo.add and repo.commit??
+    repo._git.add(".")
+    repo._git.commit("-m", "initial commit")
 
     os.chdir(starting_directory)
diff --git a/cadetrdm/utils.py b/cadetrdm/utils.py
index 27b5b71..d6d064e 100644
--- a/cadetrdm/utils.py
+++ b/cadetrdm/utils.py
@@ -31,19 +31,19 @@ class BaseRepo:
         """
         if repository_path is None or repository_path == ".":
             repository_path = os.getcwd()
-        self.git_repo = git.Repo(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs)
-        self.git = self.git_repo.git
+        self._git_repo = git.Repo(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs)
+        self._git = self._git_repo.git
 
         self._most_recent_branch = self.active_branch.name
         self._earliest_commit = None
 
     @property
     def active_branch(self):
-        return self.git_repo.active_branch
+        return self._git_repo.active_branch
 
     @property
     def untracked_files(self):
-        return self.git_repo.untracked_files
+        return self._git_repo.untracked_files
 
     @property
     def current_commit_hash(self):
@@ -51,35 +51,52 @@ class BaseRepo:
 
     @property
     def working_dir(self):
-        return self.git_repo.working_dir
+        return self._git_repo.working_dir
 
     @property
     def head(self):
-        return self.git_repo.head
+        return self._git_repo.head
 
     @property
     def remotes(self):
-        return self.git_repo.remotes
+        return self._git_repo.remotes
 
     @property
     def earliest_commit(self):
         if self._earliest_commit is None:
-            *_, earliest_commit = self.git_repo.iter_commits()
+            *_, earliest_commit = self._git_repo.iter_commits()
             self._earliest_commit = earliest_commit
         return self._earliest_commit
 
+    # ToDo: test if functools.wraps can keep docstrings
+    def add(self, *args, **kwargs):
+        self._git.add(*args, **kwargs)
+
     def add_remote(self, remote_url, remote_name="origin"):
-        self.git_repo.create_remote(remote_name, url=remote_url)
+        """
+        ToDO add documentation
+        :param remote_url:
+        :param remote_name:
+        :return:
+        """
+        self._git_repo.create_remote(remote_name, url=remote_url)
 
     def push(self, remote=None, local_branch=None, remote_branch=None):
+        """
+        # ToDo extend documentation
+        :param remote:
+        :param local_branch:
+        :param remote_branch:
+        :return:
+        """
         if local_branch is None:
             local_branch = self.active_branch
         if remote_branch is None:
             remote_branch = local_branch
         if remote is None:
-            remote = list(sorted(self.git_repo.remotes.keys()))[0]
+            remote = list(sorted(self._git_repo.remotes.keys()))[0]
 
-        remote_interface = self.git_repo.remotes[remote]
+        remote_interface = self._git_repo.remotes[remote]
         remote_interface.push(refspec=f'{local_branch}:{remote_branch}')
 
     def delete_active_branch_if_branch_is_empty(self):
@@ -89,8 +106,8 @@ class BaseRepo:
         """
         previous_branch = self.active_branch.name
         if str(self.head.commit) == self.earliest_commit:
-            self.git.checkout("master")
-            self.git.branch("-d", previous_branch)
+            self._git.checkout("master")
+            self._git.branch("-d", previous_branch)
 
     def add_all_files(self, automatically_add_new_files=True):
         """
@@ -106,7 +123,7 @@ class BaseRepo:
 
         if automatically_add_new_files:
             for f in self.untracked_files:
-                self.git.add(f)
+                self._git.add(f)
         else:
             proceed = input(
                 f'Found untracked files. Adding the following untracked files to git: \n{untracked_files}\n'
@@ -114,13 +131,13 @@ class BaseRepo:
             )
             if proceed.lower() == "y" or proceed == "":
                 for f in self.untracked_files:
-                    self.git.add(f)
+                    self._git.add(f)
             else:
                 raise KeyboardInterrupt
 
         changed_files = self.changed_files
         for f in changed_files:
-            self.git.add(f)
+            self._git.add(f)
         return self.untracked_files + changed_files
 
     def reset_hard_to_head(self):
@@ -131,13 +148,13 @@ class BaseRepo:
         if not (proceed.lower() == "y" or proceed == ""):
             raise KeyboardInterrupt
         # reset all tracked files to previous commit, -q silences output
-        self.git.reset("-q", "--hard", "HEAD")
+        self._git.reset("-q", "--hard", "HEAD")
         # remove all untracked files and directories, -q silences output
-        self.git.clean("-q", "-f", "-d")
+        self._git.clean("-q", "-f", "-d")
 
     @property
     def changed_files(self):
-        changed_files = self.git.diff(None, name_only=True).split('\n')
+        changed_files = self._git.diff(None, name_only=True).split('\n')
         if "" in changed_files:
             changed_files.remove("")
         return changed_files
@@ -180,37 +197,37 @@ class BaseRepo:
             self.update_package_list()
         if add_all:
             self.add_all_files()
-        commit_return = self.git.commit("-m", message)
+        commit_return = self._git.commit("-m", message)
         print("\n" + commit_return + "\n")
 
     def git_ammend(self, ):
         """
         Call git commit with options --amend --no-edit
         """
-        self.git.commit("--amend", "--no-edit")
+        self._git.commit("--amend", "--no-edit")
 
     @property
     def status(self):
-        return self.git.status()
+        return self._git.status()
 
     @property
     def log(self):
-        return self.git.log()
+        return self._git.log()
 
     def log_oneline(self):
-        return self.git.log("--oneline")
+        return self._git.log("--oneline")
 
     def print_status(self):
         """
         prints git status
         """
-        print(self.git.status())
+        print(self._git.status())
 
     def print_log(self):
         """
         Prints the git log
         """
-        print(self.git.log())
+        print(self._git.log())
 
     def stash_all_changes(self):
         """
@@ -219,22 +236,21 @@ class BaseRepo:
         """
         if not self.exist_unstaged_changes:
             raise RuntimeError("No changes in repo to stash.")
-        self.git.add(".")
-        self.git.stash()
+        self._git.add(".")
+        self._git.stash()
 
     def prepare_new_branch(self, branch_name):
         """
         Prepares a new branch to recieve data. This includes:
-         - creating the new branch,
-         - checking the new branch out, and
-         - resetting the HEAD of the branch to the initialization commit on the master branch.
+         - checking out the master branch,
+         - creating a new branch from there
         This thereby produces a clear, empty directory for data, while still maintaining
         .gitignore and .gitatributes
         :param branch_name:
             Name of the new branch.
         """
-        self.git.checkout('-b', branch_name)  # equivalent to $ git checkout -b %branch_name
-        self.git.reset('--hard', self.earliest_commit)  # equivalent to $ git reset --hard %commit_hash
+        self._git.checkout("master")
+        self._git.checkout('-b', branch_name)  # equivalent to $ git checkout -b %branch_name
 
     def apply_stashed_changes(self):
         """
@@ -243,7 +259,7 @@ class BaseRepo:
         All other errors are raised.
         """
         try:
-            self.git.stash('pop')  # equivalent to $ git stash pop
+            self._git.stash('pop')  # equivalent to $ git stash pop
         except git.exc.GitCommandError as e:
             # Will raise error because the stash cannot be applied without conflicts. This is expected
             if 'CONFLICT (modify/delete)' in e.stdout:
@@ -291,6 +307,7 @@ class ProjectRepo(BaseRepo):
 
         self._output_repo = ResultsRepo(os.path.join(self.working_dir, self._output_folder))
         self._on_context_enter_commit_hash = None
+        self._is_in_context_manager = False
 
     @property
     def output_repo(self):
@@ -313,13 +330,13 @@ class ProjectRepo(BaseRepo):
         Checkout the master branch, which contains all the log files.
         """
         self._most_recent_branch = self._output_repo.active_branch.name
-        self._output_repo.git.checkout("master")
+        self._output_repo._git.checkout("master")
 
     def reload_recent_results(self):
         """
         Checkout the most recent previous branch.
         """
-        self._output_repo.git.checkout(self._most_recent_branch)
+        self._output_repo._git.checkout(self._most_recent_branch)
 
     def update_output_master_logs(self):
         """
@@ -331,7 +348,7 @@ class ProjectRepo(BaseRepo):
 
         output_repo_hash = str(self._output_repo.head.commit)
 
-        self._output_repo.git.checkout("master")
+        self._output_repo._git.checkout("master")
 
         json_filepath = os.path.join(self.working_dir, self._output_folder, f"{output_branch_name}.json")
         # note: if filename of "log.csv" is changed,
@@ -363,13 +380,30 @@ class ProjectRepo(BaseRepo):
         with open(csv_filepath, "a") as f:
             f.write(csv_data + "\n")
 
-        self._output_repo.git.add(".")
-        self._output_repo.git.commit("-m", output_branch_name)
+        self._output_repo._git.add(".")
+        self._output_repo._git.commit("-m", output_branch_name)
 
-        self._output_repo.git.checkout(output_branch_name)
+        self._output_repo._git.checkout(output_branch_name)
         self._most_recent_branch = output_branch_name
 
-    def cache_previous_results(self, branch_name, file_path):
+    def load_external_data(self, url, name=None, path=None, branch=None, commit=None):
+        """
+        name: str,
+        path: PathLike,
+        url: Union[str, None] = None,
+        branch: Union[str, None] = None,"""
+        if path is None:
+            repo_name = url.split("/")[-1]
+            path = os.path.join("external_repos", repo_name)
+        if name is None:
+            name = url.split("/")[-1]
+
+        self._git_repo.create_submodule(name=name, url=url, branch=branch, path=path)
+        if commit is not None:
+            submodule = BaseRepo(path)
+            submodule._git.checkout(commit)
+
+    def load_previous_results(self, branch_name, file_path):
         """
         Load previously generated results to iterate upon.
         :param branch_name:
@@ -386,7 +420,7 @@ class ProjectRepo(BaseRepo):
             has_stashed_changes = False
 
         previous_branch = self.output_repo.active_branch.name
-        self.output_repo.git.checkout(branch_name)
+        self.output_repo._git.checkout(branch_name)
 
         source_filepath = os.path.join(self.output_repo.working_dir, file_path)
 
@@ -398,7 +432,7 @@ class ProjectRepo(BaseRepo):
 
         shutil.copyfile(source_filepath, target_filepath)
 
-        self.output_repo.git.checkout(previous_branch)
+        self.output_repo._git.checkout(previous_branch)
         if has_stashed_changes:
             self.output_repo.apply_stashed_changes()
 
@@ -446,6 +480,7 @@ class ProjectRepo(BaseRepo):
         """
         self.test_for_uncommitted_changes()
         self._on_context_enter_commit_hash = self.current_commit_hash
+        self._is_in_context_manager = True
         output_repo = self.output_repo
 
         if output_repo.exist_unstaged_changes:
@@ -472,13 +507,15 @@ class ProjectRepo(BaseRepo):
             raise RuntimeError("Code has changed since starting the context. Don't do that.")
 
         print("Completed computations, commiting results")
-        self.output_repo.git.add(".")
-        commit_return = self.output_repo.git.commit("-m", message)
+        self.output_repo._git.add(".")
+        commit_return = self.output_repo._git.commit("-m", message)
 
         print("\n" + commit_return + "\n")
 
         self.update_output_master_logs()
         self.remove_cached_files()
+        self._is_in_context_manager = False
+        self._on_context_enter_commit_hash = None
 
     @contextlib.contextmanager
     def track_results(self, results_commit_message: str, debug=False):
diff --git a/tests/test_git_adapter.py b/tests/test_git_adapter.py
index bb47faa..89f5c5b 100644
--- a/tests/test_git_adapter.py
+++ b/tests/test_git_adapter.py
@@ -47,7 +47,7 @@ def modify_code(path_to_repo):
 
 
 def count_commit_number(repo):
-    commit_log = repo.git.log("--oneline").split("\n")
+    commit_log = repo._git.log("--oneline").split("\n")
     current_commit_number = len(commit_log)
     return current_commit_number
 
@@ -86,6 +86,12 @@ def try_commit_code(path_to_repo):
     assert current_commit_number + 1 == updated_commit_number
 
 
+def try_add_submodule(path_to_repo):
+    repo = ProjectRepo(path_to_repo)
+    repo.load_external_data("https://jugit.fz-juelich.de/IBG-1/ModSim/cadet/git_lfs_data_1")
+    # ToDo: current WIP
+
+
 def try_commit_code_without_code_changes(path_to_repo):
     repo = ProjectRepo(path_to_repo)
     current_commit_number = count_commit_number(repo)
@@ -116,8 +122,8 @@ def try_commit_results_with_uncommitted_code_changes(path_to_repo):
 def try_load_previous_result(path_to_repo, branch_name):
     repo = ProjectRepo(path_to_repo)
     with repo.track_results(results_commit_message="Load array and extend"):
-        cached_array_path = repo.cache_previous_results(branch_name=branch_name,
-                                                        file_path="result.csv")
+        cached_array_path = repo.load_previous_results(branch_name=branch_name,
+                                                       file_path="result.csv")
         previous_array = np.loadtxt(cached_array_path, delimiter=",")
         extended_array = np.concatenate([previous_array, previous_array])
         extended_array_file_path = os.path.join(path_to_repo, repo._output_folder, "extended_result.csv")
@@ -131,16 +137,19 @@ def try_load_previous_result(path_to_repo, branch_name):
 def try_add_remote(path_to_repo):
     repo = ProjectRepo(path_to_repo)
     repo.add_remote("git@jugit.fz-juelich.de:IBG-1/ModSim/cadet/CADET-RDM.git")
-    assert "origin" in repo.git_repo.remotes
+    assert "origin" in repo._git_repo.remotes
 
 
 def test_cadet_rdm(path_to_repo):
     # because these depend on one-another and there is no native support afaik for sequential tests
     # these tests are called sequentially here as try_ functions.
     try_initialize_git_repo(path_to_repo)
+
     try_add_remote(path_to_repo)
+    # try_add_submodule(path_to_repo)
     try_commit_code(path_to_repo)
     try_commit_code_without_code_changes(path_to_repo)
-    results_branch_name = try_commit_results_data(path_to_repo)
     try_commit_results_with_uncommitted_code_changes(path_to_repo)
+
+    results_branch_name = try_commit_results_data(path_to_repo)
     try_load_previous_result(path_to_repo, results_branch_name)
-- 
GitLab