From 5bd9f30fe8d3616cab2f82e68fefb1b4ae155cc1 Mon Sep 17 00:00:00 2001
From: "r.jaepel" <r.jaepel@fz-juelich.de>
Date: Wed, 29 Nov 2023 14:41:48 +0100
Subject: [PATCH] Change from os.path to pathlib

---
 cadetrdm/initialize_repo.py |   9 ++-
 cadetrdm/repositories.py    | 112 +++++++++++++++++++-----------------
 tests/test_git_adapter.py   |  61 ++++++++------------
 3 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/cadetrdm/initialize_repo.py b/cadetrdm/initialize_repo.py
index 304c625..2d12289 100644
--- a/cadetrdm/initialize_repo.py
+++ b/cadetrdm/initialize_repo.py
@@ -1,6 +1,7 @@
 import os
 import json
 import uuid
+from pathlib import Path
 
 try:
     import git
@@ -176,12 +177,14 @@ def clone(project_url, path_to_repo: str = None):
     print(f"Cloning {project_url} into {path_to_repo}")
     git.Repo.clone_from(project_url, path_to_repo)
 
+    path_to_repo = Path(path_to_repo)
+
     # Clone output repo from remotes
-    json_path = os.path.join(path_to_repo, "output_remotes.json")
+    json_path = path_to_repo / "output_remotes.json"
     with open(json_path, "r") as file_handle:
         meta_dict = json.load(file_handle)
 
-    output_folder_name = os.path.join(path_to_repo, meta_dict["output_folder_name"])
+    output_folder_name = path_to_repo / meta_dict["output_folder_name"]
     ssh_remotes = list(meta_dict["output_remotes"].values())
     http_remotes = [ssh_url_to_http_url(url) for url in ssh_remotes]
     if len(ssh_remotes + http_remotes) == 0:
@@ -194,7 +197,7 @@ def clone(project_url, path_to_repo: str = None):
             print(e)
         else:
             break
-    environment_path = os.path.join(os.getcwd(), path_to_repo, "environment.yml")
+    environment_path = Path(os.getcwd()) / path_to_repo / "environment.yml"
 
     repo = ProjectRepo(path_to_repo)
     repo.fill_data_from_cadet_rdm_json()
diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py
index 4ed14a2..f12888e 100644
--- a/cadetrdm/repositories.py
+++ b/cadetrdm/repositories.py
@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 import json
 import sys
 import traceback
@@ -51,6 +52,10 @@ class BaseRepo:
         """
         if repository_path is None or repository_path == ".":
             repository_path = os.getcwd()
+
+        if type(repository_path) is str:
+            repository_path = Path(repository_path)
+
         self._git_repo = git.Repo(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs)
         self._git = self._git_repo.git
 
@@ -73,7 +78,7 @@ class BaseRepo:
 
     @property
     def working_dir(self):
-        return self._git_repo.working_dir
+        return Path(self._git_repo.working_dir)
 
     @property
     def head(self):
@@ -102,11 +107,11 @@ class BaseRepo:
 
     @property
     def data_json_path(self):
-        return os.path.join(self.working_dir, ".cadet-rdm-data.json")
+        return self.working_dir / ".cadet-rdm-data.json"
 
     @property
     def cache_json_path(self):
-        return os.path.join(self.working_dir, ".cadet-rdm-cache.json")
+        return self.working_dir / ".cadet-rdm-cache.json"
 
     def add_remote(self, remote_url, remote_name="origin"):
         """
@@ -121,12 +126,12 @@ class BaseRepo:
         if rdm_data["is_project_repo"]:
             pass
         if rdm_data["is_output_repo"]:
-            project_repo = ProjectRepo(os.path.split(self.working_dir)[0])
+            project_repo = ProjectRepo(self.working_dir.parent)
             project_repo.update_output_remotes_json()
 
     def import_remote_repo(self, source_repo_location, source_repo_branch, target_repo_location=None):
         """
-        # ToDo: consider moving this into the base repo, to use without
+        Import a remote repo and update the cadet-rdm-cache
 
         :param source_repo_location:
         Path or URL to the source repo.
@@ -143,9 +148,9 @@ class BaseRepo:
         Path to the cloned repository
         """
         if target_repo_location is None:
-            target_repo_location = os.path.join(self.working_dir, "external_cache", source_repo_location.split("/")[-1])
+            target_repo_location = self.working_dir / "external_cache" / source_repo_location.split("/")[-1]
         else:
-            target_repo_location = os.path.join(self.working_dir, target_repo_location)
+            target_repo_location = self.working_dir / target_repo_location
 
         self.add_path_to_gitignore(target_repo_location)
 
@@ -153,6 +158,7 @@ class BaseRepo:
         multi_options = ["--filter=blob:none", "--branch", source_repo_branch, "--single-branch"]
         repo = git.Repo.clone_from(source_repo_location, target_repo_location, multi_options=multi_options)
         repo.git.clear_cache()
+        repo.close()
 
         self.update_cadet_rdm_cache_json(source_repo_branch=source_repo_branch,
                                          target_repo_location=target_repo_location,
@@ -167,12 +173,12 @@ class BaseRepo:
         :return:
         """
         path_to_be_ignored = self.ensure_relative_path(path_to_be_ignored)
-        with open(os.path.join(self.working_dir, ".gitignore"), "r") as file_handle:
+        with open(self.working_dir / ".gitignore", "r") as file_handle:
             gitignore = file_handle.readlines()
             gitignore[-1] += "\n"  # Sometimes there is no trailing newline
-        if path_to_be_ignored + "\n" not in gitignore:
-            gitignore.append(path_to_be_ignored + "\n")
-        with open(os.path.join(self.working_dir, ".gitignore"), "w") as file_handle:
+        if str(path_to_be_ignored) + "\n" not in gitignore:
+            gitignore.append(str(path_to_be_ignored) + "\n")
+        with open(self.working_dir / ".gitignore", "w") as file_handle:
             file_handle.writelines(gitignore)
 
     def update_cadet_rdm_cache_json(self, source_repo_location, source_repo_branch, target_repo_location):
@@ -186,7 +192,7 @@ class BaseRepo:
         :param target_repo_location:
         Path where to put the repo or data
         """
-        if not os.path.exists(self.cache_json_path):
+        if not self.cache_json_path.exists():
             with open(self.cache_json_path, "w") as file_handle:
                 file_handle.writelines("{}")
 
@@ -198,7 +204,7 @@ class BaseRepo:
         if "__example/path/to/repo__" in rdm_cache.keys():
             rdm_cache.pop("__example/path/to/repo__")
 
-        target_repo_location = self.ensure_relative_path(target_repo_location)
+        target_repo_location = str(self.ensure_relative_path(target_repo_location))
 
         rdm_cache[target_repo_location] = {
             "source_repo_location": source_repo_location,
@@ -216,8 +222,11 @@ class BaseRepo:
         :param input_path:
         :return:
         """
-        if os.path.isabs(input_path):
-            relative_path = os.path.relpath(input_path, self.working_dir)
+        if type(input_path) is str:
+            input_path = Path(input_path)
+
+        if input_path.is_absolute:
+            relative_path = input_path.relative_to(self.working_dir)
         else:
             relative_path = input_path
         return relative_path
@@ -477,16 +486,16 @@ class BaseRepo:
         """
         self._git.checkout("master")
         self._git.checkout('-b', branch_name)  # equivalent to $ git checkout -b %branch_name
-        code_backup_path = os.path.join(self.working_dir, "run_history")
-        logs_path = os.path.join(self.working_dir, "log.tsv")
-        if os.path.exists(code_backup_path):
+        code_backup_path = self.working_dir / "run_history"
+        logs_path = self.working_dir / "log.tsv"
+        if code_backup_path.exists():
             try:
                 # Remove previous code backup
 
                 delete_path(code_backup_path)
             except Exception as e:
                 print(e)
-        if os.path.exists(logs_path):
+        if logs_path.exists():
             try:
                 # Remove previous logs
                 delete_path(logs_path)
@@ -523,7 +532,7 @@ class BaseRepo:
             output_link_line = " and ".join(f"[{repo_identifier}]({output_repo_remote})"
                                             for output_repo_remote in remotes_url_list_http) + "\n"
 
-            readme_filepath = os.path.join(self.working_dir, "README.md")
+            readme_filepath = self.working_dir / "README.md"
             with open(readme_filepath, "r") as file_handle:
                 filelines = file_handle.readlines()
                 filelines_giving_output_repo = [i for i in range(len(filelines))
@@ -564,18 +573,18 @@ class ProjectRepo(BaseRepo):
         :param kwargs:
             Additional kwargs to be handed to BaseRepo.
         """
+        repository_path = Path(repository_path)
 
         super().__init__(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs)
 
         if output_folder is not None:
-            output_folder = os.path.split(output_folder)[-1]
-            self.output_folder = output_folder
+            self.output_folder = Path(output_folder).name
         else:
-            with open(os.path.join(repository_path, "output_remotes.json"), "r") as handle:
+            with open(repository_path / "output_remotes.json", "r") as handle:
                 data = json.load(handle)
             self.output_folder = data["output_folder_name"]
 
-        self._output_repo = OutputRepo(os.path.join(self.working_dir, self.output_folder))
+        self._output_repo = OutputRepo(self.working_dir / self.output_folder)
         self._on_context_enter_commit_hash = None
         self._is_in_context_manager = False
 
@@ -619,7 +628,7 @@ class ProjectRepo(BaseRepo):
 
         self.convert_csv_to_tsv_if_necessary()
 
-        tsv_filepath = os.path.join(self.working_dir, self.output_folder, "log.tsv")
+        tsv_filepath = self.working_dir / self.output_folder / "log.tsv"
 
         df = pd.read_csv(tsv_filepath, sep="\t", header=0)
         # Clean up the headers
@@ -646,12 +655,12 @@ class ProjectRepo(BaseRepo):
 
         :return:
         """
-        tsv_filepath = os.path.join(self.working_dir, self.output_folder, "log.tsv")
-        if os.path.exists(tsv_filepath):
+        tsv_filepath = self.working_dir / self.output_folder / "log.tsv"
+        if tsv_filepath.exists():
             return
 
-        csv_filepath = os.path.join(self.working_dir, self.output_folder, "log.csv")
-        if not os.path.exists(csv_filepath):
+        csv_filepath = self.working_dir / self.output_folder / "log.csv"
+        if not csv_filepath.exists():
             # We have just initialized the repo and neither tsv nor csv exist.
             return
 
@@ -663,7 +672,7 @@ class ProjectRepo(BaseRepo):
         with open(tsv_filepath, "w") as f:
             f.writelines(tsv_lines)
 
-        write_lines_to_file(path=os.path.join(self.working_dir, ".gitattributes"),
+        write_lines_to_file(path=self.working_dir / ".gitattributes",
                             lines=["rmd-log.tsv merge=union"],
                             open_type="a")
 
@@ -681,14 +690,14 @@ class ProjectRepo(BaseRepo):
 
         self._output_repo._git.checkout("master")
 
-        logs_folderpath = os.path.join(self.working_dir, self.output_folder, "run_history", output_branch_name)
-        if not os.path.exists(logs_folderpath):
+        logs_folderpath = self.working_dir / self.output_folder / "run_history" / output_branch_name
+        if not logs_folderpath.exists():
             os.makedirs(logs_folderpath)
 
-        json_filepath = os.path.join(logs_folderpath, "metadata.json")
+        json_filepath = logs_folderpath / "metadata.json"
         # note: if filename of "log.tsv" is changed,
         #  this also has to be changed in the gitattributes of the init repo func
-        tsv_filepath = os.path.join(self.output_repo.working_dir, "log.tsv")
+        tsv_filepath = self.output_repo.working_dir / "log.tsv"
         self.convert_csv_to_tsv_if_necessary()
 
         meta_info_dict = {
@@ -696,7 +705,7 @@ class ProjectRepo(BaseRepo):
             "Output repo branch": output_branch_name,
             "Output repo commit hash": output_repo_hash,
             "Project repo commit hash": str(self.head.commit),
-            "Project repo folder name": os.path.split(self.working_dir)[-1],
+            "Project repo folder name": self.working_dir.name,
             "Project repo remotes": self.remote_urls,
             "Python sys args": str(sys.argv),
             "Tags": ", ".join(self.tags),
@@ -707,7 +716,7 @@ class ProjectRepo(BaseRepo):
         with open(json_filepath, "w") as f:
             json.dump(meta_info_dict, f, indent=2)
 
-        if not os.path.exists(tsv_filepath):
+        if not tsv_filepath.exists():
             with open(tsv_filepath, "w") as f:
                 f.write(csv_header + "\n")
                 # csv.writer(csv_header + "\n")
@@ -722,10 +731,6 @@ class ProjectRepo(BaseRepo):
 
         self.dump_package_list(logs_folderpath)
 
-        # Copy all code files from the project git repo to the output repo
-        # code_copy_folderpath = os.path.join(logs_folderpath, "code_backup")
-        # if not os.path.exists(code_copy_folderpath):
-        #     os.makedirs(code_copy_folderpath)
         self.copy_code(logs_folderpath)
 
         self._output_repo.add(".")
@@ -743,12 +748,15 @@ class ProjectRepo(BaseRepo):
         :param target_path:
         :return:
         """
-        code_tmp_folder = os.path.join(target_path, "git_repo")
+        if type(target_path) is str:
+            target_path = Path(target_path)
+
+        code_tmp_folder = target_path / "git_repo"
 
         multi_options = ["--filter=blob:none", "--single-branch"]
         git.Repo.clone_from(self.working_dir, code_tmp_folder, multi_options=multi_options)
 
-        shutil.make_archive(os.path.join(target_path, "code"), "zip", code_tmp_folder)
+        shutil.make_archive(target_path / "code", "zip", code_tmp_folder)
 
         delete_path(code_tmp_folder)
 
@@ -769,7 +777,7 @@ class ProjectRepo(BaseRepo):
     def update_output_remotes_json(self):
         output_repo_remotes = self.output_repo.remote_urls
         self.add_list_of_remotes_in_readme_file("output_repo", output_repo_remotes)
-        output_json_filepath = os.path.join(self.working_dir, "output_remotes.json")
+        output_json_filepath = self.working_dir / "output_remotes.json"
         with open(output_json_filepath, "w") as file_handle:
             remotes_dict = {remote.name: str(remote.url) for remote in self.output_repo.remotes}
             json_dict = {"output_folder_name": self.output_folder, "output_remotes": remotes_dict}
@@ -821,13 +829,13 @@ class ProjectRepo(BaseRepo):
         previous_branch = self.output_repo.active_branch.name
         self.output_repo._git.checkout(branch_name)
 
-        source_filepath = os.path.join(self.output_repo.working_dir, file_path)
+        source_filepath = self.output_repo.working_dir / file_path
 
-        target_folder = os.path.join(self.output_repo.working_dir + "_cached", branch_name)
+        target_folder = self.working_dir / (self.output_folder + "_cached") / branch_name
         os.makedirs(target_folder, exist_ok=True)
 
-        target_filepath = os.path.join(target_folder, file_path)
-        if os.path.exists(target_filepath):
+        target_filepath = target_folder / file_path
+        if target_filepath.exists():
             os.chmod(target_filepath, S_IWRITE)
             os.remove(target_filepath)
         shutil.copyfile(source_filepath, target_filepath)
@@ -851,16 +859,16 @@ class ProjectRepo(BaseRepo):
         :return:
         """
         if sub_path is None:
-            return os.path.join(self.working_dir, self.output_repo.working_dir)
+            return self.working_dir / self.output_repo.working_dir
         else:
-            return os.path.join(self.working_dir, self.output_repo.working_dir, sub_path)
+            return self.working_dir / self.output_repo.working_dir, sub_path
 
     def remove_cached_files(self):
         """
         Delete all previously cached results.
         """
-        if os.path.exists(self.output_repo.working_dir + "_cached"):
-            delete_path(self.output_repo.working_dir + "_cached")
+        if (self.working_dir / (self.output_folder + "_cached")).exists():
+            delete_path(self.working_dir / (self.output_folder + "_cached"))
 
     def test_for_correct_repo_setup(self):
         """
@@ -920,7 +928,7 @@ class ProjectRepo(BaseRepo):
                 previous_branch = self.output_repo.active_branch.name
                 self.output_repo.checkout(branch_name)
 
-            target_folder = os.path.join(self.output_repo.working_dir + "_cached", branch_name)
+            target_folder = self.working_dir / (self.output_folder + "_cached") / branch_name
 
             shutil.copytree(source_filepath, target_folder)
 
diff --git a/tests/test_git_adapter.py b/tests/test_git_adapter.py
index 69b57ad..450d126 100644
--- a/tests/test_git_adapter.py
+++ b/tests/test_git_adapter.py
@@ -1,6 +1,5 @@
-import os.path
-import shutil
-import stat
+from pathlib import Path
+import os
 import random
 
 import pytest
@@ -16,23 +15,13 @@ from cadetrdm.io_utils import delete_path
 @pytest.fixture(scope="module")
 def path_to_repo():
     # a "fixture" serves up shared, ready variables to test functions that should use the fixture as a kwarg
-    return "test_repo"
-
-
-# @pytest.fixture(scope="module", autouse=True)
-# def my_fixture(path_to_repo):
-#     print('INITIALIZATION')
-#     if os.path.exists(path_to_repo):
-#         remove_dir(path_to_repo)
-#     yield "this is just here because something must yield"
-#     print("TEAR DOWN")
-#     remove_dir(path_to_repo)
+    return Path("test_repo")
 
 
 def modify_code(path_to_repo):
     # Add changes to the project code
     random_number = random.randint(0, 265)
-    filepath = os.path.join(path_to_repo, f"print_random_number.py")
+    filepath = path_to_repo / f"print_random_number.py"
     with open(filepath, "w") as file:
         file.write(f"print({random_number})\n")
 
@@ -45,26 +34,24 @@ def count_commit_number(repo):
 
 def example_generate_results_array(path_to_repo, output_folder):
     results_array = np.random.random((500, 3))
-    np.savetxt(os.path.join(path_to_repo, output_folder, "result.csv"),
-               results_array,
-               delimiter=",")
+    np.savetxt(path_to_repo / output_folder / "result.csv", results_array, delimiter=",")
     return results_array
 
 
 def try_init_gitpython_repo(repo_path):
-    os.path.exists(repo_path)
+    repo_path.exists()
     git.Repo(repo_path)
     return True
 
 
 def try_initialize_git_repo(path_to_repo):
-    if os.path.exists(path_to_repo):
+    if path_to_repo.exists():
         delete_path(path_to_repo)
 
     initialize_repo(path_to_repo, "results")
 
     assert try_init_gitpython_repo(path_to_repo)
-    assert try_init_gitpython_repo(os.path.join(path_to_repo, "results"))
+    assert try_init_gitpython_repo(path_to_repo / "results")
 
 
 def try_commit_code(path_to_repo):
@@ -117,12 +104,12 @@ def try_load_previous_output(path_to_repo, branch_name):
                                             file_path="result.csv")
         previous_array = np.loadtxt(cached_array_path, delimiter=",")
         extended_array = np.concatenate([previous_array, previous_array])
-        extended_array_file_path = os.path.join(path_to_repo, repo.output_folder, "extended_result.csv")
+        extended_array_file_path = path_to_repo / repo.output_folder / "extended_result.csv"
         np.savetxt(extended_array_file_path,
                    extended_array,
                    delimiter=",")
-        assert os.path.exists(cached_array_path)
-        assert os.path.exists(extended_array_file_path)
+        assert cached_array_path.exists()
+        assert extended_array_file_path.exists()
 
 
 def try_add_remote(path_to_repo):
@@ -132,16 +119,16 @@ def try_add_remote(path_to_repo):
 
 
 def try_initialize_from_remote():
-    if os.path.exists("test_repo_from_remote"):
+    if Path("test_repo_from_remote").exists():
         delete_path("test_repo_from_remote")
     clone("https://jugit.fz-juelich.de/IBG-1/ModSim/cadet/rdm-examples-fraunhofer-ime-aachen",
-                           "test_repo_from_remote")
+          "test_repo_from_remote")
     assert try_init_gitpython_repo("test_repo_from_remote")
 
 
 def test_init_over_existing_repo(monkeypatch):
-    path_to_repo = "test_repo_2"
-    if os.path.exists(path_to_repo):
+    path_to_repo = Path("test_repo_2")
+    if path_to_repo.exists():
         delete_path(path_to_repo)
     os.makedirs(path_to_repo)
     os.chdir(path_to_repo)
@@ -163,8 +150,8 @@ def test_init_over_existing_repo(monkeypatch):
 
 
 def test_cache_with_non_rdm_repo(monkeypatch):
-    path_to_repo = "test_repo_5"
-    if os.path.exists(path_to_repo):
+    path_to_repo = Path("test_repo_5")
+    if path_to_repo.exists():
         delete_path(path_to_repo)
     os.makedirs(path_to_repo)
     os.chdir(path_to_repo)
@@ -190,8 +177,8 @@ def test_cache_with_non_rdm_repo(monkeypatch):
 
 
 def test_add_lfs_filetype():
-    path_to_repo = "test_repo_3"
-    if os.path.exists(path_to_repo):
+    path_to_repo = Path("test_repo_3")
+    if path_to_repo.exists():
         delete_path(path_to_repo)
     os.makedirs(path_to_repo)
     initialize_repo(path_to_repo)
@@ -225,8 +212,8 @@ def test_cadet_rdm(path_to_repo):
 
 
 def test_with_external_repos():
-    path_to_repo = "test_repo_external_data"
-    if os.path.exists(path_to_repo):
+    path_to_repo = Path("test_repo_external_data")
+    if path_to_repo.exists():
         delete_path(path_to_repo)
     os.makedirs(path_to_repo)
     initialize_repo(path_to_repo)
@@ -243,10 +230,12 @@ def test_with_external_repos():
     repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name)
     repo.import_remote_repo(source_repo_location="../test_repo/results", source_repo_branch=branch_name,
                             target_repo_location="foo/bar/repo")
-    repo.verify_unchanged_cache()
-
     # delete folder and reload
     delete_path("foo/bar/repo")
+
+    with pytest.raises(Exception):
+        repo.verify_unchanged_cache()
+
     repo.fill_data_from_cadet_rdm_json()
     repo.verify_unchanged_cache()
 
-- 
GitLab