From e22e9295f1fc76c92a091f3bf723b2a0bb367466 Mon Sep 17 00:00:00 2001
From: "r.jaepel" <r.jaepel@fz-juelich.de>
Date: Fri, 1 Sep 2023 16:46:09 +0200
Subject: [PATCH] Add functionality to print overview of output repo contents

---
 cadetrdm/cli_integration.py |  8 ++++++++
 cadetrdm/repositories.py    | 38 +++++++++++++++++++++++++++++++++++++
 setup.cfg                   |  2 ++
 tests/test_git_adapter.py   | 10 +++++++++-
 4 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/cadetrdm/cli_integration.py b/cadetrdm/cli_integration.py
index 04b93da..ebbf693 100644
--- a/cadetrdm/cli_integration.py
+++ b/cadetrdm/cli_integration.py
@@ -1,5 +1,6 @@
 import click
 
+from .repositories import ProjectRepo
 from .initialize_repo import initialize_git_repo as initialize_git_repo_implementation
 from .initialize_repo import initialize_from_remote as initialize_from_remote_implementation
 from .conda_env_utils import prepare_conda_env as prepare_conda_env_implementation
@@ -43,3 +44,10 @@ def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "out
               help='Url to the environment.yml file.')
 def prepare_conda_env(url):
     prepare_conda_env_implementation(url)
+
+
+@cli.command()
+@click.argument('path_to_repo')
+def print_output_log(path_to_repo):
+    repo = ProjectRepo(path_to_repo)
+    repo.print_output_log()
diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py
index 9aa4c9c..a72ef2e 100644
--- a/cadetrdm/repositories.py
+++ b/cadetrdm/repositories.py
@@ -4,6 +4,9 @@ from datetime import datetime
 import shutil
 import contextlib
 
+from tabulate import tabulate
+import pandas as pd
+
 try:
     import git
 except ImportError:
@@ -80,6 +83,10 @@ class BaseRepo:
         """
         self._git_repo.create_remote(remote_name, url=remote_url)
 
+    def checkout(self, *args, **kwargs):
+        self._most_recent_branch = self.active_branch
+        self._git.checkout(*args, **kwargs)
+
     def push(self, remote=None, local_branch=None, remote_branch=None):
         """
         Push local branch to remote.
@@ -345,6 +352,36 @@ class ProjectRepo(BaseRepo):
         """
         self._output_repo._git.checkout(self._most_recent_branch)
 
+    def print_output_log(self):
+        def insert_newlines(string, every=30):
+            lines = []
+            for i in range(0, len(string), every):
+                lines.append(string[i:i + every])
+            return '\n'.join(lines)
+
+        self.output_repo.checkout("master")
+
+        csv_filepath = os.path.join(self.working_dir, self.output_folder, "logs", "log.csv")
+
+        df = pd.read_csv(csv_filepath, sep=",", header=0)
+        # Clean up the headers
+        df = df.rename(columns={"Output repo commit message": 'Output commit message',
+                                "Output repo branch": "Output branch",
+                                "Output repo commit hash": "Output hash", "Project repo commit hash": "Project hash"})
+        # Shorten the commit hashes
+        df.loc[:, "Output hash"] = df.loc[:, "Output hash"].apply(lambda x: x[:8])
+        # Shorten commit messages
+        df.loc[:, "Output commit message"] = df.loc[:, "Output commit message"].apply(lambda x: x[:55])
+        df.loc[:, "Output commit message"] = df.loc[:, "Output commit message"].apply(insert_newlines)
+
+        # Select only columns of interest
+        df = df.loc[:, ["Output commit message", "Output hash", "Output branch"]]
+
+        # Print
+        print(tabulate(df, headers=df.columns, showindex=False))
+
+        self.output_repo.checkout(self.output_repo._most_recent_branch)
+
     def update_output_master_logs(self, ):
         """
         Dumps all the metadata information about the project repositories state and
@@ -355,6 +392,7 @@ class ProjectRepo(BaseRepo):
 
         output_repo_hash = str(self._output_repo.head.commit)
         output_commit_message = self._output_repo.active_branch.commit.message
+        output_commit_message = output_commit_message.replace("\n", "; ")
 
         self._output_repo._git.checkout("master")
 
diff --git a/setup.cfg b/setup.cfg
index 78f0f06..c18a585 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -21,6 +21,8 @@ install_requires =
     gitpython>=3.1
     git-lfs
     click
+    tabulate
+    pandas
 
 include_package_data = True
 
diff --git a/tests/test_git_adapter.py b/tests/test_git_adapter.py
index c35b392..4064693 100644
--- a/tests/test_git_adapter.py
+++ b/tests/test_git_adapter.py
@@ -107,10 +107,15 @@ def try_commit_results_data(path_to_repo):
     with repo.track_results(results_commit_message="Add array"):
         example_generate_results_array(path_to_repo, output_folder=repo.output_folder)
     updated_commit_number = count_commit_number(repo.output_repo)
-    assert current_commit_number + 1 == updated_commit_number
+    assert current_commit_number <= updated_commit_number
     return str(repo.output_repo.active_branch)
 
 
+def try_print_log(path_to_repo):
+    repo = ProjectRepo(path_to_repo)
+    repo.print_output_log()
+
+
 def try_commit_results_with_uncommitted_code_changes(path_to_repo):
     repo = ProjectRepo(path_to_repo)
     modify_code(path_to_repo)
@@ -162,4 +167,7 @@ def test_cadet_rdm(path_to_repo):
     try_commit_results_with_uncommitted_code_changes(path_to_repo)
 
     results_branch_name = try_commit_results_data(path_to_repo)
+    results_branch_name = try_commit_results_data(path_to_repo)
+    try_print_log(path_to_repo)
+
     try_load_previous_output(path_to_repo, results_branch_name)
-- 
GitLab