Skip to content
Snippets Groups Projects
Commit e9bc3b87 authored by Ronald Jäpel's avatar Ronald Jäpel
Browse files

Improve CADET-RDM version handling

Refactor initialization of repositories
Add .zip and .ipynb and .html to git-lfs
parent a2beb9fc
No related branches found
No related tags found
No related merge requests found
from .repositories import ProjectRepo from .repositories import ProjectRepo
from .initialize_repo import initialize_repo, clone from .initialize_repo import initialize_repo, clone
from .conda_env_utils import prepare_conda_env from .conda_env_utils import prepare_conda_env
from .version import version
__version__ = "0.0.8" __version__ = version
...@@ -38,8 +38,7 @@ def init_lfs(lfs_filetypes: list, path: str = None): ...@@ -38,8 +38,7 @@ def init_lfs(lfs_filetypes: list, path: str = None):
def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "output", gitignore: list = None, def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "output", gitignore: list = None,
gitattributes: list = None, lfs_filetypes: list = None, gitattributes: list = None, output_repo_kwargs: dict = None):
output_repo_kwargs: dict = None):
""" """
Initialize a git repository at the given path with an optional included output results repository. Initialize a git repository at the given path with an optional included output results repository.
...@@ -51,8 +50,6 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu ...@@ -51,8 +50,6 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
List of files to be added to the gitignore file. List of files to be added to the gitignore file.
:param gitattributes: :param gitattributes:
List of lines to be added to the gitattributes file List of lines to be added to the gitattributes file
:param lfs_filetypes:
List of filetypes to be handled by git lfs.
:param output_repo_kwargs: :param output_repo_kwargs:
kwargs to be given to the creation of the output repo initalization function. kwargs to be given to the creation of the output repo initalization function.
Include gitignore, gitattributes, and lfs_filetypes kwargs. Include gitignore, gitattributes, and lfs_filetypes kwargs.
...@@ -65,17 +62,16 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu ...@@ -65,17 +62,16 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
"/managing-large-files/installing-git-large-file-storage") "/managing-large-files/installing-git-large-file-storage")
if gitignore is None: if gitignore is None:
gitignore = [".idea", "*diskcache*", "*tmp*", ".ipynb_checkpoints", "__pycache__"] gitignore = get_default_gitignore() + ["*.ipynb"]
if output_folder_name: gitignore.append(output_folder_name)
gitignore.append(output_folder_name) gitignore.append(output_folder_name + "_cached")
gitignore.append(output_folder_name + "_cached")
if gitattributes is None: if gitattributes is None:
gitattributes = [] gitattributes = []
if lfs_filetypes is None: if output_repo_kwargs is None:
lfs_filetypes = ["*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx"] output_repo_kwargs = {}
starting_directory = os.getcwd() starting_directory = os.getcwd()
project_repo_uuid = str(uuid.uuid4()) project_repo_uuid = str(uuid.uuid4())
...@@ -85,6 +81,53 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu ...@@ -85,6 +81,53 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
os.makedirs(path_to_repo, exist_ok=True) os.makedirs(path_to_repo, exist_ok=True)
os.chdir(path_to_repo) os.chdir(path_to_repo)
initialize_git()
write_lines_to_file(path=".gitattributes", lines=gitattributes, open_type="a")
write_lines_to_file(path=".gitignore", lines=gitignore, open_type="a")
create_readme()
create_environment_yml()
ProjectRepo.add_jupytext_file()
rdm_data = {
"is_project_repo": True, "is_output_repo": False,
"project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid,
"cadet_rdm_version": cadetrdm.__version__
}
with open(".cadet-rdm-data.json", "w") as f:
json.dump(rdm_data, f, indent=2)
with open(".cadet-rdm-cache.json", "w") as f:
json.dump({"__example/path/to/repo__": {
"source_repo_location": "git@jugit.fz-juelich.de:IBG-1/ModSim/cadet"
"/agile_cadet_rdm_presentation_output.git",
"branch_name": "output_from_master_3910c84_2023-10-25_00-17-23",
"commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc"
}}, f, indent=2)
with open("output_remotes.json", "w") as file_handle:
remotes_dict = {}
json_dict = {"output_folder_name": output_folder_name, "output_remotes": remotes_dict}
json.dump(json_dict, file_handle, indent=2)
initialize_output_repo(output_folder_name, project_repo_uuid=project_repo_uuid,
output_repo_uuid=output_repo_uuid, **output_repo_kwargs)
repo = ProjectRepo(".", output_folder=output_folder_name)
repo.update_output_remotes_json()
repo.commit("initial commit")
os.chdir(starting_directory)
def initialize_git(folder="."):
if folder != ":":
starting_directory = os.getcwd()
os.chdir(folder)
try: try:
repo = git.Repo(".") repo = git.Repo(".")
proceed = input(f'The target directory already contains a git repo.\n' proceed = input(f'The target directory already contains a git repo.\n'
...@@ -95,52 +138,65 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu ...@@ -95,52 +138,65 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
except git.exc.InvalidGitRepositoryError: except git.exc.InvalidGitRepositoryError:
os.system(f"git init") os.system(f"git init")
if folder != ":":
os.chdir(starting_directory)
def get_default_gitignore():
return [".idea", "*diskcache*", "*tmp*", ".ipynb_checkpoints", "__pycache__"]
def get_default_lfs_filetypes():
return ["*.jpg", "*.png", "*.xlsx", "*.h5", "*.ipynb", "*.pdf", "*.docx", "*.zip", "*.html"]
def initialize_output_repo(output_folder_name, gitignore: list = None,
gitattributes: list = None, lfs_filetypes: list = None,
project_repo_uuid: str = None, output_repo_uuid: str = None):
"""
Initialize a git repository at the given path with an optional included output results repository.
:param output_folder_name:
Name for the output repository.
:param gitignore:
List of files to be added to the gitignore file.
:param gitattributes:
List of lines to be added to the gitattributes file
:param lfs_filetypes:
List of filetypes to be handled by git lfs.
:return:
"""
starting_directory = os.getcwd()
os.makedirs(output_folder_name, exist_ok=True)
os.chdir(output_folder_name)
if gitignore is None:
gitignore = get_default_gitignore()
if gitattributes is None:
gitattributes = ["rdm-log.tsv merge=union"]
if lfs_filetypes is None:
lfs_filetypes = get_default_lfs_filetypes()
initialize_git()
write_lines_to_file(path=".gitattributes", lines=gitattributes, open_type="a") write_lines_to_file(path=".gitattributes", lines=gitattributes, open_type="a")
write_lines_to_file(path=".gitignore", lines=gitignore, open_type="a") write_lines_to_file(path=".gitignore", lines=gitignore, open_type="a")
if output_repo_kwargs is None: rdm_data = {
output_repo_kwargs = {"gitattributes": ["rmd-log.tsv merge=union"]} "is_project_repo": False, "is_output_repo": True,
"project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid,
if output_folder_name: "cadet_rdm_version": cadetrdm.__version__
# This means we are in the project repo and should now initialize the output_repo }
create_readme() with open(".cadet-rdm-data.json", "w") as f:
create_environment_yml() json.dump(rdm_data, f, indent=2)
rdm_data = {
"is_project_repo": True, "is_output_repo": False,
"project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid,
"cadet_rdm_version": cadetrdm.__version__
}
with open(".cadet-rdm-data.json", "w") as f:
json.dump(rdm_data, f, indent=2)
with open(".cadet-rdm-cache.json", "w") as f:
json.dump({"__example/path/to/repo__": {
"source_repo_location": "git@jugit.fz-juelich.de:IBG-1/ModSim/cadet"
"/agile_cadet_rdm_presentation_output.git",
"branch_name": "output_from_master_3910c84_2023-10-25_00-17-23",
"commit_hash": "6e3c26527999036e9490d2d86251258fe81d46dc"
}}, f, indent=2)
initialize_repo(output_folder_name, output_folder_name=False, **output_repo_kwargs)
# This instance of ProjectRepo is therefore the project repo
repo = ProjectRepo(".", output_folder=output_folder_name)
repo.update_output_remotes_json()
else:
# If output_repo_name is False we are in the output_repo and should finish by committing the changes
rdm_data = {
"is_project_repo": False, "is_output_repo": True,
"project_uuid": project_repo_uuid, "output_uuid": output_repo_uuid,
"cadet_rdm_version": cadetrdm.__version__
}
with open(".cadet-rdm-data.json", "w") as f:
json.dump(rdm_data, f, indent=2)
init_lfs(lfs_filetypes)
create_output_readme() init_lfs(lfs_filetypes)
repo = OutputRepo(".") create_output_readme()
repo = OutputRepo(".")
repo.commit("initial commit") repo.commit("initial commit")
os.chdir(starting_directory) os.chdir(starting_directory)
......
...@@ -15,6 +15,7 @@ from tabulate import tabulate ...@@ -15,6 +15,7 @@ from tabulate import tabulate
import pandas as pd import pandas as pd
from cadetrdm.io_utils import recursive_chmod, write_lines_to_file from cadetrdm.io_utils import recursive_chmod, write_lines_to_file
from cadetrdm.version import version as cadetrdm_version
try: try:
import git import git
...@@ -562,7 +563,7 @@ class ProjectRepo(BaseRepo): ...@@ -562,7 +563,7 @@ class ProjectRepo(BaseRepo):
:param repository_path: :param repository_path:
Path to the root of the git repository. Path to the root of the git repository.
:param output_folder: :param output_folder:
Path to the root of the output repository. Deprecated: Path to the root of the output repository.
:param search_parent_directories: :param search_parent_directories:
if True, all parent directories will be searched for a valid repo as well. if True, all parent directories will be searched for a valid repo as well.
...@@ -577,12 +578,21 @@ class ProjectRepo(BaseRepo): ...@@ -577,12 +578,21 @@ class ProjectRepo(BaseRepo):
super().__init__(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs) super().__init__(repository_path, search_parent_directories=search_parent_directories, *args, **kwargs)
with open(repository_path / "output_remotes.json", "r") as handle:
output_remotes = json.load(handle)
if output_folder is not None: if output_folder is not None:
self.output_folder = Path(output_folder).name print("Deprecation Warning. Setting the outputfolder manually during repo instantiation is deprecated"
else: " and will be removed in a future update.")
with open(repository_path / "output_remotes.json", "r") as handle:
data = json.load(handle) self.output_folder = output_remotes["output_folder_name"]
self.output_folder = data["output_folder_name"]
with open(repository_path / ".cadet-rdm-data.json", "r") as handle:
metadata = json.load(handle)
repo_version = metadata["cadet_rdm_version"]
if cadetrdm_version != repo_version:
print(f"Repo version {repo_version} is outdated. Current CADET-RDM version is {cadetrdm_version}\n"
"Updating the repository now.")
self._output_repo = OutputRepo(self.working_dir / self.output_folder) self._output_repo = OutputRepo(self.working_dir / self.output_folder)
self._on_context_enter_commit_hash = None self._on_context_enter_commit_hash = None
...@@ -594,6 +604,18 @@ class ProjectRepo(BaseRepo): ...@@ -594,6 +604,18 @@ class ProjectRepo(BaseRepo):
raise ValueError("The output repo has not been set yet.") raise ValueError("The output repo has not been set yet.")
return self._output_repo return self._output_repo
def update_version(self, current_version):
version_parts = [int(x) for x in current_version.split(".")]
version_sum = version_parts[0] * 1000 * 1000 + version_parts[1] * 1000 + version_parts[2]
if current_version < 9:
self.convert_csv_to_tsv_if_necessary()
self.add_jupytext_file(self.working_dir)
@staticmethod
def add_jupytext_file(path_root: str | Path = "."):
jupytext_lines = ['# Pair ipynb notebooks to py:percent text notebooks', 'formats: "ipynb,py:percent"']
write_lines_to_file(Path(path_root) / "jupytext.yml", lines=jupytext_lines, open_type="w")
def get_new_output_branch_name(self): def get_new_output_branch_name(self):
""" """
Construct a name for the new branch in the output repository. Construct a name for the new branch in the output repository.
...@@ -626,8 +648,6 @@ class ProjectRepo(BaseRepo): ...@@ -626,8 +648,6 @@ class ProjectRepo(BaseRepo):
self.output_repo.checkout("master") self.output_repo.checkout("master")
self.convert_csv_to_tsv_if_necessary()
tsv_filepath = self.working_dir / self.output_folder / "log.tsv" tsv_filepath = self.working_dir / self.output_folder / "log.tsv"
df = pd.read_csv(tsv_filepath, sep="\t", header=0) df = pd.read_csv(tsv_filepath, sep="\t", header=0)
...@@ -673,7 +693,7 @@ class ProjectRepo(BaseRepo): ...@@ -673,7 +693,7 @@ class ProjectRepo(BaseRepo):
f.writelines(tsv_lines) f.writelines(tsv_lines)
write_lines_to_file(path=self.working_dir / ".gitattributes", write_lines_to_file(path=self.working_dir / ".gitattributes",
lines=["rmd-log.tsv merge=union"], lines=["rdm-log.tsv merge=union"],
open_type="a") open_type="a")
def update_output_master_logs(self, ): def update_output_master_logs(self, ):
...@@ -698,7 +718,6 @@ class ProjectRepo(BaseRepo): ...@@ -698,7 +718,6 @@ class ProjectRepo(BaseRepo):
# note: if filename of "log.tsv" is changed, # note: if filename of "log.tsv" is changed,
# this also has to be changed in the gitattributes of the init repo func # this also has to be changed in the gitattributes of the init repo func
tsv_filepath = self.output_repo.working_dir / "log.tsv" tsv_filepath = self.output_repo.working_dir / "log.tsv"
self.convert_csv_to_tsv_if_necessary()
meta_info_dict = { meta_info_dict = {
"Output repo commit message": output_commit_message, "Output repo commit message": output_commit_message,
......
version = "0.0.8"
\ No newline at end of file
...@@ -23,6 +23,10 @@ install_requires = ...@@ -23,6 +23,10 @@ install_requires =
click click
tabulate tabulate
pandas pandas
nbformat
nbconvert
ipylab
junix
include_package_data = True include_package_data = True
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment