Skip to content
Snippets Groups Projects
Commit 50cc3e2c authored by Ronald Jäpel's avatar Ronald Jäpel
Browse files

Add documentation within the README.md and rename a few functions

parent 36729673
No related branches found
No related tags found
No related merge requests found
# The CADET-Research Data Management toolbox
## Getting started
### Installation
CADET-RDM can be installed using
```pip install cadetrdm```
### Initialize Project Repository
Create a new project repository or convert an existing repository into a CADET-RDM repo:
```bash
cadet-rdm initialize-repo <path-to-repo> <output-folder-name>
```
or from python
```python
from cadetrdm import initialize_repo
initialize_repo(path_to_repo, output_folder_name)
```
The `output_folder_name` can be given optionally. It defaults to `output`.
### Use CADET-RDM in Python
#### Tracking Results
```python
from cadetrdm import ProjectRepo
"""
Your imports and function declarations
e.g. generate_data(), write_data_to_file(), analyse_data() and plot_analysis_results()
"""
if __name__ == '__main__':
# Instantiate CADET-RDM ProjectRepo handler
repo = ProjectRepo()
# If you've made changes to the code, commit the changes
repo.commit("Add code to generate and analyse example data")
# Everything written to the output_folder within this context manager gets tracked
# The method repo.output_data() generates full paths to within your output_folder
with repo.track_results(results_commit_message="Generate and analyse example data"):
data = generate_data()
output_filepath = repo.output_data(sub_path="raw_data/data.csv")
write_data_to_file(data, output_filepath)
analysis_results = analyse_data(data)
figure_path=repo.output_data("analysis/regression.png")
plot_analysis_results(analysis_results, figure_path)
```
#### Sharing Results
To share your project code and results with others, you need to create remote repositories on e.g.
[GitHub](https://github.com/) or GitLab. You need to create a remote for both the _project_ repo and the
_results_ repo.
Once created, the remotes need to be added to the local repositories.
```bash
cadet-cli add-remote-to-repo <path_to_repo> git@<my_git_server.foo>:<project>.git
cadet-cli add-remote-to-repo <path_to_repo/output_folder> git@<my_git_server.foo>:<project>_output.git
```
or in Python:
```python
repo = ProjectRepo()
repo.add_remote("git@<my_git_server.foo>:<project>.git")
repo.output_repo.add_remote("git@<my_git_server.foo>:<project>_output.git")
```
Once remotes are configured, you can push all changes to the project repo and the results repos with the
command
```python
# push all changes to the Project and Output repositories with one command:
repo.push()
```
#### Re-using results from previous iterations
Each result stored with CADET-RDM is given a unique branch name, formatted as:
`<timestamp>_<output_folder>_"from"_<active_project_branch>_<project_repo_hash[:7]>`
With this branch name, previously generated data can be loaded in as input data for
further calculations.
```python
cached_array_path = repo.input_data(branch_name=branch_name, file_path="raw_data/data.csv")
```
Alternatively, using the auto-generated cache of previous results, CADET-RDM can infer
the correct branch name from the path to the file within the cache
```python
cached_array_path = repo.input_data(file_path="output_cached/<branch_name>/raw_data/data.csv")
```
from .repositories import ProjectRepo
from .initialize_repo import initialize_git_repo, initialize_from_remote
from .initialize_repo import initialize_repo, initialize_from_remote
from .conda_env_utils import prepare_conda_env
import click
from .repositories import ProjectRepo
from .initialize_repo import initialize_git_repo as initialize_git_repo_implementation, init_lfs
from .initialize_repo import initialize_repo as initialize_git_repo_implementation, init_lfs
from .initialize_repo import initialize_from_remote as initialize_from_remote_implementation
from .conda_env_utils import prepare_conda_env as prepare_conda_env_implementation
......@@ -31,7 +31,7 @@ def initialize_from_remote(project_url, path_to_repo: str = None):
@click.option('--lfs_filetypes', default=None,
help='List of filetypes to be handled by git lfs. Optional.')
@click.argument('path_to_repo')
def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "output", gitignore: list = None,
def initialize_repo(path_to_repo: str, output_repo_name: (str | bool) = "output", gitignore: list = None,
gitattributes: list = None, lfs_filetypes: list = None,
output_repo_kwargs: dict = None):
initialize_git_repo_implementation(path_to_repo, output_repo_name, gitignore,
......@@ -57,6 +57,7 @@ def add_file_type_to_lfs(path_to_repo: str, file_type: str, ):
repo.add_all_files()
repo.commit(f"Add {file_type} to lfs")
@cli.command()
@click.option('--url', default=None,
help='Url to the environment.yml file.')
......
......@@ -59,7 +59,7 @@ def is_tool(name):
return which(name) is not None
def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "output", gitignore: list = None,
def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "output", gitignore: list = None,
gitattributes: list = None, lfs_filetypes: list = None,
output_repo_kwargs: dict = None):
"""
......@@ -67,7 +67,7 @@ def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "out
:param path_to_repo:
Path to main repository.
:param output_repo_name:
:param output_folder_name:
Name for the output repository.
:param gitignore:
List of files to be added to the gitignore file.
......@@ -89,9 +89,9 @@ def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "out
if gitignore is None:
gitignore = [".idea", "*diskcache*", "*tmp*", ".ipynb_checkpoints", "__pycache__"]
if output_repo_name:
gitignore.append(output_repo_name)
gitignore.append(output_repo_name + "_cached")
if output_folder_name:
gitignore.append(output_folder_name)
gitignore.append(output_folder_name + "_cached")
if gitattributes is None:
gitattributes = []
......@@ -121,13 +121,13 @@ def initialize_git_repo(path_to_repo: str, output_repo_name: (str | bool) = "out
if output_repo_kwargs is None:
output_repo_kwargs = {"gitattributes": ["logs/log.csv merge=union"]}
if output_repo_name:
if output_folder_name:
# This means we are in the project repo and should now initialize the output_repo
create_readme()
create_environment_yml()
initialize_git_repo(output_repo_name, output_repo_name=False, **output_repo_kwargs)
initialize_repo(output_folder_name, output_folder_name=False, **output_repo_kwargs)
# This instance of ProjectRepo is therefore the project repo
repo = ProjectRepo(".", output_folder=output_repo_name)
repo = ProjectRepo(".", output_folder=output_folder_name)
else:
# If output_repo_name is False we are in the output_repo and should finish by committing the changes
init_lfs(lfs_filetypes)
......
......@@ -195,7 +195,6 @@ class BaseRepo:
recursive_chmod(self.working_dir, S_IWRITE)
self._git.clean("-q", "-f", "-d")
@property
def changed_files(self):
changed_files = self._git.diff(None, name_only=True).split('\n')
......@@ -409,7 +408,7 @@ class ProjectRepo(BaseRepo):
"""
project_repo_hash = str(self.head.commit)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
branch_name = "_".join([self.output_folder, "from", str(self.active_branch), timestamp, project_repo_hash[:7]])
branch_name = "_".join([timestamp, self.output_folder, "from", str(self.active_branch), project_repo_hash[:7]])
return branch_name
def check_results_master(self):
......
......@@ -7,7 +7,7 @@ import pytest
import git
import numpy as np
from cadetrdm import initialize_git_repo, ProjectRepo, initialize_from_remote
from cadetrdm import initialize_repo, ProjectRepo, initialize_from_remote
from cadetrdm.initialize_repo import init_lfs
......@@ -71,7 +71,7 @@ def try_initialize_git_repo(path_to_repo):
if os.path.exists(path_to_repo):
remove_dir(path_to_repo)
initialize_git_repo(path_to_repo, "results")
initialize_repo(path_to_repo, "results")
assert try_init_gitpython_repo(path_to_repo)
assert try_init_gitpython_repo(os.path.join(path_to_repo, "results"))
......@@ -174,7 +174,7 @@ def test_init_over_existing_repo(monkeypatch):
# using monkeypath to simulate user input
monkeypatch.setattr('builtins.input', lambda x: "Y")
initialize_git_repo(path_to_repo)
initialize_repo(path_to_repo)
def test_add_lfs_filetype():
......@@ -182,7 +182,7 @@ def test_add_lfs_filetype():
if os.path.exists(path_to_repo):
remove_dir(path_to_repo)
os.makedirs(path_to_repo)
initialize_git_repo(path_to_repo)
initialize_repo(path_to_repo)
file_type = "*.bak"
init_lfs(lfs_filetypes=[file_type], path=path_to_repo)
repo = ProjectRepo(path_to_repo)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment