Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
CADET-RDM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
IBG-1
ModSim
CADET
CADET-RDM
Commits
216a83e0
Commit
216a83e0
authored
1 year ago
by
Ronald Jäpel
Browse files
Options
Downloads
Patches
Plain Diff
Change from CSV to TSV to fix lists spreading over multiple columns
parent
e102f587
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
cadetrdm/initialize_repo.py
+2
-2
2 additions, 2 deletions
cadetrdm/initialize_repo.py
cadetrdm/repositories.py
+46
-16
46 additions, 16 deletions
cadetrdm/repositories.py
with
48 additions
and
18 deletions
cadetrdm/initialize_repo.py
+
2
−
2
View file @
216a83e0
...
...
@@ -49,7 +49,7 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
:param gitignore:
List of files to be added to the gitignore file.
:param gitattributes:
List of lines to be added to the git
t
atributes file
List of lines to be added to the gita
t
tributes file
:param lfs_filetypes:
List of filetypes to be handled by git lfs.
:param output_repo_kwargs:
...
...
@@ -98,7 +98,7 @@ def initialize_repo(path_to_repo: str, output_folder_name: (str | bool) = "outpu
write_lines_to_file
(
path
=
"
.gitignore
"
,
lines
=
gitignore
,
open_type
=
"
a
"
)
if
output_repo_kwargs
is
None
:
output_repo_kwargs
=
{
"
gitattributes
"
:
[
"
log.
c
sv merge=union
"
]}
output_repo_kwargs
=
{
"
gitattributes
"
:
[
"
rmd-
log.
t
sv merge=union
"
]}
if
output_folder_name
:
# This means we are in the project repo and should now initialize the output_repo
...
...
This diff is collapsed.
Click to expand it.
cadetrdm/repositories.py
+
46
−
16
View file @
216a83e0
...
...
@@ -13,7 +13,7 @@ from urllib.request import urlretrieve
from
tabulate
import
tabulate
import
pandas
as
pd
from
cadetrdm.io_utils
import
recursive_chmod
from
cadetrdm.io_utils
import
recursive_chmod
,
write_lines_to_file
try
:
import
git
...
...
@@ -98,7 +98,7 @@ class BaseRepo:
@property
def
tags
(
self
):
return
None
return
list
()
@property
def
data_json_path
(
self
):
...
...
@@ -471,14 +471,14 @@ class BaseRepo:
- checking out the master branch,
- creating a new branch from there
This thereby produces a clear, empty directory for data, while still maintaining
.gitignore and .gitatributes
.gitignore and .gitat
t
ributes
:param branch_name:
Name of the new branch.
"""
self
.
_git
.
checkout
(
"
master
"
)
self
.
_git
.
checkout
(
'
-b
'
,
branch_name
)
# equivalent to $ git checkout -b %branch_name
code_backup_path
=
os
.
path
.
join
(
self
.
working_dir
,
"
run_history
"
)
logs_path
=
os
.
path
.
join
(
self
.
working_dir
,
"
log.
c
sv
"
)
logs_path
=
os
.
path
.
join
(
self
.
working_dir
,
"
log.
t
sv
"
)
if
os
.
path
.
exists
(
code_backup_path
):
try
:
# Remove previous code backup
...
...
@@ -617,9 +617,11 @@ class ProjectRepo(BaseRepo):
self
.
output_repo
.
checkout
(
"
master
"
)
csv_filepath
=
os
.
path
.
join
(
self
.
working_dir
,
self
.
output_folder
,
"
log.csv
"
)
self
.
convert_csv_to_tsv_if_necessary
()
tsv_filepath
=
os
.
path
.
join
(
self
.
working_dir
,
self
.
output_folder
,
"
log.tsv
"
)
df
=
pd
.
read_csv
(
c
sv_filepath
,
sep
=
"
,
"
,
header
=
0
)
df
=
pd
.
read_csv
(
t
sv_filepath
,
sep
=
"
\t
"
,
header
=
0
)
# Clean up the headers
df
=
df
.
rename
(
columns
=
{
"
Output repo commit message
"
:
'
Output commit message
'
,
"
Output repo branch
"
:
"
Output branch
"
,
...
...
@@ -638,6 +640,33 @@ class ProjectRepo(BaseRepo):
self
.
output_repo
.
checkout
(
self
.
output_repo
.
_most_recent_branch
)
def
convert_csv_to_tsv_if_necessary
(
self
):
"""
If not tsv log is found AND a csv log is found, convert the csv to tsv.
:return:
"""
tsv_filepath
=
os
.
path
.
join
(
self
.
working_dir
,
self
.
output_folder
,
"
log.tsv
"
)
if
os
.
path
.
exists
(
tsv_filepath
):
return
csv_filepath
=
os
.
path
.
join
(
self
.
working_dir
,
self
.
output_folder
,
"
log.csv
"
)
if
not
os
.
path
.
exists
(
csv_filepath
):
# We have just initialized the repo and neither tsv nor csv exist.
return
with
open
(
csv_filepath
)
as
csv_handle
:
csv_lines
=
csv_handle
.
readlines
()
tsv_lines
=
[
line
.
replace
(
"
,
"
,
"
\t
"
)
for
line
in
csv_lines
]
with
open
(
tsv_filepath
,
"
w
"
)
as
f
:
f
.
writelines
(
tsv_lines
)
write_lines_to_file
(
path
=
os
.
path
.
join
(
self
.
working_dir
,
"
.gitattributes
"
),
lines
=
[
"
rmd-log.tsv merge=union
"
],
open_type
=
"
a
"
)
def
update_output_master_logs
(
self
,
):
"""
Dumps all the metadata information about the project repositories state and
...
...
@@ -657,9 +686,10 @@ class ProjectRepo(BaseRepo):
os
.
makedirs
(
logs_folderpath
)
json_filepath
=
os
.
path
.
join
(
logs_folderpath
,
"
metadata.json
"
)
# note: if filename of "log.
c
sv" is changed,
# note: if filename of "log.
t
sv" is changed,
# this also has to be changed in the gitattributes of the init repo func
csv_filepath
=
os
.
path
.
join
(
self
.
output_repo
.
working_dir
,
"
log.csv
"
)
tsv_filepath
=
os
.
path
.
join
(
self
.
output_repo
.
working_dir
,
"
log.tsv
"
)
self
.
convert_csv_to_tsv_if_necessary
()
meta_info_dict
=
{
"
Output repo commit message
"
:
output_commit_message
,
...
...
@@ -669,25 +699,25 @@ class ProjectRepo(BaseRepo):
"
Project repo folder name
"
:
os
.
path
.
split
(
self
.
working_dir
)[
-
1
],
"
Project repo remotes
"
:
self
.
remote_urls
,
"
Python sys args
"
:
str
(
sys
.
argv
),
"
Tags
"
:
self
.
tags
,
"
Tags
"
:
"
,
"
.
join
(
self
.
tags
)
,
}
csv_header
=
"
,
"
.
join
(
meta_info_dict
.
keys
())
csv_data
=
"
,
"
.
join
([
str
(
x
)
for
x
in
meta_info_dict
.
values
()])
csv_header
=
"
\t
"
.
join
(
meta_info_dict
.
keys
())
csv_data
=
"
\t
"
.
join
([
str
(
x
)
for
x
in
meta_info_dict
.
values
()])
with
open
(
json_filepath
,
"
w
"
)
as
f
:
json
.
dump
(
meta_info_dict
,
f
,
indent
=
2
)
if
not
os
.
path
.
exists
(
c
sv_filepath
):
with
open
(
c
sv_filepath
,
"
w
"
)
as
f
:
if
not
os
.
path
.
exists
(
t
sv_filepath
):
with
open
(
t
sv_filepath
,
"
w
"
)
as
f
:
f
.
write
(
csv_header
+
"
\n
"
)
# csv.writer(csv_header + "\n")
with
open
(
c
sv_filepath
,
"
r
"
)
as
f
:
with
open
(
t
sv_filepath
,
"
r
"
)
as
f
:
existing_header
=
f
.
readline
().
replace
(
"
\n
"
,
""
)
if
existing_header
!=
csv_header
:
raise
ValueError
(
"
The used structure of the meta_dict doesn
'
t match the header found in log.
c
sv
"
)
raise
ValueError
(
"
The used structure of the meta_dict doesn
'
t match the header found in log.
t
sv
"
)
with
open
(
c
sv_filepath
,
"
a
"
)
as
f
:
with
open
(
t
sv_filepath
,
"
a
"
)
as
f
:
f
.
write
(
csv_data
+
"
\n
"
)
self
.
dump_package_list
(
logs_folderpath
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment