Explorar el Código

Switch vcs-check script to Python

Ryan C. Thompson hace 5 años
padre
commit
b6ad9b4c44
Se han modificado 2 ficheros con 46 adiciones y 42 borrados
  1. 0 42
      vcs-check.R
  2. 46 0
      vcs-check.py

+ 0 - 42
vcs-check.R

@@ -1,42 +0,0 @@
-#!/usr/bin/env Rscript
-
-library(magrittr)
-library(dplyr)
-library(stringr)
-library(processx)
-library(readr)
-
-get_lines <- . %>%
-    str_replace("\n+$", "") %>%
-    str_split_fixed(pattern="\n", n=Inf) %>%
-    as.vector
-
-git_tracked_files <-
-    run("git", c("ls-tree", "-r", "HEAD", "--name-only")) %$%
-    stdout %>% get_lines
-
-snakemake_untracked_files <-
-    run("snakemake", "--list-untracked") %$%
-    stderr %>% get_lines
-
-all_files <- list.files(".", full.names=TRUE, recursive=TRUE) %>%
-    str_replace("^./", "")
-
-snakemake_summary <-
-    run("snakemake", "--summary") %$%
-    stdout %>% read_tsv
-
-snakemake_generated_files <- snakemake_summary$output_file
-
-## All files used as inputs to the snakemake workflow
-input_files <- setdiff(all_files, c(snakemake_untracked_files, snakemake_generated_files))
-
-untracked_input_files <- setdiff(input_files, git_tracked_files) %>% sort
-
-if (length(untracked_input_files) > 0) {
-    message("The following files are used as input but not tracked in git:\n",
-            str_c(untracked_input_files, collapse="\n"))
-    quit(status=1)
-} else {
-    message("All input files used in the workflow are tracked in git.")
-}

+ 46 - 0
vcs-check.py

@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import re
+import os
+import os.path
+import sys
+import shlex
+import pandas
+from subprocess import check_output, run, PIPE, DEVNULL
+from io import StringIO
+
+def check_output_lines(*args, **kwargs):
+    return check_output(*args, **kwargs).rstrip('\n').split('\n')
+
+git_tracked_files = set(check_output_lines(
+    ["git", "ls-tree", "-r", "HEAD", "--name-only"],
+    text = True))
+
+snakemake_untracked_files = set(run(
+    ["snakemake", "--list-untracked"],
+    capture_output = True, text = True, check = True
+).stderr.rstrip('\n').split('\n'))
+
+all_files = set()
+for curdir, subdirs, files in os.walk('.'):
+    subdirs[:] = [d for d in subdirs if not d.startswith(".")]
+    for f in files:
+        if f.startswith('.'):
+            continue
+        all_files.add(os.path.normpath(os.path.join(curdir, f)))
+
+snakemake_summary_output = check_output(['snakemake', '--summary'], text = True, stderr=DEVNULL)
+snakemake_summary_table = pandas.read_csv(StringIO(snakemake_summary_output), sep='\t')
+snakemake_generated_files = {os.path.normpath(f) for f in snakemake_summary_table.output_file}
+
+input_files = all_files - (snakemake_untracked_files | snakemake_generated_files)
+
+untracked_input_files = input_files - git_tracked_files
+
+if untracked_input_files:
+    print("Untracked input files detected. Run the following command to add them to git:\ngit add " + " ".join(shlex.quote(f) for f in untracked_input_files),
+          file=sys.stderr)
+    sys.exit(1)
+else:
+    print("All input files known to be used in the workflow are tracked in git.")
+    sys.exit(0)