Snakefile 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. # -*- coding: utf-8; -*-
  2. import locale
  3. import os.path
  4. import regex
  5. import urllib.parse
  6. from collections import Iterable, Mapping # in Python 3 use from collections.abc
  7. from distutils.spawn import find_executable
  8. from fnmatch import fnmatch
  9. from subprocess import check_output, check_call
  10. from tempfile import NamedTemporaryFile
  11. try:
  12. from os import scandir, walk
  13. except ImportError:
  14. from scandir import scandir, walk
  15. def unnest(*args):
  16. '''Un-nest list- and tuple-like elements in arguments.
  17. "List-like" means anything with a len() and whose elments can be
  18. accessed with numeric indexing, except for string-like elements. It
  19. must also be an instance of the collections.Iterable abstract class.
  20. Dict-like elements and iterators/generators are not affected.
  21. This function always returns a list, even if it is passed a single
  22. scalar argument.
  23. '''
  24. result = []
  25. for arg in args:
  26. if isinstance(arg, str):
  27. # String
  28. result.append(arg)
  29. elif isinstance(arg, Mapping):
  30. # Dict-like
  31. result.append(arg)
  32. elif isinstance(arg, Iterable):
  33. try:
  34. # Duck-typing test for list-ness (a stricter condition
  35. # than just "iterable")
  36. for i in range(len(arg)):
  37. result.append(arg[i])
  38. except TypeError:
  39. # Iterable but not list-like
  40. result.append(arg)
  41. else:
  42. # Not iterable
  43. result.append(arg)
  44. return result
  45. def check_output_decode(*args, encoding=locale.getpreferredencoding(), **kwargs):
  46. '''Shortcut for check.output + str.decode'''
  47. return check_output(*args, **kwargs).decode(encoding)
  48. def find_mac_app(name):
  49. try:
  50. result = check_output_decode(
  51. ['mdfind',
  52. 'kMDItemDisplayName=={name}.app&&kMDItemKind==Application'.format(name=name)]).split('\n')[0]
  53. if not result:
  54. raise Exception("No result found")
  55. return result
  56. except Exception:
  57. return None
  58. def glob_recursive(pattern, top='.', include_hidden=False, *args, **kwargs):
  59. '''Combination of glob.glob and os.walk.
  60. Reutrns the relative path to every file or directory matching the
  61. pattern anywhere in the specified directory hierarchy. Defaults to the
  62. current working directory. Any additional arguments are passed to
  63. os.walk.'''
  64. for (path, dirs, files) in walk(top, *args, **kwargs):
  65. for f in dirs + files:
  66. if include_hidden or f.startswith('.'):
  67. continue
  68. if fnmatch(f, pattern):
  69. yield os.path.normpath(os.path.join(path, f))
  70. LYXPATH = find_executable('lyx') or \
  71. os.path.join(find_mac_app('LyX'), 'Contents/MacOS/lyx') or \
  72. '/bin/false'
  73. def rsync_list_files(*paths, extra_rsync_args=(), include_dirs=False):
  74. '''Iterate over the files in path that rsync would copy.
  75. By default, only files are listed, not directories, since doit doesn't
  76. like dependencies on directories because it can't hash them.
  77. This uses "rsync --list-only" to make rsync directly indicate which
  78. files it would copy, so any exclusion/inclusion rules are taken into
  79. account.
  80. '''
  81. rsync_list_cmd = [ 'rsync', '-r', '--list-only' ] + unnest(extra_rsync_args) + unnest(paths) + [ '.' ]
  82. rsync_out = check_output_decode(rsync_list_cmd).splitlines()
  83. for line in rsync_out:
  84. s = regex.search('^(-|d)(?:\S+\s+){4}(.*)', line)
  85. if s is not None:
  86. if include_dirs or s.group(1) == '-':
  87. yield s.group(2)
  88. def lyx_bib_deps(lyxfile):
  89. '''Return an iterator over bib files referenced by a Lyx file.'''
  90. # Cheat: Assume every bib file in the folder is a dependency of
  91. # any LaTeX operation. Doing this properly is tricky without
  92. # implementing the full bibfile-finding logic of LyX/LaTeX.
  93. return glob_recursive('*.bib')
  94. def lyx_hrefs(lyxfile):
  95. '''Return an iterator over hrefs in a LyX file.'''
  96. pattern = '''
  97. (?xsm)
  98. ^ LatexCommand \\s+ href \\s* \\n
  99. (?: name \\b [^\\n]+ \\n )?
  100. target \\s+ "(.*?)" $
  101. '''
  102. with open(lyxfile) as f:
  103. return (urllib.parse.unquote(m.group(1)) for m in
  104. re.finditer(pattern, f.read()))
  105. examples_base_url = 'https://darwinawardwinner.github.io/resume/examples/'
  106. examples_dir = 'examples'
  107. def resume_example_deps(lyxfile):
  108. '''Iterate over all referenced example files in a LyX file.'''
  109. for href in lyx_hrefs(lyxfile):
  110. if href.startswith(examples_base_url) and not href.endswith('/'):
  111. expath = href[len(examples_base_url):]
  112. yield os.path.join(examples_dir, expath)
  113. readme_files = list(glob_recursive('README.mkdn', top='examples'))
  114. index_files = [ os.path.join(os.path.dirname(f), 'index.html') for f in readme_files ]
  115. rsync_common_args = ['-rL', '--size-only', '--delete', '--exclude', '.DS_Store', '--delete-excluded',]
  116. all_example_files = set(rsync_list_files('examples', extra_rsync_args=rsync_common_args))
  117. r_html_files = [ f + '.html' for f in all_example_files if f.endswith('.R') ]
  118. all_example_files = all_example_files.union(index_files)
  119. all_example_files = all_example_files.union(r_html_files)
  120. rule build_all:
  121. input: 'ryan_thompson_resume.pdf', 'ryan_thompson_resume.html', 'index.html', index_files, r_html_files
  122. rule create_resume_pdf:
  123. input: lyxfile='ryan_thompson_resume.lyx',
  124. bibfiles=list(lyx_bib_deps('ryan_thompson_resume.lyx')),
  125. example_files=list(resume_example_deps('ryan_thompson_resume.lyx')),
  126. headshot='headshot-crop.png',
  127. output: pdf='ryan_thompson_resume.pdf'
  128. shell: '{LYXPATH:q} --export-to pdf4 {output.pdf:q} {input.lyxfile:q}'
  129. rule create_resume_html:
  130. input: lyxfile='ryan_thompson_resume.lyx',
  131. bibfiles=list(lyx_bib_deps('ryan_thompson_resume.lyx')),
  132. example_files=list(resume_example_deps('ryan_thompson_resume.lyx')),
  133. headshot='headshot-crop.png',
  134. output: html='ryan_thompson_resume.html'
  135. run:
  136. with NamedTemporaryFile() as tempf:
  137. shell('{LYXPATH:q} --export-to xhtml {tempf.name:q} {input.lyxfile:q}')
  138. shell('''cat {tempf.name:q} | perl -lape 's[<span class="flex_cv_image">(.*?)</span>][<span class="flex_cv_image"><img src="$1" width="100"></span>]g' > {output.html:q}''')
  139. rule link_resume_to_index_html:
  140. input: 'ryan_thompson_resume.html'
  141. output: 'index.html'
  142. shell: 'ln -s {input:q} {output:q}'
  143. rule examples_readme_to_index_html:
  144. input: '{dirname}README.mkdn'
  145. output: '{dirname,examples(/.*)?/}index.html'
  146. shell: 'pandoc -t html -o {output[0]:q} {input[0]:q}'
  147. rule R_to_html:
  148. input: '{dirname}/{basename,[^/]+}.R'
  149. output: '{dirname}/{basename}.R.html'
  150. shell: 'pygmentize -f html -O full -l R -o {output:q} {input:q}'