Snakefile 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. # -*- coding: utf-8; -*-
  2. import locale
  3. import os.path
  4. import regex
  5. import urllib.parse
  6. import os.path
  7. from collections.abc import Iterable, Mapping
  8. from distutils.spawn import find_executable
  9. from fnmatch import fnmatch
  10. from subprocess import check_output, check_call
  11. from tempfile import NamedTemporaryFile
  12. try:
  13. from os import scandir, walk
  14. except ImportError:
  15. from scandir import scandir, walk
  16. def unnest(*args):
  17. '''Un-nest list- and tuple-like elements in arguments.
  18. "List-like" means anything with a len() and whose elments can be
  19. accessed with numeric indexing, except for string-like elements. It
  20. must also be an instance of the collections.Iterable abstract class.
  21. Dict-like elements and iterators/generators are not affected.
  22. This function always returns a list, even if it is passed a single
  23. scalar argument.
  24. '''
  25. result = []
  26. for arg in args:
  27. if isinstance(arg, str):
  28. # String
  29. result.append(arg)
  30. elif isinstance(arg, Mapping):
  31. # Dict-like
  32. result.append(arg)
  33. elif isinstance(arg, Iterable):
  34. try:
  35. # Duck-typing test for list-ness (a stricter condition
  36. # than just "iterable")
  37. for i in range(len(arg)):
  38. result.append(arg[i])
  39. except TypeError:
  40. # Iterable but not list-like
  41. result.append(arg)
  42. else:
  43. # Not iterable
  44. result.append(arg)
  45. return result
  46. def check_output_decode(*args, encoding=locale.getpreferredencoding(), **kwargs):
  47. '''Shortcut for check.output + str.decode'''
  48. return check_output(*args, **kwargs).decode(encoding)
  49. def find_mac_app(name):
  50. try:
  51. result = \
  52. check_output_decode(
  53. ['mdfind',
  54. 'kMDItemDisplayName=={name}.app&&kMDItemKind==Application'.format(name=name)]).split('\n')[0] or \
  55. check_output_decode(
  56. ['mdfind',
  57. 'kMDItemDisplayName=={name}&&kMDItemKind==Application'.format(name=name)]).split('\n')[0]
  58. if result:
  59. return result
  60. else:
  61. raise Exception("Not found")
  62. except Exception:
  63. return None
  64. def find_lyx():
  65. lyx_finders = [
  66. lambda: find_executable('lyx'),
  67. lambda: os.path.join(find_mac_app('LyX'), 'Contents/MacOS/lyx'),
  68. lambda: '/Applications/Lyx.app/Contents/MacOS/lyx',
  69. ]
  70. for finder in lyx_finders:
  71. try:
  72. lyxpath = finder()
  73. if not lyxpath:
  74. continue
  75. elif not os.access(lyxpath, os.X_OK):
  76. continue
  77. else:
  78. return lyxpath
  79. except Exception:
  80. pass
  81. else:
  82. # Fallback which will just trigger an error when run
  83. return '/bin/false'
  84. LYXPATH = find_lyx()
  85. def glob_recursive(pattern, top='.', include_hidden=False, *args, **kwargs):
  86. '''Combination of glob.glob and os.walk.
  87. Reutrns the relative path to every file or directory matching the
  88. pattern anywhere in the specified directory hierarchy. Defaults to the
  89. current working directory. Any additional arguments are passed to
  90. os.walk.'''
  91. for (path, dirs, files) in walk(top, *args, **kwargs):
  92. for f in dirs + files:
  93. if include_hidden or f.startswith('.'):
  94. continue
  95. if fnmatch(f, pattern):
  96. yield os.path.normpath(os.path.join(path, f))
  97. LYXPATH = find_lyx()
  98. def rsync_list_files(*paths, extra_rsync_args=(), include_dirs=False):
  99. '''Iterate over the files in path that rsync would copy.
  100. By default, only files are listed, not directories, since doit doesn't
  101. like dependencies on directories because it can't hash them.
  102. This uses "rsync --list-only" to make rsync directly indicate which
  103. files it would copy, so any exclusion/inclusion rules are taken into
  104. account.
  105. '''
  106. rsync_list_cmd = [ 'rsync', '-r', '--list-only' ] + unnest(extra_rsync_args) + unnest(paths) + [ '.' ]
  107. rsync_out = check_output_decode(rsync_list_cmd).splitlines()
  108. for line in rsync_out:
  109. s = regex.search('^(-|d)(?:\S+\s+){4}(.*)', line)
  110. if s is not None:
  111. if include_dirs or s.group(1) == '-':
  112. yield s.group(2)
  113. def lyx_bib_deps(lyxfile):
  114. '''Return an iterator over all bib files referenced by a Lyx file.
  115. This will only return the names of existing files, so it will be
  116. unreliable in the case of an auto-generated bib file.
  117. '''
  118. with open(lyxfile) as f:
  119. lyx_text = f.read()
  120. bib_names = regex.search('bibfiles "(.*?)"', lyx_text).group(1).split(',')
  121. for bn in bib_names:
  122. bib_path = bn + '.bib'
  123. if os.path.exists(bib_path):
  124. yield bib_path
  125. def lyx_gfx_deps(lyxfile):
  126. '''Return an iterator over all graphics files included by a LyX file.'''
  127. with open(lyxfile) as f:
  128. lyx_text = f.read()
  129. for m in regex.finditer('\\\\begin_inset Graphics\\s+filename (.*?)$', lyx_text, regex.MULTILINE):
  130. yield m.group(1)
  131. def lyx_hrefs(lyxfile):
  132. '''Return an iterator over hrefs in a LyX file.'''
  133. pattern = '''
  134. (?xsm)
  135. ^ LatexCommand \\s+ href \\s* \\n
  136. (?: name \\b [^\\n]+ \\n )?
  137. target \\s+ "(.*?)" $
  138. '''
  139. with open(lyxfile) as f:
  140. return (urllib.parse.unquote(m.group(1)) for m in
  141. re.finditer(pattern, f.read()))
  142. def tex_gfx_extensions(tex_format = 'xetex'):
  143. '''Return the ordered list of graphics extensions.
  144. This yields the list of extensions that TeX will try for an
  145. \\includegraphics path.
  146. '''
  147. cmdout = check_output_decode(['texdef', '-t', tex_format, '-p', 'graphicx', 'Gin@extensions'])
  148. m = regex.search('^macro:->(.*?)$', cmdout, regex.MULTILINE)
  149. return m.group(1).split(',')
  150. rsync_common_args = ['-rL', '--size-only', '--delete', '--exclude', '.DS_Store', '--delete-excluded',]
  151. rule build_all:
  152. input: 'thesis.pdf'
  153. # Currently assumes the lyx file always exists, because it is required
  154. # to get the gfx and bib dependencies
  155. rule lyx_to_pdf:
  156. '''Produce PDF output for a LyX file.'''
  157. input: lyxfile = '{basename}.lyx',
  158. gfx_deps = lambda wildcards: lyx_gfx_deps(wildcards.basename + '.lyx'),
  159. bib_deps = lambda wildcards: lyx_bib_deps(wildcards.basename + '.lyx'),
  160. # Need to exclude pdfs in graphics/
  161. output: pdf='{basename,(?!graphics/).*}.pdf'
  162. shell: '{LYXPATH:q} --export-to pdf4 {output.pdf:q} {input.lyxfile:q}'
  163. rule pdf_extract_page:
  164. '''Extract a single page from a multi-page PDF.'''
  165. # Input is a PDF whose basename doesn't already have a page number
  166. input: pdf = 'graphics/{basename,.*(?!PAGE[0-9]+)}.pdf'
  167. output: pdf = 'graphics/{basename}-PAGE{pagenum,[1-9][0-9]*}.pdf'
  168. shell: 'pdfseparate -f {wildcards.pagenum:q} -l {wildcards.pagenum:q} {input:q} {output:q}'
  169. rule pdf_crop:
  170. '''Crop away empty margins from a PDF.'''
  171. input: pdf = 'graphics/{basename,.*(?!CROP).*}.pdf'
  172. output: pdf = 'graphics/{basename}-CROP.pdf'
  173. shell: 'pdfcrop --resolution 300 {input:q} {output:q}'
  174. rule pdf_raster:
  175. '''Rasterize PDF to PNG at 600 PPI.'''
  176. input: pdf = 'graphics/{basename}.pdf'
  177. output: png = 'graphics/{basename}-RASTER.png'
  178. shell: 'pdftoppm -r 600 {input:q} | convert - {output:q}'
  179. rule png_crop:
  180. '''Crop away empty margins from a PNG.'''
  181. input: pdf = 'graphics/{basename,.*(?!CROP).*}.png'
  182. output: pdf = 'graphics/{basename}-CROP.png'
  183. shell: 'convert {input:q} -trim {output:q}'
  184. rule svg_to_pdf:
  185. input: 'graphics/{filename}.svg'
  186. output: 'graphics/{filename}-SVG.pdf'
  187. shell: '''inkscape {input:q} --export-pdf={output:q} --export-dpi=300'''
  188. rule R_to_html:
  189. '''Render an R script as syntax-hilighted HTML.'''
  190. input: '{dirname}/{basename,[^/]+}.R'
  191. output: '{dirname}/{basename}.R.html'
  192. shell: 'pygmentize -f html -O full -l R -o {output:q} {input:q}'