bib2html.py: Add umlaut replacement
[shutils.git] / bib2html.py
1 #!/usr/bin/env python3
2 """Creates a webpage with all entries of a .bib file"""
3
4 __version__ = "1.1"
5
6 __author__ = "Stefan Huber"
7 __email__ = "shuber@sthu.org"
8 __copyright__ = "Copyright 2013, Stefan Huber"
9
10 __license__ = "MIT"
11
12 # Permission is hereby granted, free of charge, to any person
13 # obtaining a copy of this software and associated documentation
14 # files (the "Software"), to deal in the Software without
15 # restriction, including without limitation the rights to use,
16 # copy, modify, merge, publish, distribute, sublicense, and/or sell
17 # copies of the Software, and to permit persons to whom the
18 # Software is furnished to do so, subject to the following
19 # conditions:
20 #
21 # The above copyright notice and this permission notice shall be
22 # included in all copies or substantial portions of the Software.
23 #
24 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
26 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
28 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
29 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
30 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
31 # OTHER DEALINGS IN THE SOFTWARE.
32
33
34 import os, sys, getopt, re
35
36
37 def format_latex(text):
38 # Get rid of matching dollar signs
39 text = re.sub(r'\$([^\$]*)\$', r'\1', text)
40
41 # Replace text
42 subst = {
43 '\\"a': 'ä',
44 '\\"o': 'ö',
45 '\\"u': 'u',
46 '\mathcal': '',
47 '{': '',
48 '}': '',
49 '\\': '',
50 '~': ' ',
51 '---': '–',
52 '--': '–',
53 }
54
55 for a, b in subst.items():
56 text = text.replace(a, b)
57
58 return text
59
60 def format_field_span(type, value):
61 return "<span class=bibentry_" + type + ">" + format_latex(value) + "</span>"
62
63 def format_field(bibentry, field, pre='', post=''):
64 if field in bibentry.fields:
65 if bibentry.fields[field] != "":
66 return format_field_span(field, pre + bibentry.fields[field] + post)
67 return ""
68
69 def format_author(a):
70 return ' '.join(' '.join(p) for p in (a.first_names, a.middle_names, a.prelast_names, a.last_names, a.lineage_names) if p)
71
72 def format_authors(entry):
73 return ", ".join([format_author(a) for a in entry.persons['author']])
74
75
76 def format_details_article(entry):
77
78 where = format_field(entry, 'journal')
79
80 line = []
81 line.append(format_field(entry, 'pages', pre='pp. '))
82 line.append(format_field(entry, 'volume', pre='vol. ') + \
83 format_field(entry, 'number', pre='(', post=')'))
84 line.append(format_field(entry, 'month', post=' ') + \
85 format_field(entry, 'year'))
86 line.append(format_field(entry, 'note'))
87
88 line = filter(lambda l: l != "", line)
89 return [where, ", ".join(line)]
90
91 def format_details_inproceedings(entry):
92 where = format_field(entry, 'booktitle')
93
94 line = []
95 line.append(format_field(entry, 'pages', pre='pp. '))
96 line.append(format_field(entry, 'address'))
97 line.append(format_field(entry, 'month', post=' ') + \
98 format_field(entry, 'year'))
99 line.append(format_field(entry, 'isbn', pre='ISBN '))
100 line.append(format_field(entry, 'note'))
101
102 line = filter(lambda l: l != "", line)
103 return [where, ", ".join(line)]
104
105 def format_details_thesis(entry):
106 line = []
107 line.append(format_field(entry, 'school'))
108 line.append(format_field(entry, 'month', post=' ') + \
109 format_field(entry, 'year'))
110 line.append(format_field(entry, 'note'))
111
112 line = filter(lambda l: l != "", line)
113 return [", ".join(line)]
114
115 def format_details_book(entry):
116 line = []
117 line.append(format_field(entry, 'publisher'))
118 line.append(format_field(entry, 'isbn', pre='ISBN '))
119 line.append(format_field(entry, 'month', post=' ') + \
120 format_field(entry, 'year'))
121 line.append(format_field(entry, 'note'))
122
123 line = filter(lambda l: l != "", line)
124 return [", ".join(line)]
125
126 def format_links(entry):
127 doi = format_field(entry, 'doi', pre='<a href="http://dx.doi.org/', post='">[DOI]</a>')
128 webpdf = format_field(entry, 'webpdf', pre='<a href="', post='">[PDF]</a>')
129 weblink = format_field(entry, 'weblink', pre='<a href="', post='">[link]</a>')
130 url = format_field(entry, 'url', pre='<a href="', post='">[url]</a>')
131 webslides = format_field(entry, 'webslides', pre='<a href="', post='">[slides]</a>')
132 weberrata = format_field(entry, 'weberrata', pre='<a href="',
133 post='">[errata]</a>')
134 return " ".join([doi, webpdf, weblink, url, webslides, weberrata])
135
136 def format_entry(entry):
137 lines = []
138 lines.append(format_field(entry, 'title', pre="<b>", post="</b>"))
139 lines.append(format_field_span('author', format_authors(entry)))
140
141 if entry.type=='article':
142 lines.extend(format_details_article(entry))
143 elif entry.type=='inproceedings':
144 lines.extend(format_details_inproceedings(entry))
145 elif entry.type=='book':
146 lines.extend(format_details_book(entry))
147 elif entry.type in ['mastersthesis', 'phdthesis']:
148 lines.extend(format_details_thesis(entry))
149 else:
150 lines.append("Unknown type <b>'" + entry.type + "'</b>")
151
152 lines.append(format_field(entry, 'webnote'))
153 lines.append(format_links(entry))
154
155 lines = filter(lambda l: l != "", lines)
156 return "<br/>\n".join(lines)
157
158
159 def entryDateSortKey(p):
160 k, e = p
161
162 month2num = { 'jan' : '01', 'feb' : '02', 'mar' : '03', \
163 'apr' : '04', 'may' : '05', 'jun' : '06', \
164 'jul' : '07', 'aug' : '08', 'sep' : '09', \
165 'oct' : '10', 'nov' : '11', 'dec' : '12'}
166
167 if not 'month' in e.fields:
168 return e.fields['year']
169
170 month = e.fields['month'].lower()[0:3]
171 if month in month2num:
172 month = month2num[month]
173 else:
174 month = ""
175
176 return e.fields['year'] + "-" + month
177
178
179 def usage():
180 """Print usage text of this program"""
181
182 print("""Usage:
183 {0} -i FILE
184 {0} -h
185
186 OPTIONS:
187 -h print this text
188 -i .bib file
189 """.format(sys.argv[0]))
190
191 if __name__ == "__main__":
192
193 bibfile = None
194
195 try:
196 opts, args = getopt.getopt(sys.argv[1:], "hi:")
197
198 for opt, arg in opts:
199 if opt == "-h":
200 usage()
201 sys.exit(os.EX_OK)
202 elif opt == "-i":
203 bibfile = arg
204 else:
205 print("Unknown option '", opt, "'.")
206
207 except getopt.GetoptError as e:
208 print("Error parsing arguments:", e)
209 usage()
210
211 if bibfile == None:
212 print("You need to specify a bibfile")
213 usage()
214 sys.exit(os.EX_USAGE)
215
216
217 from pybtex.database.input import bibtex
218 parser = bibtex.Parser()
219
220 from pybtex.style.formatting.unsrt import Style
221
222 bib_data = parser.parse_file(bibfile)
223 entries = bib_data.entries
224
225 years = list(set([ b.fields['year'] for b in entries.values() ]))
226 years.sort(reverse=True)
227
228 for year in years:
229
230 print("<h2>" + year + "</h2>")
231
232 iteritems = list(entries.items())
233 iteritems.sort(key=entryDateSortKey, reverse=True)
234 for key, entry in iteritems:
235
236 if entry.fields['year'] != year:
237 continue
238
239 print("<div class=bibentry>")
240 print("<a class=bibentry_key id=" + key + ">[" + key + "]</a><br/>")
241
242 e = format_entry(entry)
243 print(e)
244
245 print("</div>\n")
246