#!/usr/bin/env python
"""Executes python-readability on current page and opens the summary
as new tab.
Depends on the python-readability package found at
https://github.com/buriy/python-readability .
Based off of
https://github.com/qutebrowser/qutebrowser/blob/master/misc/userscripts/readability
with fixes for various crashes and fixes URLs.
Usage:
:spawn --userscript readability
"""
from __future__ import absolute_import
import codecs
import os
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
from readability import Document
from tempfile import mkstemp
# Styling for dynamic window margin scaling and line height
HEADER = """
%s
"""
def tidy_url(s):
"""Take an URL-string and tidy it up for addressing resources from a
different host."""
u = urlparse(s)
if u.fragment and not (u.scheme or u.netloc or u.path or u.params or u.query):
return u.geturl()
return urljoin(os.environ['QUTE_URL'], s)
def setup_tmpfile():
"""Setup a temporary file for this script.
Maybe this should create a directory to put all readibility HTMLs into?"""
return mkstemp(suffix='.html', prefix='qutebrowser-readibility-')
if __name__ == '__main__':
_, tmpfile = setup_tmpfile()
with codecs.open(os.environ['QUTE_HTML'], 'r', 'utf-8') as source:
data = source.read()
doc = Document(data)
title = doc.title()
content = doc.summary().replace('', HEADER % ('📖 ' + title))
soup = BeautifulSoup(content, 'html.parser')
h1 = soup.new_tag('h1')
a = soup.new_tag('a', href=os.environ['QUTE_URL'])
a.string = title
h1.insert(0, a)
body = soup.find('body')
if body:
body.insert(0, h1)
for a in soup.find_all('a', attrs={'href': True}):
a['href'] = tidy_url(a['href'])
for img in soup.find_all('img', attrs={'src': True}):
img['src'] = tidy_url(img['src'])
with codecs.open(tmpfile, 'w', 'utf-8') as target:
target.write(str(soup))
with open(os.environ['QUTE_FIFO'], 'w') as fifo:
fifo.write('open %s' % tmpfile)