readibility

#722
Raw
Author
winny
Created
April 3, 2023, 6:49 p.m.
Expires
Never
Size
3.3 KB
Hits
114
Syntax
Python
Private
✗ No
#!/usr/bin/env python
"""Executes python-readability on current page and opens the summary
as new tab.

Depends on the python-readability package found at
https://github.com/buriy/python-readability .

Based off of
https://github.com/qutebrowser/qutebrowser/blob/master/misc/userscripts/readability
with fixes for various crashes and fixes URLs.

Usage:
  :spawn --userscript readability

"""


from __future__ import absolute_import
import codecs
import os
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
from readability import Document
from tempfile import mkstemp


# Styling for dynamic window margin scaling and line height
HEADER = """
<!DOCTYPE html>
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>%s</title>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
    <style type="text/css">
        body {
            margin: 40px auto;
            max-width: 700px;
            line-height: 1.4;
            padding: 0 10px;
            background: #111;
            color: #eee;
            font-size: larger;
        }
        a {
            color: #0099ff;
        }
        a:visited {
            color: #6666cc;
            text-decoration: dashed underline;
        }
        a:visited:hover {
            color: #111;
            background: #6666cc;
            text-decoration: none;
        }
        a:hover {
            color: #111;
            background: #0099ff;
            text-decoration: none;
        }
        h1, h2, h3 {
            line-height: 1.2;
        }
        img {
            max-width: 100%%;
        }
        pre {
            background: #1a1a1a;
            padding: 7px;
            border-radius: 10px;
        }
        table {
            border-style: solid;
            padding: 7px;
            border-color: #222;
        }
    </style>
</head>
"""


def tidy_url(s):
    """Take an URL-string and tidy it up for addressing resources from a
    different host."""
    u = urlparse(s)
    if u.fragment and not (u.scheme or u.netloc or u.path or u.params or u.query):
        return u.geturl()
    return urljoin(os.environ['QUTE_URL'], s)


def setup_tmpfile():
    """Setup a temporary file for this script.

    Maybe this should create a directory to put all readibility HTMLs into?"""
    return mkstemp(suffix='.html', prefix='qutebrowser-readibility-')


if __name__ == '__main__':
    _, tmpfile = setup_tmpfile()
    with codecs.open(os.environ['QUTE_HTML'], 'r', 'utf-8') as source:
        data = source.read()

    doc = Document(data)
    title = doc.title()
    content = doc.summary().replace('<html>', HEADER % ('📖 ' + title))

    soup = BeautifulSoup(content, 'html.parser')
    h1 = soup.new_tag('h1')
    a = soup.new_tag('a', href=os.environ['QUTE_URL'])
    a.string = title
    h1.insert(0, a)
    body = soup.find('body')
    if body:
        body.insert(0, h1)

    for a in soup.find_all('a', attrs={'href': True}):
        a['href'] = tidy_url(a['href'])

    for img in soup.find_all('img', attrs={'src': True}):
        img['src'] = tidy_url(img['src'])

    with codecs.open(tmpfile, 'w', 'utf-8') as target:
        target.write(str(soup))

    with open(os.environ['QUTE_FIFO'], 'w') as fifo:
        fifo.write('open %s' % tmpfile)