modern_lisp-machine/.config/lf/epub_thumbnailer.sh

117 lines
4.1 KiB
Bash
Executable File

#!/usr/bin/env python3
# pillow is a dependency
import os
import zipfile
from PIL import Image
import sys
from io import BytesIO
import re
from xml.dom import minidom
img_ext_regex = re.compile(r'^.*\.(jpg|jpeg|png)$', flags=re.IGNORECASE)
cover_regex = re.compile(r'.*cover.*\.(jpg|jpeg|png)', flags=re.IGNORECASE)
def get_cover_from_manifest(epub):
rootfile_path, rootfile_root = _get_rootfile_root(epub)
# find possible cover in meta
cover_id = None
for meta in rootfile_root.getElementsByTagName("meta"):
if meta.getAttribute("name") == "cover":
cover_id = meta.getAttribute("content")
break
# find the manifest element
manifest = rootfile_root.getElementsByTagName("manifest")[0]
for item in manifest.getElementsByTagName("item"):
item_id = item.getAttribute("id")
item_properties = item.getAttribute("properties")
item_href = item.getAttribute("href")
item_href_is_image = img_ext_regex.match(item_href.lower())
item_id_might_be_cover = item_id == cover_id or ('cover' in item_id and item_href_is_image)
item_properties_might_be_cover = item_properties == cover_id or ('cover' in item_properties and item_href_is_image)
if item_id_might_be_cover or item_properties_might_be_cover:
return os.path.join(os.path.dirname(rootfile_path), item_href)
return None
def get_cover_by_guide(epub):
rootfile_path, rootfile_root = _get_rootfile_root(epub)
for ref in rootfile_root.getElementsByTagName("reference"):
if ref.getAttribute("type") == "cover":
cover_href = ref.getAttribute("href")
cover_file_path = os.path.join(os.path.dirname(rootfile_path), cover_href)
# is html
cover_file = epub.open(cover_file_path)
cover_dom = minidom.parseString(cover_file.read())
imgs = cover_dom.getElementsByTagName("img")
if imgs:
img = imgs[0]
img_path = img.getAttribute("src")
return os.path.relpath(os.path.join(os.path.dirname(cover_file_path), img_path))
return None
def get_cover_by_filename(epub):
no_matching_images = []
for fileinfo in epub.filelist:
if cover_regex.match(fileinfo.filename):
return fileinfo.filename
if img_ext_regex.match(fileinfo.filename):
no_matching_images.append(fileinfo)
return _choose_best_image(no_matching_images)
def _choose_best_image(images):
if images:
return max(images, key=lambda f: f.file_size)
return None
def _get_rootfile_root(epub):
# open the main container
container = epub.open("META-INF/container.xml")
container_root = minidom.parseString(container.read())
# locate the rootfile
elem = container_root.getElementsByTagName("rootfile")[0]
rootfile_path = elem.getAttribute("full-path")
# open the rootfile
rootfile = epub.open(rootfile_path)
return rootfile_path, minidom.parseString(rootfile.read())
def extract_cover(epub, output_path, size):
extraction_strategies = [get_cover_from_manifest, get_cover_by_guide, get_cover_by_filename]
for strategy in extraction_strategies:
try:
cover_path = strategy(epub)
if cover_path:
cover = epub.open(cover_path)
im = Image.open(BytesIO(cover.read()))
im.thumbnail((size, size), Image.LANCZOS)
im.save(os.path.join(output_path, os.path.basename(input_file) + '.png'), "PNG")
return True
except Exception as ex:
print("Error getting cover using %s: " % strategy.__name__, ex)
return False
if __name__ == '__main__':
input_file = sys.argv[1]
folder_path = '/tmp/epub/'
if not os.path.exists(folder_path):
os.makedirs(folder_path, exist_ok=True)
output_path = os.path.join(folder_path, os.path.dirname(input_file))
size = int(sys.argv[2])
epub = zipfile.ZipFile(input_file, "r")
if extract_cover(epub, output_path, size):
exit(0)
else:
print("Error extracting cover")
exit(1)