117 lines
4.1 KiB
Bash
117 lines
4.1 KiB
Bash
|
#!/usr/bin/env python3
|
||
|
# pillow is a dependency
|
||
|
|
||
|
|
||
|
import os
|
||
|
import zipfile
|
||
|
from PIL import Image
|
||
|
import sys
|
||
|
from io import BytesIO
|
||
|
import re
|
||
|
from xml.dom import minidom
|
||
|
|
||
|
img_ext_regex = re.compile(r'^.*\.(jpg|jpeg|png)$', flags=re.IGNORECASE)
|
||
|
cover_regex = re.compile(r'.*cover.*\.(jpg|jpeg|png)', flags=re.IGNORECASE)
|
||
|
|
||
|
def get_cover_from_manifest(epub):
|
||
|
rootfile_path, rootfile_root = _get_rootfile_root(epub)
|
||
|
|
||
|
# find possible cover in meta
|
||
|
cover_id = None
|
||
|
for meta in rootfile_root.getElementsByTagName("meta"):
|
||
|
if meta.getAttribute("name") == "cover":
|
||
|
cover_id = meta.getAttribute("content")
|
||
|
break
|
||
|
|
||
|
# find the manifest element
|
||
|
manifest = rootfile_root.getElementsByTagName("manifest")[0]
|
||
|
for item in manifest.getElementsByTagName("item"):
|
||
|
item_id = item.getAttribute("id")
|
||
|
item_properties = item.getAttribute("properties")
|
||
|
item_href = item.getAttribute("href")
|
||
|
item_href_is_image = img_ext_regex.match(item_href.lower())
|
||
|
item_id_might_be_cover = item_id == cover_id or ('cover' in item_id and item_href_is_image)
|
||
|
item_properties_might_be_cover = item_properties == cover_id or ('cover' in item_properties and item_href_is_image)
|
||
|
if item_id_might_be_cover or item_properties_might_be_cover:
|
||
|
return os.path.join(os.path.dirname(rootfile_path), item_href)
|
||
|
|
||
|
return None
|
||
|
|
||
|
def get_cover_by_guide(epub):
|
||
|
rootfile_path, rootfile_root = _get_rootfile_root(epub)
|
||
|
|
||
|
for ref in rootfile_root.getElementsByTagName("reference"):
|
||
|
if ref.getAttribute("type") == "cover":
|
||
|
cover_href = ref.getAttribute("href")
|
||
|
cover_file_path = os.path.join(os.path.dirname(rootfile_path), cover_href)
|
||
|
|
||
|
# is html
|
||
|
cover_file = epub.open(cover_file_path)
|
||
|
cover_dom = minidom.parseString(cover_file.read())
|
||
|
imgs = cover_dom.getElementsByTagName("img")
|
||
|
if imgs:
|
||
|
img = imgs[0]
|
||
|
img_path = img.getAttribute("src")
|
||
|
return os.path.relpath(os.path.join(os.path.dirname(cover_file_path), img_path))
|
||
|
return None
|
||
|
|
||
|
def get_cover_by_filename(epub):
|
||
|
no_matching_images = []
|
||
|
for fileinfo in epub.filelist:
|
||
|
if cover_regex.match(fileinfo.filename):
|
||
|
return fileinfo.filename
|
||
|
if img_ext_regex.match(fileinfo.filename):
|
||
|
no_matching_images.append(fileinfo)
|
||
|
return _choose_best_image(no_matching_images)
|
||
|
|
||
|
def _choose_best_image(images):
|
||
|
if images:
|
||
|
return max(images, key=lambda f: f.file_size)
|
||
|
return None
|
||
|
|
||
|
def _get_rootfile_root(epub):
|
||
|
# open the main container
|
||
|
container = epub.open("META-INF/container.xml")
|
||
|
container_root = minidom.parseString(container.read())
|
||
|
|
||
|
# locate the rootfile
|
||
|
elem = container_root.getElementsByTagName("rootfile")[0]
|
||
|
rootfile_path = elem.getAttribute("full-path")
|
||
|
|
||
|
# open the rootfile
|
||
|
rootfile = epub.open(rootfile_path)
|
||
|
return rootfile_path, minidom.parseString(rootfile.read())
|
||
|
|
||
|
def extract_cover(epub, output_path, size):
|
||
|
extraction_strategies = [get_cover_from_manifest, get_cover_by_guide, get_cover_by_filename]
|
||
|
|
||
|
for strategy in extraction_strategies:
|
||
|
try:
|
||
|
cover_path = strategy(epub)
|
||
|
if cover_path:
|
||
|
cover = epub.open(cover_path)
|
||
|
im = Image.open(BytesIO(cover.read()))
|
||
|
im.thumbnail((size, size), Image.LANCZOS)
|
||
|
im.save(os.path.join(output_path, os.path.basename(input_file) + '.png'), "PNG")
|
||
|
return True
|
||
|
except Exception as ex:
|
||
|
print("Error getting cover using %s: " % strategy.__name__, ex)
|
||
|
|
||
|
return False
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
input_file = sys.argv[1]
|
||
|
folder_path = '/tmp/epub/'
|
||
|
if not os.path.exists(folder_path):
|
||
|
os.makedirs(folder_path, exist_ok=True)
|
||
|
|
||
|
output_path = os.path.join(folder_path, os.path.dirname(input_file))
|
||
|
size = int(sys.argv[2])
|
||
|
|
||
|
epub = zipfile.ZipFile(input_file, "r")
|
||
|
if extract_cover(epub, output_path, size):
|
||
|
exit(0)
|
||
|
else:
|
||
|
print("Error extracting cover")
|
||
|
exit(1)
|