UnixTools/packtdl.py

179 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Needs grab, pip3 install -U grab
# (also needs: libcurl4-*-dev for curl-config, libxslt1-dev and libxml2-dev)
import configparser
import logging
import os.path
import re
import selection
import sys
import unicodedata
from argparse import ArgumentParser
from grab import Grab
from io import StringIO
config_str = '[DEFAULT]\n' + open(sys.path[0] + '/CONFIG', 'r').read()
config_fp = StringIO(config_str)
c = configparser.RawConfigParser()
c.readfp(config_fp)
logging.basicConfig(level=logging.DEBUG)
class LoggedOutException(Exception):
'''Raise when attempting an action needing login without being logged in.'''
class PacktBook():
def __init__(self):
self.title = ""
self.url = ""
self.isbn = ""
self.nid = ""
self.cover_img = ""
self.dl_pdf = ""
self.dl_epub = ""
self.dl_mobi = ""
self.dl_code = ""
def __str__(self):
output = "["
output += "P" if self.dl_pdf else "-"
output += "e" if self.dl_epub else "-"
output += "K" if self.dl_mobi else "-"
output += "Z" if self.dl_code else "-"
output += "] {} ({})".format(self.title, self.isbn)
return output
def get_safe_name(self):
'''Returns the name of the book safe for using for file names.'''
name = self.title
name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii')
name = re.sub('[^\w\s-]', '', name).strip()
name = re.sub('[-\s]+', '_', name)
return name
def parse_from_xsel(self, book: selection.backend.XpathSelector):
'''Parses the DOM section in `book`'''
self.title = book.select("@title").text()
if self.title[-8:].lower() == " [ebook]":
self.title = self.title[:-8]
self.url = "https://www.packtpub.com" + book.select(".//div[@class='product-top-line']/div/a/@href").text()
self.nid = book.select("@nid").text()
self.cover_img = book.select(".//img/@src").text().replace("imagecache/thumbview/", "")
isbn = book.select(".//div/@isbn")
if isbn:
self.isbn = isbn.text()
self.dl_pdf = "https://www.packtpub.com" + book.select(".//a[div/@format='pdf']/@href").text()
dl_epub = book.select(".//a[div/@format='epub']/@href")
if dl_epub:
self.dl_epub = "https://www.packtpub.com" + dl_epub.text()
dl_mobi = book.select(".//a[div/@format='mobi']/@href")
if dl_mobi:
self.dl_mobi = "https://www.packtpub.com" + dl_mobi.text()
dl_code = book.select(".//a[starts-with(@href, '/code_download')]/@href")
if dl_code:
self.dl_code = "https://www.packtpub.com" + dl_code.text()
class PacktPub():
def __init__(self):
self.g = Grab()
self.g.setup(follow_location=True)
self.g.setup(follow_refresh=True)
self.g.setup(timeout=120)
self.g.setup(connect_timeout=10)
#self.g.setup(body_maxsize=512000)
self.logged_in = False
def login(self, email, password):
self.g.go('https://www.packtpub.com/')
self.g.doc.save('/tmp/packtpub-home.html')
self.g.doc.choose_form(id='packt-user-login-form')
print("Logging in with account: {}".format(email))
self.g.doc.set_input('email', email)
self.g.doc.set_input('password', password)
self.g.doc.submit()
self.g.doc.save('/tmp/packpub-home-after-login.html')
self.g.doc.text_assert('"sid":')
self.logged_in = True
def get_ebooks_list(self, url="https://www.packtpub.com/account/my-ebooks"):
'''Loads the list of purchased ebooks and returns a Selection object with all books.'''
if url.startswith("http") and not self.logged_in:
raise LoggedOutException("Must be logged in before getting ebooks list!")
self.g.go(url)
self.g.doc.save('/tmp/packtpub-my-ebooks.html')
self.g.doc.text_assert('<h1>My eBooks </h1>')
all_books_xsel = self.g.doc.select('//div[@id="product-account-list"]/div[starts-with(@class, "product-line")][@title]')
all_books = []
for b in all_books_xsel:
book_obj = PacktBook()
book_obj.parse_from_xsel(b)
all_books.append(book_obj)
return all_books
def download_book_all(self, book: PacktBook, destination_directory):
'''Downloads all available files for given book to destination_directory/book_name.'''
if not self.logged_in:
raise LoggedOutException("Must be logged in before download!")
base_name = book.get_safe_name()
if not os.path.exists(destination_directory):
os.makedirs(destination_directory, mode=0o775, exist_ok=True)
print("Downloading PDF of {} from {}".format(base_name, book.dl_pdf))
self.g.download(book.dl_pdf, destination_directory + "/" + base_name + ".pdf")
parser = ArgumentParser(description="List or download all purchased ebooks from your PACKT account.")
parser.add_argument("--start", help="Index to start at (default: 1)", metavar="NUMBER", type=int, dest="idx_start", required=False, default=1)
parser.add_argument("-n", "--count", help="Number of items to download, starting at --start index", metavar="COUNT", type=int, dest="count", required=False)
parser.add_argument("--end", help="Index to stop at (default: last)", metavar="NUMBER", type=int, dest="idx_end", required=False)
#parser.add_argument("--verbose", help="Verbose logging to STDERR", action="store_true")
opts = parser.parse_args()
opts = vars(opts)
p = PacktPub()
p.login(c.get('DEFAULT', 'PACKT_LOGIN'), c.get('DEFAULT', 'PACKT_PASSWORD'))
if os.path.isfile("/tmp/packtpub-my-ebooks.html"):
all_books = p.get_ebooks_list("file:///tmp/packtpub-my-ebooks.html")
else:
all_books = p.get_ebooks_list()
print("Found {:d} ebooks.".format(len(all_books)))
if opts["idx_start"] > 1 or opts["idx_end"] or opts["count"]:
# Some range given: Download books
idx_start = opts["idx_start"]
if idx_start < 1:
idx_start = 1
if opts["idx_end"]:
idx_end = opts["idx_end"]
elif opts["count"]:
idx_end = idx_start + opts["count"] - 1
else:
idx_end = len(all_books)
if idx_end < idx_start:
idx_end = idx_start
if idx_end > len(all_books):
idx_end = len(all_books)
print("Selected range: {:d} to {:d}".format(idx_start, idx_end))
for i in range(idx_start, idx_end+1):
print("{:d}: {}".format(i, all_books[i-1]))
p.download_book_all(all_books[i-1], "/tmp/packt")
else:
# No selection made: Show list
for i in range(0, len(all_books)):
print("{:d}: {}".format(i+1, all_books[i]))