#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Needs grab, pip3 install -U grab # (also needs: libcurl4-*-dev for curl-config, libxslt1-dev and libxml2-dev) import configparser import logging import os.path import re import selection import sys import unicodedata from argparse import ArgumentParser from grab import Grab from io import StringIO config_str = '[DEFAULT]\n' + open(sys.path[0] + '/CONFIG', 'r').read() config_fp = StringIO(config_str) c = configparser.RawConfigParser() c.readfp(config_fp) logging.basicConfig(level=logging.DEBUG) class LoggedOutException(Exception): '''Raise when attempting an action needing login without being logged in.''' class PacktBook(): def __init__(self): self.title = "" self.url = "" self.isbn = "" self.nid = "" self.cover_img = "" self.dl_pdf = "" self.dl_epub = "" self.dl_mobi = "" self.dl_code = "" def __str__(self): output = "[" output += "P" if self.dl_pdf else "-" output += "e" if self.dl_epub else "-" output += "K" if self.dl_mobi else "-" output += "Z" if self.dl_code else "-" output += "] {} ({})".format(self.title, self.isbn) return output def get_safe_name(self): '''Returns the name of the book safe for using for file names.''' name = self.title name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode('ascii') name = re.sub('[^\w\s-]', '', name).strip() name = re.sub('[-\s]+', '_', name) return name def parse_from_xsel(self, book: selection.backend.XpathSelector): '''Parses the DOM section in `book`''' self.title = book.select("@title").text() if self.title[-8:].lower() == " [ebook]": self.title = self.title[:-8] self.url = "https://www.packtpub.com" + book.select(".//div[@class='product-top-line']/div/a/@href").text() self.nid = book.select("@nid").text() self.cover_img = book.select(".//img/@src").text().replace("imagecache/thumbview/", "") isbn = book.select(".//div/@isbn") if isbn: self.isbn = isbn.text() self.dl_pdf = "https://www.packtpub.com" + book.select(".//a[div/@format='pdf']/@href").text() dl_epub = book.select(".//a[div/@format='epub']/@href") if dl_epub: self.dl_epub = "https://www.packtpub.com" + dl_epub.text() dl_mobi = book.select(".//a[div/@format='mobi']/@href") if dl_mobi: self.dl_mobi = "https://www.packtpub.com" + dl_mobi.text() dl_code = book.select(".//a[starts-with(@href, '/code_download')]/@href") if dl_code: self.dl_code = "https://www.packtpub.com" + dl_code.text() class PacktPub(): def __init__(self): self.g = Grab() self.g.setup(follow_location=True) self.g.setup(follow_refresh=True) self.g.setup(timeout=120) self.g.setup(connect_timeout=10) #self.g.setup(body_maxsize=512000) self.logged_in = False def login(self, email, password): self.g.go('https://www.packtpub.com/') self.g.doc.save('/tmp/packtpub-home.html') self.g.doc.choose_form(id='packt-user-login-form') print("Logging in with account: {}".format(email)) self.g.doc.set_input('email', email) self.g.doc.set_input('password', password) self.g.doc.submit() self.g.doc.save('/tmp/packpub-home-after-login.html') self.g.doc.text_assert('"sid":') self.logged_in = True def get_ebooks_list(self, url="https://www.packtpub.com/account/my-ebooks"): '''Loads the list of purchased ebooks and returns a Selection object with all books.''' if url.startswith("http") and not self.logged_in: raise LoggedOutException("Must be logged in before getting ebooks list!") self.g.go(url) self.g.doc.save('/tmp/packtpub-my-ebooks.html') self.g.doc.text_assert('