Initial commit

This commit is contained in:
Markus Birth 2022-01-05 14:04:46 +01:00
commit 27e5342e82
Signed by: mbirth
GPG Key ID: A9928D7A098C3A9A
12 changed files with 421 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
*.db
jekyll/
uploads/
/config.yaml

11
Pipfile Normal file
View File

@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.python.org/simple"
verify_ssl = true
name = "pypi"
[packages]
python-frontmatter = "*"
pytz = "*"
pyyaml = "*"
[dev-packages]

70
Pipfile.lock generated Normal file
View File

@ -0,0 +1,70 @@
{
"_meta": {
"hash": {
"sha256": "80e20d60ab55b386ef8a6294cfd3e9712ae0ea8aeb23582f66e8534f61361e74"
},
"pipfile-spec": 6,
"requires": {},
"sources": [
{
"name": "pypi",
"url": "https://pypi.python.org/simple",
"verify_ssl": true
}
]
},
"default": {
"python-frontmatter": {
"hashes": [
"sha256:766ae75f1b301ffc5fe3494339147e0fd80bc3deff3d7590a93991978b579b08",
"sha256:e98152e977225ddafea6f01f40b4b0f1de175766322004c826ca99842d19a7cd"
],
"index": "pypi",
"version": "==1.0.0"
},
"pytz": {
"hashes": [
"sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da",
"sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"
],
"index": "pypi",
"version": "==2021.1"
},
"pyyaml": {
"hashes": [
"sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf",
"sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696",
"sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393",
"sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77",
"sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922",
"sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5",
"sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8",
"sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10",
"sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc",
"sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018",
"sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e",
"sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253",
"sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347",
"sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183",
"sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541",
"sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb",
"sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185",
"sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc",
"sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db",
"sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa",
"sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46",
"sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122",
"sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b",
"sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63",
"sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df",
"sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc",
"sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247",
"sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6",
"sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"
],
"index": "pypi",
"version": "==5.4.1"
}
},
"develop": {}
}

38
README.md Normal file
View File

@ -0,0 +1,38 @@
jekyll2s9y importer
===================
This is a tool to import my Jekyll-based [GitHub Pages](https://pages.github.com) blog/wiki
into [Serendipity](https://s9y.org).
Preparations
------------
Setup Serendipity with an SQLite database. Download the database file to where you also plan to run jekyll2s9y.
Usage
-----
Copy `config.yaml.example` to `config.yaml` and modify according to your needs. Note that
`jekyll_dir` has to point to your Jekyll base directory.
If not done already, update the Python environment:
pipenv install
Then run the script:
pipenv run ./jekyll2s9y.py
Now copy the new database file (`s9y_database_output` in the config.yaml) back to your server and overwrite
the old version. Also copy the `uploads` directory containing the media files. Then, in the Serendipity admin
area, edit your configuration and change the permalink for "Entry URL structure" in any way (e.g. add a letter
to the end) to have the permalinks regenerated. Afterwards, you can undo the change again.
Issues
------
If there are two entries where the permalink generation (i.e. stripping all emojis, etc.) results in the same
permalink, only the older one can be accessed.

33
config.yaml.example Normal file
View File

@ -0,0 +1,33 @@
general:
jekyll_dir: jekyll
s9y_database: serendipity.db
s9y_media_dir: uploads
s9y_database_output: serendipity_new.db
# Timezone for stamps without one
timezone: Europe/Berlin
# As Jekyll only knows tags, specify which tags are to be
# considered s9y categories. Tags not mentioned here will
# be kept as tags (serendipity_plugin_freetag)
categories:
- know-how
- development
- gtd
- review
- hacking
- hardware
- software
- miscellaneous
s9y_defaults:
comments: 0
trackbacks: 0
exflag: 1
# CHANGE THIS TO YOUR Serendipity Username
author: mbirth
# CHANGE THIS TO YOUR Serendipity User-ID
authorid: 1
isdraft: "false"
allow_comments: "true"
moderate_comments: "false"

78
jekyll2s9y.py Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datetime import datetime
from os import makedirs
from os.path import basename
import shutil
import pytz
import yaml
import jekyllreader
import s9ywriter
with open("config.yaml", "rt") as f:
config = yaml.load(f)
print(repr(config))
# copy original file to working copy
shutil.copyfile(config["general"]["s9y_database"], config["general"]["s9y_database_output"])
DATEFORMAT_IN = "%Y-%m-%d %H:%M:%S %z"
DATEFORMAT_OUT = "%Y-%m-%d %H:%M:%S"
LOCAL_TIMEZONE = pytz.timezone(config["general"]["timezone"])
# MAIN SCRIPT
jk = jekyllreader.JekyllReader(config["general"]["jekyll_dir"])
s9y = s9ywriter.S9YWriter(config["general"]["s9y_database_output"])
for i in range(0, jk.len()):
print(f"Item: {i}")
jk_article = jk.get(i)
new_entry = s9ywriter.S9YEntry()
for k, v in config["s9y_defaults"].items():
setattr(new_entry, k, v)
new_entry.title = jk_article.metadata["title"]
date_created = datetime.strptime(jk_article.metadata["created"], DATEFORMAT_IN).astimezone(LOCAL_TIMEZONE)
date_updated = datetime.strptime(jk_article.metadata["updated"], DATEFORMAT_IN).astimezone(LOCAL_TIMEZONE)
new_entry.timestamp = int(date_created.timestamp())
new_entry.last_modified = int(date_updated.timestamp())
# Handle images
img_target_dir = config["general"]["s9y_media_dir"] + "/" + str(date_created.year) + "/"
img_files = jk_article.images
print(repr(img_files))
jk_article.replace_imagepaths("/" + img_target_dir)
for img in img_files:
img_name = basename(img)
makedirs(img_target_dir, exist_ok=True)
shutil.copyfile(img, img_target_dir + img_name)
# Handle body: Split into body+extended if possible
content = jk_article.body
content = content.replace("\r", "")
splits = content.split("\n\n", 1)
if len(splits) == 1:
new_entry.body = content
else:
splits[1] = splits[1].strip("\n")
(new_entry.body, new_entry.extended) = splits
# Handle tags/categories and other metadata
for t in jk_article.metadata["tags"]:
if t in config["categories"]:
new_entry.categories.append(t)
else:
new_entry.tags.append(t)
if "language" in jk_article.metadata and jk_article.metadata["language"] != "en":
if jk_article.metadata["language"] == "de":
new_entry.title += " 🇩🇪"
else:
new_entry.title += " (" + jk_article.metadata["language"] + ")"
s9y.add_entry(new_entry)
s9y.commit()

2
jekyllreader/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .jekyllreader import JekyllReader
from .jekyllarticle import JekyllArticle

View File

@ -0,0 +1,69 @@
from functools import partial
from os.path import basename
import re
import frontmatter
class JekyllArticle():
RE_IMG = r'!\[(.*?)\]\((.+?)( [\'"].+[\'"])?\)'
RE_HTML_IMG = r'<img src="(.*?)"'
RE_SITE_URL = r'\{\{ ?site\.url ?\}\}'
def __init__(self, md_file: str, base_dir: str = "."):
self.file_path = md_file
self.base_dir = base_dir
self.body = ""
self.metadata = {}
self.images = []
self.replace_image_newpath = ""
self.parse_file()
self.collect_images()
def parse_file(self):
article = frontmatter.load(self.file_path)
self.metadata = article.metadata
self.body = article.content
# Convert Twig highlights to Markdown Extra highlights
self.body = re.sub(r'\{% highlight( (\S+)) %\}', r'```\2', self.body)
self.body = re.sub(r'\{% endhighlight %\}', r'```', self.body)
def collect_images(self):
matches = re.findall(self.RE_IMG, self.body) # Returns a tuple
for match in matches:
imgfile = match[1]
imgfile = re.sub(self.RE_SITE_URL, self.base_dir, imgfile)
self.images.append(imgfile)
matches = re.findall(self.RE_HTML_IMG, self.body) # Returns plain matches
for match in matches:
imgfile = match
imgfile = re.sub(self.RE_SITE_URL, self.base_dir, imgfile)
self.images.append(imgfile)
self.images = list(set(self.images))
def _replace_single_imagepath(self, match, new_base_url: str = ""):
old_imgfile = match.group(2)
old_imgfile = re.sub(self.RE_SITE_URL, self.base_dir, old_imgfile)
img_name = basename(old_imgfile)
new_imgfile = "{}{}".format(new_base_url, img_name)
# print("Copying image {} to {} ...".format(old_imgfile, new_imgfile))
# shutil.copyfile(old_imgfile, new_imgfile)
img_title = ""
if match.group(3):
img_title = match.group(3)
new_string = "![{}]({}{})".format(match.group(1), new_imgfile, img_title)
return new_string
def _replace_single_htmlimagepath(self, match, new_base_url: str = ""):
old_imgfile = match.group(1)
old_imgfile = re.sub(self.RE_SITE_URL, self.base_dir, old_imgfile)
img_name = basename(old_imgfile)
new_imgfile = "{}{}".format(new_base_url, img_name)
new_string = "<img src=\"" + new_imgfile + "\""
return new_string
def replace_imagepaths(self, new_base_url: str = ""):
# Images OLD: ![alt text]({{ site.url }}/assets/blah.jpg "title")
# Images NEW: ![alt text](blah.jpg "title")
self.body = re.sub(self.RE_IMG, partial(self._replace_single_imagepath, new_base_url=new_base_url), self.body)
self.body = re.sub(self.RE_HTML_IMG, partial(self._replace_single_htmlimagepath, new_base_url=new_base_url), self.body)

View File

@ -0,0 +1,25 @@
import os
from .jekyllarticle import JekyllArticle
class JekyllReader():
def __init__(self, src_dir: str):
self.src_dir = src_dir
self.file_list = []
self.find_articles()
def find_articles(self):
for root, dirs, files in os.walk(self.src_dir):
if root.split("/")[-1] in ["assets", "css", "images", "fonts", "javascripts", "_includes", "_layouts", self.src_dir]:
continue
for f in files:
if f.split(".")[-1] != "md":
continue
filepath = "{}/{}".format(root, f)
self.file_list.append(filepath)
def len(self):
return len(self.file_list)
def get(self, idx: int):
return JekyllArticle(self.file_list[idx], self.src_dir)

2
s9ywriter/__init__.py Normal file
View File

@ -0,0 +1,2 @@
from .s9ywriter import S9YWriter
from .s9yentry import S9YEntry

14
s9ywriter/s9yentry.py Normal file
View File

@ -0,0 +1,14 @@
class S9YEntry():
def __init__(self):
self.db_keys = ["id", "title", "timestamp", "body", "comments", "trackbacks", "extended", "exflag", "author", "authorid", "isdraft", "allow_comments", "last_modified", "moderate_comments"]
self.categories = []
self.tags = []
def get_db_insert_values(self):
values = []
for k in self.db_keys:
if hasattr(self, k):
values.append(getattr(self, k))
else:
values.append(None)
return values

75
s9ywriter/s9ywriter.py Normal file
View File

@ -0,0 +1,75 @@
import sqlite3
from .s9yentry import S9YEntry
class S9YWriter():
def __init__(self, db_file: str):
self.db_file = db_file
self.db = sqlite3.connect(self.db_file)
def __del__(self):
self.db.close()
def add_entry(self, entry: S9YEntry):
keys = entry.db_keys[1:]
placeholders = ["?"] * len(keys)
data = entry.get_db_insert_values()[1:]
sql = "INSERT INTO entries (" + ", ".join(keys) + ") VALUES (" + ", ".join(placeholders) + ")"
#print(sql)
result = self.db.execute(sql, data)
entry_id = result.lastrowid
print(repr(result.lastrowid))
print(entry.title)
# Add permission
sql = "INSERT INTO entryproperties VALUES (?, ?, ?)"
self.db.execute(sql, [entry_id, "ep_access", "public"])
# TODO: Add permalink
# permalinks (permalink, entry_id, type, data)
# permalink = archives/YYYY-MM-DD-%title%.html
# %title% = sanitised, no Unicode, ü --> ue, etc.
# WORKAROUND: Edit your blog settings and change the permalink to let S9Y regenerate them
# then change back to desired value
for category in list(set(entry.categories)):
self.add_category(entry_id, category)
for tag in list(set(entry.tags)):
self.add_tag(entry_id, tag)
def commit(self):
self.db.commit()
def add_category(self, entry_id: int, category_name: str):
# Tables: category, entrycat, access
# Category: categoryid, category_name, "", "", 0, 0, 0, parentid, NULL, NULL
# entrycat: entryid, categoryid
print(f"{entry_id} - {category_name}")
sql = "SELECT categoryid FROM category WHERE category_name = ?"
result = self.db.execute(sql, [category_name])
cat = result.fetchone()
if cat:
category_id = cat[0]
else:
# Category does not yet exist, add it
sql = "INSERT INTO category (category_name) VALUES (?)"
result = self.db.execute(sql, [category_name])
category_id = result.lastrowid
# Add access permissions
# access: 0, category_id, "category", read
# access: 0, category_id, "category", write
sql = "INSERT INTO access VALUES (?, ?, ?, ?, ?)"
self.db.execute(sql, [0, category_id, "category", "read", ""])
self.db.execute(sql, [0, category_id, "category", "write", ""])
# Add permalink
sql = "INSERT INTO permalinks VALUES (?, ?, ?, ?)"
self.db.execute(sql, [f"categories/{category_name}", category_id, "category", None])
sql = "INSERT INTO entrycat VALUES (?, ?)"
self.db.execute(sql, [entry_id, category_id])
def add_tag(self, entry_id: int, tag: str):
# Table: entrytags (entryid, tag)
sql = "INSERT INTO entrytags VALUES (?, ?)"
self.db.execute(sql, (entry_id, tag))