Initial commit.

This commit is contained in:
Markus Birth 2022-01-20 03:58:43 +01:00
commit d774717817
Signed by: mbirth
GPG Key ID: A9928D7A098C3A9A
3 changed files with 188 additions and 0 deletions

35
README.md Normal file
View File

@ -0,0 +1,35 @@
disqus2s9y
==========
DISQUS to Serendipity importer.
Usage
-----
Fetch all the comments from your site using the Disqus API. Basically go
into your admin area, go to the "Moderate" section and check the HTTP
requests.
You should find something going to `https://disqus.com/api/3.0/posts/list`.
It will return a JSON structure. In the section `cursor`, there's a value
`hasNext`. If that's `true`, do the request again but add the parameter
`cursor` with the value from the `next` key to it. This will get you the
next bunch of comments. Rinse and repeat until you got everything.
Now copy all the files into the directory with these scripts and add
their names to the `DISQUS_FILES` variable in the Python scripts.
Also download your Serendipity SQLite database into the directory as `serendipity.db`.
Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv`
which contains all the URLs from your DISQUS dump and an empty column
`s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the
URLs from your Serendipity database.
The important step is now to match both, i.e. DISQUS-URL to Serendipity
entry_id. Fill in the matching entry_id into the `s9y_entry_id` column.
After you're done, run `disqus2s9y.py` and it should import all comments
into your `serendipity.db`. Afterwards copy that back to the server and
you're done.

102
disqus2s9y.py Executable file
View File

@ -0,0 +1,102 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import csv
import datetime
import json
import sqlite3
import sys
from pprint import pprint
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
mappings = {}
with open("disqus2s9y.csv", "rt") as f:
for row in csv.DictReader(f):
if not row["s9y_entry_id"]:
# Skip lines without entry_id
continue
mappings[row["disqus_url"]] = row["s9y_entry_id"]
print("Found {} mappings in disqus2s9y.csv.".format(len(mappings)))
comments = []
for filename in DISQUS_FILES:
with open(filename, "rt") as f:
response = json.load(f)
comments += response["response"]
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
# Sort
print("Sorting comments by timestamp ascending.")
comments = sorted(comments, key=lambda c: c["createdAt"])
db = sqlite3.connect("serendipity.db")
cursor = db.cursor()
def insert_dict(db_cursor, table, data):
fields = []
placeholders = []
values = []
for k, v in data.items():
fields.append(k)
placeholders.append("?")
values.append(v)
sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders))
db_cursor.execute(sql, values)
return db_cursor.lastrowid
def sanitise_text(message):
# This is for Markdown as I'm using the Markdown plugin
message = message.replace("<code>", "`").replace("</code>", "`")
message = message.replace("\n", " \n")
return message
disqus_to_s9y_id = {}
for c in comments:
c_url = c["thread"]["link"]
if not str(c_url) in mappings:
print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!")
continue
parent_id = 0
if c["parent"]:
if not str(c["parent"]) in disqus_to_s9y_id:
print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"]))
sys.exit(255)
parent_id = disqus_to_s9y_id[str(c["parent"])]
author_email = ""
if "email" in c["author"]:
author_email = str(c["author"]["email"])
author_url = ""
if "url" in c["author"]:
author_url = str(c["author"]["url"])
new_comment = {
"entry_id": mappings[c_url],
"parent_id": parent_id,
"timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()),
"title": "",
"author": c["author"]["name"],
"email": author_email,
"url": author_url,
"ip": c["ipAddress"],
"body": sanitise_text(c["raw_message"]),
"type": "NORMAL",
"subscribed": "false",
"status": "approved",
"referer": ""
}
new_rowid = insert_dict(cursor, "comments", new_comment)
disqus_to_s9y_id[c["id"]] = new_rowid
print("Inserted comment with id {}".format(new_rowid))
cursor.close()
db.commit()
db.close()

51
dump_urls_to_csv.py Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import json
import sqlite3
from os.path import basename
# 1. Match URLs from JSON to permalinks/entries in SQLite
# 2. Sort JSON comments old-to-new
# 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads)
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
comments = []
for filename in DISQUS_FILES:
with open(filename, "rt") as f:
response = json.load(f)
comments += response["response"]
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
old_urls = []
for c in comments:
#old_urls += c["thread"]["identifiers"]
old_urls.append(c["thread"]["link"])
old_urls = list(set(old_urls))
print("Found {} unique URLs.".format(len(old_urls)))
with open("disqus2s9y.csv", "wt") as f:
f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n")
for ou in old_urls:
old_name = basename(ou).replace(".html", "")
f.write("\"{}\",\"{}\",\n".format(ou, old_name))
db = sqlite3.connect("serendipity.db")
req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'")
response = req.fetchall()
with open("s9y_urls.csv", "wt") as f:
f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n")
for r in response:
(url, entry_id) = r
name = basename(url).replace(".html", "")
f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id))