Initial commit.

2022-01-20 03:58:43 +01:00 · 2022-01-20 03:58:43 +01:00 · d774717817
commit d774717817
3 changed files with 188 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,35 @@
+disqus2s9y
+==========
+
+DISQUS to Serendipity importer.
+
+
+Usage
+-----
+
+Fetch all the comments from your site using the Disqus API. Basically go
+into your admin area, go to the "Moderate" section and check the HTTP
+requests.
+
+You should find something going to `https://disqus.com/api/3.0/posts/list`.
+It will return a JSON structure. In the section `cursor`, there's a value
+`hasNext`. If that's `true`, do the request again but add the parameter
+`cursor` with the value from the `next` key to it. This will get you the
+next bunch of comments. Rinse and repeat until you got everything.
+
+Now copy all the files into the directory with these scripts and add
+their names to the `DISQUS_FILES` variable in the Python scripts.
+
+Also download your Serendipity SQLite database into the directory as `serendipity.db`.
+
+Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv`
+which contains all the URLs from your DISQUS dump and an empty column
+`s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the
+URLs from your Serendipity database.
+
+The important step is now to match both, i.e. DISQUS-URL to Serendipity
+entry_id. Fill in the matching entry_id into the `s9y_entry_id` column.
+
+After you're done, run `disqus2s9y.py` and it should import all comments
+into your `serendipity.db`. Afterwards copy that back to the server and
+you're done.
--- a/disqus2s9y.py
+++ b/disqus2s9y.py
@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import csv
+import datetime
+import json
+import sqlite3
+import sys
+from pprint import pprint
+
+DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
+
+
+mappings = {}
+with open("disqus2s9y.csv", "rt") as f:
+    for row in csv.DictReader(f):
+        if not row["s9y_entry_id"]:
+            # Skip lines without entry_id
+            continue
+        mappings[row["disqus_url"]] = row["s9y_entry_id"]
+
+print("Found {} mappings in disqus2s9y.csv.".format(len(mappings)))
+
+comments = []
+for filename in DISQUS_FILES:
+    with open(filename, "rt") as f:
+        response = json.load(f)
+        comments += response["response"]
+
+print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
+
+# Sort
+print("Sorting comments by timestamp ascending.")
+comments = sorted(comments, key=lambda c: c["createdAt"])
+
+db = sqlite3.connect("serendipity.db")
+cursor = db.cursor()
+
+
+def insert_dict(db_cursor, table, data):
+    fields = []
+    placeholders = []
+    values = []
+    for k, v in data.items():
+        fields.append(k)
+        placeholders.append("?")
+        values.append(v)
+    sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders))
+    db_cursor.execute(sql, values)
+    return db_cursor.lastrowid
+
+def sanitise_text(message):
+    # This is for Markdown as I'm using the Markdown plugin
+    message = message.replace("<code>", "`").replace("</code>", "`")
+    message = message.replace("\n", "  \n")
+    return message
+
+
+disqus_to_s9y_id = {}
+for c in comments:
+    c_url = c["thread"]["link"]
+    if not str(c_url) in mappings:
+        print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!")
+        continue
+    parent_id = 0
+    if c["parent"]:
+        if not str(c["parent"]) in disqus_to_s9y_id:
+            print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"]))
+            sys.exit(255)
+        parent_id = disqus_to_s9y_id[str(c["parent"])]
+
+    author_email = ""
+    if "email" in c["author"]:
+        author_email = str(c["author"]["email"])
+
+    author_url = ""
+    if "url" in c["author"]:
+        author_url = str(c["author"]["url"])
+
+    new_comment = {
+        "entry_id": mappings[c_url],
+        "parent_id": parent_id,
+        "timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()),
+        "title": "",
+        "author": c["author"]["name"],
+        "email": author_email,
+        "url": author_url,
+        "ip": c["ipAddress"],
+        "body": sanitise_text(c["raw_message"]),
+        "type": "NORMAL",
+        "subscribed": "false",
+        "status": "approved",
+        "referer": ""
+    }
+
+    new_rowid = insert_dict(cursor, "comments", new_comment)
+    disqus_to_s9y_id[c["id"]] = new_rowid
+    print("Inserted comment with id {}".format(new_rowid))
+
+cursor.close()
+db.commit()
+db.close()
--- a/dump_urls_to_csv.py
+++ b/dump_urls_to_csv.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import json
+import sqlite3
+from os.path import basename
+
+# 1. Match URLs from JSON to permalinks/entries in SQLite
+# 2. Sort JSON comments old-to-new
+# 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads)
+
+DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
+
+comments = []
+
+for filename in DISQUS_FILES:
+    with open(filename, "rt") as f:
+        response = json.load(f)
+        comments += response["response"]
+
+print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
+
+old_urls = []
+for c in comments:
+    #old_urls += c["thread"]["identifiers"]
+    old_urls.append(c["thread"]["link"])
+
+old_urls = list(set(old_urls))
+
+print("Found {} unique URLs.".format(len(old_urls)))
+
+with open("disqus2s9y.csv", "wt") as f:
+    f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n")
+    for ou in old_urls:
+        old_name = basename(ou).replace(".html", "")
+        f.write("\"{}\",\"{}\",\n".format(ou, old_name))
+
+
+db = sqlite3.connect("serendipity.db")
+req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'")
+response = req.fetchall()
+
+
+with open("s9y_urls.csv", "wt") as f:
+    f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n")
+    for r in response:
+        (url, entry_id) = r
+        name = basename(url).replace(".html", "")
+        f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id))
+
+