Initial commit.
This commit is contained in:
commit
d774717817
35
README.md
Normal file
35
README.md
Normal file
@ -0,0 +1,35 @@
|
||||
disqus2s9y
|
||||
==========
|
||||
|
||||
DISQUS to Serendipity importer.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
Fetch all the comments from your site using the Disqus API. Basically go
|
||||
into your admin area, go to the "Moderate" section and check the HTTP
|
||||
requests.
|
||||
|
||||
You should find something going to `https://disqus.com/api/3.0/posts/list`.
|
||||
It will return a JSON structure. In the section `cursor`, there's a value
|
||||
`hasNext`. If that's `true`, do the request again but add the parameter
|
||||
`cursor` with the value from the `next` key to it. This will get you the
|
||||
next bunch of comments. Rinse and repeat until you got everything.
|
||||
|
||||
Now copy all the files into the directory with these scripts and add
|
||||
their names to the `DISQUS_FILES` variable in the Python scripts.
|
||||
|
||||
Also download your Serendipity SQLite database into the directory as `serendipity.db`.
|
||||
|
||||
Now run `dump_urls_to_csv.py` to create 2 CSV files. One is `disqus2s9y.csv`
|
||||
which contains all the URLs from your DISQUS dump and an empty column
|
||||
`s9y_entry_id`. The second file is `s9y_urls.csv` which contains all the
|
||||
URLs from your Serendipity database.
|
||||
|
||||
The important step is now to match both, i.e. DISQUS-URL to Serendipity
|
||||
entry_id. Fill in the matching entry_id into the `s9y_entry_id` column.
|
||||
|
||||
After you're done, run `disqus2s9y.py` and it should import all comments
|
||||
into your `serendipity.db`. Afterwards copy that back to the server and
|
||||
you're done.
|
102
disqus2s9y.py
Executable file
102
disqus2s9y.py
Executable file
@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
import json
|
||||
import sqlite3
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
|
||||
|
||||
|
||||
mappings = {}
|
||||
with open("disqus2s9y.csv", "rt") as f:
|
||||
for row in csv.DictReader(f):
|
||||
if not row["s9y_entry_id"]:
|
||||
# Skip lines without entry_id
|
||||
continue
|
||||
mappings[row["disqus_url"]] = row["s9y_entry_id"]
|
||||
|
||||
print("Found {} mappings in disqus2s9y.csv.".format(len(mappings)))
|
||||
|
||||
comments = []
|
||||
for filename in DISQUS_FILES:
|
||||
with open(filename, "rt") as f:
|
||||
response = json.load(f)
|
||||
comments += response["response"]
|
||||
|
||||
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
|
||||
|
||||
# Sort
|
||||
print("Sorting comments by timestamp ascending.")
|
||||
comments = sorted(comments, key=lambda c: c["createdAt"])
|
||||
|
||||
db = sqlite3.connect("serendipity.db")
|
||||
cursor = db.cursor()
|
||||
|
||||
|
||||
def insert_dict(db_cursor, table, data):
|
||||
fields = []
|
||||
placeholders = []
|
||||
values = []
|
||||
for k, v in data.items():
|
||||
fields.append(k)
|
||||
placeholders.append("?")
|
||||
values.append(v)
|
||||
sql = "INSERT INTO {} ({}) VALUES ({})".format(table, ", ".join(fields), ", ".join(placeholders))
|
||||
db_cursor.execute(sql, values)
|
||||
return db_cursor.lastrowid
|
||||
|
||||
def sanitise_text(message):
|
||||
# This is for Markdown as I'm using the Markdown plugin
|
||||
message = message.replace("<code>", "`").replace("</code>", "`")
|
||||
message = message.replace("\n", " \n")
|
||||
return message
|
||||
|
||||
|
||||
disqus_to_s9y_id = {}
|
||||
for c in comments:
|
||||
c_url = c["thread"]["link"]
|
||||
if not str(c_url) in mappings:
|
||||
print(f"ERROR: Can't map {c_url} to Serendipity page. Check disqus2s9y.csv!")
|
||||
continue
|
||||
parent_id = 0
|
||||
if c["parent"]:
|
||||
if not str(c["parent"]) in disqus_to_s9y_id:
|
||||
print("ERROR: DISQUS Parent ID {} not found.".format(c["parent"]))
|
||||
sys.exit(255)
|
||||
parent_id = disqus_to_s9y_id[str(c["parent"])]
|
||||
|
||||
author_email = ""
|
||||
if "email" in c["author"]:
|
||||
author_email = str(c["author"]["email"])
|
||||
|
||||
author_url = ""
|
||||
if "url" in c["author"]:
|
||||
author_url = str(c["author"]["url"])
|
||||
|
||||
new_comment = {
|
||||
"entry_id": mappings[c_url],
|
||||
"parent_id": parent_id,
|
||||
"timestamp": int(datetime.datetime.fromisoformat(c["createdAt"]).timestamp()),
|
||||
"title": "",
|
||||
"author": c["author"]["name"],
|
||||
"email": author_email,
|
||||
"url": author_url,
|
||||
"ip": c["ipAddress"],
|
||||
"body": sanitise_text(c["raw_message"]),
|
||||
"type": "NORMAL",
|
||||
"subscribed": "false",
|
||||
"status": "approved",
|
||||
"referer": ""
|
||||
}
|
||||
|
||||
new_rowid = insert_dict(cursor, "comments", new_comment)
|
||||
disqus_to_s9y_id[c["id"]] = new_rowid
|
||||
print("Inserted comment with id {}".format(new_rowid))
|
||||
|
||||
cursor.close()
|
||||
db.commit()
|
||||
db.close()
|
51
dump_urls_to_csv.py
Executable file
51
dump_urls_to_csv.py
Executable file
@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from os.path import basename
|
||||
|
||||
# 1. Match URLs from JSON to permalinks/entries in SQLite
|
||||
# 2. Sort JSON comments old-to-new
|
||||
# 3. After writing comment into SQLite, store new SQLite-ID and Disqus-ID (for threads)
|
||||
|
||||
DISQUS_FILES = ["DISQUS1.json", "DISQUS2.json"]
|
||||
|
||||
comments = []
|
||||
|
||||
for filename in DISQUS_FILES:
|
||||
with open(filename, "rt") as f:
|
||||
response = json.load(f)
|
||||
comments += response["response"]
|
||||
|
||||
print("Found {} comments in {} file(s).".format(len(comments), len(DISQUS_FILES)))
|
||||
|
||||
old_urls = []
|
||||
for c in comments:
|
||||
#old_urls += c["thread"]["identifiers"]
|
||||
old_urls.append(c["thread"]["link"])
|
||||
|
||||
old_urls = list(set(old_urls))
|
||||
|
||||
print("Found {} unique URLs.".format(len(old_urls)))
|
||||
|
||||
with open("disqus2s9y.csv", "wt") as f:
|
||||
f.write("\"disqus_url\",\"disqus_title\",\"s9y_entry_id\"\n")
|
||||
for ou in old_urls:
|
||||
old_name = basename(ou).replace(".html", "")
|
||||
f.write("\"{}\",\"{}\",\n".format(ou, old_name))
|
||||
|
||||
|
||||
db = sqlite3.connect("serendipity.db")
|
||||
req = db.execute("SELECT permalink, entry_id FROM permalinks WHERE type='entry'")
|
||||
response = req.fetchall()
|
||||
|
||||
|
||||
with open("s9y_urls.csv", "wt") as f:
|
||||
f.write("\"s9y_title\",\"s9y_url\",\"entry_id\"\n")
|
||||
for r in response:
|
||||
(url, entry_id) = r
|
||||
name = basename(url).replace(".html", "")
|
||||
f.write("\"{}\",\"{}\",{}\n".format(name, url, entry_id))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user