41 lines
1.1 KiB
Python
Executable File
41 lines
1.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import html
|
|
import json
|
|
import re
|
|
from datetime import datetime
|
|
|
|
WALLABAG_EXPORT = "Wallabag All articles.json"
|
|
OUTPUT_FILE = "walla2goodlinks.json"
|
|
|
|
# https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
|
|
CLEANR = re.compile(r"<.*?>")
|
|
|
|
with open(WALLABAG_EXPORT, "rt") as f:
|
|
json_obj = json.load(f)
|
|
|
|
output_obj = []
|
|
for rec in json_obj:
|
|
time_added = datetime.fromisoformat(rec["created_at"])
|
|
time_read = time_added
|
|
html_str = html.unescape(rec["content"])
|
|
html_str = html_str.replace("\n", " ")
|
|
html_str = re.sub(CLEANR, "", html_str)
|
|
tags = rec["tags"]
|
|
tags.append("+IMPORTED")
|
|
new_obj = {
|
|
"readAt": time_read.timestamp(),
|
|
"addedAt": time_added.timestamp(),
|
|
"summary": html_str[:199],
|
|
"starred": (rec["is_starred"] == 1),
|
|
"title": rec["title"],
|
|
"tags": tags,
|
|
"url": rec["url"],
|
|
}
|
|
print(repr(rec))
|
|
print(repr(new_obj))
|
|
output_obj.append(new_obj)
|
|
|
|
with open(OUTPUT_FILE, "w") as f:
|
|
json.dump(output_obj, f)
|