Initial commit.

This commit is contained in:
Markus Birth 2022-10-01 20:49:57 +02:00
commit 251a0d00a3
Signed by: mbirth
GPG Key ID: A9928D7A098C3A9A
2 changed files with 79 additions and 0 deletions

42
README.md Normal file
View File

@ -0,0 +1,42 @@
wallabag2GoodLinks converter
============================
Converts a [wallabag](https://www.wallabag.org/) JSON export to [GoodLinks](https://goodlinks.app/) format.
Wallabag format
---------------
* `is_archived` (0/1)
* `is_starred` (0/1)
* `tags` (List)
* `is_public` (true/false)
* `id`
* `title`
* `url`
* `given_url`
* `content` (HTML)
* `created_at` (yyyy-mm-ddThh:mm:ss+hh:mm)
* `updated_at`
* `published_at`
* `published_by` (List)
* `annotations` (List)
* `mimetype` (text/html)
* `language` (en)
* `reading_time` (Int)
* `domain_name`
* `preview_picture` (URL)
* `http_status` ("200")
* `headers` (Object)
GoodLinks format
----------------
* `readAt` (Unixtime)
* `addedAt`
* `summary`
* `starred` (true/false)
* `title`
* `tags` (List)
* `url`

37
walla2goodlinks.py Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env python3
import html
import json
import re
from datetime import datetime
# https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
CLEANR = re.compile(r"<.*?>")
with open("Wallabag All articles.json", "rt") as f:
json_obj = json.load(f)
output_obj = []
for rec in json_obj:
time_added = datetime.fromisoformat(rec["created_at"])
time_read = time_added
html_str = html.unescape(rec["content"])
html_str = html_str.replace("\n", " ")
html_str = re.sub(CLEANR, "", html_str)
tags = rec["tags"]
tags.append("+IMPORTED")
new_obj = {
"readAt": time_read.timestamp(),
"addedAt": time_added.timestamp(),
"summary": html_str[:199],
"starred": (rec["is_starred"] == 1),
"title": rec["title"],
"tags": tags,
"url": rec["url"],
}
print(repr(rec))
print(repr(new_obj))
output_obj.append(new_obj)
with open("walla2goodlinks.json", "w") as f:
json.dump(output_obj, f)