This repository has been archived on 2025-03-31. You can view files and clone it, but cannot push or open issues or pull requests.
tty2newsml/iptcmessage.py
2015-04-17 14:25:02 +02:00

77 lines
2.9 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pystache
import re
class IPTCMessage(object):
def __init__(self, raw):
self.raw = raw
self.length = len(self.raw)
if self.raw[0:1] != b'\x01' or self.raw[-1:] != b'\x04':
raise Exception("Not a valid IPTC message!")
(self.fullheader, leftover) = self.raw[1:-1].split(b'\x02', 2)
(self.text, self.posttext) = leftover.split(b'\x03', 2)
self.parseHeader()
self.parseText()
self.parsePostText()
print("Header: " + repr(self.header))
print("Main text: " + self.text)
print("Post-text: " + repr(self.posttext))
print("Post-data: " + repr(self.postdata))
# print("Message: %s" % repr(self.raw))
def parseHeader(self):
fullheader = self.fullheader.decode("latin1")
parts = re.fullmatch('^([a-zA-Z]{1,3})([0-9]{3,4}) ([1-6]) ([a-zA-Z]{1,3}) ([0-9]{1,4}) (.{0,50})\r\n+(.{0,69})\r\n$', fullheader, re.MULTILINE | re.DOTALL)
if parts is None:
raise Exception("Header not conforming to IPTC structure.")
self.header = {
"source_id": parts.group(1),
"message_no": parts.group(2),
"priority": parts.group(3),
"category": parts.group(4),
"word_count": parts.group(5),
"optional": parts.group(6),
"keywords": parts.group(7)
}
def parseText(self):
self.text = self.text.decode("latin1")
lines = re.split(r'\r?\n', self.text)
# Check the first 3 lines for slug info, and extract if there
if lines[0][-1:] == "=" or lines[1][-1:] == "=" or lines[2][-1:] == "=":
(self.sluginfo, self.maintext) = re.split(r'=\r?\n', self.text)
self.sluginfo = self.sluginfo.strip()
self.maintext = self.maintext.strip()
else:
self.sluginfo = ""
self.maintext = self.text
pass
def parsePostText(self):
posttext = self.posttext.decode("latin1")
# posttext = "191552 MEZ sep 14blafasel" # test string according to spec
parts = re.fullmatch('^([0-9]{6})( ([a-zA-Z]{3})(?= [a-zA-Z]))?( ([a-zA-Z]{3}) ([0-9]{2}))?(.{1,32})?$', posttext, re.MULTILINE | re.DOTALL)
if parts is None:
raise Exception("PostText not conforming to IPTC structure.")
self.postdata = {
"datetime": parts.group(1),
"timezone": parts.group(3), # skip one b/c "whole"-match
"month": parts.group(5), # skip one b/c "whole"-match
"year": parts.group(6),
"msg_separation": parts.group(7)
}
def getIPTC(self):
return self.raw
def getNewsML(self):
renderer = pystache.Renderer()
tpl = renderer.load_template('newsml')
xml = renderer.render(tpl, self)
return xml
if __name__=='__main__':
print("Testmode!")