77 lines
2.9 KiB
Python
77 lines
2.9 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import pystache
|
|
import re
|
|
|
|
class IPTCMessage(object):
|
|
def __init__(self, raw):
|
|
self.raw = raw
|
|
self.length = len(self.raw)
|
|
if self.raw[0:1] != b'\x01' or self.raw[-1:] != b'\x04':
|
|
raise Exception("Not a valid IPTC message!")
|
|
(self.fullheader, leftover) = self.raw[1:-1].split(b'\x02', 2)
|
|
(self.text, self.posttext) = leftover.split(b'\x03', 2)
|
|
self.parseHeader()
|
|
self.parseText()
|
|
self.parsePostText()
|
|
print("Header: " + repr(self.header))
|
|
print("Main text: " + self.text)
|
|
print("Post-text: " + repr(self.posttext))
|
|
print("Post-data: " + repr(self.postdata))
|
|
# print("Message: %s" % repr(self.raw))
|
|
|
|
def parseHeader(self):
|
|
fullheader = self.fullheader.decode("latin1")
|
|
parts = re.fullmatch('^([a-zA-Z]{1,3})([0-9]{3,4}) ([1-6]) ([a-zA-Z]{1,3}) ([0-9]{1,4}) (.{0,50})\r\n+(.{0,69})\r\n$', fullheader, re.MULTILINE | re.DOTALL)
|
|
if parts is None:
|
|
raise Exception("Header not conforming to IPTC structure.")
|
|
self.header = {
|
|
"source_id": parts.group(1),
|
|
"message_no": parts.group(2),
|
|
"priority": parts.group(3),
|
|
"category": parts.group(4),
|
|
"word_count": parts.group(5),
|
|
"optional": parts.group(6),
|
|
"keywords": parts.group(7)
|
|
}
|
|
|
|
def parseText(self):
|
|
self.text = self.text.decode("latin1")
|
|
lines = re.split(r'\r?\n', self.text)
|
|
# Check the first 3 lines for slug info, and extract if there
|
|
if lines[0][-1:] == "=" or lines[1][-1:] == "=" or lines[2][-1:] == "=":
|
|
(self.sluginfo, self.maintext) = re.split(r'=\r?\n', self.text)
|
|
self.sluginfo = self.sluginfo.strip()
|
|
self.maintext = self.maintext.strip()
|
|
else:
|
|
self.sluginfo = ""
|
|
self.maintext = self.text
|
|
pass
|
|
|
|
def parsePostText(self):
|
|
posttext = self.posttext.decode("latin1")
|
|
# posttext = "191552 MEZ sep 14blafasel" # test string according to spec
|
|
parts = re.fullmatch('^([0-9]{6})( ([a-zA-Z]{3})(?= [a-zA-Z]))?( ([a-zA-Z]{3}) ([0-9]{2}))?(.{1,32})?$', posttext, re.MULTILINE | re.DOTALL)
|
|
if parts is None:
|
|
raise Exception("PostText not conforming to IPTC structure.")
|
|
self.postdata = {
|
|
"datetime": parts.group(1),
|
|
"timezone": parts.group(3), # skip one b/c "whole"-match
|
|
"month": parts.group(5), # skip one b/c "whole"-match
|
|
"year": parts.group(6),
|
|
"msg_separation": parts.group(7)
|
|
}
|
|
|
|
def getIPTC(self):
|
|
return self.raw
|
|
|
|
def getNewsML(self):
|
|
renderer = pystache.Renderer()
|
|
tpl = renderer.load_template('newsml')
|
|
xml = renderer.render(tpl, self)
|
|
return xml
|
|
|
|
if __name__=='__main__':
|
|
print("Testmode!")
|