Files
sbbb/scripts/export-mbox.py

55 lines
1.8 KiB
Python
Raw Normal View History

import os, zlib, gzip, email.utils, time
import psycopg2
DB_HOST = "postgres-rw.data.svc.cluster.local"
conn = psycopg2.connect(
host=DB_HOST, port=5432, dbname="messages_db",
user="messages", password=os.environ["DB_PASSWORD"]
)
cur = conn.cursor()
cur.execute("""
SELECT DISTINCT mb.local_part || '@' || d.name as mailbox_email,
b.raw_content, b.compression, m.sent_at
FROM messages_message m
JOIN messages_blob b ON m.blob_id = b.id
JOIN messages_thread t ON m.thread_id = t.id
JOIN messages_threadaccess ta ON ta.thread_id = t.id
JOIN messages_mailbox mb ON ta.mailbox_id = mb.id
JOIN messages_maildomain d ON mb.domain_id = d.id
ORDER BY mailbox_email, m.sent_at
""")
os.makedirs("/tmp/mbox", exist_ok=True)
counts = {}
for row in cur.fetchall():
mailbox_email, raw_content, compression, sent_at = row
raw = bytes(raw_content)
if compression == 1:
try:
eml = gzip.decompress(raw)
except Exception:
try:
eml = zlib.decompress(raw, -zlib.MAX_WBITS)
except Exception:
eml = raw
else:
eml = raw
date_str = email.utils.formatdate(time.mktime(sent_at.timetuple())) if sent_at else email.utils.formatdate()
mbox_path = f"/tmp/mbox/{mailbox_email}.mbox"
with open(mbox_path, "ab") as f:
f.write(f"From {mailbox_email} {date_str}\n".encode())
for line in eml.split(b"\n"):
if line.startswith(b"From "):
f.write(b">" + line + b"\n")
else:
f.write(line + b"\n")
f.write(b"\n")
counts[mailbox_email] = counts.get(mailbox_email, 0) + 1
conn.close()
for addr, count in counts.items():
print(f"{addr}: {count} messages")
print("Export complete.")