import os, zlib, gzip, email.utils, time import psycopg2 DB_HOST = "postgres-rw.data.svc.cluster.local" conn = psycopg2.connect( host=DB_HOST, port=5432, dbname="messages_db", user="messages", password=os.environ["DB_PASSWORD"] ) cur = conn.cursor() cur.execute(""" SELECT DISTINCT mb.local_part || '@' || d.name as mailbox_email, b.raw_content, b.compression, m.sent_at FROM messages_message m JOIN messages_blob b ON m.blob_id = b.id JOIN messages_thread t ON m.thread_id = t.id JOIN messages_threadaccess ta ON ta.thread_id = t.id JOIN messages_mailbox mb ON ta.mailbox_id = mb.id JOIN messages_maildomain d ON mb.domain_id = d.id ORDER BY mailbox_email, m.sent_at """) os.makedirs("/tmp/mbox", exist_ok=True) counts = {} for row in cur.fetchall(): mailbox_email, raw_content, compression, sent_at = row raw = bytes(raw_content) if compression == 1: try: eml = gzip.decompress(raw) except Exception: try: eml = zlib.decompress(raw, -zlib.MAX_WBITS) except Exception: eml = raw else: eml = raw date_str = email.utils.formatdate(time.mktime(sent_at.timetuple())) if sent_at else email.utils.formatdate() mbox_path = f"/tmp/mbox/{mailbox_email}.mbox" with open(mbox_path, "ab") as f: f.write(f"From {mailbox_email} {date_str}\n".encode()) for line in eml.split(b"\n"): if line.startswith(b"From "): f.write(b">" + line + b"\n") else: f.write(line + b"\n") f.write(b"\n") counts[mailbox_email] = counts.get(mailbox_email, 0) + 1 conn.close() for addr, count in counts.items(): print(f"{addr}: {count} messages") print("Export complete.")