client/securedrop_client/db.py (459 lines of code) (raw):

import datetime import os from enum import Enum from pathlib import Path from typing import Any from uuid import uuid4 from sqlalchemy import ( Boolean, CheckConstraint, Column, DateTime, ForeignKey, Integer, MetaData, String, Text, UniqueConstraint, create_engine, text, ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import backref, relationship, scoped_session, sessionmaker convention = { "ix": "ix_%(column_0_label)s", "uq": "uq_%(table_name)s_%(column_0_name)s", "ck": "ck_%(table_name)s_%(column_0_name)s", "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", "pk": "pk_%(table_name)s", } metadata = MetaData(naming_convention=convention) Base = declarative_base(metadata=metadata) # type: Any def make_session_maker(home: str) -> scoped_session: db_path = os.path.join(home, "svs.sqlite") engine = create_engine(f"sqlite:///{db_path}") if os.path.exists(db_path) and oct(os.stat(db_path).st_mode) != "0o100600": os.chmod(db_path, 0o600) maker = sessionmaker(bind=engine) return scoped_session(maker) class User(Base): __tablename__ = "users" id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) username = Column(String(255), nullable=False) firstname = Column(String(64)) lastname = Column(String(64)) def __repr__(self) -> str: return f"<Journalist {self.uuid}: {self.username}>" @property def deleted(self) -> bool: return self.username == "deleted" @property def fullname(self) -> str: if self.deleted: return "" if self.firstname and self.lastname: return self.firstname + " " + self.lastname elif self.firstname: return self.firstname elif self.lastname: return self.lastname else: return self.username @property def initials(self) -> str: if self.deleted: return "" if self.firstname and self.lastname: return self.firstname[0].lower() + self.lastname[0].lower() elif self.firstname and len(self.firstname) >= 2: return self.firstname[0:2].lower() elif self.lastname and len(self.lastname) >= 2: return self.lastname[0:2].lower() else: return self.username[0:2].lower() # username must be at least 3 characters class DeletedUser(User): def __init__(self) -> None: super().__init__(uuid=str(uuid4()), username="deleted") class Source(Base): __tablename__ = "sources" id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) journalist_designation = Column(String(255), nullable=False) document_count = Column(Integer, server_default=text("0"), nullable=False) is_flagged = Column(Boolean(name="is_flagged"), server_default=text("0")) public_key = Column(Text, nullable=True) fingerprint = Column(String(64)) interaction_count = Column(Integer, server_default=text("0"), nullable=False) is_starred = Column(Boolean(name="is_starred"), server_default=text("0")) last_updated = Column(DateTime) def __repr__(self) -> str: return f"<Source {self.uuid}: {self.journalist_designation}>" @property def collection(self) -> list: """Return the list of submissions, replies, messages, and drafts for this source, sorted in ascending order by the filename/interaction count.""" collection = [] collection.extend(self.messages) collection.extend(self.files) collection.extend(self.replies) collection.extend(self.draftreplies) # Push pending replies to the end of the collection, then sort by # file_counter, then by timestamp (the latter used only for draft replies). collection.sort( key=lambda x: ( getattr(x, "is_pending", False), x.file_counter, getattr(x, "timestamp", datetime.datetime(datetime.MINYEAR, 1, 1)), ) ) return collection @property def server_collection(self) -> list: """Return the list of submissions, replies, and messages for this source. These are the items that have been either successfully sent to the server, or they have been retrieved from the server.""" collection = [] collection.extend(self.messages) collection.extend(self.files) collection.extend(self.replies) collection.sort(key=lambda x: x.file_counter) return collection @property def journalist_filename(self) -> str: valid_chars = "abcdefghijklmnopqrstuvwxyz1234567890-_" return "".join( [c for c in self.journalist_designation.lower().replace(" ", "_") if c in valid_chars] ) @property def seen(self) -> bool: for item in self.collection: if not item.seen: return False return True class DeletedConversation(Base): """ Table that stores only source UUIDs for conversations (files and messages) that have been deleted locally, to prevent them from being re-added to the Messages and Replies tables during a 'stale sync' (network race condition). """ __tablename__ = "deletedconversation" uuid = Column(String(36), primary_key=True, nullable=False) def __repr__(self) -> str: return f"DeletedConversation (source {self.uuid})" def __init__(self, **kwargs: Any) -> None: if "uuid" not in kwargs: raise TypeError("Keyword argument 'uuid' (source UUID) required") super().__init__(**kwargs) class DeletedSource(Base): """ Table that temporarily stores UUIDs of sources whose accounts are deleted locally, to prevent them from being re-added to the Sources table during a 'stale sync' (network race condition). """ __tablename__ = "deletedsource" uuid = Column(String(36), primary_key=True, nullable=False) def __repr__(self) -> str: return f"DeletedSource ({self.uuid})" def __init__(self, **kwargs: Any) -> None: if "uuid" not in kwargs: raise TypeError("Keyword argument 'uuid' is required") super().__init__(**kwargs) class Message(Base): __tablename__ = "messages" __table_args__ = ( UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"), ) id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) filename = Column(String(255), nullable=False) file_counter = Column(Integer, nullable=False) size = Column(Integer, nullable=False) download_url = Column(String(255), nullable=False) # This is whether the submission has been downloaded in the local database. is_downloaded = Column(Boolean(name="is_downloaded"), nullable=False, server_default=text("0")) # This tracks if the file had been successfully decrypted after download. is_decrypted = Column( Boolean(name="is_decrypted"), CheckConstraint( "CASE WHEN is_downloaded = 0 THEN is_decrypted IS NULL ELSE 1 END", name="messages_compare_is_downloaded_vs_is_decrypted", ), nullable=True, ) download_error_id = Column(Integer, ForeignKey("downloaderrors.id")) download_error = relationship("DownloadError") # This reflects read status stored on the server. is_read = Column(Boolean(name="is_read"), nullable=False, server_default=text("0")) content = Column( Text, # this check constraint ensures the state of the DB is what one would expect CheckConstraint( "CASE WHEN is_downloaded = 0 THEN content IS NULL ELSE 1 END", name="ck_message_compare_download_vs_content", ), ) source_id = Column(Integer, ForeignKey("sources.id"), nullable=False) source = relationship( "Source", backref=backref("messages", order_by=id, cascade="delete"), lazy="joined" ) last_updated = Column( DateTime, nullable=False, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, ) def __init__(self, **kwargs: Any) -> None: if "file_counter" in kwargs: raise TypeError("Cannot manually set file_counter") filename = kwargs["filename"] kwargs["file_counter"] = int(filename.split("-")[0]) super().__init__(**kwargs) def __str__(self) -> str: """ Return something that's a useful string representation of the message. """ if self.content is not None: return self.content else: if self.download_error is not None: return self.download_error.explain(self.__class__.__name__) return "<Message not yet available>" def __repr__(self) -> str: return f"<Message {self.uuid}: {self.filename}>" def location(self, data_dir: str) -> str: """ Return the full path to the Message's file. """ return str( Path(data_dir) .joinpath( Path(self.source.journalist_filename, Path(self.filename).with_suffix(".txt")) ) .resolve() ) @property def seen(self) -> bool: """ If the message has seen by any journalist, then the message is considered seen. The `is_read` boolean is used in order to recognize messages that have been downloaded before SecureDrop 1.6.0 (before the seen-by feature). """ return bool(self.seen_messages.count() or self.is_read) def seen_by(self, journalist_id: int) -> bool: for seen_message in self.seen_messages: if seen_message.journalist_id == journalist_id: return True return False @property def seen_by_list(self) -> dict[str, User]: """ For each message retrieve a dictionary of users who have seen it. Each dictionary item consists of the user's username as its key and the user object as its value. """ usernames = {} # type: dict[str, User] for seen_message in self.seen_messages: if seen_message.journalist: usernames[seen_message.journalist.username] = seen_message.journalist return usernames class File(Base): __tablename__ = "files" __table_args__ = ( UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"), ) id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) filename = Column(String(255), nullable=False) file_counter = Column(Integer, nullable=False) size = Column(Integer, nullable=False) download_url = Column(String(255), nullable=False) # This is whether the submission has been downloaded in the local database. is_downloaded = Column(Boolean(name="is_downloaded"), nullable=False, server_default=text("0")) # This tracks if the file had been successfully decrypted after download. is_decrypted = Column( Boolean(name="is_decrypted"), CheckConstraint( "CASE WHEN is_downloaded = 0 THEN is_decrypted IS NULL ELSE 1 END", name="files_compare_is_downloaded_vs_is_decrypted", ), nullable=True, ) download_error_id = Column(Integer, ForeignKey("downloaderrors.id")) download_error = relationship("DownloadError") # This reflects read status stored on the server. is_read = Column(Boolean(name="is_read"), nullable=False, server_default=text("0")) source_id = Column(Integer, ForeignKey("sources.id"), nullable=False) source = relationship( "Source", backref=backref("files", order_by=id, cascade="delete"), lazy="joined" ) last_updated = Column( DateTime, nullable=False, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, ) def __init__(self, **kwargs: Any) -> None: if "file_counter" in kwargs: raise TypeError("Cannot manually set file_counter") filename = kwargs["filename"] kwargs["file_counter"] = int(filename.split("-")[0]) super().__init__(**kwargs) def __str__(self) -> str: """ Return something that's a useful string representation of the file. """ if self.is_downloaded: if self.download_error is not None: return self.download_error.explain(self.__class__.__name__) return f"File: {self.filename}" else: return "<Encrypted file on server>" def __repr__(self) -> str: return f"<File {self.uuid}>" def location(self, data_dir: str) -> str: """ Return the full path to the File's file. """ return str( Path(data_dir) .joinpath( Path( self.source.journalist_filename, f"{self.file_counter}-{self.source.journalist_filename}-doc", self.filename, ) ) .resolve() ) @property def seen(self) -> bool: """ If the file has been seen by any journalist, then the file is considered seen. The `is_read` boolean is used in order to recognize files that have been downloaded before SecureDrop 1.6.0 (before the seen-by feature). """ return bool(self.seen_files.count() or self.is_read) def seen_by(self, journalist_id: int) -> bool: for seen_file in self.seen_files: if seen_file.journalist_id == journalist_id: return True return False class Reply(Base): __tablename__ = "replies" __table_args__ = ( UniqueConstraint("source_id", "file_counter", name="uq_messages_source_id_file_counter"), ) id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) source_id = Column(Integer, ForeignKey("sources.id"), nullable=False) source = relationship( "Source", backref=backref("replies", order_by=id, cascade="delete"), lazy="joined" ) journalist_id = Column(Integer, ForeignKey("users.id")) journalist = relationship("User", backref=backref("replies", order_by=id)) filename = Column(String(255), nullable=False) file_counter = Column(Integer, nullable=False) size = Column(Integer) # This is whether the reply has been downloaded in the local database. is_downloaded = Column(Boolean(name="is_downloaded"), default=False) content = Column( Text, CheckConstraint( "CASE WHEN is_downloaded = 0 THEN content IS NULL ELSE 1 END", name="replies_compare_download_vs_content", ), ) # This tracks if the file had been successfully decrypted after download. is_decrypted = Column( Boolean(name="is_decrypted"), CheckConstraint( "CASE WHEN is_downloaded = 0 THEN is_decrypted IS NULL ELSE 1 END", name="replies_compare_is_downloaded_vs_is_decrypted", ), nullable=True, ) download_error_id = Column(Integer, ForeignKey("downloaderrors.id")) download_error = relationship("DownloadError") last_updated = Column( DateTime, nullable=False, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, ) def __init__(self, **kwargs: Any) -> None: if "file_counter" in kwargs: raise TypeError("Cannot manually set file_counter") filename = kwargs["filename"] kwargs["file_counter"] = int(filename.split("-")[0]) super().__init__(**kwargs) def __str__(self) -> str: """ Return something that's a useful string representation of the reply. """ if self.content is not None: return self.content else: if self.download_error is not None: return self.download_error.explain(self.__class__.__name__) return "<Reply not yet available>" def __repr__(self) -> str: return f"<Reply {self.uuid}: {self.filename}>" def location(self, data_dir: str) -> str: """ Return the full path to the Reply's file. """ return str( Path(data_dir) .joinpath( Path(self.source.journalist_filename, Path(self.filename).with_suffix(".txt")) ) .resolve() ) @property def seen(self) -> bool: """ A reply is always seen in a global inbox. """ return True def seen_by(self, journalist_id: int) -> bool: for seen_reply in self.seen_replies: if seen_reply.journalist_id == journalist_id: return True return False @property def seen_by_list(self) -> dict[str, User]: """ For each reply retrieve a dictionary of users who have seen it. Each dictionary item consists of the user's username as its key and the user object as its value. """ usernames = {} # type: dict[str, User] for seen_reply in self.seen_replies: if seen_reply.journalist: usernames[seen_reply.journalist.username] = seen_reply.journalist return usernames class DownloadErrorCodes(Enum): """ Enumerated download failure modes, with templates as values. The templates are intended to be formatted with the class name of a downloadable item. """ CHECKSUM_ERROR = "cannot download {object_type}" DECRYPTION_ERROR = "cannot decrypt {object_type}" class DownloadError(Base): """ Table of errors that can occur with downloadable items: File, Message, Reply. """ __tablename__ = "downloaderrors" id = Column(Integer, primary_key=True) name = Column(String(36), unique=True, nullable=False) def __init__(self, name: str) -> None: super().__init__() self.name = name def __repr__(self) -> str: return f"<Download error {self.name}>" def explain(self, classname: str) -> str: """ Formats the explanation type with the supplied class name. """ return DownloadErrorCodes[self.name].value.format(object_type=classname.lower()) class DraftReply(Base): __tablename__ = "draftreplies" id = Column(Integer, primary_key=True) uuid = Column(String(36), unique=True, nullable=False) timestamp = Column(DateTime, nullable=False) source_id = Column(Integer, ForeignKey("sources.id"), nullable=False) source = relationship( "Source", backref=backref("draftreplies", order_by=id, cascade="delete"), lazy="joined" ) journalist_id = Column(Integer, ForeignKey("users.id")) journalist = relationship("User", backref=backref("draftreplies", order_by=id)) # Tracks where in this conversation the draft reply was sent. # This points to the file_counter of the previous conversation item. file_counter = Column(Integer, nullable=False) content = Column(Text) # This tracks the sending status of the reply. send_status_id = Column(Integer, ForeignKey("replysendstatuses.id")) send_status = relationship("ReplySendStatus") sending_pid = Column(Integer) def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) def __str__(self) -> str: """ Return something that's a useful string representation of the reply. """ if self.content is not None: return self.content else: return "<Reply not yet available>" def __repr__(self) -> str: return f"<DraftReply {self.uuid}>" @property def is_pending(self) -> bool: """ True if Draft Reply is in Pending state. """ return ( self.send_status is not None and self.send_status.name == ReplySendStatusCodes.PENDING.value ) @property def seen(self) -> bool: """ A draft reply is always seen in a global inbox. """ return True def seen_by(self, journalist_id: int) -> bool: """ A draft reply is considered seen by everyone (we don't track who sees draft replies). """ return True @property def seen_by_list(self) -> dict[str, User]: """ A draft reply is considered seen by everyone (we don't track who sees draft replies). Return an empty dictionary. """ return {} class ReplySendStatus(Base): __tablename__ = "replysendstatuses" id = Column(Integer, primary_key=True) name = Column(String(36), unique=True, nullable=False) def __init__(self, name: str) -> None: super().__init__() self.name = name def __repr__(self) -> str: return f"<Reply status {self.name}>" class ReplySendStatusCodes(Enum): """In progress (sending) replies can currently have the following statuses""" PENDING = "PENDING" FAILED = "FAILED" class SeenFile(Base): __tablename__ = "seen_files" __table_args__ = (UniqueConstraint("file_id", "journalist_id"),) id = Column(Integer, primary_key=True) file_id = Column(Integer, ForeignKey("files.id"), nullable=False) journalist_id = Column(Integer, ForeignKey("users.id"), nullable=True) file = relationship("File", backref=backref("seen_files", lazy="dynamic", cascade="all,delete")) journalist = relationship("User", backref=backref("seen_files")) class SeenMessage(Base): __tablename__ = "seen_messages" __table_args__ = (UniqueConstraint("message_id", "journalist_id"),) id = Column(Integer, primary_key=True) message_id = Column(Integer, ForeignKey("messages.id"), nullable=False) journalist_id = Column(Integer, ForeignKey("users.id"), nullable=True) message = relationship( "Message", backref=backref("seen_messages", lazy="dynamic", cascade="all,delete") ) journalist = relationship("User", backref=backref("seen_messages")) class SeenReply(Base): __tablename__ = "seen_replies" __table_args__ = (UniqueConstraint("reply_id", "journalist_id"),) id = Column(Integer, primary_key=True) reply_id = Column(Integer, ForeignKey("replies.id"), nullable=False) journalist_id = Column(Integer, ForeignKey("users.id"), nullable=True) reply = relationship("Reply", backref=backref("seen_replies", cascade="all,delete")) journalist = relationship("User", backref=backref("seen_replies"))