Skip to content

Database Architecture

Metricis uses PostgreSQL 15+ with SQLAlchemy 2.x async ORM for data persistence.

Database Schema

Core Tables

-- Users (researchers, admins)
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    email VARCHAR(255) UNIQUE NOT NULL,
    password_hash VARCHAR(255) NOT NULL,
    role VARCHAR(50) NOT NULL, -- 'admin', 'researcher', 'coordinator'
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
);

-- Studies
CREATE TABLE studies (
    id SERIAL PRIMARY KEY,
    code VARCHAR(50) UNIQUE NOT NULL,
    name VARCHAR(255) NOT NULL,
    description TEXT,
    pi_name VARCHAR(255) NOT NULL,
    pi_email VARCHAR(255) NOT NULL,
    start_date DATE,
    end_date DATE,
    status VARCHAR(50) DEFAULT 'active', -- 'active', 'paused', 'completed'
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
);

-- Participants
CREATE TABLE participants (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
    participant_id VARCHAR(50) NOT NULL,
    email VARCHAR(255),
    phone VARCHAR(50),
    enrollment_date DATE,
    status VARCHAR(50) DEFAULT 'enrolled', -- 'screened', 'enrolled', 'withdrawn', 'completed'
    actor VARCHAR(50) DEFAULT 'patient', -- 'patient', 'caregiver'
    language VARCHAR(10) DEFAULT 'en', -- 'en', 'fr'
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW(),
    UNIQUE(study_id, participant_id)
);

-- Batteries (assessment collections)
CREATE TABLE batteries (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
    name VARCHAR(255) NOT NULL,
    description TEXT,
    tasks JSONB NOT NULL, -- Array of task configurations
    estimated_minutes INTEGER,
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
);

-- Assessment Sessions
CREATE TABLE sessions (
    id SERIAL PRIMARY KEY,
    session_id VARCHAR(255) UNIQUE NOT NULL,
    participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
    battery_id INTEGER REFERENCES batteries(id) ON DELETE SET NULL,
    visit_id INTEGER REFERENCES visits(id) ON DELETE SET NULL,
    start_time TIMESTAMP NOT NULL,
    end_time TIMESTAMP,
    status VARCHAR(50) DEFAULT 'in_progress', -- 'in_progress', 'completed', 'abandoned'
    device_info JSONB, -- Device type, OS, browser
    created_at TIMESTAMP DEFAULT NOW()
);

-- Session Data (task summaries + trials)
CREATE TABLE session_data (
    id SERIAL PRIMARY KEY,
    session_id VARCHAR(255) REFERENCES sessions(session_id) ON DELETE CASCADE,
    task_name VARCHAR(100) NOT NULL,
    summary JSONB NOT NULL, -- Task summary metrics
    trials JSONB NOT NULL, -- Raw trial data
    submitted_at TIMESTAMP DEFAULT NOW()
);

-- Visits (scheduled study events)
CREATE TABLE visits (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
    visit_name VARCHAR(255) NOT NULL,
    visit_number INTEGER NOT NULL,
    window_start_days INTEGER NOT NULL,
    window_end_days INTEGER NOT NULL,
    battery_id INTEGER REFERENCES batteries(id) ON DELETE SET NULL,
    created_at TIMESTAMP DEFAULT NOW()
);

-- Participant Visits
CREATE TABLE participant_visits (
    id SERIAL PRIMARY KEY,
    participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
    visit_id INTEGER REFERENCES visits(id) ON DELETE CASCADE,
    scheduled_date DATE,
    actual_date DATE,
    status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'missed'
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW(),
    UNIQUE(participant_id, visit_id)
);

-- Consent Forms
CREATE TABLE consent_forms (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
    version VARCHAR(50) NOT NULL,
    title VARCHAR(255) NOT NULL,
    content TEXT NOT NULL, -- HTML content
    effective_date DATE NOT NULL,
    created_at TIMESTAMP DEFAULT NOW(),
    UNIQUE(study_id, version)
);

-- Participant Consents
CREATE TABLE participant_consents (
    id SERIAL PRIMARY KEY,
    participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
    consent_form_id INTEGER REFERENCES consent_forms(id) ON DELETE CASCADE,
    signed_at TIMESTAMP,
    signature BYTEA, -- Base64 signature image
    ip_address VARCHAR(50),
    status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'signed', 'withdrawn'
    created_at TIMESTAMP DEFAULT NOW()
);

-- REDCap Configuration
CREATE TABLE redcap_configs (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE UNIQUE,
    site_id VARCHAR(100) NOT NULL,
    event_name VARCHAR(255) NOT NULL,
    participant_id_field VARCHAR(255) DEFAULT 'record_id',
    field_mappings JSONB NOT NULL, -- Task → REDCap field mappings
    sync_enabled BOOLEAN DEFAULT TRUE,
    last_sync_at TIMESTAMP,
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
);

-- Notification Templates
CREATE TABLE notification_templates (
    id SERIAL PRIMARY KEY,
    study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
    template_type VARCHAR(100) NOT NULL, -- 'visit_reminder', 'consent_request', etc.
    channel VARCHAR(50) NOT NULL, -- 'email', 'sms', 'push'
    subject VARCHAR(255),
    content TEXT NOT NULL,
    variables JSONB, -- Template variables
    created_at TIMESTAMP DEFAULT NOW(),
    updated_at TIMESTAMP DEFAULT NOW()
);

-- Notifications Log
CREATE TABLE notifications (
    id SERIAL PRIMARY KEY,
    participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
    template_id INTEGER REFERENCES notification_templates(id) ON DELETE SET NULL,
    channel VARCHAR(50) NOT NULL,
    recipient VARCHAR(255) NOT NULL, -- Email, phone, or device token
    subject VARCHAR(255),
    content TEXT NOT NULL,
    status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'sent', 'failed'
    sent_at TIMESTAMP,
    created_at TIMESTAMP DEFAULT NOW()
);

Indexes

-- Performance indexes
CREATE INDEX idx_participants_study_id ON participants(study_id);
CREATE INDEX idx_sessions_participant_id ON sessions(participant_id);
CREATE INDEX idx_sessions_session_id ON sessions(session_id);
CREATE INDEX idx_session_data_session_id ON session_data(session_id);
CREATE INDEX idx_participant_visits_participant_id ON participant_visits(participant_id);
CREATE INDEX idx_participant_visits_visit_id ON participant_visits(visit_id);
CREATE INDEX idx_notifications_participant_id ON notifications(participant_id);
CREATE INDEX idx_notifications_status ON notifications(status);

-- Search indexes
CREATE INDEX idx_participants_email ON participants(email);
CREATE INDEX idx_participants_participant_id ON participants(participant_id);
CREATE INDEX idx_studies_code ON studies(code);

SQLAlchemy Models

# app/models/study.py
from sqlalchemy import Column, Integer, String, Text, Date, DateTime
from sqlalchemy.orm import relationship
from app.db.base import Base

class Study(Base):
    __tablename__ = "studies"

    id = Column(Integer, primary_key=True)
    code = Column(String(50), unique=True, nullable=False)
    name = Column(String(255), nullable=False)
    description = Column(Text)
    pi_name = Column(String(255), nullable=False)
    pi_email = Column(String(255), nullable=False)
    start_date = Column(Date)
    end_date = Column(Date)
    status = Column(String(50), default="active")
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())

    # Relationships
    participants = relationship("Participant", back_populates="study", cascade="all, delete-orphan")
    batteries = relationship("Battery", back_populates="study", cascade="all, delete-orphan")
    visits = relationship("Visit", back_populates="study", cascade="all, delete-orphan")
    consent_forms = relationship("ConsentForm", back_populates="study", cascade="all, delete-orphan")
    redcap_config = relationship("REDCapConfig", back_populates="study", uselist=False)
# app/models/participant.py
from sqlalchemy import Column, Integer, String, Date, DateTime, ForeignKey, Enum
from sqlalchemy.orm import relationship
from app.db.base import Base

class Participant(Base):
    __tablename__ = "participants"

    id = Column(Integer, primary_key=True)
    study_id = Column(Integer, ForeignKey("studies.id", ondelete="CASCADE"), nullable=False)
    participant_id = Column(String(50), nullable=False)
    email = Column(String(255))
    phone = Column(String(50))
    enrollment_date = Column(Date)
    status = Column(String(50), default="enrolled")
    actor = Column(String(50), default="patient")
    language = Column(String(10), default="en")
    created_at = Column(DateTime, server_default=func.now())
    updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())

    # Relationships
    study = relationship("Study", back_populates="participants")
    sessions = relationship("Session", back_populates="participant", cascade="all, delete-orphan")
    visits = relationship("ParticipantVisit", back_populates="participant", cascade="all, delete-orphan")
    consents = relationship("ParticipantConsent", back_populates="participant", cascade="all, delete-orphan")

Data Relationships

Study
  ├── Participants (1:N)
  │   ├── Sessions (1:N)
  │   │   └── SessionData (1:N)
  │   ├── ParticipantVisits (1:N)
  │   └── ParticipantConsents (1:N)
  ├── Batteries (1:N)
  ├── Visits (1:N)
  │   └── ParticipantVisits (1:N)
  ├── ConsentForms (1:N)
  │   └── ParticipantConsents (1:N)
  └── REDCapConfig (1:1)

JSONB Storage

Battery Tasks

{
  "tasks": [
    {
      "task_name": "simple_rt",
      "config": {
        "trials": 20,
        "timeout": 5000
      }
    },
    {
      "task_name": "cpt",
      "config": {
        "trials": 100,
        "target_frequency": 0.3
      }
    }
  ]
}

Session Data Summary

{
  "simple_rt": {
    "mean_rt": 342.5,
    "median_rt": 335.0,
    "sd_rt": 45.2,
    "accuracy": 0.95,
    "num_trials": 20
  },
  "cpt": {
    "hits": 28,
    "misses": 2,
    "false_alarms": 3,
    "correct_rejections": 67,
    "dprime": 2.5,
    "beta": 1.2
  }
}

REDCap Field Mappings

{
  "simple_rt_mean": "cognitive_rt_mean",
  "simple_rt_sd": "cognitive_rt_sd",
  "cpt_dprime": "cpt_sensitivity",
  "cpt_beta": "cpt_criterion",
  "nback_accuracy_1": "nback_1back_acc",
  "nback_accuracy_2": "nback_2back_acc"
}

Migrations

Initial Schema

alembic revision --autogenerate -m "initial schema"
alembic upgrade head

Adding New Tables

# Make changes to models
alembic revision --autogenerate -m "add notification tables"
alembic upgrade head

Data Migrations

# alembic/versions/xxx_migrate_participant_data.py
def upgrade():
    # Custom data migration
    op.execute("""
        UPDATE participants
        SET actor = 'patient'
        WHERE actor IS NULL
    """)

def downgrade():
    pass

Query Optimization

Eager Loading

# Bad: N+1 queries
study = await session.get(Study, study_id)
for participant in study.participants:
    print(participant.sessions)  # New query for each participant

# Good: Single query with joins
study = await session.execute(
    select(Study)
    .options(selectinload(Study.participants).selectinload(Participant.sessions))
    .where(Study.id == study_id)
)

Pagination

from sqlalchemy import select

async def get_participants_paginated(
    db: AsyncSession,
    study_id: int,
    page: int = 1,
    per_page: int = 20
):
    offset = (page - 1) * per_page

    result = await db.execute(
        select(Participant)
        .where(Participant.study_id == study_id)
        .offset(offset)
        .limit(per_page)
    )
    return result.scalars().all()

Aggregations

from sqlalchemy import func, select

async def get_study_stats(db: AsyncSession, study_id: int):
    result = await db.execute(
        select(
            func.count(Participant.id).label('total_participants'),
            func.count(Session.id).label('total_sessions'),
            func.avg(Session.end_time - Session.start_time).label('avg_duration')
        )
        .select_from(Study)
        .join(Participant)
        .join(Session, isouter=True)
        .where(Study.id == study_id)
    )
    return result.first()

Backup and Recovery

Database Backup

# Daily backup
pg_dump -U metricis metricis | gzip > backup_$(date +%Y%m%d).sql.gz

# Restore
gunzip -c backup_20260124.sql.gz | psql -U metricis metricis

Point-in-Time Recovery

Enable WAL archiving in postgresql.conf:

wal_level = replica
archive_mode = on
archive_command = 'cp %p /backup/wal/%f'

Monitoring

Connection Pool

# Monitor pool usage
engine = create_async_engine(
    settings.DATABASE_URL,
    pool_size=20,
    max_overflow=10,
    pool_pre_ping=True, # Verify connections before use
)

Slow Query Log

Enable in postgresql.conf:

log_min_duration_statement = 1000  # Log queries > 1s

Next Steps