Database Architecture¶
Metricis uses PostgreSQL 15+ with SQLAlchemy 2.x async ORM for data persistence.
Database Schema¶
Core Tables¶
-- Users (researchers, admins)
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
role VARCHAR(50) NOT NULL, -- 'admin', 'researcher', 'coordinator'
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Studies
CREATE TABLE studies (
id SERIAL PRIMARY KEY,
code VARCHAR(50) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
pi_name VARCHAR(255) NOT NULL,
pi_email VARCHAR(255) NOT NULL,
start_date DATE,
end_date DATE,
status VARCHAR(50) DEFAULT 'active', -- 'active', 'paused', 'completed'
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Participants
CREATE TABLE participants (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
participant_id VARCHAR(50) NOT NULL,
email VARCHAR(255),
phone VARCHAR(50),
enrollment_date DATE,
status VARCHAR(50) DEFAULT 'enrolled', -- 'screened', 'enrolled', 'withdrawn', 'completed'
actor VARCHAR(50) DEFAULT 'patient', -- 'patient', 'caregiver'
language VARCHAR(10) DEFAULT 'en', -- 'en', 'fr'
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
UNIQUE(study_id, participant_id)
);
-- Batteries (assessment collections)
CREATE TABLE batteries (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL,
description TEXT,
tasks JSONB NOT NULL, -- Array of task configurations
estimated_minutes INTEGER,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Assessment Sessions
CREATE TABLE sessions (
id SERIAL PRIMARY KEY,
session_id VARCHAR(255) UNIQUE NOT NULL,
participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
battery_id INTEGER REFERENCES batteries(id) ON DELETE SET NULL,
visit_id INTEGER REFERENCES visits(id) ON DELETE SET NULL,
start_time TIMESTAMP NOT NULL,
end_time TIMESTAMP,
status VARCHAR(50) DEFAULT 'in_progress', -- 'in_progress', 'completed', 'abandoned'
device_info JSONB, -- Device type, OS, browser
created_at TIMESTAMP DEFAULT NOW()
);
-- Session Data (task summaries + trials)
CREATE TABLE session_data (
id SERIAL PRIMARY KEY,
session_id VARCHAR(255) REFERENCES sessions(session_id) ON DELETE CASCADE,
task_name VARCHAR(100) NOT NULL,
summary JSONB NOT NULL, -- Task summary metrics
trials JSONB NOT NULL, -- Raw trial data
submitted_at TIMESTAMP DEFAULT NOW()
);
-- Visits (scheduled study events)
CREATE TABLE visits (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
visit_name VARCHAR(255) NOT NULL,
visit_number INTEGER NOT NULL,
window_start_days INTEGER NOT NULL,
window_end_days INTEGER NOT NULL,
battery_id INTEGER REFERENCES batteries(id) ON DELETE SET NULL,
created_at TIMESTAMP DEFAULT NOW()
);
-- Participant Visits
CREATE TABLE participant_visits (
id SERIAL PRIMARY KEY,
participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
visit_id INTEGER REFERENCES visits(id) ON DELETE CASCADE,
scheduled_date DATE,
actual_date DATE,
status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'in_progress', 'completed', 'missed'
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
UNIQUE(participant_id, visit_id)
);
-- Consent Forms
CREATE TABLE consent_forms (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
version VARCHAR(50) NOT NULL,
title VARCHAR(255) NOT NULL,
content TEXT NOT NULL, -- HTML content
effective_date DATE NOT NULL,
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE(study_id, version)
);
-- Participant Consents
CREATE TABLE participant_consents (
id SERIAL PRIMARY KEY,
participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
consent_form_id INTEGER REFERENCES consent_forms(id) ON DELETE CASCADE,
signed_at TIMESTAMP,
signature BYTEA, -- Base64 signature image
ip_address VARCHAR(50),
status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'signed', 'withdrawn'
created_at TIMESTAMP DEFAULT NOW()
);
-- REDCap Configuration
CREATE TABLE redcap_configs (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE UNIQUE,
site_id VARCHAR(100) NOT NULL,
event_name VARCHAR(255) NOT NULL,
participant_id_field VARCHAR(255) DEFAULT 'record_id',
field_mappings JSONB NOT NULL, -- Task → REDCap field mappings
sync_enabled BOOLEAN DEFAULT TRUE,
last_sync_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Notification Templates
CREATE TABLE notification_templates (
id SERIAL PRIMARY KEY,
study_id INTEGER REFERENCES studies(id) ON DELETE CASCADE,
template_type VARCHAR(100) NOT NULL, -- 'visit_reminder', 'consent_request', etc.
channel VARCHAR(50) NOT NULL, -- 'email', 'sms', 'push'
subject VARCHAR(255),
content TEXT NOT NULL,
variables JSONB, -- Template variables
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Notifications Log
CREATE TABLE notifications (
id SERIAL PRIMARY KEY,
participant_id INTEGER REFERENCES participants(id) ON DELETE CASCADE,
template_id INTEGER REFERENCES notification_templates(id) ON DELETE SET NULL,
channel VARCHAR(50) NOT NULL,
recipient VARCHAR(255) NOT NULL, -- Email, phone, or device token
subject VARCHAR(255),
content TEXT NOT NULL,
status VARCHAR(50) DEFAULT 'pending', -- 'pending', 'sent', 'failed'
sent_at TIMESTAMP,
created_at TIMESTAMP DEFAULT NOW()
);
Indexes¶
-- Performance indexes
CREATE INDEX idx_participants_study_id ON participants(study_id);
CREATE INDEX idx_sessions_participant_id ON sessions(participant_id);
CREATE INDEX idx_sessions_session_id ON sessions(session_id);
CREATE INDEX idx_session_data_session_id ON session_data(session_id);
CREATE INDEX idx_participant_visits_participant_id ON participant_visits(participant_id);
CREATE INDEX idx_participant_visits_visit_id ON participant_visits(visit_id);
CREATE INDEX idx_notifications_participant_id ON notifications(participant_id);
CREATE INDEX idx_notifications_status ON notifications(status);
-- Search indexes
CREATE INDEX idx_participants_email ON participants(email);
CREATE INDEX idx_participants_participant_id ON participants(participant_id);
CREATE INDEX idx_studies_code ON studies(code);
SQLAlchemy Models¶
# app/models/study.py
from sqlalchemy import Column, Integer, String, Text, Date, DateTime
from sqlalchemy.orm import relationship
from app.db.base import Base
class Study(Base):
__tablename__ = "studies"
id = Column(Integer, primary_key=True)
code = Column(String(50), unique=True, nullable=False)
name = Column(String(255), nullable=False)
description = Column(Text)
pi_name = Column(String(255), nullable=False)
pi_email = Column(String(255), nullable=False)
start_date = Column(Date)
end_date = Column(Date)
status = Column(String(50), default="active")
created_at = Column(DateTime, server_default=func.now())
updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
# Relationships
participants = relationship("Participant", back_populates="study", cascade="all, delete-orphan")
batteries = relationship("Battery", back_populates="study", cascade="all, delete-orphan")
visits = relationship("Visit", back_populates="study", cascade="all, delete-orphan")
consent_forms = relationship("ConsentForm", back_populates="study", cascade="all, delete-orphan")
redcap_config = relationship("REDCapConfig", back_populates="study", uselist=False)
# app/models/participant.py
from sqlalchemy import Column, Integer, String, Date, DateTime, ForeignKey, Enum
from sqlalchemy.orm import relationship
from app.db.base import Base
class Participant(Base):
__tablename__ = "participants"
id = Column(Integer, primary_key=True)
study_id = Column(Integer, ForeignKey("studies.id", ondelete="CASCADE"), nullable=False)
participant_id = Column(String(50), nullable=False)
email = Column(String(255))
phone = Column(String(50))
enrollment_date = Column(Date)
status = Column(String(50), default="enrolled")
actor = Column(String(50), default="patient")
language = Column(String(10), default="en")
created_at = Column(DateTime, server_default=func.now())
updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now())
# Relationships
study = relationship("Study", back_populates="participants")
sessions = relationship("Session", back_populates="participant", cascade="all, delete-orphan")
visits = relationship("ParticipantVisit", back_populates="participant", cascade="all, delete-orphan")
consents = relationship("ParticipantConsent", back_populates="participant", cascade="all, delete-orphan")
Data Relationships¶
Study
├── Participants (1:N)
│ ├── Sessions (1:N)
│ │ └── SessionData (1:N)
│ ├── ParticipantVisits (1:N)
│ └── ParticipantConsents (1:N)
├── Batteries (1:N)
├── Visits (1:N)
│ └── ParticipantVisits (1:N)
├── ConsentForms (1:N)
│ └── ParticipantConsents (1:N)
└── REDCapConfig (1:1)
JSONB Storage¶
Battery Tasks¶
{
"tasks": [
{
"task_name": "simple_rt",
"config": {
"trials": 20,
"timeout": 5000
}
},
{
"task_name": "cpt",
"config": {
"trials": 100,
"target_frequency": 0.3
}
}
]
}
Session Data Summary¶
{
"simple_rt": {
"mean_rt": 342.5,
"median_rt": 335.0,
"sd_rt": 45.2,
"accuracy": 0.95,
"num_trials": 20
},
"cpt": {
"hits": 28,
"misses": 2,
"false_alarms": 3,
"correct_rejections": 67,
"dprime": 2.5,
"beta": 1.2
}
}
REDCap Field Mappings¶
{
"simple_rt_mean": "cognitive_rt_mean",
"simple_rt_sd": "cognitive_rt_sd",
"cpt_dprime": "cpt_sensitivity",
"cpt_beta": "cpt_criterion",
"nback_accuracy_1": "nback_1back_acc",
"nback_accuracy_2": "nback_2back_acc"
}
Migrations¶
Initial Schema¶
Adding New Tables¶
# Make changes to models
alembic revision --autogenerate -m "add notification tables"
alembic upgrade head
Data Migrations¶
# alembic/versions/xxx_migrate_participant_data.py
def upgrade():
# Custom data migration
op.execute("""
UPDATE participants
SET actor = 'patient'
WHERE actor IS NULL
""")
def downgrade():
pass
Query Optimization¶
Eager Loading¶
# Bad: N+1 queries
study = await session.get(Study, study_id)
for participant in study.participants:
print(participant.sessions) # New query for each participant
# Good: Single query with joins
study = await session.execute(
select(Study)
.options(selectinload(Study.participants).selectinload(Participant.sessions))
.where(Study.id == study_id)
)
Pagination¶
from sqlalchemy import select
async def get_participants_paginated(
db: AsyncSession,
study_id: int,
page: int = 1,
per_page: int = 20
):
offset = (page - 1) * per_page
result = await db.execute(
select(Participant)
.where(Participant.study_id == study_id)
.offset(offset)
.limit(per_page)
)
return result.scalars().all()
Aggregations¶
from sqlalchemy import func, select
async def get_study_stats(db: AsyncSession, study_id: int):
result = await db.execute(
select(
func.count(Participant.id).label('total_participants'),
func.count(Session.id).label('total_sessions'),
func.avg(Session.end_time - Session.start_time).label('avg_duration')
)
.select_from(Study)
.join(Participant)
.join(Session, isouter=True)
.where(Study.id == study_id)
)
return result.first()
Backup and Recovery¶
Database Backup¶
# Daily backup
pg_dump -U metricis metricis | gzip > backup_$(date +%Y%m%d).sql.gz
# Restore
gunzip -c backup_20260124.sql.gz | psql -U metricis metricis
Point-in-Time Recovery¶
Enable WAL archiving in postgresql.conf:
Monitoring¶
Connection Pool¶
# Monitor pool usage
engine = create_async_engine(
settings.DATABASE_URL,
pool_size=20,
max_overflow=10,
pool_pre_ping=True, # Verify connections before use
)
Slow Query Log¶
Enable in postgresql.conf:
Next Steps¶
- Architecture Overview - System design
- Server Architecture - Backend API
- Portal Architecture - Researcher portal
- API Reference - REST API documentation