Introduction
Medical imaging is critical to modern healthcare. From X-rays to MRIs, managing these large files requires specialized systems that balance performance, cost, and regulatory compliance.
Key Statistics:
- 70% of clinical decisions involve medical imaging
- Average hospital generates 1PB imaging data annually
- DICOM standard: 95% of medical imaging
- AI diagnostics accuracy: 90%+ for many use cases
DICOM Standard
DICOM Data Model
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
โ DICOM Information Model โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโค
โ โ
โ Patient Level โ
โ โโโ Patient ID, Name, Birth Date, Sex โ
โ โโโ Study Level โ
โ โโโ Study Instance UID, Date, Modality, Accession# โ
โ โโโ Series Level โ
โ โโโ Series Instance UID, Modality, Body Part โ
โ โโโ Image Level โ
โ โโโ SOP Instance UID, Rows, Cols โ
โ โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
DICOM Header Structure
#!/usr/bin/env python3
"""DICOM file handling."""
from pydicom import dcmread
from pydicom.dataset import Dataset, FileDataset
from datetime import datetime
class DICOMHandler:
"""Handle DICOM files."""
def __init__(self):
self.supported_transfer_syntaxes = [
'1.2.840.10008.1.2', # Implicit VR Little Endian
'1.2.840.10008.1.2.1', # Explicit VR Little Endian
'1.2.840.10008.1.2.2', # Explicit VR Big Endian
]
def read_dicom(self, file_path):
"""Read DICOM file."""
ds = dcmread(file_path)
return {
'patient': {
'id': ds.get('PatientID'),
'name': ds.get('PatientName'),
'birth_date': ds.get('PatientBirthDate'),
'sex': ds.get('PatientSex'),
},
'study': {
'uid': ds.get('StudyInstanceUID'),
'date': ds.get('StudyDate'),
'time': ds.get('StudyTime'),
'description': ds.get('StudyDescription'),
'accession': ds.get('AccessionNumber'),
},
'series': {
'uid': ds.get('SeriesInstanceUID'),
'number': ds.get('SeriesNumber'),
'modality': ds.get('Modality'),
'body_part': ds.get('BodyPartExamined'),
},
'image': {
'sop_uid': ds.get('SOPInstanceUID'),
'rows': ds.get('Rows'),
'columns': ds.get('Columns'),
'bits_allocated': ds.get('BitsAllocated'),
'photometric': ds.get('PhotometricInterpretation'),
}
}
def anonymize(self, ds, remove_fields=None):
"""Anonymize DICOM data."""
remove_fields = remove_fields or [
'PatientName', 'PatientID', 'PatientBirthDate',
'PatientAddress', 'PatientPhone', 'PatientSex',
'ReferringPhysicianName', 'InstitutionName'
]
for field in remove_fields:
if field in ds:
delattr(ds, field)
# Add anonymization note
ds.add_new(0x00080018, 'UI', f"ANONYMIZED.{datetime.now().strftime('%Y%m%d%H%M%S')}")
return ds
def extract_metadata(self, file_path):
"""Extract metadata for indexing."""
ds = dcmread(file_path)
return {
'patient_id': ds.get('PatientID'),
'study_uid': ds.get('StudyInstanceUID'),
'series_uid': ds.get('SeriesInstanceUID'),
'sop_uid': ds.get('SOPInstanceUID'),
'modality': ds.get('Modality'),
'study_date': ds.get('StudyDate'),
'body_part': ds.get('BodyPartExamined'),
}
PACS Integration
PACS Architecture
# PACS Components
pacs:
modalities:
- CT
- MRI
- X-Ray
- Ultrasound
archive:
type: "Distributed Storage"
retention: "7 years"
tiering:
hot:
duration: "90 days"
storage: "SSD"
warm:
duration: "2 years"
storage: "HDD"
cold:
duration: "5 years"
storage: "Object Storage"
viewer:
type: "Web-based HTML5"
features:
- "Window/Level"
- "Zoom/Pan"
- "Measurement"
- "3D Reconstruction"
DICOM Web Services
#!/usr/bin/env python3
"""DICOMweb implementation."""
import requests
from datetime import datetime
class DICOMWebClient:
"""DICOMweb RESTful services."""
def __init__(self, base_url):
self.base_url = base_url.rstrip('/')
def search_studies(self, patient_id=None, modality=None,
start_date=None, end_date=None):
"""Search for studies."""
params = {}
if patient_id:
params['PatientID'] = patient_id
if modality:
params['ModalitiesInStudy'] = modality
if start_date:
params['StudyDate'] = f"{start_date}-{end_date or datetime.now().strftime('%Y%m%d')}"
response = requests.get(
f"{self.base_url}/studies",
params=params
)
return response.json()
def retrieve_study(self, study_uid):
"""Retrieve study."""
response = requests.get(
f"{self.base_url}/studies/{study_uid}",
headers={'Accept': 'application/dicom+json'}
)
return response.json()
def retrieve_series(self, study_uid, series_uid):
"""Retrieve series."""
response = requests.get(
f"{self.base_url}/studies/{study_uid}/series/{series_uid}",
headers={'Accept': 'application/dicom+json'}
)
return response.json()
def retrieve_instance(self, study_uid, series_uid, instance_uid):
"""Retrieve single instance."""
response = requests.get(
f"{self.base_url}/studies/{study_uid}/series/{series_uid}/instances/{instance_uid}",
headers={'Accept': 'application/dicom+json'}
)
return response.json()
def store_instance(self, dicom_file):
"""Store DICOM instance."""
with open(dicom_file, 'rb') as f:
response = requests.post(
f"{self.base_url}/studies",
data=f,
headers={'Content-Type': 'application/dicom'}
)
return response.status_code == 200
Cloud Storage
Hybrid Cloud Architecture
# Medical imaging cloud storage
storage:
on_premises:
type: "MinIO"
capacity: "500TB"
use_case: "Hot storage, PACS cache"
cloud:
aws:
s3:
bucket: "medical-imaging-archive"
storage_classes:
- STANDARD
- INTELLIGENT_TIERING
- GLACIER
- DEEP_ARCHIVE
lifecycle:
- rule: "Move to Glacier after 90 days"
transition:
days: 90
storage_class: GLACIER
- rule: "Move to Deep Archive after 2 years"
transition:
days: 730
storage_class: DEEP_ARCHIVE
azure:
blob:
tier: "Cool"
immutability: true
encryption:
at_rest: "AES-256"
in_transit: "TLS 1.3"
AI Diagnostics
Imaging AI Pipeline
#!/usr/bin/env python3
"""Medical imaging AI."""
import torch
from torchvision import models
class MedicalImagingAI:
"""AI for medical imaging analysis."""
def __init__(self, model_path):
self.model = torch.load(model_path)
self.model.eval()
def preprocess(self, dicom_image):
"""Preprocess DICOM image for AI."""
import numpy as np
# Get pixel array
image = dicom_image.pixel_array
# Normalize to 0-1
image = image.astype(np.float32)
image = (image - image.min()) / (image.max() - image.min())
# Resize to model input
import torchvision.transforms as T
transform = T.Compose([
T.Resize((224, 224)),
T.ToTensor(),
T.Normalize(mean=[0.485], std=[0.229])
])
return transform(image)
def detect_anomalies(self, dicom_image):
"""Detect anomalies in medical image."""
# Preprocess
input_tensor = self.preprocess(dicom_image).unsqueeze(0)
# Run inference
with torch.no_grad():
output = self.model(input_tensor)
probabilities = torch.softmax(output, dim=1)
# Get top predictions
top_prob, top_class = torch.max(probabilities, 1)
return {
'class': top_class.item(),
'confidence': top_prob.item(),
'probabilities': probabilities[0].tolist()
}
def segment_organ(self, dicom_image):
"""Segment organ in medical image."""
# Use segmentation model
# Returns mask overlay
pass
Comments