Exploring a DICOM dataset

Tips and tricks for the next stup in Project Jupyter evolution.
python
development
DICOM
Author

Eric M. Baumel

Published

April 16, 2023

DICOM Introduction

Exploring a DICOM dataset

Adapted from :

Kaggle Kernel: Getting to know DICOM and the Data by schlerp

https://www.kaggle.com/schlerp/getting-to-know-dicom-and-the-data/notebook

A Multi-platform DICOM Toolbox for Academic Radiologists by

http://uwmsk.org/jupyter/Jupyter_DICOM_toolbox.html

A Radiologist’s Exploration of the Stanford ML Group’s MRNet data by Walter Wiggins

https://towardsdatascience.com/a-radiologists-exploration-of-the-stanford-ml-group-s-mrnet-data-8e2374e11bfb

Imports

Import libraries and write settings here.

# File handling
import os
import glob

# Data manipulation
import pandas as pd

import numpy as np

# Data visualization
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import image as mpimage

import seaborn as sns
from tqdm import tqdm

# Date manipulation
import datetime

# DICOM format reader
import pydicom
def show_dcm_info(ds):
    print()
    print("Filename.........: ", file_path)
    print("Storage type.....: ", ds.SOPClassUID)
    print()
    
    pat_name = ds.PatientName
    display_name = pat_name.family_name + ", " + pat_name.given_name
    print("Patient name.....: ", display_name)
    print("Patient ID.......: ", ds.PatientID)
    print("Patient Age......: ", ds.PatientAge)
    print("Patient Sex......: ", ds.PatientSex)
    print("Modality.........: ", ds.Modality)
    print("Body Part........: ", ds.BodyPartExamined)
    print("View Position....: ", ds.ViewPosition)
    
    
    if 'PixelData' in ds:
        rows = int(ds.Rows)
        cols = int(ds.Columns)
        print("Image size.......:  {rows:d} x {cols:d}, {size:d} bytes".format(
            rows=rows, cols=cols, size=len(ds.PixelData)))
        if 'PixelSpacing' in ds:
            print("Pixel spacing....: ", ds.PixelSpacing)
def plot_pixel_array(ds, figsize=(10, 10)):
    plt.figure(figsize=figsize)
    plt.imshow(ds.pixel_array, cmap=plt.cm.bone)
    plt.show()
i = 1
num_to_plot = 5
path = '../dicom/train_images/'
for file_name in os.listdir(path):
    file_path = os.path.join(path, file_name)
    ds = pydicom.dcmread(file_path)
    show_dcm_info(ds)
    plot_pixel_array(ds)
    
    if i >= num_to_plot:
        break
        
    i += 1

Filename.........:  ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.300.1517875162.258081.dcm
Storage type.....:  1.2.840.10008.5.1.4.1.1.7

Patient name.....:  88c14312-3265-4a3f-b7bb-41818107d607, 
Patient ID.......:  88c14312-3265-4a3f-b7bb-41818107d607
Patient Age......:  58
Patient Sex......:  F
Modality.........:  CR
Body Part........:  CHEST
View Position....:  AP
Image size.......:  1024 x 1024, 154050 bytes
Pixel spacing....:  [0.139, 0.139]


Filename.........:  ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.304.1517875162.301989.dcm
Storage type.....:  1.2.840.10008.5.1.4.1.1.7

Patient name.....:  fa43083b-0d94-4849-a5c4-40120c380164, 
Patient ID.......:  fa43083b-0d94-4849-a5c4-40120c380164
Patient Age......:  60
Patient Sex......:  M
Modality.........:  CR
Body Part........:  CHEST
View Position....:  PA
Image size.......:  1024 x 1024, 150238 bytes
Pixel spacing....:  [0.14300000000000002, 0.14300000000000002]


Filename.........:  ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.301.1517875162.280319.dcm
Storage type.....:  1.2.840.10008.5.1.4.1.1.7

Patient name.....:  97d32841-8836-4630-873c-be0b4d2e5478, 
Patient ID.......:  97d32841-8836-4630-873c-be0b4d2e5478
Patient Age......:  50
Patient Sex......:  F
Modality.........:  CR
Body Part........:  CHEST
View Position....:  PA
Image size.......:  1024 x 1024, 159756 bytes
Pixel spacing....:  [0.14300000000000002, 0.14300000000000002]


Filename.........:  ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.303.1517875162.295039.dcm
Storage type.....:  1.2.840.10008.5.1.4.1.1.7

Patient name.....:  019a6d6a-4eac-4372-a1d4-fe9193826333, 
Patient ID.......:  019a6d6a-4eac-4372-a1d4-fe9193826333
Patient Age......:  44
Patient Sex......:  M
Modality.........:  CR
Body Part........:  CHEST
View Position....:  PA
Image size.......:  1024 x 1024, 130602 bytes
Pixel spacing....:  [0.171, 0.171]


Filename.........:  ../dicom/train_images/1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330.dcm
Storage type.....:  1.2.840.10008.5.1.4.1.1.7

Patient name.....:  e6f57005-8262-46ac-92ab-7c858e4ae126, 
Patient ID.......:  e6f57005-8262-46ac-92ab-7c858e4ae126
Patient Age......:  74
Patient Sex......:  M
Modality.........:  CR
Body Part........:  CHEST
View Position....:  PA
Image size.......:  1024 x 1024, 136118 bytes
Pixel spacing....:  [0.171, 0.171]

Data in DICOM header

ds
Dataset.file_meta -------------------------------
(0002, 0000) File Meta Information Group Length  UL: 200
(0002, 0001) File Meta Information Version       OB: b'\x00\x01'
(0002, 0002) Media Storage SOP Class UID         UI: Secondary Capture Image Storage
(0002, 0003) Media Storage SOP Instance UID      UI: 1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330
(0002, 0010) Transfer Syntax UID                 UI: JPEG Baseline (Process 1)
(0002, 0012) Implementation Class UID            UI: 1.2.276.0.7230010.3.0.3.6.0
(0002, 0013) Implementation Version Name         SH: 'OFFIS_DCMTK_360'
-------------------------------------------------
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0016) SOP Class UID                       UI: Secondary Capture Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.2.276.0.7230010.3.1.4.8323329.302.1517875162.286330
(0008, 0020) Study Date                          DA: '19010101'
(0008, 0030) Study Time                          TM: '000000.00'
(0008, 0050) Accession Number                    SH: ''
(0008, 0060) Modality                            CS: 'CR'
(0008, 0064) Conversion Type                     CS: 'WSD'
(0008, 0090) Referring Physician's Name          PN: ''
(0008, 103e) Series Description                  LO: 'view: PA'
(0010, 0010) Patient's Name                      PN: 'e6f57005-8262-46ac-92ab-7c858e4ae126'
(0010, 0020) Patient ID                          LO: 'e6f57005-8262-46ac-92ab-7c858e4ae126'
(0010, 0030) Patient's Birth Date                DA: ''
(0010, 0040) Patient's Sex                       CS: 'M'
(0010, 1010) Patient's Age                       AS: '74'
(0018, 0015) Body Part Examined                  CS: 'CHEST'
(0018, 5101) View Position                       CS: 'PA'
(0020, 000d) Study Instance UID                  UI: 1.2.276.0.7230010.3.1.2.8323329.302.1517875162.286329
(0020, 000e) Series Instance UID                 UI: 1.2.276.0.7230010.3.1.3.8323329.302.1517875162.286328
(0020, 0010) Study ID                            SH: ''
(0020, 0011) Series Number                       IS: '1'
(0020, 0013) Instance Number                     IS: '1'
(0020, 0020) Patient Orientation                 CS: ''
(0028, 0002) Samples per Pixel                   US: 1
(0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
(0028, 0010) Rows                                US: 1024
(0028, 0011) Columns                             US: 1024
(0028, 0030) Pixel Spacing                       DS: [0.171, 0.171]
(0028, 0100) Bits Allocated                      US: 8
(0028, 0101) Bits Stored                         US: 8
(0028, 0102) High Bit                            US: 7
(0028, 0103) Pixel Representation                US: 0
(0028, 2110) Lossy Image Compression             CS: '01'
(0028, 2114) Lossy Image Compression Method      CS: 'ISO_10918_1'
(7fe0, 0010) Pixel Data                          OB: Array of 136118 elements

Info about undelying pixel data

im = ds.pixel_array
print(type(im))
print(im.dtype)
print(im.shape)
<class 'numpy.ndarray'>
uint8
(1024, 1024)

Convert DICOM to JPG/PNG

import cv2
def dicom_to_jpg():
    # Change to True to make PNG format
    PNG = False
    # DICOM folder path
    folder_path = '../dicom/train_images/'
    # Output path
    output_path = '../dicom/converted/'
    images_path = os.listdir(folder_path)
    for n, image in enumerate(images_path):
        ds = pydicom.dcmread(os.path.join(folder_path, image))
        pixel_array_numpy = ds.pixel_array
        if PNG == False:
            image = image.replace('.dcm', '.jpg')
        else:
            image = image.replace('.dcm', '.png')
        cv2.imwrite(os.path.join(output_path, image), pixel_array_numpy)
        if n % 50 == 0:
            print('{} image converted'.format(n))
dicom_to_jpg()
0 image converted

Patient Demographics

# Load functions from scikit-image library

from skimage import exposure
import skimage.morphology as morp
from skimage.filters import rank

# For date functions
from datetime import datetime
ds.dir('patient')
['PatientAge',
 'PatientBirthDate',
 'PatientID',
 'PatientName',
 'PatientOrientation',
 'PatientSex']
ds.PatientName
'e6f57005-8262-46ac-92ab-7c858e4ae126'
# Change Patient Name

AKA = "Blaine, Richard"
ds.PatientName = AKA
ds.PatientName
'Blaine, Richard'
# Change DOB

epoch = '19700101'
ds.PatientBirthDate = epoch

ds.PatientBirthDate
'19700101'
! python --version
Python 3.9.7
# Calculate patient age at time of scan
dob = datetime.strptime(ds.PatientBirthDate, '%Y%m%d')
study_date = datetime.strptime(ds.StudyDate, '%Y%m%d')
age = (study_date - dob).days/365

print ("Age = ", age)
Age =  -69.04657534246576
# Calculate current age
dob = datetime.strptime(ds.PatientBirthDate, '%Y%m%d')
now = datetime.now()
current_age = (now - dob).days/365

print("Current age = ", current_age)
Current age =  53.25205479452055
# Change study date

fictional = '20210504'
ds.StudyDate = fictional

ds.StudyDate
'20210504'

Anonymizing a folder of DICOM images

for filename in glob.iglob('cleft/*.DCM', recursive=True):
    ds = pydicom.read_file(filename, force=True)
    
    # Obtain the name of the folder containing the DICOM files
    foldername=os.path.basename(os.path.dirname(os.path.dirname(filename)))
    
    # Delete patient name, patientID, accession number and patient birthdate
    del ds.PatientName
    del ds.PatientID
    del ds.AccessionNumber
    del ds.PatientBirthDate
    
    pydicom.write_file(filename,ds)
    print(filename)

iPython Widgets

from ipywidgets import interactive, interact, widgets, Layout, Button, Box, Dropdown, IntSlider
from IPython.display import display
# Turn off warnings
import warnings
warnings.filterwarnings('ignore')
def contrast_adjust(image_name, percentile_lo, percentile_hi):
    
    # save optimized image array to global vaiable so that other functions can use
    global img_rescale_interactive, image_name_global
    
    image_name_global = image_name
    p_lo, p_hi = np.percentile(eval(image_name), (percentile_lo, percentile_hi))
    
    img_rescale = exposure.rescale_intensity(eval(image_name), in_range=(p_lo, p_hi))
    
    
    img_rescale_interactive = img_rescale
    
    plt.figure(figsize = (6,6), dpi=100)
    plt.imshow(img_rescale, cmap=plt.cm.gray)
    
    plt.show()
# Widget to save image in differnt formats

button_jpg = widgets.Button(description = "Save .jpg version")
button_png = widgets.Button(description = "Save .png version")
button_tiff = widgets.Button(description = "Save .tiff version")

items = [
    button_jpg,
    button_png,
    button_tiff
]

box_layout = Layout(display = 'flex',
                   flex_flow = 'row',
                   align_items = 'stretch'
                   )

box = Box(children = items, layout = box_layout)

def jpg_button_clicked(b):
    plt.imsave(image_name_global + '.jpg', img_rescale_interactive, cmap=plt.cm.gray)
    
def png_button_clicked(b):
    plt.imsave(image_name_global + '.png', img_rescale_interactive, cmap=plt.cm.gray)
    
def tiff_button_clicked(b):
    plt.imsave(image_name_global + '.tiff', img_rescale_interactive, cmap=plt.cm.gray)
    
button_jpg.on_click(jpg_button_clicked)
button_png.on_click(png_button_clicked)
button_tiff.on_click(tiff_button_clicked)
w = interactive(contrast_adjust, image_name='ds.pixel_array', percentile_lo=(1,100,.5), percentile_hi=(1,100,.5))

display(w)

box
image_name_global
'ds.pixel_array'

Invert Image

plt.figure(figsize = (6, 6), dpi=100)
figure = plt.imshow(ds.pixel_array, cmap=plt.cm.gist_yarg)

Mirror Image R > L

plt.imshow(np.fliplr(ds.pixel_array), cmap=plt.cm.gray)
<matplotlib.image.AxesImage at 0x7fa920252670>

Flip Image

plt.imshow(np.flipud(ds.pixel_array), cmap=plt.cm.gray)
<matplotlib.image.AxesImage at 0x7fa911a87e80>

Histogram

plt.hist(ds.pixel_array.flatten(), bins=64)
plt.show()

print("pixel array = ", ds.pixel_array.shape)
print("minimum value = ", np.amin(ds.pixel_array)) # Find minimum value in pixel array
print("maximum value = ", np.amax(ds.pixel_array)) # Find maximum value in pixel array

pixel array =  (1024, 1024)
minimum value =  0
maximum value =  248