Commit 0b788adf authored by Dominic Couture's avatar Dominic Couture

initiate __main__.py and config.py, rewrite all functions in dose_OCR.py, simplify imports

parent 0b06f0e4
# !/usr/bin/env python
# -*- coding: utf-8 -*-
from config import * from config import *
from model import * from dose_OCR import *
from get_DICOM import * from time import strftime
from traceback import format_exc
from logging import basicConfig, warning, INFO
# Configuration of the log file
basicConfig(
filename=join(dcm_dir, 'Logs/Dose_Report_{}.log'.format(strftime('%Y-%m-%d %H:%M:%S'))),
format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %H:%M:%S -', level=INFO)
if __name__ == '__main__': if __name__ == '__main__':
print(read_dicom('/Users/Dominic/Desktop/PACS_Dose/Rapports_de_dose/Exemples/GE.dcm'))
dcm_dir = '/Users/Dominic/Desktop/PACS_Dose/Dose_Reports'
#!/usr/bin/env python # !/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Extraction of dose information from dose screens created by `Ge`, `Siemens` and `Toshiba`
# CT Scanners using OCR.
import dicom from config import *
import numpy as np from pydicom.dicomio import read_file
from pydicom.misc import is_dicom
from pydicom.tag import Tag
from os.path import join from os.path import join
from os import listdir, remove from os import listdir, remove
import numpy as np
import pytesseract as pt import pytesseract as pt
from PIL import Image from PIL.Image import open as open_image
import scipy.misc from scipy.misc import imfilter, imresize, toimage
# Dictionary containing the limits of the pertinent text areas on the dose screen for all
# manufacturers in a tuple (top, bottom, left, right).
column_width = { column_width = {
'GE': { 'GE': {
'Series': (160, 511, 0, 54), 'Series': (160, 511, 0, 54),
...@@ -32,68 +39,62 @@ column_width = { ...@@ -32,68 +39,62 @@ column_width = {
'Toshiba': { 'Toshiba': {
'Total mAs': (), 'Total mAs': (),
'CTDIvol': (), 'CTDIvol': (),
'DLP': () 'DLP': ()}
}
} }
# Synonyms dictionary used to identify the correct manufacturer (the one used in `column_width`)
manufacturers_dict = { manufacturers_dict = {
'GE MEDICAL SYSTEMS': 'GE', 'GE MEDICAL SYSTEMS': 'GE', 'GE': 'GE',
'GE': 'GE',
'SIEMENS': 'Siemens', 'SIEMENS': 'Siemens',
'Toshiba': 'Toshiba'} 'Toshiba': 'Toshiba'}
def get_array_from_overlay(dcm): def get_array_from_overlay(dcm):
""" Return a 2D numpy array of the overlay of index 1 for the given DICOM file. """ Return a 2D numpy array of the overlay of index 1 for the given DICOM file from
Usefull to extract dose informaiton from `Siemens MedCom Object Graphics`. binary data. Useful to extract dose informaiton from `Siemens MedCom Object Graphics`.
""" """
from dicom import tag size = (int(dcm[Tag(0x60000010)].value), int(dcm[Tag(0x60000011)].value))
rows = dcm[tag(0x60000010)].value overlay_raw = dcm[Tag(0x60003000)].value
cols = dcm[tag(0x60000011)].value area = size[0] * size[1]
overlay_raw = dcm[tag(0x60003000)].value length = len(overlay_raw)
n_bits = int(rows * cols / len(overlay_raw)) n_bits = int(area / length)
decoded_linear = np.zeros(rows * cols) decoded_linear = np.zeros(area)
for i in range(1, len(overlay_raw)): for i in range(1, length):
bits = tuple(int(j) for j in format(overlay_raw[i], '08b'))[::-1] bits = tuple(int(j) for j in format(overlay_raw[i], '08b'))[::-1]
for k in range (0, n_bits): for k in range (0, n_bits):
decoded_linear[i * n_bits + k] = bits[k] decoded_linear[i * n_bits + k] = bits[k]
return np.reshape(decoded_linear, [rows, cols]) return np.reshape(decoded_linear, size)
def read_column(array, limits, name): def read_column(array, limits, name):
""" Use OCR to read the content of an image and return a list of strings. `limits` is """ Use OCR to read the content of an image and return a list of strings. `limits` is
a tuple containing the limits of the column in the format (top, bottom, left, right). a tuple containing the limits of the column in the format (top, bottom, left, right).
""" """
tmp_png_path = join(dicom_dir, 'png_files', 'column_'+name+'.png') tmp_png_path = join(dcm_dir, 'PNG_Files', 'column_' + name + '.png')
column = np.invert(array[limits[0]:limits[1], limits[2]:limits[3]]) column = np.invert(array[limits[0]:limits[1], limits[2]:limits[3]])
new_column = scipy.misc.imfilter(scipy.misc.imresize(column, tuple(i*2 for i in column.shape)), 'sharpen') toimage(
scipy.misc.toimage( imfilter(imresize(column, tuple(i*2 for i in column.shape)), 'sharpen'),
new_column,
high=255, high=255,
low=0).save(tmp_png_path) low=0).save(tmp_png_path)
string = pt.image_to_string(Image.open(tmp_png_path)) string = pt.image_to_string(open_image(tmp_png_path))
remove(tmp_png_path) remove(tmp_png_path)
return string.split('\n') return string.split('\n')
def read_dicom(dcm): def read_dicom(dcm_path):
""" Return a dictionary with information extracted from the DICOM image. """ Return a dictionary with information extracted from the DICOM image located at a given path.
""" """
dcm = dicom.read_file(join(dicom_dir, dcm)) dcm = read_file(dcm_path)
manufacturer = manufacturers_dict[ds.Manufacturer] manufacturer = manufacturers_dict[dcm.Manufacturer]
limits = column_width[manufacturer]
if manufacturer == 'GE': if manufacturer == 'GE':
info = {i: read_column(ds.pixel_array, limits[i], i) for i in limits} pixel_array = dcm.pixel_array
series = [{
'Series': int(i + 2),
'Type': info['Type'][i + 1],
'CTDIvol': float(info['CTDIvol'][i]),
'DLP': float(info['DLP'][i])} for i in range(len(info['DLP']) - 1)]
elif manufacturer == 'Siemens': elif manufacturer == 'Siemens':
pixel_array = get_array_from_overlay(dcm) pixel_array = get_array_from_overlay(dcm)
info = {i: read_column(ds.pixel_array, limits[i], i) for i in limits} limits = column_width[manufacturer]
info = {i: read_column(pixel_array, limits[i], i) for i in limits}
series = [{
'Series': int(i + 2),
'Type': info['Type'][i + 1],
'CTDIvol': float(info['CTDIvol'][i]),
'DLP': float(info['DLP'][i])} for i in range(len(info['DLP']) - 1)]
return {'Total_DLP': float(info['DLP'][-1]), 'Series': series} return {'Total_DLP': float(info['DLP'][-1]), 'Series': series}
dicom_dir = '/Users/Dominic/Desktop/Dose_Reports'
print(read_dicom('visible.dcm'))
...@@ -8,18 +8,18 @@ def get_array_from_overlay(dcm): ...@@ -8,18 +8,18 @@ def get_array_from_overlay(dcm):
""" Return a 2D numpy array of the overlay of index 1 for the given DICOM file. """ Return a 2D numpy array of the overlay of index 1 for the given DICOM file.
Usefull to extract dose informaiton from `Siemens MedCom Object Graphics`. Usefull to extract dose informaiton from `Siemens MedCom Object Graphics`.
""" """
rows = dcm[Tag(0x60000010)].value size = (int(dcm[Tag(0x60000010)].value), int(dcm[Tag(0x60000011)].value))
cols = dcm[Tag(0x60000011)].value
overlay_raw = dcm[Tag(0x60003000)].value overlay_raw = dcm[Tag(0x60003000)].value
n_bits = int(rows * cols / len(overlay_raw)) area = size[0] * size[1]
decoded_linear = np.zeros(rows * cols) length = len(overlay_raw)
for i in range(1, len(overlay_raw)): n_bits = int(area / length)
decoded_linear = np.zeros(area)
for i in range(1, length):
bits = tuple(int(j) for j in format(overlay_raw[i], '08b'))[::-1] bits = tuple(int(j) for j in format(overlay_raw[i], '08b'))[::-1]
for k in range (0, n_bits): for k in range (0, n_bits):
decoded_linear[i * n_bits + k] = bits[k] decoded_linear[i * n_bits + k] = bits[k]
return np.reshape(decoded_linear, [rows, cols]) return np.reshape(decoded_linear, size)
dcm = read_file(argv[-1]) overlay = get_array_from_overlay(read_file(argv[-1]))
overlay = get_array_from_overlay(dcm)
plt.imshow(overlay) plt.imshow(overlay)
plt.show() plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment