Commit 8ebbdac4 authored by Dominic Couture's avatar Dominic Couture

Add ocr capability and pptx file

parent d4767fbb
File added
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import dicom
import numpy as np
from os.path import join
from os import listdir, remove
import pytesseract as pt
from PIL import Image
import scipy.misc
column_width = {
'GE': {
'Series': (160, 511, 0, 54),
'Type': (160, 511, 55, 128),
'Scan Range': (160, 511, 129, 270),
'CTDIvol': (160, 511, 271, 339),
'DLP': (160, 511, 340, 424),
'Phantom': (160, 511, 425, 511)},
'Siemens': {
'Total mAs': (),
'Total DLP': (),
'Name': (),
'Scan': (),
'kV' : (),
'mAs/ref.': (),
'CTDIvol': (),
'DLP': (),
'TI': (),
'cSL': ()},
'Toshiba': {
'Total mAs': (),
'CTDIvol': (),
'DLP': ()
}
}
manufacturers_dict = {
'GE MEDICAL SYSTEMS': 'GE',
'': 'GE',
'SIEMENS': 'GE',
'Toshiba': 'Toshiba'}
def read_column(array, limits, name):
""" Use OCR to read the content of an image and return a list of strings. `limits` is
a tuple containing the limits of the column in the format (top, bottom, left, right).
"""
tmp_png_path = join(dicom_dir, 'png_files', 'column_'+name+'.png')
column = np.invert(array[limits[0]:limits[1], limits[2]:limits[3]])
new_column = scipy.misc.imfilter(scipy.misc.imresize(column, tuple(i*2 for i in column.shape)), 'sharpen')
scipy.misc.toimage(
new_column,
high=255,
low=0).save(tmp_png_path)
string = pt.image_to_string(Image.open(tmp_png_path))
remove(tmp_png_path)
return string.split('\n')
def read_dicom(dcm):
""" Return a dictionary with information extracted from the DICOM image.
"""
ds = dicom.read_file(join(dicom_dir, dcm))
manufacturer = manufacturers_dict[ds.Manufacturer]
limits = column_width[manufacturer]
info = {i: read_column(ds.pixel_array, limits[i], i) for i in limits}
if manufacturer == 'GE':
series = [{
'Series': int(i + 2),
'Type': info['Type'][i + 1],
'CTDIvol': float(info['CTDIvol'][i]),
'DLP': float(info['DLP'][i])} for i in range(len(info['DLP']) - 1)]
return {'Total_DLP': float(info['DLP'][-1]), 'Series': series}
dicom_dir = '/Users/Dominic/Desktop/Dose_Reports'
print(read_dicom('visible.dcm'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment