#!/usr/bin/python """ This script emits four linked files that visualize 5, 10, 15, and 20-year cancer survival rates as shown in Tufte, Beautiful Evidence, p. 176. It uses part of Joe Gregorio's sparkline plotter, adds data labels, and sorts each result view by descending survival rate. """ # sparkline plotting courtesy of joe gregorio import sys, os, re import Image, ImageDraw, ImageFont import StringIO def plot_sparkline_smooth(results, args): step = int(args.get('step', '2')) height = int(args.get('height', '20')) if height < 6: height = 6 (dmin, dmax) = [int(x) for x in args.get('limits', '0,100').split(',')] im = Image.new("RGB", ((len(results)-1)*step+4 + 8, height + 8 ), 'white') draw = ImageDraw.Draw(im) coords = zip(range(1,len(results)*step+1, step), [height + 3 - (y-dmin)/(float(dmax - dmin + 1)/(height-4)) for y in results]) # add data labels for i in range(len(coords)): x,y = coords[i] draw.text ( (x,y-10), str(results[i]), fill="black") draw.line(coords, fill="#000000") del draw f = StringIO.StringIO() im.save(f, "PNG") return f.getvalue() # data from tufte, beautiful evidence, p 176 datadict = { 'Prostate' : (99,95,87,81), 'Thyroid' : (96,96,94,95), 'Testis' : (95,94,91,88), 'Melanomas' : (89,87,84,83), 'Breast' : (86,78,71,65), "Hodgkin's" : (85,80,74,67), 'Corpus uteri,uterus' : (84,83,81,79), 'Urinary,bladder' : (82,76,70,68), 'Cervix,uteri' : (71,64,63,60), 'Larynx' : (69,57,46,38), 'Rectum' : (63,55,52,49), 'Kidney,renal pelvis' : (62,54,50,47), 'Colon' : (62,55,54,52), "Non-Hodgkin's" : (58,46,38,34), 'Oral cavity,pharynx' : (57,44,38,33), 'Ovary' : (55,49,50,50), 'Leukemia' : (43,32,30,26), 'Brain,nervous system' : (32,29,28,26), 'Multiple myeloma' : (30,13, 7 ,5), 'Stomach' : (24,19,19,15), 'Lung,bronchus' : (15,11, 8, 6), 'Esophagus' : (14, 8, 8, 5), 'Liver,bile duct' : ( 8, 6, 8, 8), 'Pancreas' : ( 4, 3, 3, 3), } keys = datadict.keys() vertMultiple = 1.5 def normalize(s): s = re.sub("[, ]",'_',s) return s.replace("'","") for fname in keys: data = datadict[fname] mn = min(data) mx = max(data) diff = mx-mn args = {'step':100,'height':diff*vertMultiple,'limits':'%d,%d' % (mn,mx)} image_data = plot_sparkline_smooth(data, args) f = open("%s.png" % normalize(fname),'wb') f.write(image_data) f.close rates = ( ( '5 year', 0), ('10 year', 1), ('15 year', 2), ('20 year', 3) ) pagetemplate = """
Estimates of percentage survival rates explanation
| %s | ||||
' % normalize(key) )
page = pagetemplate % (datadict,links,tabledata)
f = open(fname,'w')
f.write(page)
f.close()