文章目录
- 一、MIMIC IV数据库衍生表格(mimic_derived)简介
- 二、衍生表格示例
- 三、衍生表格配置
一、MIMIC IV数据库衍生表格(mimic_derived)简介
MIMIC IV数据库衍生表格实际上就是对数据库数据进一步归纳整理后的数据。由于mimic_derived 参与了很多内容的查询,熟悉这个模块的内容还是很有必要的,但是官方对这个模块缺乏详细的介绍。模块mimic_derived, 而且这个模块仅存在与谷歌云的mimic数据库中, 本地建立的数据库中虽然有这个模块,但是没有内容,需要自己根据官方提供的代码生成。
二、衍生表格示例
1.age,为患者入院(admission)时的年龄,存储在age列。
SELECT ad.subject_id, ad.hadm_id, ad.admittime, pa.anchor_age, pa.anchor_year, DATETIME_DIFF(ad.admittime, DATETIME(pa.anchor_year, 1, 1, 0, 0,0),'YEAR') + pa.anchor_age AS age
FROM mimic_core.admissions ad
INNER JOIN mimic_core.patients pa
ON ad.subject_id = pa.subject_id
;
2.weight_duration, ICU期间体重的变化,体重是反应患者营养状况的重要因素。
-- THIS SCRIPT IS AUTOMATICALLY GENERATED. DO NOT EDIT IT DIRECTLY.
DROP TABLE IF EXISTS weight_durations; CREATE TABLE weight_durations AS
-- This query extracts weights for adult ICU patients with start/stop times
-- if an admission weight is given, then this is assigned from intime to outtime
WITH wt_stg as
(SELECTc.stay_id, c.charttime, case when c.itemid = 226512 then 'admit'else 'daily' end as weight_type-- TODO: eliminate obvious outliers if there is a reasonable weight, c.valuenum as weightFROM mimic_icu.chartevents cWHERE c.valuenum IS NOT NULLAND c.itemid in(226512 -- Admit Wt, 224639 -- Daily Weight)AND c.valuenum > 0
)
-- assign ascending row number
, wt_stg1 as
(selectstay_id, charttime, weight_type, weight, ROW_NUMBER() OVER (partition by stay_id, weight_type order by charttime) as rnfrom wt_stgWHERE weight IS NOT NULL
)
-- change charttime to intime for the first admission weight recorded
, wt_stg2 AS
(SELECT wt_stg1.stay_id, ie.intime, ie.outtime, wt_stg1.weight_type, case when wt_stg1.weight_type = 'admit' and wt_stg1.rn = 1then DATETIME_SUB(ie.intime, INTERVAL '2' HOUR)else wt_stg1.charttime end as starttime, wt_stg1.weightfrom wt_stg1INNER JOIN mimic_icu.icustays ieon ie.stay_id = wt_stg1.stay_id
)
, wt_stg3 as
(selectstay_id, intime, outtime, starttime, coalesce(LEAD(starttime) OVER (PARTITION BY stay_id ORDER BY starttime),DATETIME_ADD(outtime, INTERVAL '2' HOUR)) as endtime, weight, weight_typefrom wt_stg2
)
-- this table is the start/stop times from admit/daily weight in charted data
, wt1 as
(selectstay_id, starttime, coalesce(endtime,LEAD(starttime) OVER (partition by stay_id order by starttime),-- impute ICU discharge as the end of the final weight measurement-- plus a 2 hour "fuzziness" windowDATETIME_ADD(outtime, INTERVAL '2' HOUR)) as endtime, weight, weight_typefrom wt_stg3
)
-- if the intime for the patient is < the first charted daily weight
-- then we will have a "gap" at the start of their stay
-- to prevent this, we look for these gaps and backfill the first weight
-- this adds (153255-149657)=3598 rows, meaning this fix helps for up to 3598 stay_id
, wt_fix as
(select ie.stay_id-- we add a 2 hour "fuzziness" window, DATETIME_SUB(ie.intime, INTERVAL '2' HOUR) as starttime, wt.starttime as endtime, wt.weight, wt.weight_typefrom mimic_icu.icustays ieinner join-- the below subquery returns one row for each unique stay_id-- the row contains: the first starttime and the corresponding weight(SELECT wt1.stay_id, wt1.starttime, wt1.weight, weight_type, ROW_NUMBER() OVER (PARTITION BY wt1.stay_id ORDER BY wt1.starttime) as rnFROM wt1) wtON ie.stay_id = wt.stay_idAND wt.rn = 1and ie.intime < wt.starttime
)
-- add the backfill rows to the main weight table
SELECT
wt1.stay_id
, wt1.starttime
, wt1.endtime
, wt1.weight
, wt1.weight_type
FROM wt1
UNION ALL
SELECT
wt_fix.stay_id
, wt_fix.starttime
, wt_fix.endtime
, wt_fix.weight
, wt_fix.weight_type
FROM wt_fix;
3.GCS评分, 神经系统功能评分。
with base as
(selectsubject_id, ce.stay_id, ce.charttime-- pivot each value into its own column, max(case when ce.ITEMID = 223901 then ce.valuenum else null end) as GCSMotor, max(casewhen ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 0when ce.ITEMID = 223900 then ce.valuenumelse nullend) as GCSVerbal, max(case when ce.ITEMID = 220739 then ce.valuenum else null end) as GCSEyes-- convert the data into a number, reserving a value of 0 for ET/Trach, max(case-- endotrach/vent is assigned a value of 0-- flag it here to later parse speciallywhen ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 1 -- metavisionelse 0 end)as endotrachflag, ROW_NUMBER ()OVER (PARTITION BY ce.stay_id ORDER BY ce.charttime ASC) as rnfrom mimic_icu.chartevents ce-- Isolate the desired GCS variableswhere ce.ITEMID in(-- GCS components, Metavision223900, 223901, 220739)group by ce.subject_id, ce.stay_id, ce.charttime
)
, gcs as (select b.*, b2.GCSVerbal as GCSVerbalPrev, b2.GCSMotor as GCSMotorPrev, b2.GCSEyes as GCSEyesPrev-- Calculate GCS, factoring in special case when they are intubated and prev vals-- note that the coalesce are used to implement the following if:-- if current value exists, use it-- if previous value exists, use it-- otherwise, default to normal, case-- replace GCS during sedation with 15when b.GCSVerbal = 0then 15when b.GCSVerbal is null and b2.GCSVerbal = 0then 15-- if previously they were intub, but they aren't now, do not use previous GCS valueswhen b2.GCSVerbal = 0thencoalesce(b.GCSMotor,6)+ coalesce(b.GCSVerbal,5)+ coalesce(b.GCSEyes,4)-- otherwise, add up score normally, imputing previous value if none available at current timeelsecoalesce(b.GCSMotor,coalesce(b2.GCSMotor,6))+ coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5))+ coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4))end as GCSfrom base b-- join to itself within 6 hours to get previous valueleft join base b2on b.stay_id = b2.stay_idand b.rn = b2.rn+1and b2.charttime > DATETIME_ADD(b.charttime, INTERVAL '6' HOUR)
)
-- combine components with previous within 6 hours
-- filter down to cohort which is not excluded
-- truncate charttime to the hour
, gcs_stg as
(selectsubject_id, gs.stay_id, gs.charttime, GCS, coalesce(GCSMotor,GCSMotorPrev) as GCSMotor, coalesce(GCSVerbal,GCSVerbalPrev) as GCSVerbal, coalesce(GCSEyes,GCSEyesPrev) as GCSEyes, case when coalesce(GCSMotor,GCSMotorPrev) is null then 0 else 1 end+ case when coalesce(GCSVerbal,GCSVerbalPrev) is null then 0 else 1 end+ case when coalesce(GCSEyes,GCSEyesPrev) is null then 0 else 1 endas components_measured, EndoTrachFlagfrom gcs gs
)
-- priority is:
-- (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime
, gcs_priority as
(selectsubject_id, stay_id, charttime, gcs, gcsmotor, gcsverbal, gcseyes, EndoTrachFlag, ROW_NUMBER() over(PARTITION BY stay_id, charttimeORDER BY components_measured DESC, endotrachflag, gcs, charttime DESC) as rnfrom gcs_stg
)
selectgs.subject_id, gs.stay_id, gs.charttime, GCS AS gcs, GCSMotor AS gcs_motor, GCSVerbal AS gcs_verbal, GCSEyes AS gcs_eyes, EndoTrachFlag AS gcs_unable
from gcs_priority gs
where rn = 1
;
4.sofa评分。
WITH co AS
(select ih.stay_id, ie.hadm_id, hr-- start/endtime can be used to filter to values within this hour, DATETIME_SUB(ih.endtime, INTERVAL '1' HOUR) AS starttime, ih.endtimefrom mimic_derived.icustay_hourly ihINNER JOIN mimic_icu.icustays ieON ih.stay_id = ie.stay_id
)
, pafi as
(-- join blood gas to ventilation durations to determine if patient was ventselect ie.stay_id, bg.charttime-- because pafi has an interaction between vent/PaO2:FiO2, we need two columns for the score-- it can happen that the lowest unventilated PaO2/FiO2 is 68, but the lowest ventilated PaO2/FiO2 is 120-- in this case, the SOFA score is 3, *not* 4., case when vd.stay_id is null then pao2fio2ratio else null end pao2fio2ratio_novent, case when vd.stay_id is not null then pao2fio2ratio else null end pao2fio2ratio_ventFROM mimic_icu.icustays ieinner join mimic_derived.bg bgon ie.subject_id = bg.subject_idleft join mimic_derived.ventilation vdon ie.stay_id = vd.stay_idand bg.charttime >= vd.starttimeand bg.charttime <= vd.endtimeand vd.ventilation_status = 'InvasiveVent'WHERE specimen = 'ART.'
)
, vs AS
(select co.stay_id, co.hr-- vitals, min(vs.mbp) as meanbp_minfrom coleft join mimic_derived.vitalsign vson co.stay_id = vs.stay_idand co.starttime < vs.charttimeand co.endtime >= vs.charttimegroup by co.stay_id, co.hr
)
, gcs AS
(select co.stay_id, co.hr-- gcs, min(gcs.gcs) as gcs_minfrom coleft join mimic_derived.gcs gcson co.stay_id = gcs.stay_idand co.starttime < gcs.charttimeand co.endtime >= gcs.charttimegroup by co.stay_id, co.hr
)
, bili AS
(select co.stay_id, co.hr, max(enz.bilirubin_total) as bilirubin_maxfrom coleft join mimic_derived.enzyme enzon co.hadm_id = enz.hadm_idand co.starttime < enz.charttimeand co.endtime >= enz.charttimegroup by co.stay_id, co.hr
)
, cr AS
(select co.stay_id, co.hr, max(chem.creatinine) as creatinine_maxfrom coleft join mimic_derived.chemistry chemon co.hadm_id = chem.hadm_idand co.starttime < chem.charttimeand co.endtime >= chem.charttimegroup by co.stay_id, co.hr
)
, plt AS
(select co.stay_id, co.hr, min(cbc.platelet) as platelet_minfrom coleft join mimic_derived.complete_blood_count cbcon co.hadm_id = cbc.hadm_idand co.starttime < cbc.charttimeand co.endtime >= cbc.charttimegroup by co.stay_id, co.hr
)
, pf AS
(select co.stay_id, co.hr, min(pafi.pao2fio2ratio_novent) AS pao2fio2ratio_novent, min(pafi.pao2fio2ratio_vent) AS pao2fio2ratio_ventfrom co-- bring in blood gases that occurred during this hourleft join pafion co.stay_id = pafi.stay_idand co.starttime < pafi.charttimeand co.endtime >= pafi.charttimegroup by co.stay_id, co.hr
)
-- sum uo separately to prevent duplicating values
, uo as
(select co.stay_id, co.hr-- uo, MAX(CASE WHEN uo.uo_tm_24hr >= 22 AND uo.uo_tm_24hr <= 30THEN uo.urineoutput_24hr / uo.uo_tm_24hr * 24END) as uo_24hrfrom coleft join mimic_derived.urine_output_rate uoon co.stay_id = uo.stay_idand co.starttime < uo.charttimeand co.endtime >= uo.charttimegroup by co.stay_id, co.hr
)
-- collapse vasopressors into 1 row per hour
-- also ensures only 1 row per chart time
, vaso AS
(SELECT co.stay_id, co.hr, MAX(epi.vaso_rate) as rate_epinephrine, MAX(nor.vaso_rate) as rate_norepinephrine, MAX(dop.vaso_rate) as rate_dopamine, MAX(dob.vaso_rate) as rate_dobutamineFROM coLEFT JOIN mimic_derived.epinephrine epion co.stay_id = epi.stay_idand co.endtime > epi.starttimeand co.endtime <= epi.endtimeLEFT JOIN mimic_derived.norepinephrine noron co.stay_id = nor.stay_idand co.endtime > nor.starttimeand co.endtime <= nor.endtimeLEFT JOIN mimic_derived.dopamine dopon co.stay_id = dop.stay_idand co.endtime > dop.starttimeand co.endtime <= dop.endtimeLEFT JOIN mimic_derived.dobutamine dobon co.stay_id = dob.stay_idand co.endtime > dob.starttimeand co.endtime <= dob.endtimeWHERE epi.stay_id IS NOT NULLOR nor.stay_id IS NOT NULLOR dop.stay_id IS NOT NULLOR dob.stay_id IS NOT NULLGROUP BY co.stay_id, co.hr
)
, scorecomp as
(selectco.stay_id, co.hr, co.starttime, co.endtime, pf.pao2fio2ratio_novent, pf.pao2fio2ratio_vent, vaso.rate_epinephrine, vaso.rate_norepinephrine, vaso.rate_dopamine, vaso.rate_dobutamine, vs.meanbp_min, gcs.gcs_min-- uo, uo.uo_24hr-- labs, bili.bilirubin_max, cr.creatinine_max, plt.platelet_minfrom coleft join vson co.stay_id = vs.stay_idand co.hr = vs.hrleft join gcson co.stay_id = gcs.stay_idand co.hr = gcs.hrleft join bilion co.stay_id = bili.stay_idand co.hr = bili.hrleft join cron co.stay_id = cr.stay_idand co.hr = cr.hrleft join plton co.stay_id = plt.stay_idand co.hr = plt.hrleft join pfon co.stay_id = pf.stay_idand co.hr = pf.hrleft join uoon co.stay_id = uo.stay_idand co.hr = uo.hrleft join vasoon co.stay_id = vaso.stay_idand co.hr = vaso.hr
)
, scorecalc as
(-- Calculate the final score-- note that if the underlying data is missing, the component is null-- eventually these are treated as 0 (normal), but knowing when data is missing is useful for debuggingselect scorecomp.*-- Respiration, casewhen pao2fio2ratio_vent < 100 then 4when pao2fio2ratio_vent < 200 then 3when pao2fio2ratio_novent < 300 then 2when pao2fio2ratio_vent < 300 then 2when pao2fio2ratio_novent < 400 then 1when pao2fio2ratio_vent < 400 then 1when coalesce(pao2fio2ratio_vent, pao2fio2ratio_novent) is null then nullelse 0end as respiration-- Coagulation, casewhen platelet_min < 20 then 4when platelet_min < 50 then 3when platelet_min < 100 then 2when platelet_min < 150 then 1when platelet_min is null then nullelse 0end as coagulation-- Liver, case-- Bilirubin checks in mg/dLwhen bilirubin_max >= 12.0 then 4when bilirubin_max >= 6.0 then 3when bilirubin_max >= 2.0 then 2when bilirubin_max >= 1.2 then 1when bilirubin_max is null then nullelse 0end as liver-- Cardiovascular, casewhen rate_dopamine > 15 or rate_epinephrine > 0.1 or rate_norepinephrine > 0.1 then 4when rate_dopamine > 5 or rate_epinephrine <= 0.1 or rate_norepinephrine <= 0.1 then 3when rate_dopamine > 0 or rate_dobutamine > 0 then 2when meanbp_min < 70 then 1when coalesce(meanbp_min, rate_dopamine, rate_dobutamine, rate_epinephrine, rate_norepinephrine) is null then nullelse 0end as cardiovascular-- Neurological failure (GCS), casewhen (gcs_min >= 13 and gcs_min <= 14) then 1when (gcs_min >= 10 and gcs_min <= 12) then 2when (gcs_min >= 6 and gcs_min <= 9) then 3when gcs_min < 6 then 4when gcs_min is null then nullelse 0end as cns-- Renal failure - high creatinine or low urine output, casewhen (creatinine_max >= 5.0) then 4when uo_24hr < 200 then 4when (creatinine_max >= 3.5 and creatinine_max < 5.0) then 3when uo_24hr < 500 then 3when (creatinine_max >= 2.0 and creatinine_max < 3.5) then 2when (creatinine_max >= 1.2 and creatinine_max < 2.0) then 1when coalesce (uo_24hr, creatinine_max) is null then nullelse 0 end as renalfrom scorecomp
)
, score_final as
(select s.*-- Combine all the scores to get SOFA-- Impute 0 if the score is missing-- the window function takes the max over the last 24 hours, coalesce(MAX(respiration) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as respiration_24hours, coalesce(MAX(coagulation) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as coagulation_24hours, coalesce(MAX(liver) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as liver_24hours, coalesce(MAX(cardiovascular) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as cardiovascular_24hours, coalesce(MAX(cns) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as cns_24hours, coalesce(MAX(renal) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0) as renal_24hours-- sum together data for final SOFA, coalesce(MAX(respiration) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)+ coalesce(MAX(coagulation) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)+ coalesce(MAX(liver) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)+ coalesce(MAX(cardiovascular) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)+ coalesce(MAX(cns) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)+ coalesce(MAX(renal) OVER (PARTITION BY stay_id ORDER BY HRROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING),0)as sofa_24hoursfrom scorecalc sWINDOW W as(PARTITION BY stay_idORDER BY hrROWS BETWEEN 23 PRECEDING AND 0 FOLLOWING)
)
select * from score_final
where hr >= 0;
只是简单列举了几个常用表格,具体的表格说明可以查询github上官方源代码中的concept文件夹内的代码和注释。
三、衍生表格配置
1.参考前面数据安装步骤,打开命令界面,进入mimiciv数据库,配置数据库函数。
set search_path to mimic_derived;
2.运行shell脚本生成衍生表格数据文件夹postgres(windows运行shell脚本可通过安装git运行,可自行百度安装)。
3.衍生表格数据。
4.修改postgres-make-concepts.sql文件。
5.运行postgres-make-concepts.sql文件(注意替换为自己的文件路径)。
set search_path to mimic_derived;\encoding 'UTF-8';\i F:/mimic/mimic-code-main/mimic-iv/concepts/postgres/postgres-make-concepts.sql;
运行时间有点长,耐心等待。
6.表格生成成功,一共54个表格。
想获取mimiciv数据文件或者有问题的小伙伴可私信,谢谢!