Skip to content

Commit 406f098

Browse files
committed
Add a contrib etl
Then it is feasible to run a core etl without having anay private data such: - note_nlp - cohort - measurement derived - and so on
1 parent b28d66d commit 406f098

File tree

7 files changed

+151
-135
lines changed

7 files changed

+151
-135
lines changed

Diff for: Makefile

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
MIMIC_SCHEMA=mimic
1+
MIMIC_SCHEMA=mimicdemo
22
HOST_OMOP=localhost
33
concept:
44
Rscript --vanilla etl/ConceptTables/loadTables.R $(MIMIC_SCHEMA)
55
sequence:
66
psql --set=mimicschema="$(MIMIC_SCHEMA)" -h $(HOST_OMOP) -d mimic postgres -f etl/etl_sequence.sql
77
load:
88
psql --set=mimicschema="$(MIMIC_SCHEMA)" -h $(HOST_OMOP) -d mimic postgres -f etl/etl.sql
9+
contrib:
10+
psql --set=mimicschema="$(MIMIC_SCHEMA)" -h $(HOST_OMOP) -d mimic postgres -f etl/etl_contrib.sql
911
check:
1012
psql --set=mimicschema="$(MIMIC_SCHEMA)" -h $(HOST_OMOP) -d mimic postgres -f etl/check_etl.sql
1113
export:
@@ -16,5 +18,6 @@ export:
1618
cp etl/import_mimic_omop.sql etl/Result/ &&\
1719
cp omop/build-omop/postgresql/* etl/Result/
1820
# tar -cf $(MIMIC_SCHEMA)-omop.tar etl/Result/
19-
runetl: sequence concept load export
21+
runetl: sequence concept load export
22+
runetlwithcontrib: sequence concept load contrib export
2023
runetllight: concept load

Diff for: etl/StandardizedClinicalDataTables/MEASUREMENT/etl.sql

-125
Original file line numberDiff line numberDiff line change
@@ -734,74 +734,6 @@ OR -- middle
734734
(is_last IS FALSE AND is_first IS FALSE AND row_to_insert.measurement_datetime > visit_detail_assign.visit_start_datetime AND row_to_insert.measurement_datetime <= visit_detail_assign.visit_end_datetime)
735735
);
736736

737-
-- Derived values from labeevent
738-
739-
with
740-
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
741-
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
742-
"gcpt_lab_unit_to_concept" AS (SELECT unit as unit_source_value, concept_id as unit_concept_id FROM gcpt_lab_unit_to_concept),
743-
"gcpt_derived_to_concept" as (select measurement_source_value, itemid, mimic_id as measurement_source_concept_id, concept_id as measurement_concept_id from gcpt_derived_to_concept),
744-
"row_to_insert" as (
745-
SELECT
746-
nextval('mimic_id_seq') as measurement_id
747-
, person_id
748-
, coalesce(measurement_concept_id, 0) as measurement_concept_id -- mapped
749-
, charttime::date as measurement_date
750-
, charttime::timestamp as measurement_datetime
751-
, 45754907 as measurement_type_concept_id --derived value
752-
, 4172703 as operator_concept_id -- =
753-
, valuenum as value_as_number
754-
, CASE WHEN flag = 'abnormal' THEN 45878745 --abnormal
755-
ELSE NULL END as value_as_concept_id -- this shouldn't actually be here, no way to put this information into range too
756-
, unit_concept_id
757-
, null::numeric as range_low
758-
, null::numeric as range_high
759-
, null::integer as provider_id
760-
, visit_occurrence_id
761-
, gcpt_derived_to_concept.measurement_source_value
762-
, gcpt_derived_to_concept.measurement_source_concept_id
763-
, valueuom as unit_source_value
764-
, null::text as value_source_value
765-
FROM mimiciii.gcpt_derived_values
766-
JOIN patients using(subject_id)
767-
left join admissions using(hadm_id)
768-
left join gcpt_lab_unit_to_concept on valueuom = unit_source_value
769-
left join gcpt_derived_to_concept using(itemid)
770-
)
771-
INSERT INTO omop.measurement
772-
SELECT
773-
row_to_insert.measurement_id
774-
, row_to_insert.person_id
775-
, row_to_insert.measurement_concept_id
776-
, row_to_insert.measurement_date
777-
, row_to_insert.measurement_datetime
778-
, row_to_insert.measurement_type_concept_id
779-
, row_to_insert.operator_concept_id
780-
, row_to_insert.value_as_number
781-
, row_to_insert.value_as_concept_id
782-
, row_to_insert.unit_concept_id
783-
, row_to_insert.range_low
784-
, row_to_insert.range_high
785-
, row_to_insert.provider_id
786-
, row_to_insert.visit_occurrence_id
787-
, visit_detail_assign.visit_detail_id
788-
, row_to_insert.measurement_source_value
789-
, row_to_insert.measurement_source_concept_id
790-
, row_to_insert.unit_source_value
791-
, row_to_insert.value_source_value
792-
FROM row_to_insert
793-
LEFT JOIN omop.visit_detail_assign
794-
ON row_to_insert.visit_occurrence_id = visit_detail_assign.visit_occurrence_id
795-
AND
796-
(--only one visit_detail
797-
(is_first IS TRUE AND is_last IS TRUE)
798-
OR -- first
799-
(is_first IS TRUE AND is_last IS FALSE AND row_to_insert.measurement_datetime <= visit_detail_assign.visit_end_datetime)
800-
OR -- last
801-
(is_last IS TRUE AND is_first IS FALSE AND row_to_insert.measurement_datetime > visit_detail_assign.visit_start_datetime)
802-
OR -- middle
803-
(is_last IS FALSE AND is_first IS FALSE AND row_to_insert.measurement_datetime > visit_detail_assign.visit_start_datetime AND row_to_insert.measurement_datetime <= visit_detail_assign.visit_end_datetime)
804-
);
805737

806738
-- weight from inputevent_mv
807739

@@ -872,60 +804,3 @@ OR -- middle
872804
)
873805
;
874806

875-
-- Derived values from noteevents
876-
877-
with
878-
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
879-
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
880-
"gcpt_derived_to_concept" as (select measurement_source_value, itemid, mimic_id as measurement_source_concept_id, concept_id as measurement_concept_id from gcpt_derived_to_concept),
881-
"row_to_insert" as (
882-
SELECT
883-
nextval('mimic_id_seq') as measurement_id
884-
, person_id
885-
, coalesce(measurement_concept_id, 0) as measurement_concept_id -- mapped
886-
, charttime::date as measurement_date
887-
, charttime::timestamp as measurement_datetime
888-
, 45754907 as measurement_type_concept_id --derived value
889-
, CASE WHEN exact_value IS NOT NULL THEN 4172703 --=
890-
WHEN inf_egal_value IS NOT NULL THEN 4171756 --<
891-
WHEN sup_egal_value IS NOT NULL THEN 4172704 END -->
892-
as operator_concept_id
893-
, coalesce(exact_value, inf_egal_value, sup_egal_value) as value_as_number
894-
, null::integer as value_as_concept_id
895-
, 8554 as unit_concept_id -- percent
896-
, null::numeric as range_low
897-
, null::numeric as range_high
898-
, null::integer as provider_id
899-
, visit_occurrence_id
900-
, gcpt_derived_to_concept.measurement_source_value
901-
, gcpt_derived_to_concept.measurement_source_concept_id
902-
, '%' as unit_source_value
903-
, null::text as value_source_value
904-
FROM gcpt_derived_fevg
905-
JOIN patients using(subject_id)
906-
left join admissions using(hadm_id)
907-
left join gcpt_derived_to_concept on 'LVEF from noteevents' = measurement_source_value
908-
WHERE coalesce(exact_value, inf_egal_value, sup_egal_value) IS NOT NULL
909-
)
910-
INSERT INTO omop.measurement
911-
SELECT
912-
row_to_insert.measurement_id
913-
, row_to_insert.person_id
914-
, row_to_insert.measurement_concept_id
915-
, row_to_insert.measurement_date
916-
, row_to_insert.measurement_datetime
917-
, row_to_insert.measurement_type_concept_id
918-
, row_to_insert.operator_concept_id
919-
, row_to_insert.value_as_number
920-
, row_to_insert.value_as_concept_id
921-
, row_to_insert.unit_concept_id
922-
, row_to_insert.range_low
923-
, row_to_insert.range_high
924-
, row_to_insert.provider_id
925-
, row_to_insert.visit_occurrence_id
926-
, null as visit_detail_id
927-
, row_to_insert.measurement_source_value
928-
, row_to_insert.measurement_source_concept_id
929-
, row_to_insert.unit_source_value
930-
, row_to_insert.value_source_value
931-
FROM row_to_insert;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
-- Derived values from labeevent
2+
3+
with
4+
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
5+
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
6+
"gcpt_lab_unit_to_concept" AS (SELECT unit as unit_source_value, concept_id as unit_concept_id FROM gcpt_lab_unit_to_concept),
7+
"gcpt_derived_to_concept" as (select measurement_source_value, itemid, mimic_id as measurement_source_concept_id, concept_id as measurement_concept_id from gcpt_derived_to_concept),
8+
"row_to_insert" as (
9+
SELECT
10+
nextval('mimic_id_seq') as measurement_id
11+
, person_id
12+
, coalesce(measurement_concept_id, 0) as measurement_concept_id -- mapped
13+
, charttime::date as measurement_date
14+
, charttime::timestamp as measurement_datetime
15+
, 45754907 as measurement_type_concept_id --derived value
16+
, 4172703 as operator_concept_id -- =
17+
, valuenum as value_as_number
18+
, CASE WHEN flag = 'abnormal' THEN 45878745 --abnormal
19+
ELSE NULL END as value_as_concept_id -- this shouldn't actually be here, no way to put this information into range too
20+
, unit_concept_id
21+
, null::numeric as range_low
22+
, null::numeric as range_high
23+
, null::integer as provider_id
24+
, visit_occurrence_id
25+
, gcpt_derived_to_concept.measurement_source_value
26+
, gcpt_derived_to_concept.measurement_source_concept_id
27+
, valueuom as unit_source_value
28+
, null::text as value_source_value
29+
FROM mimiciii.gcpt_derived_values
30+
JOIN patients using(subject_id)
31+
left join admissions using(hadm_id)
32+
left join gcpt_lab_unit_to_concept on valueuom = unit_source_value
33+
left join gcpt_derived_to_concept using(itemid)
34+
)
35+
INSERT INTO omop.measurement
36+
SELECT
37+
row_to_insert.measurement_id
38+
, row_to_insert.person_id
39+
, row_to_insert.measurement_concept_id
40+
, row_to_insert.measurement_date
41+
, row_to_insert.measurement_datetime
42+
, row_to_insert.measurement_type_concept_id
43+
, row_to_insert.operator_concept_id
44+
, row_to_insert.value_as_number
45+
, row_to_insert.value_as_concept_id
46+
, row_to_insert.unit_concept_id
47+
, row_to_insert.range_low
48+
, row_to_insert.range_high
49+
, row_to_insert.provider_id
50+
, row_to_insert.visit_occurrence_id
51+
, visit_detail_assign.visit_detail_id
52+
, row_to_insert.measurement_source_value
53+
, row_to_insert.measurement_source_concept_id
54+
, row_to_insert.unit_source_value
55+
, row_to_insert.value_source_value
56+
FROM row_to_insert
57+
LEFT JOIN omop.visit_detail_assign
58+
ON row_to_insert.visit_occurrence_id = visit_detail_assign.visit_occurrence_id
59+
AND
60+
(--only one visit_detail
61+
(is_first IS TRUE AND is_last IS TRUE)
62+
OR -- first
63+
(is_first IS TRUE AND is_last IS FALSE AND row_to_insert.measurement_datetime <= visit_detail_assign.visit_end_datetime)
64+
OR -- last
65+
(is_last IS TRUE AND is_first IS FALSE AND row_to_insert.measurement_datetime > visit_detail_assign.visit_start_datetime)
66+
OR -- middle
67+
(is_last IS FALSE AND is_first IS FALSE AND row_to_insert.measurement_datetime > visit_detail_assign.visit_start_datetime AND row_to_insert.measurement_datetime <= visit_detail_assign.visit_end_datetime)
68+
);
69+
70+
-- Derived values from noteevents
71+
72+
with
73+
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
74+
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
75+
"gcpt_derived_to_concept" as (select measurement_source_value, itemid, mimic_id as measurement_source_concept_id, concept_id as measurement_concept_id from gcpt_derived_to_concept),
76+
"row_to_insert" as (
77+
SELECT
78+
nextval('mimic_id_seq') as measurement_id
79+
, person_id
80+
, coalesce(measurement_concept_id, 0) as measurement_concept_id -- mapped
81+
, charttime::date as measurement_date
82+
, charttime::timestamp as measurement_datetime
83+
, 45754907 as measurement_type_concept_id --derived value
84+
, CASE WHEN exact_value IS NOT NULL THEN 4172703 --=
85+
WHEN inf_egal_value IS NOT NULL THEN 4171756 --<
86+
WHEN sup_egal_value IS NOT NULL THEN 4172704 END -->
87+
as operator_concept_id
88+
, coalesce(exact_value, inf_egal_value, sup_egal_value) as value_as_number
89+
, null::integer as value_as_concept_id
90+
, 8554 as unit_concept_id -- percent
91+
, null::numeric as range_low
92+
, null::numeric as range_high
93+
, null::integer as provider_id
94+
, visit_occurrence_id
95+
, gcpt_derived_to_concept.measurement_source_value
96+
, gcpt_derived_to_concept.measurement_source_concept_id
97+
, '%' as unit_source_value
98+
, null::text as value_source_value
99+
FROM gcpt_derived_fevg
100+
JOIN patients using(subject_id)
101+
left join admissions using(hadm_id)
102+
left join gcpt_derived_to_concept on 'LVEF from noteevents' = measurement_source_value
103+
WHERE coalesce(exact_value, inf_egal_value, sup_egal_value) IS NOT NULL
104+
)
105+
INSERT INTO omop.measurement
106+
SELECT
107+
row_to_insert.measurement_id
108+
, row_to_insert.person_id
109+
, row_to_insert.measurement_concept_id
110+
, row_to_insert.measurement_date
111+
, row_to_insert.measurement_datetime
112+
, row_to_insert.measurement_type_concept_id
113+
, row_to_insert.operator_concept_id
114+
, row_to_insert.value_as_number
115+
, row_to_insert.value_as_concept_id
116+
, row_to_insert.unit_concept_id
117+
, row_to_insert.range_low
118+
, row_to_insert.range_high
119+
, row_to_insert.provider_id
120+
, row_to_insert.visit_occurrence_id
121+
, null as visit_detail_id
122+
, row_to_insert.measurement_source_value
123+
, row_to_insert.measurement_source_concept_id
124+
, row_to_insert.unit_source_value
125+
, row_to_insert.value_source_value
126+
FROM row_to_insert;

Diff for: etl/StandardizedVocabularies/CONCEPT/etl.sql

+5-5
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ SELECT
9191
, 226719 --Last menses
9292
, 225279 --Date of Admission to Hospital
9393
, 225059 --Past medical history
94-
, 916 --Allergy 1
95-
, 927 --Allergy 2
96-
, 935 --Allergy 3
97-
, 925 --Marital Status
94+
, 916 --Allergy 1
95+
, 927 --Allergy 2
96+
, 935 --Allergy 3
97+
, 925 --Marital Status
9898
, 226381 --Marital Status
99-
, 926 --Religion
99+
, 926 --Religion
100100
, 226543 --Religion
101101
) THEN 'Observation'::Text
102102
ELSE 'Measurement'::Text END as domain_id

Diff for: etl/etl.sql

-3
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,6 @@ TRUNCATE TABLE omop.fact_relationship CASCADE;
2828

2929
\i etl/pg_function.sql
3030
\i etl/StandardizedVocabularies/CONCEPT/etl.sql -- SHALL be first loaded table
31-
\i etl/StandardizedVocabularies/COHORT_DEFINITION/etl.sql
32-
\i etl/StandardizedVocabularies/ATTRIBUTE_DEFINITION/etl.sql
33-
\i etl/StandardizedDerivedElements/COHORT_ATTRIBUTE/etl.sql
3431
\i etl/StandardizedHealthSystemDataTables/CARE_SITE/etl.sql
3532
\i etl/StandardizedHealthSystemDataTables/PROVIDER/etl.sql
3633
\i etl/StandardizedClinicalDataTables/PERSON/etl.sql

Diff for: etl/etl_contrib.sql

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
\set ON_ERROR_STOP true
2+
set search_path to :'mimicschema';
3+
\timing
4+
5+
\i etl/StandardizedVocabularies/COHORT_DEFINITION/etl.sql
6+
\i etl/StandardizedDerivedElements/COHORT_ATTRIBUTE/etl.sql
7+
\i etl/StandardizedClinicalDataTables/MEASUREMENT/etl_contrib.sql
8+
\i etl/StandardizedVocabularies/ATTRIBUTE_DEFINITION/etl.sql
9+

Diff for: mimic/build-mimic/build-mimicdemo.sql

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- MIMICIII FULL
2+
DROP SCHEMA mimicdemo CASCADE;
3+
CREATE SCHEMA mimicdemo;
4+
SET search_path TO mimicdemo;
5+
\i 'postgres_create_tables.sql'
6+
\i 'postgres_load_data.sql'

0 commit comments

Comments
 (0)