-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaddDataPoint.py
305 lines (274 loc) · 14.7 KB
/
addDataPoint.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
def addDataPoint(row):
academic_status = row[0].strip()
grad_year = row[1].strip()
stud_id = row[2].strip()
course_semester = row[3].strip()
gender = row[4].strip()
student_semester_str = row[5].strip() # year when student took course (eg FF, FR, SO, ..)
major = row[6].strip()
concentration = row[7].strip()
course_number = row[8].strip()
section_no = row[9].strip()
course_title = row[10].strip()
section_title = row[11].strip()
professor_name = row[12].strip() # Get rid of the header row with column titles
if academic_status == 'Academic Status Code':
return
# list of courses we don't want to include in our model, for various reasons listed below
non_included_courses = [
'ENGR3520A', # Project that went along with FOCS one year
'OIE1000', # OIE
'OIP1000', # The Olin Internship Practicum
'AHSE11BA', # Babson Cross Registration
'BAB5001', # Babson Cross Registration
'BAB1001', # Babson Cross Registration
'ENGR1510', # Introductory Programming (no equivalent down the line)
'CD1097', # Curriculum Development Activity
'E! CAP SPR', # Entrepreneurship CapstoneSpring Pre-registration
'SEM 401', # Seminars
'SEM 301',
'SEM 302',
'AHSE2141', # Disregard AHS part of Engineering for Humanity
'AHSE CAP SPR', # AHS CapstoneSpring Pre-registration
'SCI10WE', # Wellesley Cross Registration
'ENGR3425', # Analog VLSI didn't have an easy latter equivalent
'ICB2', # Lecture Component of ICB2, already accounted for in course numbers
'ICB1', # Lecture Component of ICB1, already accounted for in course numbers
'MTH3130' # Mathematical Analysis (2 cr) - 16 people - doesn't fit what came after well
]
# Don't include the courses in the list above as well as lab classes and AHS Cap Pre-reg
if ' L' in course_number or course_number in non_included_courses or course_title == 'AHS CapstoneSpring Pre-registration':
return
""" Combine the course_semester and year into one meaningful variable that describes
what the student's standing is at the time they take a course offering by semester
number (from 0 to 7) """
# Sometimes the freshman first semester is labeled as 'TF', change it to 'FF', which is more frequently used
if student_semester_str == 'TF': student_semester_str = 'FF'
student_semester_no = 0 # 'FF'
if student_semester_str == 'FR':
student_semester_no = 1
elif student_semester_str == 'SO':
student_semester_no = 2
elif student_semester_str == 'JR':
student_semester_no = 4
elif student_semester_str == 'SR':
student_semester_no = 6
if 'SP' in course_semester: student_semester_no += 1
""" Cleaning course data by setting equivalent course information """
# AHS vs. AHSE problem, change everything to AHSE
if 'AHS' in course_number and 'AHSE' not in course_number:
course_number = course_number[:3] + 'E' + course_number[3:]
# Entried in classes_to_convert contain pairs of course #s (left) to convert to equivalent course #s (right)
# The SECOND course # should be the one that you want the FIRST course # to become
classes_to_convert = [
('FND2490', 'ENGR2250'), # FND UOCD
('FND1310', 'MTH1110'), # FND Calculus - 2006
('FND1312', 'MTH1110'), # FND Calculus - 2007
('FND2510', 'ENGR2410'), # FND Sig Sys
('FND1510', 'ENGR1110'), # FND Mod Con
('FND2610', 'AHSE1500'), # FND FBE
('FND2240', 'SCI1410'), # FND Mat Sci
('FND1210', 'SCI1111'), # FND Physics side of Mod Sim (check on this, was called 'Physical Foundations ofEngineering I')
('FND1311', 'MTH2140'), # FND Diff Eq
('FND1410', 'ENGR1200'), # FND precursor to Design Nature
('FND1420', 'ENGR1121'), # FND clocest thing we could get was Real World Measurements
('FND2350', 'MTH2130'), # FND clocest thing is Prob Stat (Applied Mathematical Methods)
('FND2710', 'SCI1210'), # FND Mod Bio
('AHS1110', 'AHSE1100'), # AHS -> AHSE
('AHS1111', 'AHSE2131'), # AHS -> AHSE
('AHS1140', 'AHSE2120'), # AHS -> AHSE
('ELE1050', 'ENGR2510'), # Software design (before it was Soft Des)
('ENG1510', 'ENGR2510'), # Software design (before it was Soft Des)
('MTH3198', 'MTH4198'), # Consolidating the 2 OSS in Mathematics
('ISR1300', 'MTH0098'), # IS in Mathematics
('ISR1100', 'AHSE0198'), # IS inArts Humanities Social Science
('AHSE3198', 'AHSE0198'), # IS inArts Humanities Social Science
('AHSE1198', 'AHSE0198'), # IS inArts Humanities Social Science
('SCI3098', 'SCI0098'), # IS in theSciences
('SCI1098', 'SCI0098'), # IS in theSciences
('ENGR3098', 'ENGR0098'), # IS in Engineering
('ISR1500', 'ENGR0098'), # IS/Research inComputing, Electrical orSystems -> IS in Engineering
('ISR2900', 'ENGR0098'), # IS & ResearchTechnical Concepts -> IS in Engineering
('ISR1200', 'SCI0098'), # IS & Research:Physical Concepts -> IS in theSciences
('ISR1020', 'AHSE0198'), # IS & ResearchMusical Concepts -> IS inArts Humanities Social Science
('ISR1030', 'ENGR0098'), # IS/Research inDesign Concepts -> IS in Engineering
('ISR1900', 'ENGR0098'), # IS & ResearchTechnical Concepts -> IS in Engineering
('SCI1410A', 'SCI1410'), # Mat Sci
('ELE2715', 'SCI2320'), # Applied Organic Chemistry -> Organic Chemistry with Lab
('SCI1110', 'SCI1130'), # Mechanics
('MEC1915', 'ENGR2320'), # Mech Solids
('ENGR3320', 'ENGR2320'), # Mech Solids
('SCI2220', 'ENGR2620'), # Biomechanics
('SCI3110', 'SCI2130'), # Modern Physics
('ENGR3812', 'SCI3120'), # Solid State Physics
('SCI1121A', 'SCI1121'), # E&M
('SCI1120', 'SCI1121'), # E&M
('ENGR3290', 'ENGR4290'), # ADE
('ENGR1120', 'ENGR1121'), # Real World Measurements
('ENGR3380', 'ENGR3260'), # DFM
('ENGR3340', 'ENGR2340'), # Dynamics
('MEC2910', 'ENGR2350'), # Thermodynamics
('ENGR3350', 'ENGR2350'), # Thermodynamics
('MTH2310', 'MTH2110'), # Discrete
('ECE2910', 'ENGR2420'), # Circuits
('MTH3140', 'ENGR3140'), # Error control codes
('MTH1097', 'MTH0097'), # Undergraduate Research inMathematics
('SCI1097', 'SCI0097'), # Undergraduate Research in theSciences
('ENGR1097', 'ENGR0097'), # Undergraduate Research inEngineering
('AHSE1197', 'AHSE0197'), # Undergraduate Research inArts, Humanities, Social Science
('MTH1000', 'MTH1110'), # Calculus
('MEC1000', 'ENGR1330'), # Fundamentals of Machine ShopOperations
('AHSE1120', 'AHSE1100'), # History of Tech
('AHSE3500', 'AHSE3599'), # Entrepreneurship: Real TimeCase Study -> Special Topics in Business andEntrepreneurship
('AHSE1599', 'AHSE1500'), # Entrepreneurship FoundationTopic -> FBE
('AHSE1140', 'AHSE1145'), # Anthropology Foundation
('AHSE2140', 'AHSE1145'), # Anthropology Foundation
('ENGR3430', 'ENGR3426'), # Digital VLSI -> Mixed Analog-Digital VLSI I
('AHSE1135', 'AHSE1130'), # Seeing and Hearing
('ELE1010', 'AHSE2131'), # Responsive Drawing and VisualThinking
('MTH2150', 'MTH2130'), # Applied Mathematical Methods -> Prob Stat
('AHSE3100', 'AHSE3199'), # Leadership and Ethics
('ENGR1199A', 'ENGR1199'), # Energy Systems in Urban Design
('ENGR3299A', 'ENGR3270'), # Real Products, Real Markets
('ENGR3699', 'ENGR3630'), # Transport in Biological Systems
('SCI2099B', 'SCI2099'), # Special Topics: Art of Approximation
('SCI2199', 'ENGR3355'), # Renewable Energy
('SCI3199', 'SCI2145'), # High Energy Astrophysics
('MTH3199A', 'MTH3160'), # Intro to Complex Variables
('ELE1025', 'AHSE1122'), # Wired Ensemble
('ELE1020', 'AHSE1122') # Wired Ensemble
]
equivalent_courses = {} # keys = course #, values = equivalent course #
# convert the list above to a dictionary
for old, current in classes_to_convert:
equivalent_courses[old] = current
# Do the actual conversion of the course #s
if course_number in equivalent_courses:
course_number = equivalent_courses[course_number]
""" SPECIAL MANIPULATIONS """
# Digital Signal Processing used to be a Speical Topics
if course_number == 'ENGR3499B' and 'Digital Signal Processing' in section_title:
course_number = 'ENGR3415'
course_title = 'Digital Signal Processing'
section_title = ''
# Preferred title for AHSE1102
elif course_number == 'AHSE1102':
course_title = 'Arts and Humanities: Self-Explored in Art and Philosp'
# Foundation Topic in Physics
elif course_number == 'SCI1199':
# Phys of Conserv Laws: Energy Foc by Mechtenberg, Abigail
if course_title in 'Phys of Conserv Laws: Energy Foc' or 'Phys of Conserv Laws: Energy Foc' in course_title:
course_number = 'SCI1199A'
# Phys of Conserv Laws: Waves
else:
course_number = 'SCI1199B'
# Linearity 1 and 2
elif course_number == 'MTH2188':
# Linearity 1's equivalent is Linear Algebra and Differential Equations for our purposes
if section_title in 'Linearity 1' or 'Linearity 1' in section_title:
course_number = 'MTH2188A'
# Linearity 2's equivalent is Vector Calculus for our purposes
else:
course_number = 'MTH1120'
# Two courses with ENGR3345
elif course_number == 'ENGR3345':
# Mechanical and Aerospace Systems
if course_title in 'Mechanical and Aerospace Systems' or 'Mechanical and Aerospace Systems' in section_title:
course_number = 'ENGR3345A'
# Dynamic Systems
else:
course_number = 'ENGR3345B'
# Some of the 'Heroes for the RenaissanceEngineer: Leonardo, Nabokov,Bach and Borodin' classes are misnumbered
elif course_number == 'AHSE1145':
if 'RenaissanceEngineer' in course_title:
course_number = 'AHSE2120'
# Modern Physics had a previous number (same as Quantum Physics, oops!)
elif course_number == 'SCI2130':
# Modern Physics
if course_title == 'Modern Physics':
course_number = 'SCI2130A'
# Quantum Physics
else:
course_number = 'SCI2130B'
# Mod Con was previously called 'Engineering of Compartment Systems'
elif course_number == 'ENGR1110':
course_title = 'Modeling and Control'
# Arts, Humanities, Social ScienceFoundation Topic
# There were multiple courses with the same number but a different course, this corrects for that
# creating new course numbers for courses that have a different subject but the same course number
elif course_number == 'AHSE1199':
# Art Since 1945: Movmt Theme Cntx
if 'Art Since 1945' in section_title:
course_number = 'AHSE1199A'
# Creative Writing Workshop
elif 'Creative Wr' in section_title:
course_number = 'AHSE1199B'
# How Supreme Court Shapes Amer
elif 'How Supreme Court' in section_title:
course_number = 'AHSE1199C'
# Islam and the West: Politic/Cult
elif 'Islam' in section_title:
course_number = 'AHSE1199D'
# Media Revolution:Activism & Tech
elif 'Media Revolution' in section_title:
course_number = 'AHSE1199E'
# Globalzatn: Culture Econ Politic
elif 'Globalzatn' in section_title:
course_number = 'AHSE1199F'
# Robots, Mutants & Monsters: Envi
elif 'Robots, Mutants' in section_title:
course_number = 'AHSE1199G'
# The Play's the Thing:Shakespeare
elif 'Shakespeare' in section_title:
course_number = 'AHSE1199H'
# Anthropology
elif 'Human Connection' in section_title:
course_number = 'AHSE1145'
# Identity from the Mind adn Brain
elif 'Identity' in section_title:
course_number = 'AHSE1155'
# Environment and Health
elif 'Heath and the Urban' in section_title:
course_number = 'ELE1090'
title_changes = [
('AHSE1100', 'History of Technology:A Cultural & Contextual Approach'),
('ENGR4190', 'Senior Capstone Program inEngineering (SCOPE)'),
('ENGR3426', 'Mixed Analog-Digital VLSI I'),
('ENGR2510', 'Software Design'),
('SCI2320', 'Applied Organic Chemistry'),
('SCI1111', 'Modeling and Simulation of thePhysical World'),
('MTH2140', 'Differential Equations'),
('MTH1110', 'Calculus'),
('AHSE0198', 'Independent Study in Arts, Humanities, Social Science'),
('SCI1121', 'Electricity and Magnetism'),
('MTH2130', 'Probability and Statistics'),
('AHSE1500', 'Foundations of Business andEntrepreneurship'),
('AHSE1130', 'Seeing and Hearing:Communicating with Photographs,Video and Sound'),
('ENGR2420', 'Intro Microelectronic Circuits'),
('AHSE1122', 'The Wired Ensemble -Instruments, Voices, Players'),
('SCI0098', 'Independent Study inScience'),
('SCI1410', 'Materials Science and SolidState Chemistry with lab'),
('ENGR1200', 'Design Nature'),
('ENGR0098', 'Independent Study in Engineering'),
('ENGR1121', 'Real World Measurements')
]
for course_no, final_title in title_changes:
if course_number == course_no:
course_title = final_title
# Removing the X on the end of course numbers (For an IS)
if course_number.endswith('X'):
course_number = course_number[:-1]
# Breaking Linearity and 2006's math blocks into their respective math courses
# MTH2188A ['Special Topics in Mathematics', 'Linearity 1']
# FND1320 Mathematical Foundations ofEngineering II: Linear Algebra and Vector Calculus
if course_number == 'MTH2188A' or course_number == 'FND1320':
num_iterations = 2
if course_number == 'MTH2188A':
course_numbers = ['MTH2120', 'MTH2140']
course_titles = ['Linear Algebra', 'Differential Equations']
else:
course_numbers = ['MTH1120', 'MTH2120']
course_titles = ['Vector Calculus', 'Linear Algebra']
else:
num_iterations = 1
return [academic_status, grad_year,stud_id, gender, student_semester_no, major, concentration, course_number, section_no, course_title, section_title, professor_name]