2
2
"cells" : [
3
3
{
4
4
"cell_type" : " code" ,
5
- "execution_count" : 1 ,
6
- "id" : " 1a6cdb3f-759a-4e8c-a351-79934a29c1b4" ,
5
+ "execution_count" : null ,
7
6
"metadata" : {},
8
7
"outputs" : [],
9
8
"source" : [
10
9
" # Imports\n " ,
11
10
" from bs4 import BeautifulSoup\n " ,
12
- " from requests import Session"
11
+ " from requests import Session\n " ,
12
+ " from urllib.parse import urlparse"
13
13
]
14
14
},
15
15
{
16
16
"cell_type" : " code" ,
17
- "execution_count" : 3 ,
18
- "id" : " 841f5dcf-80da-4fea-b866-94af3c490fe4" ,
17
+ "execution_count" : null ,
19
18
"metadata" : {},
20
19
"outputs" : [],
21
20
"source" : [
116
115
},
117
116
{
118
117
"cell_type" : " code" ,
119
- "execution_count" : 58 ,
120
- "id" : " 716bc3dd-1d51-447b-ac13-61da1d859116" ,
118
+ "execution_count" : null ,
121
119
"metadata" : {},
122
120
"outputs" : [],
123
121
"source" : [
126
124
},
127
125
{
128
126
"cell_type" : " code" ,
129
- "execution_count" : 59 ,
130
- "id" : " 816e5cdc-fd61-41f0-8c11-d72cea22d1a4" ,
127
+ "execution_count" : null ,
131
128
"metadata" : {},
132
129
"outputs" : [],
133
130
"source" : [
136
133
},
137
134
{
138
135
"cell_type" : " code" ,
139
- "execution_count" : 60 ,
140
- "id" : " 36610e53-f418-4ee5-a9ab-07a68e73f65f" ,
136
+ "execution_count" : null ,
141
137
"metadata" : {},
142
138
"outputs" : [],
143
139
"source" : [
147
143
},
148
144
{
149
145
"cell_type" : " code" ,
150
- "execution_count" : 61 ,
151
- "id" : " 9d29e929-92d7-4024-9601-b04c384f9435" ,
146
+ "execution_count" : null ,
152
147
"metadata" : {},
153
148
"outputs" : [],
154
149
"source" : [
157
152
},
158
153
{
159
154
"cell_type" : " code" ,
160
- "execution_count" : 120 ,
161
- "id" : " 002dde50-0148-4278-89b0-00d9e8e0d1c2" ,
155
+ "execution_count" : null ,
162
156
"metadata" : {},
163
157
"outputs" : [],
164
158
"source" : [
167
161
},
168
162
{
169
163
"cell_type" : " code" ,
170
- "execution_count" : 121 ,
171
- "id" : " ba80d72f-618f-4124-b7fc-c7b8c962afcf" ,
164
+ "execution_count" : null ,
172
165
"metadata" : {},
173
166
"outputs" : [],
174
167
"source" : [
185
178
},
186
179
{
187
180
"cell_type" : " code" ,
188
- "execution_count" : 122 ,
189
- "id" : " fedb9f43-c929-42e6-a8f5-0c2e41c7849e" ,
181
+ "execution_count" : null ,
190
182
"metadata" : {},
191
183
"outputs" : [],
192
184
"source" : [
195
187
},
196
188
{
197
189
"cell_type" : " code" ,
198
- "execution_count" : 141 ,
199
- "id" : " b03b0e44-3364-4180-b6ad-b8ec3ed3d46a" ,
190
+ "execution_count" : null ,
200
191
"metadata" : {},
201
192
"outputs" : [],
202
193
"source" : [
206
197
},
207
198
{
208
199
"cell_type" : " code" ,
209
- "execution_count" : 142 ,
210
- "id" : " fff3107b-dddf-4fab-b3f3-4807510d63ea" ,
200
+ "execution_count" : null ,
201
+ "metadata" : {},
202
+ "outputs" : [],
203
+ "source" : [
204
+ " url = urlparse(url).geturl().replace(\"\\\\\" , \" /\" )"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type" : " code" ,
209
+ "execution_count" : null ,
211
210
"metadata" : {},
212
211
"outputs" : [],
213
212
"source" : [
216
215
},
217
216
{
218
217
"cell_type" : " code" ,
219
- "execution_count" : 143 ,
220
- "id" : " 2642b64b-c216-4ff9-b2db-359037d28582" ,
218
+ "execution_count" : null ,
221
219
"metadata" : {},
222
220
"outputs" : [],
223
221
"source" : [
224
- " soup = BeautifulSoup(res2.text)"
222
+ " soup = BeautifulSoup(res2.text, \" html.parser \" )"
225
223
]
226
224
},
227
225
{
228
226
"cell_type" : " code" ,
229
- "execution_count" : 147 ,
230
- "id" : " 8e9736cf-382a-43f7-a404-282b23b29b6d" ,
227
+ "execution_count" : null ,
231
228
"metadata" : {},
232
- "outputs" : [
233
- {
234
- "data" : {
235
- "text/plain" : [
236
- " 'Raised when indentation contains an inconsistent use of tabs and spaces.\\ nThis is a subclass of'"
237
- ]
238
- },
239
- "execution_count" : 147 ,
240
- "metadata" : {},
241
- "output_type" : " execute_result"
242
- }
243
- ],
229
+ "outputs" : [],
244
230
"source" : [
245
- " e = soup.find(id= id_) \n " ,
246
- " e .parent.p.contents[0].strip() "
231
+ " base_element = soup.find(\" dt \" , {'id' : id_}) \r \n" ,
232
+ " base_parent = base_element .parent"
247
233
]
248
234
},
249
235
{
250
236
"cell_type" : " code" ,
251
- "execution_count" : 148 ,
252
- "id" : " 5fda45f4-b36e-4ca7-8921-1a30ae26849f" ,
237
+ "execution_count" : null ,
253
238
"metadata" : {},
254
- "outputs" : [
255
- {
256
- "data" : {
257
- "text/plain" : [
258
- " 'TabError'"
259
- ]
260
- },
261
- "execution_count" : 148 ,
262
- "metadata" : {},
263
- "output_type" : " execute_result"
264
- }
265
- ],
239
+ "outputs" : [],
266
240
"source" : [
267
- " id_ "
241
+ " para = base_parent.find( \" dd \" ).text "
268
242
]
269
243
},
270
244
{
271
245
"cell_type" : " code" ,
272
- "execution_count" : 149 ,
273
- "id" : " e6ebee8c-3ce6-4f2e-80e1-4d4002066a58" ,
246
+ "execution_count" : null ,
274
247
"metadata" : {},
275
- "outputs" : [
276
- {
277
- "data" : {
278
- "text/plain" : [
279
- " 'Raised when indentation contains an inconsistent use of tabs and spaces.\\ nThis is a subclass of IndentationError.'"
280
- ]
281
- },
282
- "execution_count" : 149 ,
283
- "metadata" : {},
284
- "output_type" : " execute_result"
285
- }
286
- ],
248
+ "outputs" : [],
287
249
"source" : [
288
- " e.parent.p.text "
250
+ " print(para) "
289
251
]
290
252
}
291
253
],
292
254
"metadata" : {
255
+ "interpreter" : {
256
+ "hash" : " 63fd5069d213b44bf678585dea6b12cceca9941eaf7f819626cde1f2670de90d"
257
+ },
293
258
"kernelspec" : {
294
- "display_name" : " Python 3" ,
295
- "language" : " python" ,
259
+ "display_name" : " Python 3.9.5 64-bit" ,
296
260
"name" : " python3"
297
261
},
298
262
"language_info" : {
299
- "codemirror_mode" : {
300
- "name" : " ipython" ,
301
- "version" : 3
302
- },
303
- "file_extension" : " .py" ,
304
- "mimetype" : " text/x-python" ,
305
263
"name" : " python" ,
306
- "nbconvert_exporter" : " python" ,
307
- "pygments_lexer" : " ipython3" ,
308
- "version" : " 3.9.5"
264
+ "version" : " "
265
+ },
266
+ "metadata" : {
267
+ "interpreter" : {
268
+ "hash" : " 63fd5069d213b44bf678585dea6b12cceca9941eaf7f819626cde1f2670de90d"
269
+ }
309
270
}
310
271
},
311
272
"nbformat" : 4 ,
312
273
"nbformat_minor" : 5
313
- }
274
+ }
0 commit comments