2
2
Useful utilities for data munging.
3
3
"""
4
4
import warnings
5
+
5
6
import pandas as pd
6
7
import requests
8
+
7
9
import dataretrieval
8
10
from dataretrieval .codes import tz
9
11
12
+
10
13
def to_str (listlike , delimiter = ',' ):
11
14
"""Translates list-like objects into strings.
12
15
@@ -74,23 +77,25 @@ def format_datetime(df, date_field, time_field, tz_field):
74
77
# create a datetime index from the columns in qwdata response
75
78
df [tz_field ] = df [tz_field ].map (tz )
76
79
77
- df ['datetime' ] = pd .to_datetime (df [ date_field ] + ' ' +
78
- df [time_field ] + ' ' +
79
- df [ tz_field ] ,
80
- format = 'ISO8601' ,
81
- utc = True )
80
+ df ['datetime' ] = pd .to_datetime (
81
+ df [ date_field ] + ' ' + df [time_field ] + ' ' + df [ tz_field ],
82
+ format = 'ISO8601' ,
83
+ utc = True ,
84
+ )
82
85
83
86
# if there are any incomplete dates, warn the user
84
- if any ( pd . isna ( df ['datetime' ]) ):
85
- count = sum ( pd . isna ( df ['datetime' ]) == True )
87
+ if df ['datetime' ]. isna (). any ( ):
88
+ count = df ['datetime' ]. isna (). sum ( )
86
89
warnings .warn (
87
- f'Warning: { count } incomplete dates found, ' +
88
- 'consider setting datetime_index to False.' , UserWarning )
90
+ f'Warning: { count } incomplete dates found, '
91
+ + 'consider setting datetime_index to False.' ,
92
+ UserWarning ,
93
+ )
89
94
90
95
return df
91
96
92
97
93
- #This function may be deprecated once pandas.update support joins besides left.
98
+ # This function may be deprecated once pandas.update support joins besides left.
94
99
def update_merge (left , right , na_only = False , on = None , ** kwargs ):
95
100
"""Performs a combination update and merge.
96
101
@@ -113,30 +118,30 @@ def update_merge(left, right, na_only=False, on=None, **kwargs):
113
118
add na_only parameter support
114
119
115
120
"""
116
- #df = left.merge(right, how='outer',
121
+ # df = left.merge(right, how='outer',
117
122
# left_index=True, right_index=True)
118
123
df = left .merge (right , how = 'outer' , on = on , ** kwargs )
119
124
120
-
121
125
# check for column overlap and resolve update
122
126
for column in df .columns :
123
- #if duplicated column, use the value from right
127
+ # if duplicated column, use the value from right
124
128
if column [- 2 :] == '_x' :
125
- name = column [:- 2 ] # find column name
129
+ name = column [:- 2 ] # find column name
126
130
127
131
if na_only :
128
- df [name ] = df [name + '_x' ].fillna (df [name + '_y' ])
132
+ df [name ] = df [name + '_x' ].fillna (df [name + '_y' ])
129
133
130
134
else :
131
- df [name ] = df [name + '_x' ].update (df [name + '_y' ])
135
+ df [name ] = df [name + '_x' ].update (df [name + '_y' ])
132
136
133
137
df .drop ([name + '_x' , name + '_y' ], axis = 1 , inplace = True )
134
138
135
139
return df
136
140
141
+
137
142
class BaseMetadata :
138
143
"""Base class for metadata.
139
-
144
+
140
145
Attributes
141
146
----------
142
147
url : str
@@ -145,9 +150,9 @@ class BaseMetadata:
145
150
Response elapsed time
146
151
header: requests.structures.CaseInsensitiveDict
147
152
Response headers
148
-
153
+
149
154
"""
150
-
155
+
151
156
def __init__ (self , response ) -> None :
152
157
"""Generates a standard set of metadata informed by the response.
153
158
@@ -168,30 +173,29 @@ def __init__(self, response) -> None:
168
173
self .query_time = response .elapsed
169
174
self .header = response .headers
170
175
self .comment = None
171
-
176
+
172
177
# # not sure what statistic_info is
173
178
# self.statistic_info = None
174
-
179
+
175
180
# # disclaimer seems to be only part of importWaterML1
176
181
# self.disclaimer = None
177
-
182
+
178
183
# These properties are to be set by `nwis` or `wqp`-specific metadata classes.
179
184
@property
180
185
def site_info (self ):
181
186
raise NotImplementedError (
182
- " site_info must be implemented by utils.BaseMetadata children"
187
+ ' site_info must be implemented by utils.BaseMetadata children'
183
188
)
184
-
189
+
185
190
@property
186
191
def variable_info (self ):
187
192
raise NotImplementedError (
188
- " variable_info must be implemented by utils.BaseMetadata children"
193
+ ' variable_info must be implemented by utils.BaseMetadata children'
189
194
)
190
195
191
-
192
196
def __repr__ (self ) -> str :
193
- return f" { type (self ).__name__ } (url={ self .url } )"
194
-
197
+ return f' { type (self ).__name__ } (url={ self .url } )'
198
+
195
199
196
200
def query (url , payload , delimiter = ',' , ssl_check = True ):
197
201
"""Send a query.
@@ -219,37 +223,40 @@ def query(url, payload, delimiter=',', ssl_check=True):
219
223
220
224
for key , value in payload .items ():
221
225
payload [key ] = to_str (value , delimiter )
222
- #for index in range(len(payload)):
226
+ # for index in range(len(payload)):
223
227
# key, value = payload[index]
224
228
# payload[index] = (key, to_str(value))
225
229
226
230
# define the user agent for the query
227
- user_agent = {
228
- 'user-agent' : f"python-dataretrieval/{ dataretrieval .__version__ } " }
231
+ user_agent = {'user-agent' : f'python-dataretrieval/{ dataretrieval .__version__ } ' }
229
232
230
- response = requests .get (url , params = payload ,
231
- headers = user_agent , verify = ssl_check )
233
+ response = requests .get (url , params = payload , headers = user_agent , verify = ssl_check )
232
234
233
235
if response .status_code == 400 :
234
- raise ValueError ("Bad Request, check that your parameters are correct. URL: {}" .format (response .url ))
236
+ raise ValueError (
237
+ f'Bad Request, check that your parameters are correct. URL: { response .url } '
238
+ )
235
239
elif response .status_code == 404 :
236
240
raise ValueError (
237
- "Page Not Found Error. May be the result of an empty query. " +
238
- f"URL: { response .url } " )
241
+ 'Page Not Found Error. May be the result of an empty query. '
242
+ + f'URL: { response .url } '
243
+ )
239
244
elif response .status_code == 414 :
240
245
_reason = response .reason
241
246
_example = """
242
- split_list = np.array_split(site_list, n) # n is number of chunks to divide query into \n
247
+ # n is the number of chunks to divide the query into \n
248
+ split_list = np.array_split(site_list, n)
243
249
data_list = [] # list to store chunk results in \n
244
250
# loop through chunks and make requests \n
245
251
for site_list in split_list: \n
246
- data = nwis.get_record(sites=site_list, service='dv', start=start, end=end) \n
252
+ data = nwis.get_record(sites=site_list, service='dv', \n
253
+ start=start, end=end) \n
247
254
data_list.append(data) # append results to list"""
248
255
raise ValueError (
249
- " Request URL too long. Modify your query to use fewer sites. " +
250
- f" API response reason: { _reason } . Pseudo-code example of how to " +
251
- f" split your query: \n { _example } "
252
- )
256
+ ' Request URL too long. Modify your query to use fewer sites. '
257
+ + f' API response reason: { _reason } . Pseudo-code example of how to '
258
+ + f' split your query: \n { _example } '
259
+ )
253
260
254
261
if response .text .startswith ('No sites/data' ):
255
262
raise NoSitesError (response .url )
@@ -258,11 +265,13 @@ def query(url, payload, delimiter=',', ssl_check=True):
258
265
259
266
260
267
class NoSitesError (Exception ):
261
- """Custom error class used when selection criteria returns no sites/data.
262
- """
268
+ """Custom error class used when selection criteria returns no sites/data."""
269
+
263
270
def __init__ (self , url ):
264
271
self .url = url
265
272
266
273
def __str__ (self ):
267
- return "No sites/data found using the selection criteria specified in url: {}" .format (self .url )
268
-
274
+ return (
275
+ 'No sites/data found using the selection criteria specified in url: '
276
+ '{url}'
277
+ ).format (url = self .url )
0 commit comments