@@ -39,6 +39,8 @@ def process_item(self, item):
39
39
district = item .xpath ("string(../../td[1])" )
40
40
party = item .xpath ("string(../../td[2])" )
41
41
leg_url = item .get ("href" )
42
+ if not district or not party or not leg_url :
43
+ self .skip ()
42
44
43
45
return SenDetail (
44
46
PartialPerson (name = name , party = party , district = district , url = leg_url )
@@ -50,15 +52,15 @@ class SenDetail(HtmlPage):
50
52
input_type = PartialPerson
51
53
52
54
def get_source_from_input (self ):
53
- return URL (self .input .url , timeout = 10 )
55
+ return URL (self .input .url , timeout = 30 )
54
56
55
57
def process_page (self ):
56
58
email = (
57
59
self .root .xpath ('//a[contains(@href, "mailto:")]' )[0 ]
58
60
.get ("href" )
59
61
.split (":" )[- 1 ]
60
62
)
61
-
63
+ print ( self . input )
62
64
p = ScrapePerson (
63
65
state = "fl" ,
64
66
chamber = "upper" ,
@@ -162,12 +164,15 @@ class Representatives(HtmlListPage):
162
164
163
165
def process_item (self , item ):
164
166
name = item .xpath ("./a/div[@class='team-txt']/h5/text()" )[0 ].strip ()
167
+ if name == "Pending, Election" :
168
+ self .skip ()
165
169
party = item .xpath ("./a/div[@class='team-txt']/p[1]/text()" )[0 ].split ()[0 ]
166
170
district = item .xpath ("./a/div[@class='team-txt']/p[1]/span/text()" )[0 ].split ()[
167
171
- 1
168
172
]
169
173
image = self .IMAGE_BASE + item .xpath (".//img" )[0 ].attrib ["data-src" ]
170
174
link = str (item .xpath ("./a/@href" )[0 ])
175
+ print (name , party , district )
171
176
172
177
return RepContact (
173
178
PartialPerson (
0 commit comments