Skip to content

Commit 3c94a01

Browse files
authored
AR: move off FTP (#5278)
1 parent ad5b991 commit 3c94a01

File tree

1 file changed

+4
-59
lines changed

1 file changed

+4
-59
lines changed

scrapers/ar/bills.py

Lines changed: 4 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
11
import csv
22
import re
3-
import io
43
import datetime
54
import pytz
6-
import os
7-
import ssl
8-
import ftplib
9-
import tempfile
10-
5+
import urllib.parse
116

127
from openstates.scrape import Scraper, Bill, VoteEvent
138
from openstates.exceptions import EmptyScrape
@@ -24,28 +19,6 @@
2419
]
2520

2621

27-
# Needed because they're using a port python doesn't expect
28-
# https://stackoverflow.com/questions/12164470/python-ftp-implicit-tls-connection-issue
29-
class ImplicitFTP_TLS(ftplib.FTP_TLS):
30-
"""FTP_TLS subclass that automatically wraps sockets in SSL to support implicit FTPS."""
31-
32-
def __init__(self, *args, **kwargs):
33-
super().__init__(*args, **kwargs)
34-
self._sock = None
35-
36-
@property
37-
def sock(self):
38-
"""Return the socket."""
39-
return self._sock
40-
41-
@sock.setter
42-
def sock(self, value):
43-
"""When modifying the socket, ensure that it is ssl wrapped."""
44-
if value is not None and not isinstance(value, ssl.SSLSocket):
45-
value = self.context.wrap_socket(value)
46-
self._sock = value
47-
48-
4922
class ARBillScraper(Scraper):
5023
ftp_user = ""
5124
ftp_pass = ""
@@ -54,13 +27,6 @@ class ARBillScraper(Scraper):
5427

5528
def scrape(self, chamber=None, session=None):
5629

57-
self.ftp_user = os.environ.get("AR_FTP_USER")
58-
self.ftp_pass = os.environ.get("AR_FTP_PASSWORD")
59-
60-
if not self.ftp_user or not self.ftp_pass:
61-
self.error("AR_FTP_USER and AR_FTP_PASSWORD env variables are required.")
62-
raise EmptyScrape
63-
6430
self.slug = get_slug_for_session(session)
6531

6632
for i in self.jurisdiction.legislative_sessions:
@@ -506,27 +472,6 @@ def decode_ar_utf16(self, data) -> str:
506472
return data
507473

508474
def get_utf_16_ftp_content(self, filename):
509-
self.info(f"GET from ftp: {filename}")
510-
ftp_client = ImplicitFTP_TLS()
511-
ftp_client.connect(host="secureftp.arkleg.state.ar.us", port=990)
512-
ftp_client.login(user=self.ftp_user, passwd=self.ftp_pass)
513-
ftp_client.prot_p()
514-
ftp_client.cwd("SessionInformation")
515-
raw = tempfile.NamedTemporaryFile()
516-
517-
with open(raw.name, "wb") as f:
518-
ftp_client.retrbinary("RETR " + filename, raw.write)
519-
520-
# 2025: we've seen encoding issues oscillate on this file
521-
# so try both the "old" and "new" methods to decode
522-
# as necessary
523-
try:
524-
with io.open(raw.name, "r", encoding="utf-16-le") as f:
525-
text = f.read()
526-
text = text.replace("\ufeff", "")
527-
text = text.replace("\x00", "").strip()
528-
return text
529-
except UnicodeDecodeError:
530-
with open(raw.name, "rb") as f:
531-
text = self.decode_ar_utf16(f.read())
532-
return text
475+
path = urllib.parse.quote_plus(f"/SessionInformation/{filename}")
476+
url = f"https://arkleg.state.ar.us/Home/FTPDocument?path={path}"
477+
return self.decode_ar_utf16(self.get(url).content)

0 commit comments

Comments
 (0)