Skip to content

Commit

Permalink
AR: move off FTP (#5278)
Browse files Browse the repository at this point in the history
  • Loading branch information
showerst authored Feb 7, 2025
1 parent ad5b991 commit 3c94a01
Showing 1 changed file with 4 additions and 59 deletions.
63 changes: 4 additions & 59 deletions scrapers/ar/bills.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import csv
import re
import io
import datetime
import pytz
import os
import ssl
import ftplib
import tempfile

import urllib.parse

from openstates.scrape import Scraper, Bill, VoteEvent
from openstates.exceptions import EmptyScrape
Expand All @@ -24,28 +19,6 @@
]


# Needed because they're using a port python doesn't expect
# https://stackoverflow.com/questions/12164470/python-ftp-implicit-tls-connection-issue
class ImplicitFTP_TLS(ftplib.FTP_TLS):
"""FTP_TLS subclass that automatically wraps sockets in SSL to support implicit FTPS."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._sock = None

@property
def sock(self):
"""Return the socket."""
return self._sock

@sock.setter
def sock(self, value):
"""When modifying the socket, ensure that it is ssl wrapped."""
if value is not None and not isinstance(value, ssl.SSLSocket):
value = self.context.wrap_socket(value)
self._sock = value


class ARBillScraper(Scraper):
ftp_user = ""
ftp_pass = ""
Expand All @@ -54,13 +27,6 @@ class ARBillScraper(Scraper):

def scrape(self, chamber=None, session=None):

self.ftp_user = os.environ.get("AR_FTP_USER")
self.ftp_pass = os.environ.get("AR_FTP_PASSWORD")

if not self.ftp_user or not self.ftp_pass:
self.error("AR_FTP_USER and AR_FTP_PASSWORD env variables are required.")
raise EmptyScrape

self.slug = get_slug_for_session(session)

for i in self.jurisdiction.legislative_sessions:
Expand Down Expand Up @@ -506,27 +472,6 @@ def decode_ar_utf16(self, data) -> str:
return data

def get_utf_16_ftp_content(self, filename):
self.info(f"GET from ftp: {filename}")
ftp_client = ImplicitFTP_TLS()
ftp_client.connect(host="secureftp.arkleg.state.ar.us", port=990)
ftp_client.login(user=self.ftp_user, passwd=self.ftp_pass)
ftp_client.prot_p()
ftp_client.cwd("SessionInformation")
raw = tempfile.NamedTemporaryFile()

with open(raw.name, "wb") as f:
ftp_client.retrbinary("RETR " + filename, raw.write)

# 2025: we've seen encoding issues oscillate on this file
# so try both the "old" and "new" methods to decode
# as necessary
try:
with io.open(raw.name, "r", encoding="utf-16-le") as f:
text = f.read()
text = text.replace("\ufeff", "")
text = text.replace("\x00", "").strip()
return text
except UnicodeDecodeError:
with open(raw.name, "rb") as f:
text = self.decode_ar_utf16(f.read())
return text
path = urllib.parse.quote_plus(f"/SessionInformation/{filename}")
url = f"https://arkleg.state.ar.us/Home/FTPDocument?path={path}"
return self.decode_ar_utf16(self.get(url).content)

0 comments on commit 3c94a01

Please sign in to comment.