MAINT: Fix automatically documented issues

bashtage · bashtage · commit f4fb6a2dd1f3 · 2022-04-24T08:26:27.000+01:00
Fix a number of small issues found with auto code analysis tool
diff --git a/archive/docs/fix_longtable.py b/archive/docs/fix_longtable.py
@@ -7,8 +7,8 @@
 read_file_path = os.path.join(BUILDDIR,'latex','statsmodels.tex')
 write_file_path = os.path.join(BUILDDIR, 'latex','statsmodels_tmp.tex')
 
-read_file = open(read_file_path,'r')
-write_file = open(write_file_path, 'w')
+read_file = open(read_file_path, 'r', encoding="utf-8")
+write_file = open(write_file_path, 'w', encoding="utf-8")
 
 for line in read_file:
     if 'longtable}{LL' in line:
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -405,7 +405,7 @@
 # ghissue config
 github_project_url = 'https://github.com/statsmodels/statsmodels'
 
-example_context = yaml.safe_load(open('examples/landing.yml'))
+example_context = yaml.safe_load(open('examples/landing.yml', encoding="utf-8"))
 html_context.update({'examples': example_context})
 
 # --------------- DOCTEST -------------------
diff --git a/examples/python/generic_mle.py b/examples/python/generic_mle.py
@@ -130,7 +130,7 @@ def nloglikeobs(self, params):
     def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds):
         # we have one additional parameter and we need to add it for summary
         self.exog_names.append('alpha')
-        if start_params == None:
+        if start_params is None:
             # Reasonable starting values
             start_params = np.append(np.zeros(self.exog.shape[1]), .5)
             # intercept
diff --git a/examples/run_all.py b/examples/run_all.py
@@ -28,7 +28,7 @@ def no_show(*args):
     EXAMPLE_FILES = glob.glob('python/*.py')
     for example in EXAMPLE_FILES:
         KNOWN_BAD_FILE = any([bf in example for bf in BAD_FILES])
-        with open(example, 'r') as pyfile:
+        with open(example, 'r', encoding="utf-8") as pyfile:
             code = pyfile.read()
             try:
                 sys.stdout = REDIRECT_STDOUT
diff --git a/setup.py b/setup.py
@@ -50,7 +50,7 @@
 # These are strictly installation requirements. Builds requirements are
 # managed in pyproject.toml
 INSTALL_REQUIRES = []
-with open("requirements.txt") as req:
+with open("requirements.txt", encoding="utf-8") as req:
     for line in req.readlines():
         INSTALL_REQUIRES.append(line.split("#")[0].strip())
 
@@ -266,11 +266,11 @@ def check_source(source_name):
 def process_tempita(source_name):
     """Runs pyx.in files through tempita is needed"""
     if source_name.endswith("pyx.in"):
-        with open(source_name, "r") as templated:
+        with open(source_name, "r", encoding="utf-8") as templated:
             pyx_template = templated.read()
         pyx = Tempita.sub(pyx_template)
         pyx_filename = source_name[:-3]
-        with open(pyx_filename, "w") as pyx_file:
+        with open(pyx_filename, "w", encoding="utf-8") as pyx_file:
             pyx_file.write(pyx)
         file_stats = os.stat(source_name)
         try:
diff --git a/statsmodels/base/tests/test_penalized.py b/statsmodels/base/tests/test_penalized.py
@@ -67,7 +67,7 @@ def setup_class(cls):
         cls._initialize()
 
     @classmethod
-    def _generate_endog(self, linpred):
+    def _generate_endog(cls, linpred):
         mu = np.exp(linpred)
         np.random.seed(999)
         y = np.random.poisson(mu)
@@ -404,7 +404,7 @@ def test_cov_type(self):
 class CheckPenalizedLogit(CheckPenalizedPoisson):
 
     @classmethod
-    def _generate_endog(self, linpred):
+    def _generate_endog(cls, linpred):
         mu = 1 / (1 + np.exp(-linpred + linpred.mean() - 0.5))
         np.random.seed(999)
         y = np.random.rand(len(mu)) < mu
@@ -517,7 +517,7 @@ def test_zeros(self):
 class CheckPenalizedBinomCount(CheckPenalizedPoisson):
 
     @classmethod
-    def _generate_endog(self, linpred):
+    def _generate_endog(cls, linpred):
         mu = 1 / (1 + np.exp(-linpred + linpred.mean() - 0.5))
         np.random.seed(999)
         n_trials = 5 * np.ones(len(mu), int)
@@ -614,7 +614,7 @@ def _initialize(cls):
 class CheckPenalizedGaussian(CheckPenalizedPoisson):
 
     @classmethod
-    def _generate_endog(self, linpred):
+    def _generate_endog(cls, linpred):
         sig_e = np.sqrt(0.1)
         np.random.seed(999)
         y = linpred + sig_e * np.random.rand(len(linpred))
diff --git a/statsmodels/datasets/utils.py b/statsmodels/datasets/utils.py
@@ -119,7 +119,8 @@ def _get_cache(cache):
 
 def _cache_it(data, cache_path):
     import zlib
-    open(cache_path, "wb").write(zlib.compress(data))
+    with open(cache_path, "wb") as zf:
+        zf.write(zlib.compress(data))
 
 
 def _open_cache(cache_path):
diff --git a/statsmodels/discrete/tests/test_discrete.py b/statsmodels/discrete/tests/test_discrete.py
@@ -1617,7 +1617,7 @@ def test_issue_339():
     smry = "\n".join(res1.summary().as_text().split('\n')[9:])
     cur_dir = os.path.dirname(os.path.abspath(__file__))
     test_case_file = os.path.join(cur_dir, 'results', 'mn_logit_summary.txt')
-    with open(test_case_file, 'r') as fd:
+    with open(test_case_file, 'r', encoding="utf-8") as fd:
         test_case = fd.read()
     np.testing.assert_equal(smry, test_case[:-1])
     # smoke test for summary2
diff --git a/statsmodels/examples/ex_lowess.py b/statsmodels/examples/ex_lowess.py
@@ -65,7 +65,7 @@
 rpath = os.path.split(statsmodels.nonparametric.tests.results.__file__)[0]
 rfile = os.path.join(rpath, 'test_lowess_frac.csv')
 test_data = np.genfromtxt(open(rfile, 'rb'),
-                                  delimiter = ',', names = True)
+                          delimiter=',', names=True)
 expected_lowess_23 = np.array([test_data['x'], test_data['out_2_3']]).T
 expected_lowess_15 = np.array([test_data['x'], test_data['out_1_5']]).T
 
diff --git a/statsmodels/examples/run_all.py b/statsmodels/examples/run_all.py
@@ -49,7 +49,7 @@ def noop(*args):
         try:
             print("\n\nExecuting example file", run_all_f)
             print("-----------------------" + "-"*len(run_all_f))
-            exec(open(run_all_f).read())
+            exec(open(run_all_f, encoding="utf-8").read())
         except:
             #f might be overwritten in the executed file
             print("**********************" + "*"*len(run_all_f))
diff --git a/statsmodels/genmod/generalized_estimating_equations.py b/statsmodels/genmod/generalized_estimating_equations.py
@@ -1515,7 +1515,7 @@ class make sense when the model has been fit with regularization.
             update, hm = self._update_regularized(
                               mean_params, pen_wt, scad_param, eps)
             if update is None:
-                msg = "Singular matrix encountered in regularized GEE update",
+                msg = "Singular matrix encountered in regularized GEE update"
                 warnings.warn(msg, ConvergenceWarning)
                 break
             if itr > miniter and np.sqrt(np.sum(update**2)) < ctol:
diff --git a/statsmodels/genmod/tests/gee_categorical_simulation_check.py b/statsmodels/genmod/tests/gee_categorical_simulation_check.py
@@ -187,7 +187,7 @@ def gendat_nominal():
 
     nrep = 100
 
-    OUT = open("gee_categorical_simulation_check.txt", "w")
+    OUT = open("gee_categorical_simulation_check.txt", "w", encoding="utf-8")
 
     np.set_printoptions(formatter={'all': lambda x: "%8.3f" % x},
                         suppress=True)
diff --git a/statsmodels/genmod/tests/gee_gaussian_simulation_check.py b/statsmodels/genmod/tests/gee_gaussian_simulation_check.py
@@ -245,7 +245,7 @@ def gendat_nested1():
         # older numpy versions do not have formatter option
         pass
 
-    OUT = open("gee_gaussian_simulation_check.txt", "w")
+    OUT = open("gee_gaussian_simulation_check.txt", "w", encoding="utf-8")
 
     nrep = 100
 
diff --git a/statsmodels/genmod/tests/gee_poisson_simulation_check.py b/statsmodels/genmod/tests/gee_poisson_simulation_check.py
@@ -178,7 +178,7 @@ def gendat_overdispersed():
     np.set_printoptions(formatter={'all': lambda x: "%8.3f" % x},
                         suppress=True)
 
-    OUT = open("gee_poisson_simulation_check.txt", "w")
+    OUT = open("gee_poisson_simulation_check.txt", "w", encoding="utf-8")
 
     nrep = 100
 
diff --git a/statsmodels/genmod/tests/gee_simulation_check.py b/statsmodels/genmod/tests/gee_simulation_check.py
@@ -17,7 +17,7 @@
                     suppress=True)
 
 
-OUT = open("gee_simulation_check.txt", "w")
+OUT = open("gee_simulation_check.txt", "w", encoding="utf-8")
 
 class GEE_simulator(object):
 
diff --git a/statsmodels/genmod/tests/results/gee_generate_tests.py b/statsmodels/genmod/tests/results/gee_generate_tests.py
@@ -21,7 +21,7 @@ def generate_logistic():
 
     p = len(beta)
 
-    OUT = open("gee_logistic_1.csv", "w")
+    OUT = open("gee_logistic_1.csv", "w", encoding="utf-8")
 
     for i in range(nclust):
 
@@ -62,7 +62,7 @@ def generate_linear():
 
     p = len(beta)
 
-    OUT = open("gee_linear_1.csv", "w")
+    OUT = open("gee_linear_1.csv", "w", encoding="utf-8")
 
     for i in range(nclust):
 
@@ -102,7 +102,7 @@ def generate_nested_linear():
 
     p = len(beta)
 
-    OUT = open("gee_nested_linear_1.csv", "w")
+    OUT = open("gee_nested_linear_1.csv", "w", encoding="utf-8")
 
     for i in range(nclust):
 
@@ -130,7 +130,7 @@ def generate_ordinal():
 
     rz = 0.5
 
-    OUT = open("gee_ordinal_1.csv", "w")
+    OUT = open("gee_ordinal_1.csv", "w", encoding="utf-8")
 
     for i in range(200):
 
@@ -165,7 +165,7 @@ def generate_nominal():
 
     rz = 0.5
 
-    OUT = open("gee_nominal_1.csv", "w")
+    OUT = open("gee_nominal_1.csv", "w", encoding="utf-8")
 
     for i in range(200):
 
@@ -203,7 +203,7 @@ def generate_poisson():
 
     nclust = 100
 
-    OUT = open("gee_poisson_1.csv", "w")
+    OUT = open("gee_poisson_1.csv", "w", encoding="utf-8")
 
     for i in range(nclust):
 
diff --git a/statsmodels/genmod/tests/results/results_glm.py b/statsmodels/genmod/tests/results/results_glm.py
@@ -1193,7 +1193,7 @@ def __init__(self):
         # set up data #
         filename = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 "inv_gaussian.csv")
-        with open(filename, 'r') as fd:
+        with open(filename, 'r', encoding="utf-8") as fd:
             data = np.genfromtxt(fd, delimiter=",", dtype=float)[1:]
         self.endog = data[:5000, 0]
         self.exog = data[:5000, 1:]
diff --git a/statsmodels/iolib/table.py b/statsmodels/iolib/table.py
@@ -97,7 +97,7 @@ def csv2st(csvfile, headers=False, stubs=False, title=None):
     Can also supply headers and stubs as tuples of strings.
     """
     rows = list()
-    with open(csvfile, 'r') as fh:
+    with open(csvfile, 'r', encoding="utf-8") as fh:
         reader = csv.reader(fh)
         if headers is True:
             headers = next(reader)
diff --git a/statsmodels/nonparametric/tests/test_kernel_regression.py b/statsmodels/nonparametric/tests/test_kernel_regression.py
@@ -64,7 +64,7 @@ def write2file(self, file_name, data):  # pragma: no cover
         """Write some data to a csv file.  Only use for debugging!"""
         import csv
 
-        data_file = csv.writer(open(file_name, "w"))
+        data_file = csv.writer(open(file_name, "w", encoding="utf-8"))
         data = np.column_stack(data)
         nobs = max(np.shape(data))
         K = min(np.shape(data))
diff --git a/statsmodels/othermod/tests/test_beta.py b/statsmodels/othermod/tests/test_beta.py
@@ -67,18 +67,18 @@ def assert_close(a, b, eps):
 class TestBetaModel(object):
 
     @classmethod
-    def setup_class(self):
+    def setup_class(cls):
         model = "I(food/income) ~ income + persons"
-        self.income_fit = BetaModel.from_formula(model, income).fit()
+        cls.income_fit = BetaModel.from_formula(model, income).fit()
 
-        model = self.model = "methylation ~ gender + CpG"
-        Z = self.Z = patsy.dmatrix("~ age", methylation)
+        model = cls.model = "methylation ~ gender + CpG"
+        Z = cls.Z = patsy.dmatrix("~ age", methylation)
         mod = BetaModel.from_formula(model, methylation, exog_precision=Z,
                                      link_precision=links.identity())
-        self.meth_fit = mod.fit()
+        cls.meth_fit = mod.fit()
         mod = BetaModel.from_formula(model, methylation, exog_precision=Z,
                                      link_precision=links.Log())
-        self.meth_log_fit = mod.fit()
+        cls.meth_log_fit = mod.fit()
 
     def test_income_coefficients(self):
         rslt = self.income_fit
diff --git a/statsmodels/regression/tests/test_lme.py b/statsmodels/regression/tests/test_lme.py
@@ -58,7 +58,7 @@ def __init__(self, meth, irfs, ds_ix):
         cur_dir = os.path.dirname(os.path.abspath(__file__))
         rdir = os.path.join(cur_dir, 'results')
         fname = os.path.join(rdir, "lme%02d.csv" % ds_ix)
-        with open(fname) as fid:
+        with open(fname, encoding="utf-8") as fid:
             rdr = csv.reader(fid)
             header = next(rdr)
             data = [[float(x) for x in line] for line in rdr]
diff --git a/statsmodels/sandbox/distributions/tests/test_norm_expan.py b/statsmodels/sandbox/distributions/tests/test_norm_expan.py
@@ -60,29 +60,29 @@ class TestExpandNormMom(CheckExpandNorm):
     # compare with normal, skew=0, excess_kurtosis=0
 
     @classmethod
-    def setup_class(kls):
-        kls.scale = 2
-        kls.dist1 = stats.norm(1, 2)
-        kls.mvsk = [1., 2**2, 0, 0]
-        kls.dist2 = NormExpan_gen(kls.mvsk, mode='mvsk')
+    def setup_class(cls):
+        cls.scale = 2
+        cls.dist1 = stats.norm(1, 2)
+        cls.mvsk = [1., 2**2, 0, 0]
+        cls.dist2 = NormExpan_gen(cls.mvsk, mode='mvsk')
 
 
 class TestExpandNormSample(object):
     # do not subclass CheckExpandNorm,
     # precision not high enough because of mvsk from data
 
     @classmethod
-    def setup_class(kls):
-        kls.dist1 = dist1 = stats.norm(1, 2)
+    def setup_class(cls):
+        cls.dist1 = dist1 = stats.norm(1, 2)
         np.random.seed(5999)
-        kls.rvs = dist1.rvs(size=200)
+        cls.rvs = dist1.rvs(size=200)
         #rvs = np.concatenate([rvs, -rvs])
         # fix mean and std of sample
         #rvs = (rvs - rvs.mean())/rvs.std(ddof=1) * np.sqrt(2) + 1
-        kls.dist2 = NormExpan_gen(kls.rvs, mode='sample')
+        cls.dist2 = NormExpan_gen(cls.rvs, mode='sample')
 
-        kls.scale = 2
-        kls.atol_pdf = 1e-3
+        cls.scale = 2
+        cls.atol_pdf = 1e-3
 
     def test_ks(self):
         # cdf is slow
diff --git a/statsmodels/sandbox/examples/run_all.py b/statsmodels/sandbox/examples/run_all.py
@@ -23,7 +23,7 @@
         try:
             print("Executing example file", run_all_f)
             print("-----------------------" + "-"*len(run_all_f))
-            exec(open(run_all_f).read())
+            exec(open(run_all_f, encoding="utf-8").read())
         except:
             #f might be overwritten in the executed file
             print("*********************")
diff --git a/statsmodels/sandbox/regression/anova_nistcertified.py b/statsmodels/sandbox/regression/anova_nistcertified.py
@@ -24,7 +24,7 @@
 def getnist(filename):
     here = os.path.dirname(__file__)
     fname = os.path.abspath(os.path.join(here, 'data', filename))
-    with open(fname, 'r') as fd:
+    with open(fname, 'r', encoding="utf-8") as fd:
         content = fd.read().split('\n')
 
     data = [line.split() for line in content[60:]]
diff --git a/statsmodels/sandbox/tests/maketests_mlabwrap.py b/statsmodels/sandbox/tests/maketests_mlabwrap.py
@@ -139,7 +139,7 @@ def save(self, what=None, filename=None, header=True, useinstant=True,
             txt.append('%s%s = %s' % (prefix, x, repr(getattr(self,x))))
         txt.extend(['','']) #add empty lines at end
         if filename is not None:
-            with open(filename, 'a+') as fd:
+            with open(filename, 'a+', encoding="utf-8") as fd:
                 fd.write('\n'.join(txt))
         return txt
 
diff --git a/statsmodels/stats/libqsturng/tests/test_qsturng.py b/statsmodels/stats/libqsturng/tests/test_qsturng.py
@@ -19,7 +19,7 @@
 
 
 def read_ch(fname):
-    with open(fname) as f:
+    with open(fname, encoding="utf-8") as f:
         lines = f.readlines()
     ps,rs,vs,qs = lzip(*[L.split(',') for L in lines])
     return lmap(float, ps), lmap(float, rs),lmap(float, vs), lmap(float, qs)
diff --git a/statsmodels/stats/tests/results/lilliefors_critical_value_simulation.py b/statsmodels/stats/tests/results/lilliefors_critical_value_simulation.py
@@ -139,7 +139,7 @@ def simulations(sim_type, save=False):
 
 """
     cv_filename = '../../_lilliefors_critical_values.py'
-    with io.open(cv_filename, 'w', newline='\n') as cv:
+    with io.open(cv_filename, 'w', newline='\n', encoding="utf-8") as cv:
         cv.write(FormatCode(header)[0])
         cv.write(FormatCode(normal)[0])
         cv.write('\n\n')
diff --git a/statsmodels/stats/tests/test_deltacov.py b/statsmodels/stats/tests/test_deltacov.py
@@ -15,12 +15,13 @@
 
 class TestDeltacovOLS(object):
 
-    def setup_class(self):
+    @classmethod
+    def setup_class(cls):
         nobs, k_vars = 100, 4
         x = np.random.randn(nobs, k_vars)
         x[:, 0] = 1
         y = x[:, :-1].sum(1) + np.random.randn(nobs)
-        self.res = OLS(y, x).fit()
+        cls.res = OLS(y, x).fit()
 
     def test_method(self):
         # test Results.method is same as calling function/class
diff --git a/statsmodels/stats/tests/test_diagnostic.py b/statsmodels/stats/tests/test_diagnostic.py
diff --git a/statsmodels/stats/tests/test_diagnostic_other.py b/statsmodels/stats/tests/test_diagnostic_other.py
diff --git a/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py b/statsmodels/tsa/ardl/_pss_critical_values/pss-process.py
diff --git a/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py b/statsmodels/tsa/statespace/tests/test_dynamic_factor_mq.py
diff --git a/statsmodels/tsa/tests/test_exponential_smoothing.py b/statsmodels/tsa/tests/test_exponential_smoothing.py
diff --git a/statsmodels/tsa/vector_ar/tests/JMulTi_results/parse_jmulti_var_output.py b/statsmodels/tsa/vector_ar/tests/JMulTi_results/parse_jmulti_var_output.py
diff --git a/statsmodels/tsa/vector_ar/tests/JMulTi_results/parse_jmulti_vecm_output.py b/statsmodels/tsa/vector_ar/tests/JMulTi_results/parse_jmulti_vecm_output.py
diff --git a/statsmodels/tsa/x13.py b/statsmodels/tsa/x13.py
diff --git a/tools/dataset_rst.py b/tools/dataset_rst.py
diff --git a/tools/generate_formula_api.py b/tools/generate_formula_api.py
diff --git a/tools/releasing/enumerate-api.py b/tools/releasing/enumerate-api.py