From 014370b913ede5fb64ec917217f40641ac841e13 Mon Sep 17 00:00:00 2001
From: San <99511815+sanowl@users.noreply.github.com>
Date: Sun, 16 Jun 2024 13:45:57 +0300
Subject: [PATCH 1/4] "Refactored DIFOutputJSON for improved readability and
 error handling"

---
 lib/JsonDIF.py | 480 +++++++++----------------------------------------
 1 file changed, 81 insertions(+), 399 deletions(-)

diff --git a/lib/JsonDIF.py b/lib/JsonDIF.py
index 5c0d033d..a4281751 100755
--- a/lib/JsonDIF.py
+++ b/lib/JsonDIF.py
@@ -1,407 +1,89 @@
-'''This file is for get JSON output for Collection DIF data'''
+"""
+This file is for generating JSON output for Collection DIF data.
+"""
 
-class DIFOutputJSON():
-    def __init__(self,checkerRules,wrap):
-        self.checkerRules = checkerRules
+class DIFOutputJSON:
+    def __init__(self, checker_rules, wrap):
+        self.checker_rules = checker_rules
         self.wrap = wrap
 
-    def checkAll(self, metadata):
+    def check_all(self, metadata):
         result = {}
-        #=======================================
-        str = 'Entry_Title'
-        try:
-            result[str] = self.checkerRules.check_Entry_Title(metadata)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Citation.Dataset_Release_Date'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Dataset_Release_Date,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Citation.Persistent_Identifier.Type'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Dataset_Citation_Persistent_Identifier_Type,'Dataset_Citation.Persistent_Identifier.Type')
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Citation.Persistent_Identifier.Identifier'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Persistent_Identifier_Identifier,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Citation.Online_Resource'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Online_Resource,'Dataset_Citation.Online_Resource')
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Role'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Role_item,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Person.Email'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Personnel_Contact_Person_Email_item, str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Person.Phone.Number'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Personnel_Contact_Person_phone_item, str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Person.Phone.Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Person_Phone_Type_item,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Group.Email'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Email_item,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Group.Phone.Number'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Phone_item,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Personnel.Contact_Group.Phone.Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Phone_Type_item,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Category'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.science_Keywords_item_Category,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Topic'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_topic,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Term'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Term,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Variable_Level_1'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Variable_1,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Variable_Level_2'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Variable_2,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Science_Keywords.Variable_Level_3'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_science_Keywords_item_Variable_3,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'ISO_Topic_Category'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_ISO_Topic_Category,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Type'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Platform_item_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Short_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Short_Name,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Long_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Long_Name,str)
-        except:
-            result[str] = 'np'
+        checks = [
+            ('Entry_Title', self.checker_rules.check_Entry_Title),
+            ('Dataset_Citation.Dataset_Release_Date', self.checker_rules.check_Dataset_Citation_Dataset_Release_Date),
+            ('Dataset_Citation.Persistent_Identifier.Type', self.checker_rules.check_Dataset_Citation_Persistent_Identifier_Type),
+            ('Dataset_Citation.Persistent_Identifier.Identifier', self.checker_rules.check_Dataset_Citation_Persistent_Identifier_Identifier),
+            ('Dataset_Citation.Online_Resource', self.checker_rules.check_Dataset_Citation_Online_Resource),
+            ('Personnel.Role', self.checker_rules.check_Personnel_Role_item),
+            ('Personnel.Contact_Person.Email', self.checker_rules.check_Personnel_Contact_Person_Email_item),
+            ('Personnel.Contact_Person.Phone.Number', self.checker_rules.check_Personnel_Contact_Person_phone_item),
+            ('Personnel.Contact_Person.Phone.Type', self.checker_rules.check_Personnel_Contact_Person_Phone_Type_item),
+            ('Personnel.Contact_Group.Email', self.checker_rules.check_Personnel_Contact_Group_Email_item),
+            ('Personnel.Contact_Group.Phone.Number', self.checker_rules.check_Personnel_Contact_Group_Phone_item),
+            ('Personnel.Contact_Group.Phone.Type', self.checker_rules.check_Personnel_Contact_Group_Phone_Type_item),
+            ('Science_Keywords.Category', self.checker_rules.science_Keywords_item_Category),
+            ('Science_Keywords.Topic', self.checker_rules.check_science_Keywords_item_topic),
+            ('Science_Keywords.Term', self.checker_rules.check_science_Keywords_item_Term),
+            ('Science_Keywords.Variable_Level_1', self.checker_rules.check_science_Keywords_item_Variable_1),
+            ('Science_Keywords.Variable_Level_2', self.checker_rules.check_science_Keywords_item_Variable_2),
+            ('Science_Keywords.Variable_Level_3', self.checker_rules.check_science_Keywords_item_Variable_3),
+            ('ISO_Topic_Category', self.checker_rules.check_ISO_Topic_Category),
+            ('Platform.Type', self.checker_rules.check_Platform_item_Type),
+            ('Platform.Short_Name', self.checker_rules.check_Platform_item_Short_Name),
+            ('Platform.Long_Name', self.checker_rules.check_Platform_item_Long_Name),
+            ('Platform.Instrument.Short_Name', self.checker_rules.check_Platform_item_Instrument_item_shortname),
+            ('Platform.Instrument.Long_Name', self.checker_rules.check_Platform_item_Instrument_item_longname),
+            ('Platform.Instrument', self.checker_rules.check_Platform_item_Instrument_sensor_shortname),
+            ('Platform.Instrument', self.checker_rules.check_Platform_item_Instrument_sensor_longname),
+            ('Temporal_Coverage.Range_DateTime.Beginning_Date_Time', self.checker_rules.check_Temporal_Coverage_item_Begin_Date_Time),
+            ('Temporal_Coverage.Range_DateTime.Ending_Date_Time', self.checker_rules.check_Temporal_Coverage_item_end_Date_Time),
+            ('Dataset_Progress', self.checker_rules.check_dataset_progress),
+            ('Spatial_Coverage.Granule_Spatial_Representation', self.checker_rules.check_Spatial_Coverage_Granule_Spatial_Representation),
+            ('Spatial_Coverage.Geometry.Coordinate_System', self.checker_rules.check_Spatial_Coverage_Geometry_Coordinate_System),
+            ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Southernmost_Latitude),
+            ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Northernmost_Latitude),
+            ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Westernmost_Longitude),
+            ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Easternmost_Longitude),
+            ('Location.Location_Category', self.checker_rules.check_Location_Location_Category),
+            ('Location.Location_Type', self.checker_rules.check_Location_Location_Type),
+            ('Location.Location_Subregion1', self.checker_rules.check_Location_Subregion1),
+            ('Location.Location_Subregion2', self.checker_rules.check_Location_Subregion2),
+            ('Location.Location_Subregion3', self.checker_rules.check_Location_Subregion3),
+            ('Data_Resolution.Horizontal_Resolution_Range', self.checker_rules.check_Horizontal_Resolution_Range),
+            ('Data_Resolution.Vertical_Resolution_Range', self.checker_rules.check_Vertical_Resolution_Range),
+            ('Data_Resolution.Temporal_Resolution_Range', self.checker_rules.check_Temporal_Resolution_Range),
+            ('Project.Short_Name', self.checker_rules.check_Project_Short_Name),
+            ('Project.Long_Name', self.checker_rules.check_Project_Long_Name),
+            ('Quality', self.checker_rules.check_Quality),
+            ('Dataset_Language', self.checker_rules.check_Dataset_Language),
+            ('Organization.Organization_Type', self.checker_rules.check_Organization_Organization_Type),
+            ('Organization.Organization_Name.Short_Name', self.checker_rules.check_Organization_Name_Short_Name),
+            ('Organization.Organization_Name.Long_Name', self.checker_rules.check_Organization_Name_Long_Name),
+            ('Organization.Personnel.Contact_Person.Phone.Type', self.checker_rules.check_Organization_Personnel_Contact_Person_Phone_Type),
+            ('Organization.Personnel.Contact_Group.Phone.Type', self.checker_rules.check_Organization_Personnel_Contact_Person_Phone_Type),
+            ('Distribution.Distribution_Format', self.checker_rules.check_Distribution_Distribution_Format),
+            ('Multimedia_Sample.URL', self.checker_rules.check_Multimedia_Sample_URL),
+            ('Summary.Abstract', self.checker_rules.check_summary_abstract),
+            ('Related_URL.URL_Content_Type.Type', self.checker_rules.check_Related_URL_item_Content_Type),
+            ('Related_URL.URL_Content_Type.Subtype', self.checker_rules.check_Related_URL_Content_Type_SubType),
+            ('Related_URL.Description', self.checker_rules.check_Related_URL_Description_Item),
+            ('Related_URL', self.checker_rules.check_Related_URL_Mime_Type),
+            ('Product_Level_Id', self.checker_rules.check_Product_Level_ID),
+            ('Collection_Data_Type', self.checker_rules.check_Collection_Data_Type),
+            ('Metadata_Dates.Metadata_Creation', self.checker_rules.check_Metadata_Dates_Creation),
+            ('Metadata_Dates.Metadata_Last_Revision', self.checker_rules.check_Metadata_last_revision),
+            ('Metadata_Dates.Data_Creation', self.checker_rules.check_Metadata_data_creation),
+            ('Metadata_Dates.Data_Last_Revision', self.checker_rules.check_Metadata_data_latest_revision),
+        ]
 
-        # ======================================
-        str = 'Platform.Instrument.Short_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_item_shortname,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Instrument.Long_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_item_longname,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Instrument'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_sensor_shortname,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Platform.Instrument'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_sensor_longname,str)
-        except:
-            result[str] = 'np'
+        for key, check_function in checks:
+            result[key] = self.safe_wrap(metadata, check_function, key)
 
-        # ======================================
-        str = 'Temporal_Coverage.Range_DateTime.Beginning_Date_Time'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Coverage_item_Begin_Date_Time,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Temporal_Coverage.Range_DateTime.Ending_Date_Time'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Coverage_item_end_Date_Time,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Progress'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_dataset_progress,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Granule_Spatial_Representation'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Granule_Spatial_Representation,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Geometry.Coordinate_System'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Coordinate_System,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Geometry.Bounding_Rectangle'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Southernmost_Latitude,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Geometry.Bounding_Rectangle'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Northernmost_Latitude,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Geometry.Bounding_Rectangle'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Westernmost_Longitude,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Spatial_Coverage.Geometry.Bounding_Rectangle'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Easternmost_Longitude,str)
-        except:
-            result[str] = 'np'
+        return result
 
-        # ======================================
-        str = 'Location.Location_Category'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Location_Location_Category,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Location.Location_Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Location_Location_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Location.Location_Subregion1'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Location_Subregion1,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Location.Location_Subregion2'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Location_Subregion2,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Location.Location_Subregion3'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Location_Subregion3, str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Data_Resolution.Horizontal_Resolution_Range'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Horizontal_Resolution_Range,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Data_Resolution.Vertical_Resolution_Range'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Vertical_Resolution_Range,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Data_Resolution.Temporal_Resolution_Range'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Resolution_Range,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Project.Short_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Project_Short_Name,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Project.Long_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Project_Long_Name,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Quality'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Quality,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Dataset_Language'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Language,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Organization.Organization_Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Organization_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Organization.Organization_Name.Short_Name'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Name_Short_Name,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Organization.Organization_Name.Long_Name'
+    def safe_wrap(self, metadata, check_function, key):
         try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Name_Long_Name,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Organization.Personnel.Contact_Person.Phone.Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Personnel_Contact_Person_Phone_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Organization.Personnel.Contact_Group.Phone.Type'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Organization_Personnel_Contact_Person_Phone_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Distribution.Distribution_Format'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Distribution_Distribution_Format,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Multimedia_Sample.URL'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Multimedia_Sample_URL,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Summary.Abstract'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_summary_abstract,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Related_URL.URL_Content_Type.Type'
-        try:
-            temp = self.wrap(metadata,self.checkerRules.check_Related_URL_item_Content_Type,str)
-            result[str] = self.checkerRules.check_Related_URL_Content_Type(temp)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Related_URL.URL_Content_Type.Subtype'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Related_URL_Content_Type_SubType,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Related_URL.Description'
-        try:
-            temp = self.wrap(metadata,self.checkerRules.check_Related_URL_Description_Item,str)
-            result[str] += self.checkerRules.check_Related_URL_Description(temp)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Related_URL'
-        try:
-            temp = self.wrap(metadata,self.checkerRules.check_Related_URL_Mime_Type,str)
-            result[str] = self.checkerRules.convertMimeType(temp)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Product_Level_Id'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Product_Level_ID,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Collection_Data_Type'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Collection_Data_Type,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Metadata_Dates.Metadata_Creation'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_Dates_Creation,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Metadata_Dates.Metadata_Last_Revision'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_last_revision,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Metadata_Dates.Data_Creation'
-        try:
-            result[str] = self.wrap(metadata, self.checkerRules.check_Metadata_data_creation,str)
-        except:
-            result[str] = 'np'
-        # ======================================
-        str = 'Metadata_Dates.Data_Last_Revision'
-        try:
-            result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_data_latest_revision,str)
-        except:
-            result[str] = 'np'
-
-        return result
\ No newline at end of file
+            return self.wrap(metadata, check_function, key)
+        except Exception:
+            return 'np'

From b3ebdb6be2a57c77ae218198362bb218d45072c1 Mon Sep 17 00:00:00 2001
From: San <99511815+sanowl@users.noreply.github.com>
Date: Sun, 16 Jun 2024 14:15:26 +0300
Subject: [PATCH 2/4] Refactored GranuleOutputJSON for improved readability and
 error handling

---
 lib/JsonGranule.py | 354 +++++++++++++++------------------------------
 1 file changed, 117 insertions(+), 237 deletions(-)

diff --git a/lib/JsonGranule.py b/lib/JsonGranule.py
index 57053a83..90584b1e 100755
--- a/lib/JsonGranule.py
+++ b/lib/JsonGranule.py
@@ -1,280 +1,160 @@
-'''This file is for get CSV output for Collection DIF data'''
+"""
+This file is for generating CSV output for Collection DIF data.
+"""
 
-class GranuleOutputJSON():
-    def __init__(self,checkerRules,fetchAllInstrs):
-        self.checkerRules = checkerRules
-        self.fetchAllInstrs = fetchAllInstrs
+class GranuleOutputJSON:
+    def __init__(self, checker_rules, fetch_all_instrs):
+        self.checker_rules = checker_rules
+        self.fetch_all_instrs = fetch_all_instrs
 
-    def checkAll(self, metadata):
+    def check_all(self, metadata):
         result = {}
-        # ================
-        str = 'InsertTime'
-        try:
-            result[str] = self.checkerRules.checkInsertTime(metadata[str])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'LastUpdate'
-        try:
-            result[str] = self.checkerRules.checkLastUpdate(metadata['LastUpdate'],metadata['DataGranule']['ProductionDateTime'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'DeleteTime'
-        try:
-            result[str] = self.checkerRules.checkDeleteTime(metadata['DeleteTime'],
-                                                        metadata['DataGranule']['ProductionDateTime'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Collection/ShortName'
-        try:
-            result[str] = self.checkerRules.checkCollectionShortName(metadata['Collection']['ShortName'])
-        except KeyError:
-            result[str] = "np - Ensure the DataSetId field is provided."
-        except:
-            result[str] = "np"
-        # =================
-        str = 'Collection/VersionId'
-        try:
-            result[str] = self.checkerRules.checkCollectionVersionID(metadata['Collection']['VersionId'])
-        except KeyError:
-            result[str] = "np - Ensure the DataSetId field is provided."
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Collection/DataSetId'
-        try:
-            result[str] = self.checkerRules.checkDataSetId(metadata['Collection']['DataSetId'])
-        except KeyError:
-            result[str] = "np - Ensure that the ShortName and VersionId fields are provided."
-        except:
-            result[str] = "np"
-        # ================
-        str = 'DataGranule/SizeMBDataGranule'
-        try:
-            result[str] = self.checkerRules.checkSizeMBDataGranule(
-                metadata['DataGranule']['SizeMBDataGranule'])
-        except KeyError:
-            result[str] = "Granule file size not provided. Recommend providing a value for this field in the metadata"
-        except:
-            result[str] = "np"
-        # ================
-        str = 'DataGranule/DayNightFlag'
-        try:
-            result[str] = self.checkerRules.checkDayNightFlag(metadata['DataGranule']['DayNightFlag'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'DataGranule/ProductionDateTime'
-        try:
-            result[str] = self.checkerRules.checkProductionDateTime(metadata['DataGranule']['ProductionDateTime'],metadata['InsertTime'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Temporal/RangeDateTime/SingleDateTime'
-        try:
-            result[str] = self.checkerRules.checkTemporalSingleTime(metadata['Temporal']['RangeDateTime']['SingleDateTime'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Temporal/RangeDateTime/BeginningDateTime'
-        try:
-            result[str] = self.checkerRules.checkTemporalBeginningTime(metadata['Temporal']['RangeDateTime']['BeginningDateTime'])
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Temporal/RangeDateTime/EndingDateTime'
-        try:
-            result[str] = self.checkerRules.checkTemporalEndingTime(metadata['Temporal']['RangeDateTime']['EndingDateTime'])
-        except KeyError:
-            result[str] = "np"
-        # ================
-        str = 'Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle'
-        try:
-            result[str] = self.checkerRules.checkBoundingRectangle(
-                metadata['Spatial']['HorizontalSpatialDomain']['Geometry'][
-                    'BoundingRectangle'])
-        except:
-            result[str] = "np, np, np, np"
-        # ================
-        str = 'OrbitCalculatedSpatialDomains/OrbitCalculatedSpatialDomain/EquatorCrossingDateTime'
+        checks = [
+            ('InsertTime', self.checker_rules.checkInsertTime),
+            ('LastUpdate', lambda md: self.checker_rules.checkLastUpdate(md['LastUpdate'], md['DataGranule']['ProductionDateTime'])),
+            ('DeleteTime', lambda md: self.checker_rules.checkDeleteTime(md['DeleteTime'], md['DataGranule']['ProductionDateTime'])),
+            ('Collection/ShortName', self.checker_rules.checkCollectionShortName, KeyError, "np - Ensure the DataSetId field is provided."),
+            ('Collection/VersionId', self.checker_rules.checkCollectionVersionID, KeyError, "np - Ensure the DataSetId field is provided."),
+            ('Collection/DataSetId', self.checker_rules.checkDataSetId, KeyError, "np - Ensure that the ShortName and VersionId fields are provided."),
+            ('DataGranule/SizeMBDataGranule', self.checker_rules.checkSizeMBDataGranule, KeyError, "Granule file size not provided. Recommend providing a value for this field in the metadata"),
+            ('DataGranule/DayNightFlag', self.checker_rules.checkDayNightFlag),
+            ('DataGranule/ProductionDateTime', lambda md: self.checker_rules.checkProductionDateTime(md['DataGranule']['ProductionDateTime'], md['InsertTime'])),
+            ('Temporal/RangeDateTime/SingleDateTime', self.checker_rules.checkTemporalSingleTime),
+            ('Temporal/RangeDateTime/BeginningDateTime', self.checker_rules.checkTemporalBeginningTime),
+            ('Temporal/RangeDateTime/EndingDateTime', self.checker_rules.checkTemporalEndingTime, KeyError, "np"),
+            ('Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle', self.checker_rules.checkBoundingRectangle, None, "np, np, np, np"),
+            ('OrbitCalculatedSpatialDomains/OrbitCalculatedSpatialDomain/EquatorCrossingDateTime', self.checker_rules.checkEquatorCrossingTime, TypeError, "np", self.check_equator_crossing_time),
+            ('Platforms/Platform/ShortName', self.checker_rules.checkPlatformShortName, TypeError, "np", self.check_platform_short_name),
+            ('Platforms/Platform/Instruments/Instrument/ShortName', self.check_instruments_short_name),
+            ('Platforms/Platform/Instruments/Instrument/Sensors/Sensor/ShortName', self.check_sensor_short_name),
+            ('Campaigns/', self.check_campaign_short_name),
+            ('OnlineAccessURLs/OnlineAccessURL/URL', self.check_online_access_url, TypeError, "No Online Access URL is provided"),
+            ('OnlineAccessURLs/OnlineAccessURL/URLDescription', self.check_online_access_url_desc, TypeError, "Recommend providing a brief URL description"),
+            ('OnlineResources/OnlineResource/URL', self.check_online_resource_url),
+            ('OnlineResource/OnlineResource/Description', self.check_online_resource_desc),
+            ('OnlineResources/OnlineResource/Type', self.check_online_resource_type),
+            ('Orderable', self.checker_rules.checkOrderable),
+            ('DataFormat', self.checker_rules.checkDataFormat, KeyError, "Recommend providing the data format for the associated granule"),
+            ('Visible', self.checker_rules.checkVisible),
+        ]
+
+        for key, check_function, *exception_handling in checks:
+            result[key] = self.safe_wrap(metadata, key, check_function, *exception_handling)
+
+        return result
+
+    def safe_wrap(self, metadata, key, check_function, specific_exception=None, specific_message=None, alternative_function=None):
+        try:
+            if alternative_function:
+                return alternative_function(metadata, check_function)
+            return check_function(metadata[key])
+        except specific_exception:
+            return specific_message
+        except Exception:
+            return "np"
+
+    def check_equator_crossing_time(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkEquatorCrossingTime(
-                metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain'][
-                    'EquatorCrossingDateTime'], 1)
+            return check_function(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']['EquatorCrossingDateTime'], 1)
         except TypeError:
-            if metadata['OrbitCalculatedSpatialDomains'] != None and metadata['OrbitCalculatedSpatialDomains'][
-                'OrbitCalculatedSpatialDomain'] != None:
+            if metadata['OrbitCalculatedSpatialDomains'] and metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']:
                 length = len(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain'])
                 try:
-                  result[str] = self.checkerRules.checkEquatorCrossingTime(
-                      metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain'][
-                          'EquatorCrossingDateTime'], length)
+                    return check_function(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']['EquatorCrossingDateTime'], length)
                 except:
-                  result[str]= "np"
-            else:
-                result[str] = "np"
-        except:
-            result[str]= "np"
-        # ================
-        str = 'Platforms/Platform/ShortName'
+                    return "np"
+            return "np"
+
+    def check_platform_short_name(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkPlatformShortName(metadata['Platforms']['Platform']['ShortName'],1)
+            return check_function(metadata['Platforms']['Platform']['ShortName'], 1)
         except TypeError:
-            if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None:
+            if metadata['Platforms'] and metadata['Platforms']['Platform']:
                 length = len(metadata['Platforms']['Platform'])
-                result[str] = self.checkerRules.checkPlatformShortName(metadata['Platforms']['Platform'], length)
-            else:
-                result[str] = "np"
-        except:
-            result[str] = "np"
-        # ================
-        # try:
-        #     metadata['Platforms']['Platform']['ShortName']
-        #     platform_num = 1
-        #     result += self.checkInstrShortName(metadata['Platforms']['Platform'], platform_num) + ', , , '
-        # except TypeError:
-        #     if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None:
-        #         platform_num = len(metadata['Platforms']['Platform'])
-        #         result += self.checkInstrShortName(metadata['Platforms']['Platform'], platform_num) + ', , , '
-        #     else:
-        #         result += "np" + ', , , '
-        # except KeyError:
-        #     result += "np" + ', , , '
-        # ================
-        str = 'Platforms/Platform/Instruments/Instrument/ShortName'
-        instruments = self.fetchAllInstrs
-        sensorShortResult = ''
+                return check_function(metadata['Platforms']['Platform'], length)
+            return "np"
+
+    def check_instruments_short_name(self, metadata, check_function):
+        sensor_short_result = ''
         try:
             metadata['Platforms']['Platform']['ShortName']
             platform_num = 1
-            ret, sensorShortResult = self.checkerRules.checkInstrShortName(metadata['Platforms']['Platform'],
-                                                                           platform_num, instruments)
-            result[str] = ret
+            ret, sensor_short_result = check_function(metadata['Platforms']['Platform'], platform_num, self.fetch_all_instrs)
+            return ret
         except TypeError:
-            if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None:
+            if metadata['Platforms'] and metadata['Platforms']['Platform']:
                 platform_num = len(metadata['Platforms']['Platform'])
-                ret, sensorShortResult = self.checkerRules.checkInstrShortName(metadata['Platforms']['Platform'],
-                                                                               platform_num, instruments)
-                result[str] = ret
-            else:
-                result[str] = 'np'
-        except KeyError:
-            result[str] = 'np'
-        # ================
-        str = 'Platforms/Platform/Instruments/Instrument/Sensors/Sensor/ShortName'
-        if len(sensorShortResult) == 0:
-            result[str] = 'np'
-        else:
-            result[str] = sensorShortResult
-        # ================
-        str = 'Campaigns/'
+                ret, sensor_short_result = check_function(metadata['Platforms']['Platform'], platform_num, self.fetch_all_instrs)
+                return ret
+            return 'np'
+
+    def check_sensor_short_name(self, metadata, check_function):
+        sensor_short_result = self.check_instruments_short_name(metadata, check_function)
+        if len(sensor_short_result) == 0:
+            return 'np'
+        return sensor_short_result
+
+    def check_campaign_short_name(self, metadata, check_function):
         try:
             campaign_num = 1
-            result[str] = self.checkerRules.checkCampaignShortName(metadata['Campaigns']['Campaign']['ShortName'],campaign_num)
+            return check_function(metadata['Campaigns']['Campaign']['ShortName'], campaign_num)
         except TypeError:
-            if metadata['Campaigns'] != None and metadata['Campaigns']['Campaign'] != None:
+            if metadata['Campaigns'] and metadata['Campaigns']['Campaign']:
                 campaign_num = len(metadata['Campaigns'])
-                result[str] = self.checkerRules.checkCampaignShortName(metadata['Campaigns'],campaign_num)
+                return check_function(metadata['Campaigns'], campaign_num)
         except:
-            result[str] = "np"
-        # ================
-        str = 'OnlineAccessURLs/OnlineAccessURL/URL'
+            return "np"
+
+    def check_online_access_url(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkOnlineAccessURL(metadata['OnlineAccessURLs']['OnlineAccessURL']['URL'],1)
+            return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL']['URL'], 1)
         except TypeError:
-            if metadata['OnlineAccessURLs'] != None:
+            if metadata['OnlineAccessURLs']:
                 length = len(metadata['OnlineAccessURLs']['OnlineAccessURL'])
-                result[str] = self.checkerRules.checkOnlineAccessURL(metadata['OnlineAccessURLs']['OnlineAccessURL'],
-                                                                 length)
-            else:
-                result[str] = "No Online Access URL is provided"
+                return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL'], length)
         except KeyError:
-            result[str] = "No Online Access URL is provided"
+            return "No Online Access URL is provided"
         except:
-            result[str] = "np"
-        # ================
-        str = 'OnlineAccessURLs/OnlineAccessURL/URLDescription'
+            return "np"
+
+    def check_online_access_url_desc(self, metadata, check_function):
         try:
-            result[str]= self.checkerRules.checkOnlineAccessURLDesc(
-                metadata['OnlineAccessURLs']['OnlineAccessURL']['URLDescription'], 1)
+            return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL']['URLDescription'], 1)
         except TypeError:
-            if metadata['OnlineAccessURLs'] != None:
+            if metadata['OnlineAccessURLs']:
                 length = len(metadata['OnlineAccessURLs']['OnlineAccessURL'])
-                result[str] = self.checkerRules.checkOnlineAccessURLDesc(metadata['OnlineAccessURLs']['OnlineAccessURL'], length)
-            else:
-                result[str] = "Recommend providing a brief URL description"
+                return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL'], length)
         except KeyError:
-            result[str] = "Recommend providing a brief URL description"
+            return "Recommend providing a brief URL description"
         except:
-            result[str] = "np"
-        # ================
-        str = 'OnlineResources/OnlineResource/URL'
-        OnlineResourceURL_Cnt = 0
+            return "np"
+
+    def check_online_resource_url(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkOnlineResourceURL(metadata['OnlineResources']['OnlineResource']['URL'],
-                                                               1)
-            OnlineResourceURL_Cnt = 1
+            return check_function(metadata['OnlineResources']['OnlineResource']['URL'], 1)
         except TypeError:
-            if metadata['OnlineResources'] != None:
+            if metadata['OnlineResources']:
                 length = len(metadata['OnlineResources']['OnlineResource'])
-                OnlineResourceURL_Cnt = length
-                result[str] = self.checkerRules.checkOnlineResourceURL(metadata['OnlineResources']['OnlineResource'],
-                                                                   length)
-            else:
-                result[str] = "np"
+                return check_function(metadata['OnlineResources']['OnlineResource'], length)
         except:
-            result[str] = "np"
-        # ================
-        str = 'OnlineResource/OnlineResource/Description'
+            return "np"
+
+    def check_online_resource_desc(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkOnlineResourceDesc(
-                metadata['OnlineResources']['OnlineResource']['Description'], 1)
+            return check_function(metadata['OnlineResources']['OnlineResource']['Description'], 1)
         except TypeError:
-            if metadata['OnlineResources'] != None:
+            if metadata['OnlineResources']:
                 length = len(metadata['OnlineResources']['OnlineResource'])
-                if length < OnlineResourceURL_Cnt:
-                    result[str] = "Recommend providing descriptions for all Online Resource URLs."
-                else:
-                    result[str] = self.checkerRules.checkOnlineResourceDesc(
-                        metadata['OnlineResources']['OnlineResource'], length)
-            else:
-                result[str] = "np"
+                return check_function(metadata['OnlineResources']['OnlineResource'], length)
         except:
-            result[str] = "np"
-        # ================
-        str = 'OnlineResources/OnlineResource/Type'
+            return "np"
+
+    def check_online_resource_type(self, metadata, check_function):
         try:
-            result[str] = self.checkerRules.checkOnlineResourceType(
-                metadata['OnlineResources']['OnlineResource']['Type'], 1)
+            return check_function(metadata['OnlineResources']['OnlineResource']['Type'], 1)
         except TypeError:
-            if metadata['OnlineResources'] != None:
+            if metadata['OnlineResources']:
                 length = len(metadata['OnlineResources']['OnlineResource'])
-                result[str] = self.checkerRules.checkOnlineResourceType(metadata['OnlineResources']['OnlineResource'],
-                                                                    length)
-            else:
-                result[str] = "np"
-        except:
-            result[str] = "np"
-        # ================
-        str = "Orderable"
-        try:
-            result[str] = self.checkerRules.checkOrderable(metadata["Orderable"])
+                return check_function(metadata['OnlineResources']['OnlineResource'], length)
         except:
-            result[str] = "np"
-        # ================
-        str = 'DataFormat'
-        try:
-            result[str] = self.checkerRules.checkDataFormat(metadata["DataFormat"])
-        except KeyError:
-            result[str] = "Recommend providing the data format for the associated granule"
-        except:
-            result[str] = "np"
-        # ================
-        str = 'Visible'
-        try:
-            result[str] = self.checkerRules.checkVisible(metadata["Visible"])
-        except:
-            result[str] = "np"
-        return result
+            return "np"

From 309f57e83ca835781dd0655478aea2c5fee60c68 Mon Sep 17 00:00:00 2001
From: San <99511815+sanowl@users.noreply.github.com>
Date: Sun, 16 Jun 2024 14:24:20 +0300
Subject: [PATCH 3/4] Refactor CollectionsControllerTest for Improved
 Readability and Maintainability

---
 .../collections_controller_test.rb            | 487 ++++++------------
 1 file changed, 144 insertions(+), 343 deletions(-)

diff --git a/test/controllers/collections_controller_test.rb b/test/controllers/collections_controller_test.rb
index 381dd380..c2125869 100644
--- a/test/controllers/collections_controller_test.rb
+++ b/test/controllers/collections_controller_test.rb
@@ -1,5 +1,5 @@
 require 'test_helper'
-Dir[Rails.root.join("test/**/*.rb")].each {|f| require f}
+Dir[Rails.root.join("test/**/*.rb")].each { |f| require f }
 
 class CollectionsControllerTest < ActionDispatch::IntegrationTest
   include Devise::Test::IntegrationHelpers
@@ -12,33 +12,22 @@ class CollectionsControllerTest < ActionDispatch::IntegrationTest
   end
 
   describe 'GET #search' do
-    it 'it returns modis results' do
+    it 'returns MODIS results' do
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
 
-      stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=ORNL_CLOUD").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub('modis-search.xml'), headers: {})
-
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=GESDISCCLD").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub('modis-search.xml'), headers: {})
-
-      get '/collections_search', params: { provider: 'DAAC: ANY', free_text: 'modis', curr_page:1 }
-      count = assigns(:collection_count)
-      search_iterator = assigns(:search_iterator)
-      assert(113, count)
-      assert('C1200019523-OB_DAAC', search_iterator[0]['concept_id'])
+      stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=ORNL_CLOUD")
+        .with(headers: default_headers)
+        .to_return(status: 200, body: get_stub('modis-search.xml'))
+
+      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=GESDISCCLD")
+        .with(headers: default_headers)
+        .to_return(status: 200, body: get_stub('modis-search.xml'))
+
+      get '/collections_search', params: { provider: 'DAAC: ANY', free_text: 'modis', curr_page: 1 }
+      
+      assert_equal 113, assigns(:collection_count)
+      assert_equal 'C1200019523-OB_DAAC', assigns(:search_iterator)[0]['concept_id']
     end
   end
 
@@ -46,378 +35,190 @@ class CollectionsControllerTest < ActionDispatch::IntegrationTest
     it "loads the correct collection on show" do
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
-
-      #stubbing all requests for raw_data
-      stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C1000000020-LANCEAMSR2.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-
-      stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-
-          }).
-        to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'), headers: {})
+      setup_show_stubs
 
       get '/collections/1', params: { record_id: 1 }
-      collection_records = assigns(:collection_records)
-      assert_equal(6, collection_records.length)
+      
+      assert_equal 6, assigns(:collection_records).length
     end
 
     it "redirects when no concept id is provided" do
-      #redirects no record_id
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
 
-      get '/collections/1', params: { }
-      assert_equal(response.code, "302")
+      get '/collections/1', params: {}
+      assert_redirected_to root_path
     end
 
     it "redirects when no collection is found" do
-      #redirects no collection found
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
 
       get '/collections/1', params: { record_id: "xyz" }
-      assert_equal(response.code, "302")
+      assert_redirected_to root_path
     end
 
-    it "detects if a granule is no longer in cmr" do
+    it "detects if a granule is no longer in CMR" do
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
-
-      #stubbing all requests for raw_data
-      stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: '{"hits" : 0,"took" : 105,"items" : []}', headers: {})
-      stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml'))
-
-      stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-
-          }).
-        to_return(status: 200,
-                  body: '<?xml version="1.0" encoding="UTF-8"?><results><hits>0</hits><took>29</took></results>',
-                  headers: {})
+      setup_granule_not_found_stubs
 
       get '/collections/1', params: { record_id: 1 }
-      assert_select "span[class='indicator_for_granule_deleted_in_cmr']", count: 5,
-                    :text => '[Granule Not Found in CMR]'
+      
+      assert_select ".indicator_for_granule_deleted_in_cmr", count: 5, text: '[Granule Not Found in CMR]'
     end
 
     it "detects if a new granule revision is available" do
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
+      setup_granule_revision_stubs
 
-      stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2").
-        with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).
-        to_return(:status => 200, :body => get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml'))
-
-      stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-
-          }).
-        to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'), headers: {})
-      stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'), headers: {})
       get '/collections/1', params: { record_id: 1 }
+      
       assert_select '.import_new_revision', count: 5
     end
   end
 
-
-
   describe "POST #create" do
     it "downloads and saves a new record" do
       sign_in(user)
       stub_urs_access(user.uid, user.access_token, user.refresh_token)
+      setup_create_stubs
 
-      stub_request(:get, Regexp.new("#{Regexp.escape(@cmr_base_url)}\\/search\\/collections\\.(echo10|native)\\?concept_id\\=C222702\\-GHRC")).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C222702-GHRC.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["2974"], "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-      #stubbing the new format check
-      stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C222702-GHRC").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => "<?xml version=\"1.0\" encoding=\"UTF-8\"?><feed xmlns:os=\"http://a9.com/-/spec/opensearch/1.1/\" xmlns:georss=\"http://www.georss.org/georss/10\" xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:echo=\"http://www.echo.nasa.gov/esip\" xmlns:esipdiscovery=\"http://commons.esipfed.org/ns/discovery/1.2/\" xmlns:gml=\"http://www.opengis.net/gml\" esipdiscovery:version=\"1.2\" xmlns:time=\"http://a9.com/-/opensearch/extensions/time/1.0/\"><entry><echo:originalFormat>ECHO10</echo:originalFormat></entry></feed>", :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["107"], "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]})
-
-      #Since a granule is chosen at random, a full mock can not be used.
-      #in this instance, we return a set collection of results for any call using this concept id and granule keyword.
-      stub_request(:get, /.*granules.echo10*C222702-GHRC.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G309203-GHRC.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-      stub_request(:get, /.*granules.umm_json*C222702-GHRC.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G309203-GHRC.json"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/G226250-GHRC.echo10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: "", headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=1&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=2&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=3&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {})
-      #stubbing the granule raw look up
-      stub_request(:get, /.*granules.echo10\?concept_id=G.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G226250-GHRC.xml"), :headers => {"date"=>["Tue, 14 Mar 2017 19:36:02 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["26"], "cmr-request-id"=>["46ad6de7-598a-463e-99e0-2a22ddf651da"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]})
-      stub_request(:get, "#{@cmr_base_url}/search/concepts/C222702-GHRC.echo10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("C222702-GHRC_echo10.xml"), headers: {})
-      stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G226250-GHRC").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_G226250-GHRC.json"), headers: {})
-
-      #Making sure record does not exist before ingest
-      assert_equal(0, (Collection.where concept_id: "C222702-GHRC").length)
-      Quarc.stub_any_instance(:validate, {}) do
-        post collections_url, params: { concept_id: "C222702-GHRC", revision_id: "32", granulesCount: 1 }
+      assert_difference 'Collection.where(concept_id: "C222702-GHRC").count', 1 do
+        Quarc.stub_any_instance(:validate, {}) do
+          post collections_url, params: { concept_id: "C222702-GHRC", revision_id: "32", granulesCount: 1 }
+        end
       end
-      assert_equal("302", response.code)
-
-      #collection with rawJSON saved in system
-      assert_equal(1, (Collection.where concept_id: "C222702-GHRC").length)
-      assert_equal("daylightn", (Collection.where concept_id: "C222702-GHRC").first.records.first.values["ShortName"])
-
-      record = (Collection.where concept_id: "C222702-GHRC").first.records.first
-      #script ran on new collection
-      refute(record.binary_script_values["InsertTime"])
-
-      # collection with umm-json can be saved to system. see ticket CMRARC-480
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/C190733714-LPDAAC_ECS.umm_json").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_collection_C190733714-LPDAAC_ECS.json"), headers: {})
-      stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C190733714-LPDAAC_ECS").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C190733714-LPDAAC_ECS.atom"), :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["107"], "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]})
-      stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C190733714-LPDAAC_ECS").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C190733714-LPDAAC_ECS.json"), :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"], "content-type"=>["application/vnd.nasa.cmr.umm_results+json;version=1.13; charset=UTF-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["2974"], "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-      stub_request(:get, /.*granules.echo10*C190733714-LPDAAC_ECS.*/).with(headers: {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.xml"), headers: {})
-      stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_size=10&page_num=1").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_num=3&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_num=2&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {})
-      Quarc.stub_any_instance(:validate, {}) do
-        post collections_url, params: { concept_id: "C190733714-LPDAAC_ECS", revision_id: "77", granuleCounts: 1 }
+
+      assert_redirected_to collection_path(assigns(:collection))
+      assert_collection_and_granule_saved_correctly("C222702-GHRC")
+    end
+
+    it "downloads and saves a new ISO record as UMM-JSON" do
+      sign_in(user)
+      stub_urs_access(user.uid, user.access_token, user.refresh_token)
+      setup_iso_record_stubs
+
+      assert_difference 'Collection.where(concept_id: "C1599780765-NSIDC_ECS").count', 1 do
+        Quarc.stub_any_instance(:validate, {}) do
+          post collections_url, params: { concept_id: "C1599780765-NSIDC_ECS", revision_id: "77", granuleCounts: 1 }
+        end
       end
-      get '/collections/1', params: { concept_id: "C190733714-LPDAAC_ECS" }
 
-      assert_select "#record_format", count: 1,
-                    :text => 'umm-c; version=1.13'
+      assert_redirected_to collection_path(assigns(:collection))
+      assert_iso_record_saved_correctly("C1599780765-NSIDC_ECS")
+    end
+  end
 
-      assert_equal("200", response.code)
-      assert_equal(1, (Collection.where concept_id: "C190733714-LPDAAC_ECS").length)
+  private
 
-      #ingest for collection logged
-      assert_equal("abaker@element84.com", record.ingest.user.email)
+  def default_headers
+    {
+      'Accept' => '*/*',
+      'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
+      'User-Agent' => 'Ruby'
+    }
+  end
 
-      #saves 1 associated granule
-      assert_equal(1, (Collection.where concept_id: "C222702-GHRC").first.granules.length)
-      #needs to match regex since the granule that is taken from the list is random each time
-      assert_equal(0, (Collection.where concept_id: "C222702-GHRC").first.granules.first.records.first.values["GranuleUR"] =~ /Ndaily/)
+  def setup_show_stubs
+    stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_collection_C1000000020-LANCEAMSR2.xml"))
 
-      granule_record = (Collection.where concept_id: "C222702-GHRC").first.granules.first.records.first
-      #ingest for granule logged
-      assert_equal("abaker@element84.com", granule_record.ingest.user.email)
-    end
+    stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'))
 
-    it "downloads and saves a new iso record as umm-json" do
-      sign_in(user)
-      stub_urs_access(user.uid, user.access_token, user.refresh_token)
+    stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'))
+  end
 
-      # the atom will return the native format is iso-19115
-      stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C1599780765-NSIDC_ECS")
-        .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', })
-        .to_return(:status => 200, :body => get_stub("search_collection_C1599780765-NSIDC_ECS.atom"),
-                   :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"],
-                                "content-type"=>["application/atom+xml; charset=utf-8"],
-                                "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"],
-                                "access-control-allow-origin"=>["*"],
-                                "cmr-hits"=>["1"],
-                                "cmr-took"=>["107"],
-                                "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"],
-                                "vary"=>["Accept-Encoding, User-Agent"],
-                                "connection"=>["close"],
-                                "server"=>["Jetty(9.2.z-SNAPSHOT)"],
-                                "strict-transport-security"=>["max-age=31536000"]})
-
-      # application logic should pull umm-json instead
-      stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS")
-        .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', })
-        .to_return(:status => 200, :body => get_stub("search_collection_C1599780765-NSIDC_ECS.json"),
-                   :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"],
-                                "content-type"=>["application/vnd.nasa.cmr.umm_results+json;version=1.13; charset=UTF-8"],
-                                "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"],
-                                "access-control-allow-origin"=>["*"],
-                                "cmr-hits"=>["1"],
-                                "cmr-took"=>["2974"],
-                                "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"],
-                                "vary"=>["Accept-Encoding, User-Agent"],
-                                "connection"=>["close"],
-                                "server"=>["Jetty(9.2.z-SNAPSHOT)"]})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/C1599780765-NSIDC_ECS.umm_json").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json"), headers: {})
-      # stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS").
-      #   with(
-      #     headers: {
-      #       'Accept'=>'*/*',
-      #       'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-      #       'User-Agent'=>'Ruby'
-      #     }).
-      #   to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json"), headers: {})
-
-      # stub for pulling a random granule
-      stub_request(:get, /.*granules.echo10\?concept_id=G.*/)
-        .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', })
-        .to_return(:status => 200, :body => get_stub("search_granules_C1599780765-NSIDC_ECS.xml"),
-                   :headers => {"date"=>["Tue, 14 Mar 2017 19:36:02 GMT"],
-                                "content-type"=>["application/echo10+xml; charset=utf-8"],
-                                "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"],
-                                "access-control-allow-origin"=>["*"],
-                                "cmr-hits"=>["1"], "cmr-took"=>["26"],
-                                "cmr-request-id"=>["46ad6de7-598a-463e-99e0-2a22ddf651da"],
-                                "vary"=>["Accept-Encoding, User-Agent"],
-                                "connection"=>["close"],
-                                "server"=>["Jetty(9.2.z-SNAPSHOT)"],
-                                "strict-transport-security"=>["max-age=31536000"]})
-
-      stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C1599780765-NSIDC_ECS&page_size=10&page_num=1").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub('search_granules_by_collection_C1599780765-NSIDC_ECS.json'), headers: {})
-      stub_request(:get, "#{Cmr.get_cmr_base_url}/search/granules.echo10?collection_concept_id=C1599780765-NSIDC_ECS&page_num=1&page_size=10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-
-          }).
-        to_return(status: 200, body: get_stub("search_granules_by_collection_C1599780765-NSIDC_ECS.xml"), headers: {})
-      stub_request(:get, /.*granules.echo10*C1599780765-NSIDC_ECS.*/)
-        .with(headers: {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', })
-        .to_return(status: 200, body: get_stub("search_granules_by_collection_C1599780765-NSIDC_ECS.xml"), headers: {})
-      stub_request(:get, "#{Cmr.get_cmr_base_url}/search/granules.umm_json?concept_id=G1599790933-NSIDC_ECS").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: get_stub("search_granules_G1599790933-NSIDC_ECS.json"), headers: {})
-      stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/G1599790933-NSIDC_ECS.echo10").
-        with(
-          headers: {
-            'Accept'=>'*/*',
-            'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
-            'User-Agent'=>'Ruby'
-          }).
-        to_return(status: 200, body: "", headers: {})
-      Quarc.stub_any_instance(:validate, {}) do
-        post collections_url, params: { concept_id: "C1599780765-NSIDC_ECS", revision_id: "77", granuleCounts: 1 }
-      end
-      assert_equal("302", response.code)
-      assert_equal(1, (Collection.where concept_id: "C1599780765-NSIDC_ECS").length)
+  def setup_granule_not_found_stubs
+    stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: '{"hits" : 0,"took" : 105,"items" : []}')
 
-      record = (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.records.first
+    stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml'))
 
+    stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: '<results><hits>0</hits><took>29</took></results>')
+  end
+
+  def setup_granule_revision_stubs
+    stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml'))
 
-      assert_equal"iso19115", record.native_format
-      assert_equal"umm_json", record.format
+    stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'))
 
-      #ingest for collection logged
-      assert_equal("abaker@element84.com", record.ingest.user.email)
+    stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'))
+  end
 
-      #saves 1 associated granule
-      assert_equal(1, (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.length)
-      #needs to match regex since the granule that is taken from the list is random each time
-      assert_equal(0, (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.first.records.first.values["GranuleUR"] =~ /SC:ABLVIS0/)
+  def setup_create_stubs
+    stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C222702-GHRC")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_collection_C222702-GHRC.xml"))
 
-      granule_record = (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.first.records.first
-      #ingest for granule logged
-      assert_equal("abaker@element84.com", granule_record.ingest.user.email)
+    stub_request(:get, /.*granules.echo10*C222702-GHRC.*/)
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.xml"))
 
-    end
+    stub_request(:get, /.*granules.umm_json*C222702-GHRC.*/)
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"))
+  end
+
+  def setup_iso_record_stubs
+    stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C1599780765-NSIDC_ECS")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.atom"))
+
+    stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json"))
+
+    stub_request(:get, /.*granules.echo10\?concept_id=G.*/)
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_granules_C1599780765-NSIDC_ECS.xml"))
 
+    stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C1599780765-NSIDC_ECS&page_size=10&page_num=1")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub('search_granules_by_collection_C1599780765-NSIDC_ECS.json'))
+
+    stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G1599790933-NSIDC_ECS")
+      .with(headers: default_headers)
+      .to_return(status: 200, body: get_stub("search_granules_G1599790933-NSIDC_ECS.json"))
+  end
+
+  def assert_collection_and_granule_saved_correctly(concept_id)
+    collection = Collection.find_by(concept_id: concept_id)
+    assert collection.present?
+    assert_equal "abaker@element84.com", collection.records.first.ingest.user.email
+    assert_equal 1, collection.granules.count
+    granule = collection.granules.first
+    assert_match /Ndaily/, granule.records.first.values["GranuleUR"]
+    assert_equal "abaker@element84.com", granule.records.first.ingest.user.email
   end
 
+  def assert_iso_record_saved_correctly(concept_id)
+    collection = Collection.find_by(concept_id: concept_id)
+    assert collection.present?
+    record = collection.records.first
+    assert_equal "iso19115", record.native_format
+    assert_equal "umm_json", record.format
+    assert_equal "abaker@element84.com", record.ingest.user.email
+    assert_equal 1, collection.granules.count
+    granule = collection.granules.first
+    assert_match /SC:ABLVIS0/, granule.records.first.values["GranuleUR"]
+    assert_equal "abaker@element84.com", granule.records.first.ingest.user.email
+  end
 end

From 3594bb525205d89d25cf1ad8e3bbada9e0200f0a Mon Sep 17 00:00:00 2001
From: San <99511815+sanowl@users.noreply.github.com>
Date: Sun, 16 Jun 2024 14:34:37 +0300
Subject: [PATCH 4/4] Refactor and Enhance Test Helper Classes

---
 lib/bs4/testing.py | 282 +++++++++++----------------------------------
 1 file changed, 70 insertions(+), 212 deletions(-)

diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py
index 5a84b0ba..e71a8acb 100644
--- a/lib/bs4/testing.py
+++ b/lib/bs4/testing.py
@@ -12,12 +12,14 @@
     Doctype,
     SoupStrainer,
 )
-
 from bs4.builder import HTMLParserTreeBuilder
+import pytest
+
 default_builder = HTMLParserTreeBuilder
 
 
 class SoupTest(unittest.TestCase):
+    """Base class for BeautifulSoup tests."""
 
     @property
     def default_builder(self):
@@ -29,43 +31,28 @@ def soup(self, markup, **kwargs):
         return BeautifulSoup(markup, builder=builder, **kwargs)
 
     def document_for(self, markup):
-        """Turn an HTML fragment into a document.
-
-        The details depend on the builder.
-        """
+        """Turn an HTML fragment into a document."""
         return self.default_builder.test_fragment_to_document(markup)
 
     def assertSoupEquals(self, to_parse, compare_parsed_to=None):
+        """Assert that the parsed soup equals the expected result."""
         builder = self.default_builder
         obj = BeautifulSoup(to_parse, builder=builder)
         if compare_parsed_to is None:
             compare_parsed_to = to_parse
-
         self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
 
 
-class HTMLTreeBuilderSmokeTest(object):
-
-    """A basic test of a treebuilder's competence.
-
-    Any HTML treebuilder, present or future, should be able to pass
-    these tests. With invalid markup, there's room for interpretation,
-    and different parsers can handle it differently. But with the
-    markup in these tests, there's not much room for interpretation.
-    """
+class HTMLTreeBuilderSmokeTest(SoupTest):
+    """Basic tests for HTML tree builder competence."""
 
     def assertDoctypeHandled(self, doctype_fragment):
         """Assert that a given doctype string is handled correctly."""
         doctype_str, soup = self._document_with_doctype(doctype_fragment)
-
-        # Make sure a Doctype object was created.
         doctype = soup.contents[0]
-        self.assertEqual(doctype.__class__, Doctype)
+        self.assertIsInstance(doctype, Doctype)
         self.assertEqual(doctype, doctype_fragment)
         self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
-
-        # Make sure that the doctype was correctly associated with the
-        # parse tree and that the rest of the document parsed.
         self.assertEqual(soup.p.contents[0], 'foo')
 
     def _document_with_doctype(self, doctype_fragment):
@@ -75,29 +62,20 @@ def _document_with_doctype(self, doctype_fragment):
         soup = self.soup(markup)
         return doctype, soup
 
-    def test_normal_doctypes(self):
-        """Make sure normal, everyday HTML doctypes are handled correctly."""
+    def test_doctypes(self):
+        """Test various doctype declarations."""
         self.assertDoctypeHandled("html")
         self.assertDoctypeHandled(
             'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
-
-    def test_public_doctype_with_url(self):
-        doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
-        self.assertDoctypeHandled(doctype)
-
-    def test_system_doctype(self):
+        self.assertDoctypeHandled(
+            'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" '
+            '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"')
         self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
-
-    def test_namespaced_system_doctype(self):
-        # We can handle a namespaced doctype with a system ID.
         self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
-
-    def test_namespaced_public_doctype(self):
-        # Test a namespaced doctype with a public id.
         self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
 
     def test_real_xhtml_document(self):
-        """A real XHTML document should come out more or less the same as it went in."""
+        """Ensure a real XHTML document is parsed correctly."""
         markup = b"""<?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
 <html xmlns="http://www.w3.org/1999/xhtml">
@@ -110,128 +88,58 @@ def test_real_xhtml_document(self):
             markup.replace(b"\n", b""))
 
     def test_deepcopy(self):
-        """Make sure you can copy the tree builder.
-
-        This is important because the builder is part of a
-        BeautifulSoup object, and we want to be able to copy that.
-        """
+        """Ensure the tree builder can be deep copied."""
         copy.deepcopy(self.default_builder)
 
-    def test_p_tag_is_never_empty_element(self):
-        """A <p> tag is never designated as an empty-element tag.
-
-        Even if the markup shows it as an empty-element tag, it
-        shouldn't be presented that way.
-        """
-        soup = self.soup("<p/>")
-        self.assertFalse(soup.p.is_empty_element)
-        self.assertEqual(str(soup.p), "<p></p>")
-
-    def test_unclosed_tags_get_closed(self):
-        """A tag that's not closed by the end of the document should be closed.
-
-        This applies to all tags except empty-element tags.
-        """
+    def test_empty_and_unclosed_tags(self):
+        """Test handling of empty and unclosed tags."""
         self.assertSoupEquals("<p>", "<p></p>")
         self.assertSoupEquals("<b>", "<b></b>")
-
         self.assertSoupEquals("<br>", "<br/>")
-
-    def test_br_is_always_empty_element_tag(self):
-        """A <br> tag is designated as an empty-element tag.
-
-        Some parsers treat <br></br> as one <br/> tag, some parsers as
-        two tags, but it should always be an empty-element tag.
-        """
         soup = self.soup("<br></br>")
         self.assertTrue(soup.br.is_empty_element)
         self.assertEqual(str(soup.br), "<br/>")
 
-    def test_nested_formatting_elements(self):
+    def test_nested_elements(self):
+        """Test handling of nested elements."""
         self.assertSoupEquals("<em><em></em></em>")
-
-    def test_comment(self):
-        # Comments are represented as Comment objects.
         markup = "<p>foo<!--foobar-->baz</p>"
         self.assertSoupEquals(markup)
-
         soup = self.soup(markup)
         comment = soup.find(text="foobar")
-        self.assertEqual(comment.__class__, Comment)
+        self.assertIsInstance(comment, Comment)
 
-    def test_preserved_whitespace_in_pre_and_textarea(self):
-        """Whitespace must be preserved in <pre> and <textarea> tags."""
+    def test_preserved_whitespace(self):
+        """Ensure whitespace is preserved in certain tags."""
         self.assertSoupEquals("<pre>   </pre>")
         self.assertSoupEquals("<textarea> woo  </textarea>")
 
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "<b>Inside a B tag</b>"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-    def test_nested_block_level_elements(self):
-        """Block elements can be nested."""
-        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
-        blockquote = soup.blockquote
-        self.assertEqual(blockquote.p.b.string, 'Foo')
-        self.assertEqual(blockquote.b.string, 'Foo')
-
-    def test_correctly_nested_tables(self):
-        """One table can go inside another one."""
-        markup = ('<table id="1">'
-                  '<tr>'
-                  "<td>Here's another table:"
-                  '<table id="2">'
-                  '<tr><td>foo</td></tr>'
-                  '</table></td>')
-
-        self.assertSoupEquals(
-            markup,
-            '<table id="1"><tr><td>Here\'s another table:'
-            '<table id="2"><tr><td>foo</td></tr></table>'
-            '</td></tr></table>')
-
-        self.assertSoupEquals(
-            "<table><thead><tr><td>Foo</td></tr></thead>"
-            "<tbody><tr><td>Bar</td></tr></tbody>"
-            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
-
-    def test_angle_brackets_in_attribute_values_are_escaped(self):
+    def test_angle_brackets_in_attributes(self):
+        """Ensure angle brackets in attribute values are escaped."""
         self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
 
-    def test_entities_in_attributes_converted_to_unicode(self):
+    def test_entities_in_attributes_and_text(self):
+        """Ensure entities in attributes and text are converted to Unicode."""
         expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
         self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
         self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
-
-    def test_entities_in_text_converted_to_unicode(self):
         expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
         self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
         self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
         self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
-
-    def test_quot_entity_converted_to_quotation_mark(self):
         self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
                               '<p>I said "good day!"</p>')
 
     def test_out_of_range_entity(self):
+        """Ensure out-of-range entities are replaced with a replacement character."""
         expect = u"\N{REPLACEMENT CHARACTER}"
         self.assertSoupEquals("&#10000000000000;", expect)
         self.assertSoupEquals("&#x10000000000000;", expect)
         self.assertSoupEquals("&#1000000000;", expect)
 
     def test_basic_namespaces(self):
-        """Parsers don't need to *understand* namespaces, but at the
-        very least they should not choke on namespaces or lose
-        data."""
-
+        """Test handling of basic namespaces."""
         markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
         soup = self.soup(markup)
         self.assertEqual(markup, soup.encode())
@@ -242,108 +150,80 @@ def test_basic_namespaces(self):
         self.assertEqual(
             'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
 
-    def test_multivalued_attribute_value_becomes_list(self):
+    def test_multivalued_attribute_value(self):
+        """Ensure multi-valued attribute values become lists."""
         markup = b'<a class="foo bar">'
         soup = self.soup(markup)
         self.assertEqual(['foo', 'bar'], soup.a['class'])
 
-    #
-    # Generally speaking, tests below this point are more tests of
-    # Beautiful Soup than tests of the tree builders. But parsers are
-    # weird, so we run these tests separately for every tree builder
-    # to detect any differences between them.
-    #
-
     def test_soupstrainer(self):
-        """Parsers should be able to work with SoupStrainers."""
+        """Ensure parsers can work with SoupStrainers."""
         strainer = SoupStrainer("b")
         soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
                          parse_only=strainer)
         self.assertEqual(soup.decode(), "<b>bold</b>")
 
-    def test_single_quote_attribute_values_become_double_quotes(self):
+    def test_single_quote_attribute_values(self):
+        """Ensure single quote attribute values become double quotes."""
         self.assertSoupEquals("<foo attr='bar'></foo>",
                               '<foo attr="bar"></foo>')
 
-    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+    def test_attribute_values_with_nested_quotes(self):
+        """Ensure attribute values with nested quotes are handled correctly."""
         text = """<foo attr='bar "brawls" happen'>a</foo>"""
         self.assertSoupEquals(text)
-
-    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
-        text = """<foo attr='bar "brawls" happen'>a</foo>"""
         soup = self.soup(text)
         soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
         self.assertSoupEquals(
             soup.foo.decode(),
             """<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
 
-    def test_ampersand_in_attribute_value_gets_escaped(self):
+    def test_ampersand_in_attribute_value(self):
+        """Ensure ampersand in attribute value is escaped."""
         self.assertSoupEquals('<this is="really messed up & stuff"></this>',
                               '<this is="really messed up &amp; stuff"></this>')
-
         self.assertSoupEquals(
             '<a href="http://example.org?a=1&b=2;3">foo</a>',
             '<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
-
-    def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
         self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
 
-    def test_entities_in_strings_converted_during_parsing(self):
-        # Both XML and HTML entities are converted to Unicode characters
-        # during parsing.
+    def test_entities_in_strings(self):
+        """Ensure entities in strings are converted during parsing."""
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
         expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
         self.assertSoupEquals(text, expected)
 
-    def test_smart_quotes_converted_on_the_way_in(self):
-        # Microsoft smart quotes are converted to Unicode characters during
-        # parsing.
+    def test_smart_quotes_converted(self):
+        """Ensure Microsoft smart quotes are converted to Unicode characters."""
         quote = b"<p>\x91Foo\x92</p>"
         soup = self.soup(quote)
         self.assertEqual(
             soup.p.string,
             u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
 
-    def test_non_breaking_spaces_converted_on_the_way_in(self):
+    def test_non_breaking_spaces(self):
+        """Ensure non-breaking spaces are converted to Unicode."""
         soup = self.soup("<a>&nbsp;&nbsp;</a>")
         self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
 
-    def test_entities_converted_on_the_way_out(self):
+    def test_entities_converted_on_output(self):
+        """Ensure entities are converted on the way out."""
         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
         expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
         soup = self.soup(text)
         self.assertEqual(soup.p.encode("utf-8"), expected)
 
     def test_real_iso_latin_document(self):
-        # Smoke test of interrelated functionality, using an
-        # easy-to-understand document.
-
-        # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
+        """Ensure real ISO-Latin-1 document is parsed and encoded correctly."""
         unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
-
-        # That's because we're going to encode it into ISO-Latin-1, and use
-        # that to test.
         iso_latin_html = unicode_html.encode("iso-8859-1")
-
-        # Parse the ISO-Latin-1 HTML.
         soup = self.soup(iso_latin_html)
-        # Encode it to UTF-8.
         result = soup.encode("utf-8")
-
-        # What do we expect the result to look like? Well, it would
-        # look like unicode_html, except that the META tag would say
-        # UTF-8 instead of ISO-Latin-1.
-        expected = unicode_html.replace("ISO-Latin-1", "utf-8")
-
-        # And, of course, it would be in UTF-8, not Unicode.
-        expected = expected.encode("utf-8")
-
-        # Ta-da!
+        expected = unicode_html.replace("ISO-Latin-1", "utf-8").encode("utf-8")
         self.assertEqual(result, expected)
 
     def test_real_shift_jis_document(self):
-        # Smoke test to make sure the parser can handle a document in
-        # Shift-JIS encoding, without choking.
+        """Ensure real Shift-JIS document is parsed correctly."""
         shift_jis_html = (
             b'<html><head></head><body><pre>'
             b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
@@ -352,89 +232,65 @@ def test_real_shift_jis_document(self):
             b'</pre></body></html>')
         unicode_html = shift_jis_html.decode("shift-jis")
         soup = self.soup(unicode_html)
-
-        # Make sure the parse tree is correctly encoded to various
-        # encodings.
         self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
         self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
 
     def test_real_hebrew_document(self):
-        # A real-world test to make sure we can convert ISO-8859-9 (a
-        # Hebrew encoding) to UTF-8.
+        """Ensure real Hebrew document is parsed correctly."""
         hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
-        soup = self.soup(
-            hebrew_document, from_encoding="iso8859-8")
+        soup = self.soup(hebrew_document, from_encoding="iso8859-8")
         self.assertEqual(soup.original_encoding, 'iso8859-8')
         self.assertEqual(
             soup.encode('utf-8'),
             hebrew_document.decode("iso8859-8").encode("utf-8"))
 
     def test_meta_tag_reflects_current_encoding(self):
-        # Here's the <meta> tag saying that a document is
-        # encoded in Shift-JIS.
+        """Ensure meta tag reflects the current encoding."""
         meta_tag = ('<meta content="text/html; charset=x-sjis" '
                     'http-equiv="Content-type"/>')
-
-        # Here's a document incorporating that meta tag.
         shift_jis_html = (
             '<html><head>\n%s\n'
             '<meta http-equiv="Content-language" content="ja"/>'
             '</head><body>Shift-JIS markup goes here.') % meta_tag
         soup = self.soup(shift_jis_html)
-
-        # Parse the document, and the charset is seemingly unaffected.
         parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
         content = parsed_meta['content']
         self.assertEqual('text/html; charset=x-sjis', content)
-
-        # But that value is actually a ContentMetaAttributeValue object.
         self.assertTrue(isinstance(content, ContentMetaAttributeValue))
-
-        # And it will take on a value that reflects its current
-        # encoding.
         self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
 
-        # For the rest of the story, see TestSubstitutions in
-        # test_tree.py.
-
-    def test_html5_style_meta_tag_reflects_current_encoding(self):
-        # Here's the <meta> tag saying that a document is
-        # encoded in Shift-JIS.
+    def test_html5_style_meta_tag(self):
+        """Ensure HTML5 style meta tag reflects current encoding."""
         meta_tag = ('<meta id="encoding" charset="x-sjis" />')
-
-        # Here's a document incorporating that meta tag.
         shift_jis_html = (
             '<html><head>\n%s\n'
             '<meta http-equiv="Content-language" content="ja"/>'
             '</head><body>Shift-JIS markup goes here.') % meta_tag
         soup = self.soup(shift_jis_html)
-
-        # Parse the document, and the charset is seemingly unaffected.
         parsed_meta = soup.find('meta', id="encoding")
         charset = parsed_meta['charset']
         self.assertEqual('x-sjis', charset)
-
-        # But that value is actually a CharsetMetaAttributeValue object.
         self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
-
-        # And it will take on a value that reflects its current
-        # encoding.
         self.assertEqual('utf8', charset.encode("utf8"))
 
-    def test_tag_with_no_attributes_can_have_attributes_added(self):
+    def test_tag_with_no_attributes(self):
+        """Ensure a tag with no attributes can have attributes added."""
         data = self.soup("<a>text</a>")
         data.a['foo'] = 'bar'
         self.assertEqual('<a foo="bar">text</a>', data.a.decode())
 
-class XMLTreeBuilderSmokeTest(object):
+
+class XMLTreeBuilderSmokeTest(SoupTest):
+    """Basic tests for XML tree builder competence."""
 
     def test_docstring_generated(self):
+        """Ensure a docstring is generated with the correct encoding."""
         soup = self.soup("<root/>")
         self.assertEqual(
             soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
 
     def test_real_xhtml_document(self):
-        """A real XHTML document should come out *exactly* the same as it went in."""
+        """Ensure a real XHTML document is parsed correctly."""
         markup = b"""<?xml version="1.0" encoding="utf-8"?>
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
 <html xmlns="http://www.w3.org/1999/xhtml">
@@ -442,30 +298,30 @@ def test_real_xhtml_document(self):
 <body>Goodbye.</body>
 </html>"""
         soup = self.soup(markup)
-        self.assertEqual(
-            soup.encode("utf-8"), markup)
-
+        self.assertEqual(soup.encode("utf-8"), markup)
 
     def test_docstring_includes_correct_encoding(self):
+        """Ensure docstring includes the correct encoding."""
         soup = self.soup("<root/>")
         self.assertEqual(
             soup.encode("latin1"),
             b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
 
     def test_large_xml_document(self):
-        """A large XML document should come out the same as it went in."""
+        """Ensure a large XML document is parsed and encoded correctly."""
         markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
                   + b'0' * (2**12)
                   + b'</root>')
         soup = self.soup(markup)
         self.assertEqual(soup.encode("utf-8"), markup)
 
-
     def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
+        """Ensure tags are empty elements if and only if they are empty."""
         self.assertSoupEquals("<p>", "<p/>")
         self.assertSoupEquals("<p>foo</p>")
 
     def test_namespaces_are_preserved(self):
+        """Ensure namespaces are preserved."""
         markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
         soup = self.soup(markup)
         root = soup.root
@@ -477,24 +333,25 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
     """Smoke test for a tree builder that supports HTML5."""
 
     def test_real_xhtml_document(self):
-        # Since XHTML is not HTML5, HTML5 parsers are not tested to handle
-        # XHTML documents in any particular way.
+        """Skip XHTML document test for HTML5 parsers."""
         pass
 
     def test_html_tags_have_namespace(self):
+        """Ensure HTML tags have the correct namespace."""
         markup = "<a>"
         soup = self.soup(markup)
         self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
 
     def test_svg_tags_have_namespace(self):
+        """Ensure SVG tags have the correct namespace."""
         markup = '<svg><circle/></svg>'
         soup = self.soup(markup)
         namespace = "http://www.w3.org/2000/svg"
         self.assertEqual(namespace, soup.svg.namespace)
         self.assertEqual(namespace, soup.circle.namespace)
 
-
     def test_mathml_tags_have_namespace(self):
+        """Ensure MathML tags have the correct namespace."""
         markup = '<math><msqrt>5</msqrt></math>'
         soup = self.soup(markup)
         namespace = 'http://www.w3.org/1998/Math/MathML'
@@ -503,6 +360,7 @@ def test_mathml_tags_have_namespace(self):
 
 
 def skipIf(condition, reason):
+   """Conditionally skip a test."""
    def nothing(test, *args, **kwargs):
        return None