From 014370b913ede5fb64ec917217f40641ac841e13 Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Sun, 16 Jun 2024 13:45:57 +0300 Subject: [PATCH 1/4] "Refactored DIFOutputJSON for improved readability and error handling" --- lib/JsonDIF.py | 480 +++++++++---------------------------------------- 1 file changed, 81 insertions(+), 399 deletions(-) diff --git a/lib/JsonDIF.py b/lib/JsonDIF.py index 5c0d033d..a4281751 100755 --- a/lib/JsonDIF.py +++ b/lib/JsonDIF.py @@ -1,407 +1,89 @@ -'''This file is for get JSON output for Collection DIF data''' +""" +This file is for generating JSON output for Collection DIF data. +""" -class DIFOutputJSON(): - def __init__(self,checkerRules,wrap): - self.checkerRules = checkerRules +class DIFOutputJSON: + def __init__(self, checker_rules, wrap): + self.checker_rules = checker_rules self.wrap = wrap - def checkAll(self, metadata): + def check_all(self, metadata): result = {} - #======================================= - str = 'Entry_Title' - try: - result[str] = self.checkerRules.check_Entry_Title(metadata) - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Citation.Dataset_Release_Date' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Dataset_Release_Date,str) - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Citation.Persistent_Identifier.Type' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Dataset_Citation_Persistent_Identifier_Type,'Dataset_Citation.Persistent_Identifier.Type') - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Citation.Persistent_Identifier.Identifier' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Persistent_Identifier_Identifier,str) - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Citation.Online_Resource' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Citation_Online_Resource,'Dataset_Citation.Online_Resource') - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Role' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Role_item,str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Person.Email' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Personnel_Contact_Person_Email_item, str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Person.Phone.Number' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Personnel_Contact_Person_phone_item, str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Person.Phone.Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Person_Phone_Type_item,str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Group.Email' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Email_item,str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Group.Phone.Number' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Phone_item,str) - except: - result[str] = 'np' - # ====================================== - str = 'Personnel.Contact_Group.Phone.Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Personnel_Contact_Group_Phone_Type_item,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Category' - try: - result[str] = self.wrap(metadata,self.checkerRules.science_Keywords_item_Category,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Topic' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_topic,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Term' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Term,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Variable_Level_1' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Variable_1,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Variable_Level_2' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_science_Keywords_item_Variable_2,str) - except: - result[str] = 'np' - # ====================================== - str = 'Science_Keywords.Variable_Level_3' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_science_Keywords_item_Variable_3,str) - except: - result[str] = 'np' - # ====================================== - str = 'ISO_Topic_Category' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_ISO_Topic_Category,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Type' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Platform_item_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Short_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Short_Name,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Long_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Long_Name,str) - except: - result[str] = 'np' + checks = [ + ('Entry_Title', self.checker_rules.check_Entry_Title), + ('Dataset_Citation.Dataset_Release_Date', self.checker_rules.check_Dataset_Citation_Dataset_Release_Date), + ('Dataset_Citation.Persistent_Identifier.Type', self.checker_rules.check_Dataset_Citation_Persistent_Identifier_Type), + ('Dataset_Citation.Persistent_Identifier.Identifier', self.checker_rules.check_Dataset_Citation_Persistent_Identifier_Identifier), + ('Dataset_Citation.Online_Resource', self.checker_rules.check_Dataset_Citation_Online_Resource), + ('Personnel.Role', self.checker_rules.check_Personnel_Role_item), + ('Personnel.Contact_Person.Email', self.checker_rules.check_Personnel_Contact_Person_Email_item), + ('Personnel.Contact_Person.Phone.Number', self.checker_rules.check_Personnel_Contact_Person_phone_item), + ('Personnel.Contact_Person.Phone.Type', self.checker_rules.check_Personnel_Contact_Person_Phone_Type_item), + ('Personnel.Contact_Group.Email', self.checker_rules.check_Personnel_Contact_Group_Email_item), + ('Personnel.Contact_Group.Phone.Number', self.checker_rules.check_Personnel_Contact_Group_Phone_item), + ('Personnel.Contact_Group.Phone.Type', self.checker_rules.check_Personnel_Contact_Group_Phone_Type_item), + ('Science_Keywords.Category', self.checker_rules.science_Keywords_item_Category), + ('Science_Keywords.Topic', self.checker_rules.check_science_Keywords_item_topic), + ('Science_Keywords.Term', self.checker_rules.check_science_Keywords_item_Term), + ('Science_Keywords.Variable_Level_1', self.checker_rules.check_science_Keywords_item_Variable_1), + ('Science_Keywords.Variable_Level_2', self.checker_rules.check_science_Keywords_item_Variable_2), + ('Science_Keywords.Variable_Level_3', self.checker_rules.check_science_Keywords_item_Variable_3), + ('ISO_Topic_Category', self.checker_rules.check_ISO_Topic_Category), + ('Platform.Type', self.checker_rules.check_Platform_item_Type), + ('Platform.Short_Name', self.checker_rules.check_Platform_item_Short_Name), + ('Platform.Long_Name', self.checker_rules.check_Platform_item_Long_Name), + ('Platform.Instrument.Short_Name', self.checker_rules.check_Platform_item_Instrument_item_shortname), + ('Platform.Instrument.Long_Name', self.checker_rules.check_Platform_item_Instrument_item_longname), + ('Platform.Instrument', self.checker_rules.check_Platform_item_Instrument_sensor_shortname), + ('Platform.Instrument', self.checker_rules.check_Platform_item_Instrument_sensor_longname), + ('Temporal_Coverage.Range_DateTime.Beginning_Date_Time', self.checker_rules.check_Temporal_Coverage_item_Begin_Date_Time), + ('Temporal_Coverage.Range_DateTime.Ending_Date_Time', self.checker_rules.check_Temporal_Coverage_item_end_Date_Time), + ('Dataset_Progress', self.checker_rules.check_dataset_progress), + ('Spatial_Coverage.Granule_Spatial_Representation', self.checker_rules.check_Spatial_Coverage_Granule_Spatial_Representation), + ('Spatial_Coverage.Geometry.Coordinate_System', self.checker_rules.check_Spatial_Coverage_Geometry_Coordinate_System), + ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Southernmost_Latitude), + ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Northernmost_Latitude), + ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Westernmost_Longitude), + ('Spatial_Coverage.Geometry.Bounding_Rectangle', self.checker_rules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Easternmost_Longitude), + ('Location.Location_Category', self.checker_rules.check_Location_Location_Category), + ('Location.Location_Type', self.checker_rules.check_Location_Location_Type), + ('Location.Location_Subregion1', self.checker_rules.check_Location_Subregion1), + ('Location.Location_Subregion2', self.checker_rules.check_Location_Subregion2), + ('Location.Location_Subregion3', self.checker_rules.check_Location_Subregion3), + ('Data_Resolution.Horizontal_Resolution_Range', self.checker_rules.check_Horizontal_Resolution_Range), + ('Data_Resolution.Vertical_Resolution_Range', self.checker_rules.check_Vertical_Resolution_Range), + ('Data_Resolution.Temporal_Resolution_Range', self.checker_rules.check_Temporal_Resolution_Range), + ('Project.Short_Name', self.checker_rules.check_Project_Short_Name), + ('Project.Long_Name', self.checker_rules.check_Project_Long_Name), + ('Quality', self.checker_rules.check_Quality), + ('Dataset_Language', self.checker_rules.check_Dataset_Language), + ('Organization.Organization_Type', self.checker_rules.check_Organization_Organization_Type), + ('Organization.Organization_Name.Short_Name', self.checker_rules.check_Organization_Name_Short_Name), + ('Organization.Organization_Name.Long_Name', self.checker_rules.check_Organization_Name_Long_Name), + ('Organization.Personnel.Contact_Person.Phone.Type', self.checker_rules.check_Organization_Personnel_Contact_Person_Phone_Type), + ('Organization.Personnel.Contact_Group.Phone.Type', self.checker_rules.check_Organization_Personnel_Contact_Person_Phone_Type), + ('Distribution.Distribution_Format', self.checker_rules.check_Distribution_Distribution_Format), + ('Multimedia_Sample.URL', self.checker_rules.check_Multimedia_Sample_URL), + ('Summary.Abstract', self.checker_rules.check_summary_abstract), + ('Related_URL.URL_Content_Type.Type', self.checker_rules.check_Related_URL_item_Content_Type), + ('Related_URL.URL_Content_Type.Subtype', self.checker_rules.check_Related_URL_Content_Type_SubType), + ('Related_URL.Description', self.checker_rules.check_Related_URL_Description_Item), + ('Related_URL', self.checker_rules.check_Related_URL_Mime_Type), + ('Product_Level_Id', self.checker_rules.check_Product_Level_ID), + ('Collection_Data_Type', self.checker_rules.check_Collection_Data_Type), + ('Metadata_Dates.Metadata_Creation', self.checker_rules.check_Metadata_Dates_Creation), + ('Metadata_Dates.Metadata_Last_Revision', self.checker_rules.check_Metadata_last_revision), + ('Metadata_Dates.Data_Creation', self.checker_rules.check_Metadata_data_creation), + ('Metadata_Dates.Data_Last_Revision', self.checker_rules.check_Metadata_data_latest_revision), + ] - # ====================================== - str = 'Platform.Instrument.Short_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_item_shortname,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Instrument.Long_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_item_longname,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Instrument' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_sensor_shortname,str) - except: - result[str] = 'np' - # ====================================== - str = 'Platform.Instrument' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Platform_item_Instrument_sensor_longname,str) - except: - result[str] = 'np' + for key, check_function in checks: + result[key] = self.safe_wrap(metadata, check_function, key) - # ====================================== - str = 'Temporal_Coverage.Range_DateTime.Beginning_Date_Time' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Coverage_item_Begin_Date_Time,str) - except: - result[str] = 'np' - # ====================================== - str = 'Temporal_Coverage.Range_DateTime.Ending_Date_Time' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Coverage_item_end_Date_Time,str) - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Progress' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_dataset_progress,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Granule_Spatial_Representation' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Granule_Spatial_Representation,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Geometry.Coordinate_System' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Coordinate_System,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Geometry.Bounding_Rectangle' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Southernmost_Latitude,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Geometry.Bounding_Rectangle' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Northernmost_Latitude,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Geometry.Bounding_Rectangle' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Westernmost_Longitude,str) - except: - result[str] = 'np' - # ====================================== - str = 'Spatial_Coverage.Geometry.Bounding_Rectangle' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Spatial_Coverage_Geometry_Bounding_Rectangle_Easternmost_Longitude,str) - except: - result[str] = 'np' + return result - # ====================================== - str = 'Location.Location_Category' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Location_Location_Category,str) - except: - result[str] = 'np' - # ====================================== - str = 'Location.Location_Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Location_Location_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Location.Location_Subregion1' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Location_Subregion1,str) - except: - result[str] = 'np' - # ====================================== - str = 'Location.Location_Subregion2' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Location_Subregion2,str) - except: - result[str] = 'np' - # ====================================== - str = 'Location.Location_Subregion3' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Location_Subregion3, str) - except: - result[str] = 'np' - # ====================================== - str = 'Data_Resolution.Horizontal_Resolution_Range' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Horizontal_Resolution_Range,str) - except: - result[str] = 'np' - # ====================================== - str = 'Data_Resolution.Vertical_Resolution_Range' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Vertical_Resolution_Range,str) - except: - result[str] = 'np' - # ====================================== - str = 'Data_Resolution.Temporal_Resolution_Range' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Temporal_Resolution_Range,str) - except: - result[str] = 'np' - # ====================================== - str = 'Project.Short_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Project_Short_Name,str) - except: - result[str] = 'np' - # ====================================== - str = 'Project.Long_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Project_Long_Name,str) - except: - result[str] = 'np' - # ====================================== - str = 'Quality' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Quality,str) - except: - result[str] = 'np' - # ====================================== - str = 'Dataset_Language' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Dataset_Language,str) - except: - result[str] = 'np' - # ====================================== - str = 'Organization.Organization_Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Organization_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Organization.Organization_Name.Short_Name' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Name_Short_Name,str) - except: - result[str] = 'np' - # ====================================== - str = 'Organization.Organization_Name.Long_Name' + def safe_wrap(self, metadata, check_function, key): try: - result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Name_Long_Name,str) - except: - result[str] = 'np' - # ====================================== - str = 'Organization.Personnel.Contact_Person.Phone.Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Organization_Personnel_Contact_Person_Phone_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Organization.Personnel.Contact_Group.Phone.Type' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Organization_Personnel_Contact_Person_Phone_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Distribution.Distribution_Format' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Distribution_Distribution_Format,str) - except: - result[str] = 'np' - # ====================================== - str = 'Multimedia_Sample.URL' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Multimedia_Sample_URL,str) - except: - result[str] = 'np' - # ====================================== - str = 'Summary.Abstract' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_summary_abstract,str) - except: - result[str] = 'np' - # ====================================== - str = 'Related_URL.URL_Content_Type.Type' - try: - temp = self.wrap(metadata,self.checkerRules.check_Related_URL_item_Content_Type,str) - result[str] = self.checkerRules.check_Related_URL_Content_Type(temp) - except: - result[str] = 'np' - # ====================================== - str = 'Related_URL.URL_Content_Type.Subtype' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Related_URL_Content_Type_SubType,str) - except: - result[str] = 'np' - # ====================================== - str = 'Related_URL.Description' - try: - temp = self.wrap(metadata,self.checkerRules.check_Related_URL_Description_Item,str) - result[str] += self.checkerRules.check_Related_URL_Description(temp) - except: - result[str] = 'np' - # ====================================== - str = 'Related_URL' - try: - temp = self.wrap(metadata,self.checkerRules.check_Related_URL_Mime_Type,str) - result[str] = self.checkerRules.convertMimeType(temp) - except: - result[str] = 'np' - # ====================================== - str = 'Product_Level_Id' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Product_Level_ID,str) - except: - result[str] = 'np' - # ====================================== - str = 'Collection_Data_Type' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Collection_Data_Type,str) - except: - result[str] = 'np' - # ====================================== - str = 'Metadata_Dates.Metadata_Creation' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_Dates_Creation,str) - except: - result[str] = 'np' - # ====================================== - str = 'Metadata_Dates.Metadata_Last_Revision' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_last_revision,str) - except: - result[str] = 'np' - # ====================================== - str = 'Metadata_Dates.Data_Creation' - try: - result[str] = self.wrap(metadata, self.checkerRules.check_Metadata_data_creation,str) - except: - result[str] = 'np' - # ====================================== - str = 'Metadata_Dates.Data_Last_Revision' - try: - result[str] = self.wrap(metadata,self.checkerRules.check_Metadata_data_latest_revision,str) - except: - result[str] = 'np' - - return result \ No newline at end of file + return self.wrap(metadata, check_function, key) + except Exception: + return 'np' From b3ebdb6be2a57c77ae218198362bb218d45072c1 Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Sun, 16 Jun 2024 14:15:26 +0300 Subject: [PATCH 2/4] Refactored GranuleOutputJSON for improved readability and error handling --- lib/JsonGranule.py | 354 +++++++++++++++------------------------------ 1 file changed, 117 insertions(+), 237 deletions(-) diff --git a/lib/JsonGranule.py b/lib/JsonGranule.py index 57053a83..90584b1e 100755 --- a/lib/JsonGranule.py +++ b/lib/JsonGranule.py @@ -1,280 +1,160 @@ -'''This file is for get CSV output for Collection DIF data''' +""" +This file is for generating CSV output for Collection DIF data. +""" -class GranuleOutputJSON(): - def __init__(self,checkerRules,fetchAllInstrs): - self.checkerRules = checkerRules - self.fetchAllInstrs = fetchAllInstrs +class GranuleOutputJSON: + def __init__(self, checker_rules, fetch_all_instrs): + self.checker_rules = checker_rules + self.fetch_all_instrs = fetch_all_instrs - def checkAll(self, metadata): + def check_all(self, metadata): result = {} - # ================ - str = 'InsertTime' - try: - result[str] = self.checkerRules.checkInsertTime(metadata[str]) - except: - result[str] = "np" - # ================ - str = 'LastUpdate' - try: - result[str] = self.checkerRules.checkLastUpdate(metadata['LastUpdate'],metadata['DataGranule']['ProductionDateTime']) - except: - result[str] = "np" - # ================ - str = 'DeleteTime' - try: - result[str] = self.checkerRules.checkDeleteTime(metadata['DeleteTime'], - metadata['DataGranule']['ProductionDateTime']) - except: - result[str] = "np" - # ================ - str = 'Collection/ShortName' - try: - result[str] = self.checkerRules.checkCollectionShortName(metadata['Collection']['ShortName']) - except KeyError: - result[str] = "np - Ensure the DataSetId field is provided." - except: - result[str] = "np" - # ================= - str = 'Collection/VersionId' - try: - result[str] = self.checkerRules.checkCollectionVersionID(metadata['Collection']['VersionId']) - except KeyError: - result[str] = "np - Ensure the DataSetId field is provided." - except: - result[str] = "np" - # ================ - str = 'Collection/DataSetId' - try: - result[str] = self.checkerRules.checkDataSetId(metadata['Collection']['DataSetId']) - except KeyError: - result[str] = "np - Ensure that the ShortName and VersionId fields are provided." - except: - result[str] = "np" - # ================ - str = 'DataGranule/SizeMBDataGranule' - try: - result[str] = self.checkerRules.checkSizeMBDataGranule( - metadata['DataGranule']['SizeMBDataGranule']) - except KeyError: - result[str] = "Granule file size not provided. Recommend providing a value for this field in the metadata" - except: - result[str] = "np" - # ================ - str = 'DataGranule/DayNightFlag' - try: - result[str] = self.checkerRules.checkDayNightFlag(metadata['DataGranule']['DayNightFlag']) - except: - result[str] = "np" - # ================ - str = 'DataGranule/ProductionDateTime' - try: - result[str] = self.checkerRules.checkProductionDateTime(metadata['DataGranule']['ProductionDateTime'],metadata['InsertTime']) - except: - result[str] = "np" - # ================ - str = 'Temporal/RangeDateTime/SingleDateTime' - try: - result[str] = self.checkerRules.checkTemporalSingleTime(metadata['Temporal']['RangeDateTime']['SingleDateTime']) - except: - result[str] = "np" - # ================ - str = 'Temporal/RangeDateTime/BeginningDateTime' - try: - result[str] = self.checkerRules.checkTemporalBeginningTime(metadata['Temporal']['RangeDateTime']['BeginningDateTime']) - except: - result[str] = "np" - # ================ - str = 'Temporal/RangeDateTime/EndingDateTime' - try: - result[str] = self.checkerRules.checkTemporalEndingTime(metadata['Temporal']['RangeDateTime']['EndingDateTime']) - except KeyError: - result[str] = "np" - # ================ - str = 'Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle' - try: - result[str] = self.checkerRules.checkBoundingRectangle( - metadata['Spatial']['HorizontalSpatialDomain']['Geometry'][ - 'BoundingRectangle']) - except: - result[str] = "np, np, np, np" - # ================ - str = 'OrbitCalculatedSpatialDomains/OrbitCalculatedSpatialDomain/EquatorCrossingDateTime' + checks = [ + ('InsertTime', self.checker_rules.checkInsertTime), + ('LastUpdate', lambda md: self.checker_rules.checkLastUpdate(md['LastUpdate'], md['DataGranule']['ProductionDateTime'])), + ('DeleteTime', lambda md: self.checker_rules.checkDeleteTime(md['DeleteTime'], md['DataGranule']['ProductionDateTime'])), + ('Collection/ShortName', self.checker_rules.checkCollectionShortName, KeyError, "np - Ensure the DataSetId field is provided."), + ('Collection/VersionId', self.checker_rules.checkCollectionVersionID, KeyError, "np - Ensure the DataSetId field is provided."), + ('Collection/DataSetId', self.checker_rules.checkDataSetId, KeyError, "np - Ensure that the ShortName and VersionId fields are provided."), + ('DataGranule/SizeMBDataGranule', self.checker_rules.checkSizeMBDataGranule, KeyError, "Granule file size not provided. Recommend providing a value for this field in the metadata"), + ('DataGranule/DayNightFlag', self.checker_rules.checkDayNightFlag), + ('DataGranule/ProductionDateTime', lambda md: self.checker_rules.checkProductionDateTime(md['DataGranule']['ProductionDateTime'], md['InsertTime'])), + ('Temporal/RangeDateTime/SingleDateTime', self.checker_rules.checkTemporalSingleTime), + ('Temporal/RangeDateTime/BeginningDateTime', self.checker_rules.checkTemporalBeginningTime), + ('Temporal/RangeDateTime/EndingDateTime', self.checker_rules.checkTemporalEndingTime, KeyError, "np"), + ('Spatial/HorizontalSpatialDomain/Geometry/BoundingRectangle', self.checker_rules.checkBoundingRectangle, None, "np, np, np, np"), + ('OrbitCalculatedSpatialDomains/OrbitCalculatedSpatialDomain/EquatorCrossingDateTime', self.checker_rules.checkEquatorCrossingTime, TypeError, "np", self.check_equator_crossing_time), + ('Platforms/Platform/ShortName', self.checker_rules.checkPlatformShortName, TypeError, "np", self.check_platform_short_name), + ('Platforms/Platform/Instruments/Instrument/ShortName', self.check_instruments_short_name), + ('Platforms/Platform/Instruments/Instrument/Sensors/Sensor/ShortName', self.check_sensor_short_name), + ('Campaigns/', self.check_campaign_short_name), + ('OnlineAccessURLs/OnlineAccessURL/URL', self.check_online_access_url, TypeError, "No Online Access URL is provided"), + ('OnlineAccessURLs/OnlineAccessURL/URLDescription', self.check_online_access_url_desc, TypeError, "Recommend providing a brief URL description"), + ('OnlineResources/OnlineResource/URL', self.check_online_resource_url), + ('OnlineResource/OnlineResource/Description', self.check_online_resource_desc), + ('OnlineResources/OnlineResource/Type', self.check_online_resource_type), + ('Orderable', self.checker_rules.checkOrderable), + ('DataFormat', self.checker_rules.checkDataFormat, KeyError, "Recommend providing the data format for the associated granule"), + ('Visible', self.checker_rules.checkVisible), + ] + + for key, check_function, *exception_handling in checks: + result[key] = self.safe_wrap(metadata, key, check_function, *exception_handling) + + return result + + def safe_wrap(self, metadata, key, check_function, specific_exception=None, specific_message=None, alternative_function=None): + try: + if alternative_function: + return alternative_function(metadata, check_function) + return check_function(metadata[key]) + except specific_exception: + return specific_message + except Exception: + return "np" + + def check_equator_crossing_time(self, metadata, check_function): try: - result[str] = self.checkerRules.checkEquatorCrossingTime( - metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain'][ - 'EquatorCrossingDateTime'], 1) + return check_function(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']['EquatorCrossingDateTime'], 1) except TypeError: - if metadata['OrbitCalculatedSpatialDomains'] != None and metadata['OrbitCalculatedSpatialDomains'][ - 'OrbitCalculatedSpatialDomain'] != None: + if metadata['OrbitCalculatedSpatialDomains'] and metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']: length = len(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']) try: - result[str] = self.checkerRules.checkEquatorCrossingTime( - metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain'][ - 'EquatorCrossingDateTime'], length) + return check_function(metadata['OrbitCalculatedSpatialDomains']['OrbitCalculatedSpatialDomain']['EquatorCrossingDateTime'], length) except: - result[str]= "np" - else: - result[str] = "np" - except: - result[str]= "np" - # ================ - str = 'Platforms/Platform/ShortName' + return "np" + return "np" + + def check_platform_short_name(self, metadata, check_function): try: - result[str] = self.checkerRules.checkPlatformShortName(metadata['Platforms']['Platform']['ShortName'],1) + return check_function(metadata['Platforms']['Platform']['ShortName'], 1) except TypeError: - if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None: + if metadata['Platforms'] and metadata['Platforms']['Platform']: length = len(metadata['Platforms']['Platform']) - result[str] = self.checkerRules.checkPlatformShortName(metadata['Platforms']['Platform'], length) - else: - result[str] = "np" - except: - result[str] = "np" - # ================ - # try: - # metadata['Platforms']['Platform']['ShortName'] - # platform_num = 1 - # result += self.checkInstrShortName(metadata['Platforms']['Platform'], platform_num) + ', , , ' - # except TypeError: - # if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None: - # platform_num = len(metadata['Platforms']['Platform']) - # result += self.checkInstrShortName(metadata['Platforms']['Platform'], platform_num) + ', , , ' - # else: - # result += "np" + ', , , ' - # except KeyError: - # result += "np" + ', , , ' - # ================ - str = 'Platforms/Platform/Instruments/Instrument/ShortName' - instruments = self.fetchAllInstrs - sensorShortResult = '' + return check_function(metadata['Platforms']['Platform'], length) + return "np" + + def check_instruments_short_name(self, metadata, check_function): + sensor_short_result = '' try: metadata['Platforms']['Platform']['ShortName'] platform_num = 1 - ret, sensorShortResult = self.checkerRules.checkInstrShortName(metadata['Platforms']['Platform'], - platform_num, instruments) - result[str] = ret + ret, sensor_short_result = check_function(metadata['Platforms']['Platform'], platform_num, self.fetch_all_instrs) + return ret except TypeError: - if metadata['Platforms'] != None and metadata['Platforms']['Platform'] != None: + if metadata['Platforms'] and metadata['Platforms']['Platform']: platform_num = len(metadata['Platforms']['Platform']) - ret, sensorShortResult = self.checkerRules.checkInstrShortName(metadata['Platforms']['Platform'], - platform_num, instruments) - result[str] = ret - else: - result[str] = 'np' - except KeyError: - result[str] = 'np' - # ================ - str = 'Platforms/Platform/Instruments/Instrument/Sensors/Sensor/ShortName' - if len(sensorShortResult) == 0: - result[str] = 'np' - else: - result[str] = sensorShortResult - # ================ - str = 'Campaigns/' + ret, sensor_short_result = check_function(metadata['Platforms']['Platform'], platform_num, self.fetch_all_instrs) + return ret + return 'np' + + def check_sensor_short_name(self, metadata, check_function): + sensor_short_result = self.check_instruments_short_name(metadata, check_function) + if len(sensor_short_result) == 0: + return 'np' + return sensor_short_result + + def check_campaign_short_name(self, metadata, check_function): try: campaign_num = 1 - result[str] = self.checkerRules.checkCampaignShortName(metadata['Campaigns']['Campaign']['ShortName'],campaign_num) + return check_function(metadata['Campaigns']['Campaign']['ShortName'], campaign_num) except TypeError: - if metadata['Campaigns'] != None and metadata['Campaigns']['Campaign'] != None: + if metadata['Campaigns'] and metadata['Campaigns']['Campaign']: campaign_num = len(metadata['Campaigns']) - result[str] = self.checkerRules.checkCampaignShortName(metadata['Campaigns'],campaign_num) + return check_function(metadata['Campaigns'], campaign_num) except: - result[str] = "np" - # ================ - str = 'OnlineAccessURLs/OnlineAccessURL/URL' + return "np" + + def check_online_access_url(self, metadata, check_function): try: - result[str] = self.checkerRules.checkOnlineAccessURL(metadata['OnlineAccessURLs']['OnlineAccessURL']['URL'],1) + return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL']['URL'], 1) except TypeError: - if metadata['OnlineAccessURLs'] != None: + if metadata['OnlineAccessURLs']: length = len(metadata['OnlineAccessURLs']['OnlineAccessURL']) - result[str] = self.checkerRules.checkOnlineAccessURL(metadata['OnlineAccessURLs']['OnlineAccessURL'], - length) - else: - result[str] = "No Online Access URL is provided" + return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL'], length) except KeyError: - result[str] = "No Online Access URL is provided" + return "No Online Access URL is provided" except: - result[str] = "np" - # ================ - str = 'OnlineAccessURLs/OnlineAccessURL/URLDescription' + return "np" + + def check_online_access_url_desc(self, metadata, check_function): try: - result[str]= self.checkerRules.checkOnlineAccessURLDesc( - metadata['OnlineAccessURLs']['OnlineAccessURL']['URLDescription'], 1) + return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL']['URLDescription'], 1) except TypeError: - if metadata['OnlineAccessURLs'] != None: + if metadata['OnlineAccessURLs']: length = len(metadata['OnlineAccessURLs']['OnlineAccessURL']) - result[str] = self.checkerRules.checkOnlineAccessURLDesc(metadata['OnlineAccessURLs']['OnlineAccessURL'], length) - else: - result[str] = "Recommend providing a brief URL description" + return check_function(metadata['OnlineAccessURLs']['OnlineAccessURL'], length) except KeyError: - result[str] = "Recommend providing a brief URL description" + return "Recommend providing a brief URL description" except: - result[str] = "np" - # ================ - str = 'OnlineResources/OnlineResource/URL' - OnlineResourceURL_Cnt = 0 + return "np" + + def check_online_resource_url(self, metadata, check_function): try: - result[str] = self.checkerRules.checkOnlineResourceURL(metadata['OnlineResources']['OnlineResource']['URL'], - 1) - OnlineResourceURL_Cnt = 1 + return check_function(metadata['OnlineResources']['OnlineResource']['URL'], 1) except TypeError: - if metadata['OnlineResources'] != None: + if metadata['OnlineResources']: length = len(metadata['OnlineResources']['OnlineResource']) - OnlineResourceURL_Cnt = length - result[str] = self.checkerRules.checkOnlineResourceURL(metadata['OnlineResources']['OnlineResource'], - length) - else: - result[str] = "np" + return check_function(metadata['OnlineResources']['OnlineResource'], length) except: - result[str] = "np" - # ================ - str = 'OnlineResource/OnlineResource/Description' + return "np" + + def check_online_resource_desc(self, metadata, check_function): try: - result[str] = self.checkerRules.checkOnlineResourceDesc( - metadata['OnlineResources']['OnlineResource']['Description'], 1) + return check_function(metadata['OnlineResources']['OnlineResource']['Description'], 1) except TypeError: - if metadata['OnlineResources'] != None: + if metadata['OnlineResources']: length = len(metadata['OnlineResources']['OnlineResource']) - if length < OnlineResourceURL_Cnt: - result[str] = "Recommend providing descriptions for all Online Resource URLs." - else: - result[str] = self.checkerRules.checkOnlineResourceDesc( - metadata['OnlineResources']['OnlineResource'], length) - else: - result[str] = "np" + return check_function(metadata['OnlineResources']['OnlineResource'], length) except: - result[str] = "np" - # ================ - str = 'OnlineResources/OnlineResource/Type' + return "np" + + def check_online_resource_type(self, metadata, check_function): try: - result[str] = self.checkerRules.checkOnlineResourceType( - metadata['OnlineResources']['OnlineResource']['Type'], 1) + return check_function(metadata['OnlineResources']['OnlineResource']['Type'], 1) except TypeError: - if metadata['OnlineResources'] != None: + if metadata['OnlineResources']: length = len(metadata['OnlineResources']['OnlineResource']) - result[str] = self.checkerRules.checkOnlineResourceType(metadata['OnlineResources']['OnlineResource'], - length) - else: - result[str] = "np" - except: - result[str] = "np" - # ================ - str = "Orderable" - try: - result[str] = self.checkerRules.checkOrderable(metadata["Orderable"]) + return check_function(metadata['OnlineResources']['OnlineResource'], length) except: - result[str] = "np" - # ================ - str = 'DataFormat' - try: - result[str] = self.checkerRules.checkDataFormat(metadata["DataFormat"]) - except KeyError: - result[str] = "Recommend providing the data format for the associated granule" - except: - result[str] = "np" - # ================ - str = 'Visible' - try: - result[str] = self.checkerRules.checkVisible(metadata["Visible"]) - except: - result[str] = "np" - return result + return "np" From 309f57e83ca835781dd0655478aea2c5fee60c68 Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Sun, 16 Jun 2024 14:24:20 +0300 Subject: [PATCH 3/4] Refactor CollectionsControllerTest for Improved Readability and Maintainability --- .../collections_controller_test.rb | 487 ++++++------------ 1 file changed, 144 insertions(+), 343 deletions(-) diff --git a/test/controllers/collections_controller_test.rb b/test/controllers/collections_controller_test.rb index 381dd380..c2125869 100644 --- a/test/controllers/collections_controller_test.rb +++ b/test/controllers/collections_controller_test.rb @@ -1,5 +1,5 @@ require 'test_helper' -Dir[Rails.root.join("test/**/*.rb")].each {|f| require f} +Dir[Rails.root.join("test/**/*.rb")].each { |f| require f } class CollectionsControllerTest < ActionDispatch::IntegrationTest include Devise::Test::IntegrationHelpers @@ -12,33 +12,22 @@ class CollectionsControllerTest < ActionDispatch::IntegrationTest end describe 'GET #search' do - it 'it returns modis results' do + it 'returns MODIS results' do sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) - stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=ORNL_CLOUD"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub('modis-search.xml'), headers: {}) - - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=GESDISCCLD"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub('modis-search.xml'), headers: {}) - - get '/collections_search', params: { provider: 'DAAC: ANY', free_text: 'modis', curr_page:1 } - count = assigns(:collection_count) - search_iterator = assigns(:search_iterator) - assert(113, count) - assert('C1200019523-OB_DAAC', search_iterator[0]['concept_id']) + stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=ORNL_CLOUD") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('modis-search.xml')) + + stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.echo10?keyword=*modis*&page_num=1&page_size=10&provider=GESDISCCLD") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('modis-search.xml')) + + get '/collections_search', params: { provider: 'DAAC: ANY', free_text: 'modis', curr_page: 1 } + + assert_equal 113, assigns(:collection_count) + assert_equal 'C1200019523-OB_DAAC', assigns(:search_iterator)[0]['concept_id'] end end @@ -46,378 +35,190 @@ class CollectionsControllerTest < ActionDispatch::IntegrationTest it "loads the correct collection on show" do sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) - - #stubbing all requests for raw_data - stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C1000000020-LANCEAMSR2.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - - stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - - }). - to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'), headers: {}) + setup_show_stubs get '/collections/1', params: { record_id: 1 } - collection_records = assigns(:collection_records) - assert_equal(6, collection_records.length) + + assert_equal 6, assigns(:collection_records).length end it "redirects when no concept id is provided" do - #redirects no record_id sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) - get '/collections/1', params: { } - assert_equal(response.code, "302") + get '/collections/1', params: {} + assert_redirected_to root_path end it "redirects when no collection is found" do - #redirects no collection found sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) get '/collections/1', params: { record_id: "xyz" } - assert_equal(response.code, "302") + assert_redirected_to root_path end - it "detects if a granule is no longer in cmr" do + it "detects if a granule is no longer in CMR" do sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) - - #stubbing all requests for raw_data - stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: '{"hits" : 0,"took" : 105,"items" : []}', headers: {}) - stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml')) - - stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - - }). - to_return(status: 200, - body: '029', - headers: {}) + setup_granule_not_found_stubs get '/collections/1', params: { record_id: 1 } - assert_select "span[class='indicator_for_granule_deleted_in_cmr']", count: 5, - :text => '[Granule Not Found in CMR]' + + assert_select ".indicator_for_granule_deleted_in_cmr", count: 5, text: '[Granule Not Found in CMR]' end it "detects if a new granule revision is available" do sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) + setup_granule_revision_stubs - stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2"). - with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }). - to_return(:status => 200, :body => get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml')) - - stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - - }). - to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml'), headers: {}) - stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json'), headers: {}) get '/collections/1', params: { record_id: 1 } + assert_select '.import_new_revision', count: 5 end end - - describe "POST #create" do it "downloads and saves a new record" do sign_in(user) stub_urs_access(user.uid, user.access_token, user.refresh_token) + setup_create_stubs - stub_request(:get, Regexp.new("#{Regexp.escape(@cmr_base_url)}\\/search\\/collections\\.(echo10|native)\\?concept_id\\=C222702\\-GHRC")).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C222702-GHRC.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["2974"], "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - #stubbing the new format check - stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C222702-GHRC").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => "ECHO10", :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["107"], "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]}) - - #Since a granule is chosen at random, a full mock can not be used. - #in this instance, we return a set collection of results for any call using this concept id and granule keyword. - stub_request(:get, /.*granules.echo10*C222702-GHRC.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G309203-GHRC.xml"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - stub_request(:get, /.*granules.umm_json*C222702-GHRC.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G309203-GHRC.json"), :headers => {"date"=>["Tue, 21 Feb 2017 16:02:46 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["10554"], "cmr-took"=>["40"], "cmr-request-id"=>["5b0c8426-3a23-4025-a4d3-6d1c9024153a"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/G226250-GHRC.echo10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: "", headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=1&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=2&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C222702-GHRC&page_num=3&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json"), headers: {}) - #stubbing the granule raw look up - stub_request(:get, /.*granules.echo10\?concept_id=G.*/).with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_granules_G226250-GHRC.xml"), :headers => {"date"=>["Tue, 14 Mar 2017 19:36:02 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["26"], "cmr-request-id"=>["46ad6de7-598a-463e-99e0-2a22ddf651da"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]}) - stub_request(:get, "#{@cmr_base_url}/search/concepts/C222702-GHRC.echo10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("C222702-GHRC_echo10.xml"), headers: {}) - stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G226250-GHRC"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_G226250-GHRC.json"), headers: {}) - - #Making sure record does not exist before ingest - assert_equal(0, (Collection.where concept_id: "C222702-GHRC").length) - Quarc.stub_any_instance(:validate, {}) do - post collections_url, params: { concept_id: "C222702-GHRC", revision_id: "32", granulesCount: 1 } + assert_difference 'Collection.where(concept_id: "C222702-GHRC").count', 1 do + Quarc.stub_any_instance(:validate, {}) do + post collections_url, params: { concept_id: "C222702-GHRC", revision_id: "32", granulesCount: 1 } + end end - assert_equal("302", response.code) - - #collection with rawJSON saved in system - assert_equal(1, (Collection.where concept_id: "C222702-GHRC").length) - assert_equal("daylightn", (Collection.where concept_id: "C222702-GHRC").first.records.first.values["ShortName"]) - - record = (Collection.where concept_id: "C222702-GHRC").first.records.first - #script ran on new collection - refute(record.binary_script_values["InsertTime"]) - - # collection with umm-json can be saved to system. see ticket CMRARC-480 - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/C190733714-LPDAAC_ECS.umm_json"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_collection_C190733714-LPDAAC_ECS.json"), headers: {}) - stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C190733714-LPDAAC_ECS").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C190733714-LPDAAC_ECS.atom"), :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"], "content-type"=>["application/echo10+xml; charset=utf-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["107"], "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"], "strict-transport-security"=>["max-age=31536000"]}) - stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C190733714-LPDAAC_ECS").with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(:status => 200, :body => get_stub("search_collection_C190733714-LPDAAC_ECS.json"), :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"], "content-type"=>["application/vnd.nasa.cmr.umm_results+json;version=1.13; charset=UTF-8"], "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], "access-control-allow-origin"=>["*"], "cmr-hits"=>["1"], "cmr-took"=>["2974"], "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"], "vary"=>["Accept-Encoding, User-Agent"], "connection"=>["close"], "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - stub_request(:get, /.*granules.echo10*C190733714-LPDAAC_ECS.*/).with(headers: {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }).to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.xml"), headers: {}) - stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_size=10&page_num=1"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_num=3&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?collection_concept_id=C190733714-LPDAAC_ECS&page_num=2&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_by_collection_C190733714-LPDAAC_ECS.json"), headers: {}) - Quarc.stub_any_instance(:validate, {}) do - post collections_url, params: { concept_id: "C190733714-LPDAAC_ECS", revision_id: "77", granuleCounts: 1 } + + assert_redirected_to collection_path(assigns(:collection)) + assert_collection_and_granule_saved_correctly("C222702-GHRC") + end + + it "downloads and saves a new ISO record as UMM-JSON" do + sign_in(user) + stub_urs_access(user.uid, user.access_token, user.refresh_token) + setup_iso_record_stubs + + assert_difference 'Collection.where(concept_id: "C1599780765-NSIDC_ECS").count', 1 do + Quarc.stub_any_instance(:validate, {}) do + post collections_url, params: { concept_id: "C1599780765-NSIDC_ECS", revision_id: "77", granuleCounts: 1 } + end end - get '/collections/1', params: { concept_id: "C190733714-LPDAAC_ECS" } - assert_select "#record_format", count: 1, - :text => 'umm-c; version=1.13' + assert_redirected_to collection_path(assigns(:collection)) + assert_iso_record_saved_correctly("C1599780765-NSIDC_ECS") + end + end - assert_equal("200", response.code) - assert_equal(1, (Collection.where concept_id: "C190733714-LPDAAC_ECS").length) + private - #ingest for collection logged - assert_equal("abaker@element84.com", record.ingest.user.email) + def default_headers + { + 'Accept' => '*/*', + 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', + 'User-Agent' => 'Ruby' + } + end - #saves 1 associated granule - assert_equal(1, (Collection.where concept_id: "C222702-GHRC").first.granules.length) - #needs to match regex since the granule that is taken from the list is random each time - assert_equal(0, (Collection.where concept_id: "C222702-GHRC").first.granules.first.records.first.values["GranuleUR"] =~ /Ndaily/) + def setup_show_stubs + stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_collection_C1000000020-LANCEAMSR2.xml")) - granule_record = (Collection.where concept_id: "C222702-GHRC").first.granules.first.records.first - #ingest for granule logged - assert_equal("abaker@element84.com", granule_record.ingest.user.email) - end + stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml')) - it "downloads and saves a new iso record as umm-json" do - sign_in(user) - stub_urs_access(user.uid, user.access_token, user.refresh_token) + stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/granules.umm_json?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json')) + end - # the atom will return the native format is iso-19115 - stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C1599780765-NSIDC_ECS") - .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }) - .to_return(:status => 200, :body => get_stub("search_collection_C1599780765-NSIDC_ECS.atom"), - :headers => {"date"=>["Fri, 17 Mar 2017 20:00:54 GMT"], - "content-type"=>["application/atom+xml; charset=utf-8"], - "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], - "access-control-allow-origin"=>["*"], - "cmr-hits"=>["1"], - "cmr-took"=>["107"], - "cmr-request-id"=>["308d3b81-b229-4593-a05e-c61a741d45be"], - "vary"=>["Accept-Encoding, User-Agent"], - "connection"=>["close"], - "server"=>["Jetty(9.2.z-SNAPSHOT)"], - "strict-transport-security"=>["max-age=31536000"]}) - - # application logic should pull umm-json instead - stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS") - .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }) - .to_return(:status => 200, :body => get_stub("search_collection_C1599780765-NSIDC_ECS.json"), - :headers => {"date"=>["Tue, 21 Feb 2017 15:50:04 GMT"], - "content-type"=>["application/vnd.nasa.cmr.umm_results+json;version=1.13; charset=UTF-8"], - "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], - "access-control-allow-origin"=>["*"], - "cmr-hits"=>["1"], - "cmr-took"=>["2974"], - "cmr-request-id"=>["bb005bac-18ce-4b6a-b69f-3f29f820ced5"], - "vary"=>["Accept-Encoding, User-Agent"], - "connection"=>["close"], - "server"=>["Jetty(9.2.z-SNAPSHOT)"]}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/C1599780765-NSIDC_ECS.umm_json"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json"), headers: {}) - # stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS"). - # with( - # headers: { - # 'Accept'=>'*/*', - # 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - # 'User-Agent'=>'Ruby' - # }). - # to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json"), headers: {}) - - # stub for pulling a random granule - stub_request(:get, /.*granules.echo10\?concept_id=G.*/) - .with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }) - .to_return(:status => 200, :body => get_stub("search_granules_C1599780765-NSIDC_ECS.xml"), - :headers => {"date"=>["Tue, 14 Mar 2017 19:36:02 GMT"], - "content-type"=>["application/echo10+xml; charset=utf-8"], - "access-control-expose-headers"=>["CMR-Hits, CMR-Request-Id"], - "access-control-allow-origin"=>["*"], - "cmr-hits"=>["1"], "cmr-took"=>["26"], - "cmr-request-id"=>["46ad6de7-598a-463e-99e0-2a22ddf651da"], - "vary"=>["Accept-Encoding, User-Agent"], - "connection"=>["close"], - "server"=>["Jetty(9.2.z-SNAPSHOT)"], - "strict-transport-security"=>["max-age=31536000"]}) - - stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C1599780765-NSIDC_ECS&page_size=10&page_num=1"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub('search_granules_by_collection_C1599780765-NSIDC_ECS.json'), headers: {}) - stub_request(:get, "#{Cmr.get_cmr_base_url}/search/granules.echo10?collection_concept_id=C1599780765-NSIDC_ECS&page_num=1&page_size=10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - - }). - to_return(status: 200, body: get_stub("search_granules_by_collection_C1599780765-NSIDC_ECS.xml"), headers: {}) - stub_request(:get, /.*granules.echo10*C1599780765-NSIDC_ECS.*/) - .with(headers: {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', }) - .to_return(status: 200, body: get_stub("search_granules_by_collection_C1599780765-NSIDC_ECS.xml"), headers: {}) - stub_request(:get, "#{Cmr.get_cmr_base_url}/search/granules.umm_json?concept_id=G1599790933-NSIDC_ECS"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: get_stub("search_granules_G1599790933-NSIDC_ECS.json"), headers: {}) - stub_request(:get, "https://cmr.sit.earthdata.nasa.gov/search/concepts/G1599790933-NSIDC_ECS.echo10"). - with( - headers: { - 'Accept'=>'*/*', - 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', - 'User-Agent'=>'Ruby' - }). - to_return(status: 200, body: "", headers: {}) - Quarc.stub_any_instance(:validate, {}) do - post collections_url, params: { concept_id: "C1599780765-NSIDC_ECS", revision_id: "77", granuleCounts: 1 } - end - assert_equal("302", response.code) - assert_equal(1, (Collection.where concept_id: "C1599780765-NSIDC_ECS").length) + def setup_granule_not_found_stubs + stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: '{"hits" : 0,"took" : 105,"items" : []}') - record = (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.records.first + stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml')) + stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: '029') + end + + def setup_granule_revision_stubs + stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C1000000020-LANCEAMSR2") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_by_collection_C1000000020-LANCEAMSR2.xml')) - assert_equal"iso19115", record.native_format - assert_equal"umm_json", record.format + stub_request(:get, "#{@cmr_base_url}/search/granules.echo10?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.xml')) - #ingest for collection logged - assert_equal("abaker@element84.com", record.ingest.user.email) + stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G309210-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_G309210-GHRC.json')) + end - #saves 1 associated granule - assert_equal(1, (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.length) - #needs to match regex since the granule that is taken from the list is random each time - assert_equal(0, (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.first.records.first.values["GranuleUR"] =~ /SC:ABLVIS0/) + def setup_create_stubs + stub_request(:get, "#{@cmr_base_url}/search/collections.echo10?concept_id=C222702-GHRC") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_collection_C222702-GHRC.xml")) - granule_record = (Collection.where concept_id: "C1599780765-NSIDC_ECS").first.granules.first.records.first - #ingest for granule logged - assert_equal("abaker@element84.com", granule_record.ingest.user.email) + stub_request(:get, /.*granules.echo10*C222702-GHRC.*/) + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.xml")) - end + stub_request(:get, /.*granules.umm_json*C222702-GHRC.*/) + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_granules_G309203-GHRC.json")) + end + + def setup_iso_record_stubs + stub_request(:get, "#{@cmr_base_url}/search/collections.atom?concept_id=C1599780765-NSIDC_ECS") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.atom")) + + stub_request(:get, "#{@cmr_base_url}/search/collections.umm_json?concept_id=C1599780765-NSIDC_ECS") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_collection_C1599780765-NSIDC_ECS.json")) + + stub_request(:get, /.*granules.echo10\?concept_id=G.*/) + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_granules_C1599780765-NSIDC_ECS.xml")) + stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?collection_concept_id=C1599780765-NSIDC_ECS&page_size=10&page_num=1") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub('search_granules_by_collection_C1599780765-NSIDC_ECS.json')) + + stub_request(:get, "#{@cmr_base_url}/search/granules.umm_json?concept_id=G1599790933-NSIDC_ECS") + .with(headers: default_headers) + .to_return(status: 200, body: get_stub("search_granules_G1599790933-NSIDC_ECS.json")) + end + + def assert_collection_and_granule_saved_correctly(concept_id) + collection = Collection.find_by(concept_id: concept_id) + assert collection.present? + assert_equal "abaker@element84.com", collection.records.first.ingest.user.email + assert_equal 1, collection.granules.count + granule = collection.granules.first + assert_match /Ndaily/, granule.records.first.values["GranuleUR"] + assert_equal "abaker@element84.com", granule.records.first.ingest.user.email end + def assert_iso_record_saved_correctly(concept_id) + collection = Collection.find_by(concept_id: concept_id) + assert collection.present? + record = collection.records.first + assert_equal "iso19115", record.native_format + assert_equal "umm_json", record.format + assert_equal "abaker@element84.com", record.ingest.user.email + assert_equal 1, collection.granules.count + granule = collection.granules.first + assert_match /SC:ABLVIS0/, granule.records.first.values["GranuleUR"] + assert_equal "abaker@element84.com", granule.records.first.ingest.user.email + end end From 3594bb525205d89d25cf1ad8e3bbada9e0200f0a Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Sun, 16 Jun 2024 14:34:37 +0300 Subject: [PATCH 4/4] Refactor and Enhance Test Helper Classes --- lib/bs4/testing.py | 282 +++++++++++---------------------------------- 1 file changed, 70 insertions(+), 212 deletions(-) diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py index 5a84b0ba..e71a8acb 100644 --- a/lib/bs4/testing.py +++ b/lib/bs4/testing.py @@ -12,12 +12,14 @@ Doctype, SoupStrainer, ) - from bs4.builder import HTMLParserTreeBuilder +import pytest + default_builder = HTMLParserTreeBuilder class SoupTest(unittest.TestCase): + """Base class for BeautifulSoup tests.""" @property def default_builder(self): @@ -29,43 +31,28 @@ def soup(self, markup, **kwargs): return BeautifulSoup(markup, builder=builder, **kwargs) def document_for(self, markup): - """Turn an HTML fragment into a document. - - The details depend on the builder. - """ + """Turn an HTML fragment into a document.""" return self.default_builder.test_fragment_to_document(markup) def assertSoupEquals(self, to_parse, compare_parsed_to=None): + """Assert that the parsed soup equals the expected result.""" builder = self.default_builder obj = BeautifulSoup(to_parse, builder=builder) if compare_parsed_to is None: compare_parsed_to = to_parse - self.assertEqual(obj.decode(), self.document_for(compare_parsed_to)) -class HTMLTreeBuilderSmokeTest(object): - - """A basic test of a treebuilder's competence. - - Any HTML treebuilder, present or future, should be able to pass - these tests. With invalid markup, there's room for interpretation, - and different parsers can handle it differently. But with the - markup in these tests, there's not much room for interpretation. - """ +class HTMLTreeBuilderSmokeTest(SoupTest): + """Basic tests for HTML tree builder competence.""" def assertDoctypeHandled(self, doctype_fragment): """Assert that a given doctype string is handled correctly.""" doctype_str, soup = self._document_with_doctype(doctype_fragment) - - # Make sure a Doctype object was created. doctype = soup.contents[0] - self.assertEqual(doctype.__class__, Doctype) + self.assertIsInstance(doctype, Doctype) self.assertEqual(doctype, doctype_fragment) self.assertEqual(str(soup)[:len(doctype_str)], doctype_str) - - # Make sure that the doctype was correctly associated with the - # parse tree and that the rest of the document parsed. self.assertEqual(soup.p.contents[0], 'foo') def _document_with_doctype(self, doctype_fragment): @@ -75,29 +62,20 @@ def _document_with_doctype(self, doctype_fragment): soup = self.soup(markup) return doctype, soup - def test_normal_doctypes(self): - """Make sure normal, everyday HTML doctypes are handled correctly.""" + def test_doctypes(self): + """Test various doctype declarations.""" self.assertDoctypeHandled("html") self.assertDoctypeHandled( 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') - - def test_public_doctype_with_url(self): - doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' - self.assertDoctypeHandled(doctype) - - def test_system_doctype(self): + self.assertDoctypeHandled( + 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" ' + '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"') self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"') - - def test_namespaced_system_doctype(self): - # We can handle a namespaced doctype with a system ID. self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"') - - def test_namespaced_public_doctype(self): - # Test a namespaced doctype with a public id. self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"') def test_real_xhtml_document(self): - """A real XHTML document should come out more or less the same as it went in.""" + """Ensure a real XHTML document is parsed correctly.""" markup = b""" @@ -110,128 +88,58 @@ def test_real_xhtml_document(self): markup.replace(b"\n", b"")) def test_deepcopy(self): - """Make sure you can copy the tree builder. - - This is important because the builder is part of a - BeautifulSoup object, and we want to be able to copy that. - """ + """Ensure the tree builder can be deep copied.""" copy.deepcopy(self.default_builder) - def test_p_tag_is_never_empty_element(self): - """A

tag is never designated as an empty-element tag. - - Even if the markup shows it as an empty-element tag, it - shouldn't be presented that way. - """ - soup = self.soup("

") - self.assertFalse(soup.p.is_empty_element) - self.assertEqual(str(soup.p), "

") - - def test_unclosed_tags_get_closed(self): - """A tag that's not closed by the end of the document should be closed. - - This applies to all tags except empty-element tags. - """ + def test_empty_and_unclosed_tags(self): + """Test handling of empty and unclosed tags.""" self.assertSoupEquals("

", "

") self.assertSoupEquals("", "") - self.assertSoupEquals("
", "
") - - def test_br_is_always_empty_element_tag(self): - """A
tag is designated as an empty-element tag. - - Some parsers treat

as one
tag, some parsers as - two tags, but it should always be an empty-element tag. - """ soup = self.soup("

") self.assertTrue(soup.br.is_empty_element) self.assertEqual(str(soup.br), "
") - def test_nested_formatting_elements(self): + def test_nested_elements(self): + """Test handling of nested elements.""" self.assertSoupEquals("") - - def test_comment(self): - # Comments are represented as Comment objects. markup = "

foobaz

" self.assertSoupEquals(markup) - soup = self.soup(markup) comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) + self.assertIsInstance(comment, Comment) - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in
 and ")
 
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "Inside a B tag"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - """Block elements can be nested.""" - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_correctly_nested_tables(self): - """One table can go inside another one.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_angle_brackets_in_attribute_values_are_escaped(self): + def test_angle_brackets_in_attributes(self): + """Ensure angle brackets in attribute values are escaped.""" self.assertSoupEquals('', '') - def test_entities_in_attributes_converted_to_unicode(self): + def test_entities_in_attributes_and_text(self): + """Ensure entities in attributes and text are converted to Unicode.""" expect = u'

' self.assertSoupEquals('

', expect) self.assertSoupEquals('

', expect) self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): expect = u'

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' self.assertSoupEquals("

piñata

", expect) self.assertSoupEquals("

piñata

", expect) self.assertSoupEquals("

piñata

", expect) - - def test_quot_entity_converted_to_quotation_mark(self): self.assertSoupEquals("

I said "good day!"

", '

I said "good day!"

') def test_out_of_range_entity(self): + """Ensure out-of-range entities are replaced with a replacement character.""" expect = u"\N{REPLACEMENT CHARACTER}" self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) def test_basic_namespaces(self): - """Parsers don't need to *understand* namespaces, but at the - very least they should not choke on namespaces or lose - data.""" - + """Test handling of basic namespaces.""" markup = b'4' soup = self.soup(markup) self.assertEqual(markup, soup.encode()) @@ -242,108 +150,80 @@ def test_basic_namespaces(self): self.assertEqual( 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) - def test_multivalued_attribute_value_becomes_list(self): + def test_multivalued_attribute_value(self): + """Ensure multi-valued attribute values become lists.""" markup = b'' soup = self.soup(markup) self.assertEqual(['foo', 'bar'], soup.a['class']) - # - # Generally speaking, tests below this point are more tests of - # Beautiful Soup than tests of the tree builders. But parsers are - # weird, so we run these tests separately for every tree builder - # to detect any differences between them. - # - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" + """Ensure parsers can work with SoupStrainers.""" strainer = SoupStrainer("b") soup = self.soup("A bold statement", parse_only=strainer) self.assertEqual(soup.decode(), "bold") - def test_single_quote_attribute_values_become_double_quotes(self): + def test_single_quote_attribute_values(self): + """Ensure single quote attribute values become double quotes.""" self.assertSoupEquals("", '') - def test_attribute_values_with_nested_quotes_are_left_alone(self): + def test_attribute_values_with_nested_quotes(self): + """Ensure attribute values with nested quotes are handled correctly.""" text = """a""" self.assertSoupEquals(text) - - def test_attribute_values_with_double_nested_quotes_get_quoted(self): - text = """a""" soup = self.soup(text) soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' self.assertSoupEquals( soup.foo.decode(), """a""") - def test_ampersand_in_attribute_value_gets_escaped(self): + def test_ampersand_in_attribute_value(self): + """Ensure ampersand in attribute value is escaped.""" self.assertSoupEquals('', '') - self.assertSoupEquals( 'foo', 'foo') - - def test_escaped_ampersand_in_attribute_value_is_left_alone(self): self.assertSoupEquals('') - def test_entities_in_strings_converted_during_parsing(self): - # Both XML and HTML entities are converted to Unicode characters - # during parsing. + def test_entities_in_strings(self): + """Ensure entities in strings are converted during parsing.""" text = "

<<sacré bleu!>>

" expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" self.assertSoupEquals(text, expected) - def test_smart_quotes_converted_on_the_way_in(self): - # Microsoft smart quotes are converted to Unicode characters during - # parsing. + def test_smart_quotes_converted(self): + """Ensure Microsoft smart quotes are converted to Unicode characters.""" quote = b"

\x91Foo\x92

" soup = self.soup(quote) self.assertEqual( soup.p.string, u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - def test_non_breaking_spaces_converted_on_the_way_in(self): + def test_non_breaking_spaces(self): + """Ensure non-breaking spaces are converted to Unicode.""" soup = self.soup("  ") self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) - def test_entities_converted_on_the_way_out(self): + def test_entities_converted_on_output(self): + """Ensure entities are converted on the way out.""" text = "

<<sacré bleu!>>

" expected = u"

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") soup = self.soup(text) self.assertEqual(soup.p.encode("utf-8"), expected) def test_real_iso_latin_document(self): - # Smoke test of interrelated functionality, using an - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + """Ensure real ISO-Latin-1 document is parsed and encoded correctly.""" unicode_html = u'

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. iso_latin_html = unicode_html.encode("iso-8859-1") - - # Parse the ISO-Latin-1 HTML. soup = self.soup(iso_latin_html) - # Encode it to UTF-8. result = soup.encode("utf-8") - - # What do we expect the result to look like? Well, it would - # look like unicode_html, except that the META tag would say - # UTF-8 instead of ISO-Latin-1. - expected = unicode_html.replace("ISO-Latin-1", "utf-8") - - # And, of course, it would be in UTF-8, not Unicode. - expected = expected.encode("utf-8") - - # Ta-da! + expected = unicode_html.replace("ISO-Latin-1", "utf-8").encode("utf-8") self.assertEqual(result, expected) def test_real_shift_jis_document(self): - # Smoke test to make sure the parser can handle a document in - # Shift-JIS encoding, without choking. + """Ensure real Shift-JIS document is parsed correctly.""" shift_jis_html = ( b'
'
             b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
@@ -352,89 +232,65 @@ def test_real_shift_jis_document(self):
             b'
') unicode_html = shift_jis_html.decode("shift-jis") soup = self.soup(unicode_html) - - # Make sure the parse tree is correctly encoded to various - # encodings. self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) def test_real_hebrew_document(self): - # A real-world test to make sure we can convert ISO-8859-9 (a - # Hebrew encoding) to UTF-8. + """Ensure real Hebrew document is parsed correctly.""" hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' - soup = self.soup( - hebrew_document, from_encoding="iso8859-8") + soup = self.soup(hebrew_document, from_encoding="iso8859-8") self.assertEqual(soup.original_encoding, 'iso8859-8') self.assertEqual( soup.encode('utf-8'), hebrew_document.decode("iso8859-8").encode("utf-8")) def test_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. + """Ensure meta tag reflects the current encoding.""" meta_tag = ('') - - # Here's a document incorporating that meta tag. shift_jis_html = ( '\n%s\n' '' 'Shift-JIS markup goes here.') % meta_tag soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) content = parsed_meta['content'] self.assertEqual('text/html; charset=x-sjis', content) - - # But that value is actually a ContentMetaAttributeValue object. self.assertTrue(isinstance(content, ContentMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. self.assertEqual('text/html; charset=utf8', content.encode("utf8")) - # For the rest of the story, see TestSubstitutions in - # test_tree.py. - - def test_html5_style_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. + def test_html5_style_meta_tag(self): + """Ensure HTML5 style meta tag reflects current encoding.""" meta_tag = ('') - - # Here's a document incorporating that meta tag. shift_jis_html = ( '\n%s\n' '' 'Shift-JIS markup goes here.') % meta_tag soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. parsed_meta = soup.find('meta', id="encoding") charset = parsed_meta['charset'] self.assertEqual('x-sjis', charset) - - # But that value is actually a CharsetMetaAttributeValue object. self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. self.assertEqual('utf8', charset.encode("utf8")) - def test_tag_with_no_attributes_can_have_attributes_added(self): + def test_tag_with_no_attributes(self): + """Ensure a tag with no attributes can have attributes added.""" data = self.soup("text") data.a['foo'] = 'bar' self.assertEqual('text', data.a.decode()) -class XMLTreeBuilderSmokeTest(object): + +class XMLTreeBuilderSmokeTest(SoupTest): + """Basic tests for XML tree builder competence.""" def test_docstring_generated(self): + """Ensure a docstring is generated with the correct encoding.""" soup = self.soup("") self.assertEqual( soup.encode(), b'\n') def test_real_xhtml_document(self): - """A real XHTML document should come out *exactly* the same as it went in.""" + """Ensure a real XHTML document is parsed correctly.""" markup = b""" @@ -442,30 +298,30 @@ def test_real_xhtml_document(self): Goodbye. """ soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8"), markup) - + self.assertEqual(soup.encode("utf-8"), markup) def test_docstring_includes_correct_encoding(self): + """Ensure docstring includes the correct encoding.""" soup = self.soup("") self.assertEqual( soup.encode("latin1"), b'\n') def test_large_xml_document(self): - """A large XML document should come out the same as it went in.""" + """Ensure a large XML document is parsed and encoded correctly.""" markup = (b'\n' + b'0' * (2**12) + b'') soup = self.soup(markup) self.assertEqual(soup.encode("utf-8"), markup) - def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + """Ensure tags are empty elements if and only if they are empty.""" self.assertSoupEquals("

", "

") self.assertSoupEquals("

foo

") def test_namespaces_are_preserved(self): + """Ensure namespaces are preserved.""" markup = 'This tag is in the a namespaceThis tag is in the b namespace' soup = self.soup(markup) root = soup.root @@ -477,24 +333,25 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" def test_real_xhtml_document(self): - # Since XHTML is not HTML5, HTML5 parsers are not tested to handle - # XHTML documents in any particular way. + """Skip XHTML document test for HTML5 parsers.""" pass def test_html_tags_have_namespace(self): + """Ensure HTML tags have the correct namespace.""" markup = "" soup = self.soup(markup) self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) def test_svg_tags_have_namespace(self): + """Ensure SVG tags have the correct namespace.""" markup = '' soup = self.soup(markup) namespace = "http://www.w3.org/2000/svg" self.assertEqual(namespace, soup.svg.namespace) self.assertEqual(namespace, soup.circle.namespace) - def test_mathml_tags_have_namespace(self): + """Ensure MathML tags have the correct namespace.""" markup = '5' soup = self.soup(markup) namespace = 'http://www.w3.org/1998/Math/MathML' @@ -503,6 +360,7 @@ def test_mathml_tags_have_namespace(self): def skipIf(condition, reason): + """Conditionally skip a test.""" def nothing(test, *args, **kwargs): return None