apt_fields = {'APT': {'FAC_TYPE': (13, 15), 'LOC_IDENTIFIER': (4, 28), 'EFF_DATE': (10, 32), 'CITY_NAME': (40, 94), 'OFFICIAL_NAME': (50, 134), 'ARP_LAT': (15, 524), 'ARP_LON': (15, 551), 'ARP_ELEV': (7, 579), 'MAG_VAR': (3, 587), 'MAG_VAR_EPOCH_YEAR': (4, 590), 'ARFF_TYPE_DATE': (15, 843), 'FUEL_TYPE': (40, 901), 'REPAIR_SERVICES': (5, 941), 'BOTTLED_OXYGEN': (8, 951), 'BULK_OXYGEN': (8, 959) }, 'ATT': {'ATTENDENCE_SCHEDULE': (108, 19)} }
APTParser class is the class which will be responsible for parsing line APT file line by line, but only this line for which record is type 'APT' or 'ATT'. In constructor let's add three additional keys, that will be store value of attendence schedule, latitude and longitude in DD format respectively:
class APTParser(NASRLineBaseParser): prev_site_number = '' def __init__(self, fields_dict): NASRLineBaseParser.__init__(self, fields_dict) APTParser.extracted_data['ATTENDENCE_SCHEDULE'] = '' APTParser.extracted_data['LAT_DD'] = '' APTParser.extracted_data['LON_DD'] = ''
Method data2csv will extract data and save it in CSV file (as name suggests). First, we need to check which type of record APT file line is:
def data2csv(self, raw_line, writer): rec_type = raw_line[0:3]
if rec_type == 'APT': # Parse for key 'APT' APTParser.extracted_data = self.parse_line(raw_line, apt_fields[rec_type]) current_site_number = raw_line[3:11] APTParser.prev_site_number = current_site_number
if rec_type == 'ATT': # Parse for key 'ATT' current_site_number = raw_line[3:11] if APTParser.prev_site_number == current_site_number: att_data = self.parse_line(raw_line, apt_fields[rec_type]) APTParser.extracted_data['ATTENDENCE_SCHEDULE'] = att_data.get('ATTENDENCE_SCHEDULE')
Before we pass dictionary with data into writerow method, we need calculate DD format of DMSH values stored in APT file:
arp_lat_dd = dmsh_hyphens_delimited2dd(APTParser.extracted_data.get('ARP_LAT')) arp_lon_dd = dmsh_hyphens_delimited2dd(APTParser.extracted_data.get('ARP_LON')) APTParser.extracted_data['LAT_DD'] = arp_lat_dd APTParser.extracted_data['LON_DD'] = arp_lon_dd writer.writerow(APTParser.extracted_data)
Finally, we are ready to write function that will parse APT file and save extracted data in CSV file:
def apt2csv(input_file, output_file): apt_data = APTParser(apt_fields['APT']) fields = [key for key in APTParser.extracted_data] with open(input_file, 'r') as nasr_file: with open(output_file, 'w', newline='') as result_file: writer = csv.DictWriter(result_file, fieldnames=fields, delimiter=';') writer.writeheader() for line in nasr_file: apt_data.data2csv(line, writer)
No comments:
Post a Comment