Tuesday 30 April 2019

Parsing NASR data: APT file (landing facility data)

Dictionary that keeps information about 'fields boundary' of attributes we want to extract looks as follows:

apt_fields = {'APT': {'FAC_TYPE': (13, 15),
                      'LOC_IDENTIFIER': (4, 28),
                      'EFF_DATE': (10, 32),
                      'CITY_NAME': (40, 94),
                      'OFFICIAL_NAME': (50, 134),
                      'ARP_LAT': (15, 524),
                      'ARP_LON': (15, 551),
                      'ARP_ELEV': (7, 579),
                      'MAG_VAR': (3, 587),
                      'MAG_VAR_EPOCH_YEAR': (4, 590),
                      'ARFF_TYPE_DATE': (15, 843),
                      'FUEL_TYPE': (40, 901),
                      'REPAIR_SERVICES': (5, 941),
                      'BOTTLED_OXYGEN': (8, 951),
                      'BULK_OXYGEN': (8, 959)
                      },
              'ATT': {'ATTENDENCE_SCHEDULE': (108, 19)}
              }


APTParser class is the class which will be responsible for parsing line APT file line by line, but only this line for which record is type 'APT' or 'ATT'. In constructor let's add three additional keys, that will be store value of  attendence schedule, latitude and longitude in DD format respectively:

class APTParser(NASRLineBaseParser):

    prev_site_number = ''

    def __init__(self, fields_dict):
        NASRLineBaseParser.__init__(self, fields_dict)
        APTParser.extracted_data['ATTENDENCE_SCHEDULE'] = ''
        APTParser.extracted_data['LAT_DD'] = ''
        APTParser.extracted_data['LON_DD'] = ''

Method data2csv will extract data and save it in CSV file (as name suggests). First, we need to check which type of record APT file line is:

def data2csv(self, raw_line, writer):
    rec_type = raw_line[0:3]


if rec_type == 'APT':  # Parse for key 'APT'
    APTParser.extracted_data = self.parse_line(raw_line, apt_fields[rec_type])
    current_site_number = raw_line[3:11]
    APTParser.prev_site_number = current_site_number


if rec_type == 'ATT':  # Parse for key 'ATT'
    current_site_number = raw_line[3:11]
    if APTParser.prev_site_number == current_site_number:
        att_data = self.parse_line(raw_line, apt_fields[rec_type])
        APTParser.extracted_data['ATTENDENCE_SCHEDULE'] = att_data.get('ATTENDENCE_SCHEDULE')

Before we pass dictionary with data into writerow method, we need calculate DD format of DMSH values stored in APT file:

arp_lat_dd = dmsh_hyphens_delimited2dd(APTParser.extracted_data.get('ARP_LAT'))
arp_lon_dd = dmsh_hyphens_delimited2dd(APTParser.extracted_data.get('ARP_LON'))

APTParser.extracted_data['LAT_DD'] = arp_lat_dd
APTParser.extracted_data['LON_DD'] = arp_lon_dd

writer.writerow(APTParser.extracted_data)

Finally, we are ready to write function that will parse APT file and save extracted data in CSV file:

def apt2csv(input_file, output_file):

    apt_data = APTParser(apt_fields['APT'])

    fields = [key for key in APTParser.extracted_data]

    with open(input_file, 'r') as nasr_file:
        with open(output_file, 'w', newline='') as result_file:
            writer = csv.DictWriter(result_file, fieldnames=fields, delimiter=';')
            writer.writeheader()

            for line in nasr_file:
                apt_data.data2csv(line, writer)

No comments:

Post a Comment