RBA Exchange Rate Historical Data ETL into Azure
The Reserve Bank Of Australia maintains an historical exchange rate data page. I have downloaded Exchange Rates ‐ Daily ‐ 2023 to Current. I will:
The source file has the Exchange Date on the vertical axis and the Exchange Rate for each currency on the horizontal access (as well as the TWI):
Firstly I will create a table to hold the final data in Azure:
- create table forex
- (
- exchg_id int not null identity,
- exchg_date date not null,
- exchg_currency varchar(4) not null,
- exchg_rate decimal(19,4) null,
- constraint PK_forex primary key (exchg_id),
- constraint UQ_EXCHG_DATE_CURRENCY unique(exchg_date, exchg_currency)
- )
The Unique Constraint UQ_EXCHG_DATE_CURRENCY not only ensures data integrity and 3rd normal form but will also act as an index and will be very useful where results are ordered by exchg_date, exchg_currency.
The following Python code takes the source CSV and transforms it into an output file with three columns: Date, Currency and Exchange Rate. The third parameter, format_for_sql determines whether the output file is a SQL select statement which could be run directly in SQL Server Manegement Studio or a flat file for use in ETL.
- import os
- import sys
- CLI_SEPARATOR = "======================================================================\n"
- CLI_HEADER = "\nprocess_forex_csv\nProcesses CSV version of RBA Exchange Rate history:\nhttps://www.rba.gov.au/statistics/historical-data.html#exchange-rates\nfrom supplied format to a format ready for ETL"
- CLI_USAGE = "Usage:\nprocess_forex_csv.py [input_filename] [output_filename] [format_as_sql (y/n)] [append_to_output]\n"
- OUTPUT_HEADER = "Date,Currency,Rate\n"
- FILE_HEADER_1 = "EXCHANGE RATES ,,,,,,,,,,,,,,,,,,,,,,,"
- FILE_HEADER_2 = "HISTORICAL DAILY EXCHANGE RATES OF THE AUSTRALIAN DOLLAR AGAINST:"
- FOREX_HEADER = "Series ID"
- months = {
- "jan": "01",
- "feb": "02",
- "mar": "03",
- "apr": "04",
- "may": "05",
- "jun": "06",
- "jul": "07",
- "aug": "08",
- "sep": "09",
- "oct": "10",
- "nov": "11",
- "dec": "12"
- }
- def isNull(val): return True if str(val).upper() == 'NULL' else False
- class ProcessForexCSV:
- def __init__(self, input_filename, output_filename, format_for_sql, append_to_output):
- self.input_filename = input_filename
- self.output_filename = output_filename
- self.format_for_sql = format_for_sql
- self.append_to_output = append_to_output
- self.currencies = []
- def process_file(self):
- input_file_path = os.path.join(os.getcwd(), self.input_filename)
- output_file_path = os.path.join(os.getcwd(), self.output_filename)
- with open(input_file_path, "rt") as f:
- new_lines = ""
- line = f.readline()
- if line.find(FILE_HEADER_1) < 0 and line.find(FILE_HEADER_2) < 0:
- print("Invalid file format.")
- return
- line = f.readline()
- while line.find(FOREX_HEADER) < 0:
- line = f.readline()
- tokens = line.split(",")
- i = 0
- for token in tokens:
- if i > 0:
- self.currencies.append(token[len(token)-3:].strip())
- i += 1
- print("Currencies:\n{0}".format(CLI_SEPARATOR))
- for currency in self.currencies:
- print("{0}".format(currency))
- line = f.readline().strip()
- line_count = 0
- while line != "":
- line_count += 1
- line_tokens = line.split(",")
- token_count = len(line_tokens)
- exchg_date = line_tokens[0]
- original_exchg_date = exchg_date
- date_tokens = exchg_date.split("-")
- exchg_date = date_tokens[2] + months[date_tokens[1].lower()].zfill(2) + date_tokens[0].zfill(2)
- i = 1
- if self.format_for_sql and line_count == 1:
- line = "VALUES\n"
- new_lines += line
- while i < token_count:
- if line_tokens[i] == "":
- line_tokens[i] = "NULL"
- exchg_line = ""
- if not self.format_for_sql:
- exchg_line = "{0},{1},{2}\n".format(original_exchg_date, self.currencies[i-1], line_tokens[i])
- else:
- exchg_line = " ('" + exchg_date + "','" + self.currencies[i-1] + "'," + line_tokens[i] + "),\n"
- i += 1
- if exchg_line != "":
- #print(exchg_line)
- new_lines += exchg_line
- line = f.readline().strip()
- f.close()
- # get rid of the final "union all"
- if self.format_for_sql:
- new_lines = new_lines[:len(new_lines)-len(",\n")]
- else:
- new_lines = new_lines[:len(new_lines)-len("\n")]
- #print(new_lines)
- file_mode = "at" if self.append_to_output else "wt"
- with open(output_file_path, file_mode) as f2:
- if not self.format_for_sql and not self.append_to_output:
- f2.write(OUTPUT_HEADER)
- f2.write(new_lines)
- f2.close()
- print("{0}\n{1} exchange rates {2} to {3}\n{4}".format(CLI_SEPARATOR, line_count, "appended" if self.append_to_output else "written", self.output_filename, CLI_SEPARATOR))
- if __name__ == "__main__":
- print(CLI_HEADER)
- print(CLI_SEPARATOR)
- print(CLI_USAGE)
-
- if len(sys.argv) == 5:
- input_filename = sys.argv[1]
- output_filename = sys.argv[2]
- format_for_sql = True if str(sys.argv[3]).lower() == "y" else False
- append_to_output = True if str(sys.argv[4]).lower() == "y" else False
- process = ProcessForexCSV(input_filename, output_filename, format_for_sql, append_to_output)
- process.process_file()
- else:
- print("Invalid number of parameters.")
If format_for_sql is 'y' an SQL statement with 18,009 VALUES is produced. You only need add the insert at the top:
VALUES
('20230103','USD','0.6828'),
('20230103','TWI','61.40'),
('20230103','RCR','4.6994'),
('20230103','RJY','88.48'),
('20230103','EUR','0.6400'),
('20230103','SKW','867.16'),
('20230103','KPS','0.5656'),
('20230103','RSD','0.9134'),
('20230103','IRE','56.51'),
('20230103','RTB','23.47'),
('20230103','NZD','1.0760'),
('20230103','NTD','20.97'),
('20230103','RMR','2.9975'),
('20230103','RIR','10635'),
('20230103','RVD','16039'),
('20230103','AED',NULL),
('20230103','NGK',NULL),
('20230103','HKD','5.3316'),
('20230103','RCD',NULL),
('20230103','ARD',NULL),
('20230103','RSF','0.6310'),
('20230103','PHP','38.07'),
('20230103','DR','0.5131'),
...
('20260213','USD','0.7086'),
('20260213','TWI','65.10'),
('20260213','RCR','4.8940'),
('20260213','RJY','108.59'),
('20260213','EUR','0.5973'),
('20260213','SKW','1022.76'),
('20260213','KPS','0.5206'),
('20260213','RSD','0.8952'),
('20260213','IRE','64.27'),
('20260213','RTB','22.03'),
('20260213','NZD','1.1743'),
('20260213','NTD','22.29'),
('20260213','RMR','2.7628'),
('20260213','RIR','11936'),
('20260213','RVD','18402'),
('20260213','AED',NULL),
('20260213','NGK','3.0386'),
('20260213','HKD','5.5388'),
('20260213','RCD','0.9650'),
('20260213','ARD',NULL),
('20260213','RSF','0.5456'),
('20260213','PHP','41.09'),
('20260213','DR','0.5135')
If format_as_sql is 'n' a flat CSV file is produced:
Date,Currency,Rate
03-Jan-2023,USD,0.6828
03-Jan-2023,TWI,61.40
03-Jan-2023,RCR,4.6994
03-Jan-2023,RJY,88.48
03-Jan-2023,EUR,0.6400
03-Jan-2023,SKW,867.16
03-Jan-2023,KPS,0.5656
03-Jan-2023,RSD,0.9134
03-Jan-2023,IRE,56.51
03-Jan-2023,RTB,23.47
03-Jan-2023,NZD,1.0760
03-Jan-2023,NTD,20.97
03-Jan-2023,RMR,2.9975
03-Jan-2023,RIR,10635
03-Jan-2023,RVD,16039
03-Jan-2023,AED,NULL
03-Jan-2023,NGK,NULL
03-Jan-2023,HKD,5.3316
03-Jan-2023,RCD,NULL
03-Jan-2023,ARD,NULL
03-Jan-2023,RSF,0.6310
03-Jan-2023,PHP,38.07
03-Jan-2023,DR,0.5131
...
13-Feb-2026,USD,0.7086
13-Feb-2026,TWI,65.10
13-Feb-2026,RCR,4.8940
13-Feb-2026,RJY,108.59
13-Feb-2026,EUR,0.5973
13-Feb-2026,SKW,1022.76
13-Feb-2026,KPS,0.5206
13-Feb-2026,RSD,0.8952
13-Feb-2026,IRE,64.27
13-Feb-2026,RTB,22.03
13-Feb-2026,NZD,1.1743
13-Feb-2026,NTD,22.29
13-Feb-2026,RMR,2.7628
13-Feb-2026,RIR,11936
13-Feb-2026,RVD,18402
13-Feb-2026,AED,NULL
13-Feb-2026,NGK,3.0386
13-Feb-2026,HKD,5.5388
13-Feb-2026,RCD,0.9650
13-Feb-2026,ARD,NULL
13-Feb-2026,RSF,0.5456
13-Feb-2026,PHP,41.09
13-Feb-2026,DR,0.5135
It actually works out a lot easier to use the SQL statements rather than construct an ETL package. Just need to add one line at the top. Though for a more complex ETL project we would create staging tables and use an ETL platform such as SSIS. This will be the subject of my next SQL Blog.
- insert forex(exchg_date, exchg_currency, exchg_rate)
- VALUES
- ('20230103','USD','0.6828'),
- ('20230103','TWI','61.40'),
- ('20230103','RCR','4.6994'),
- ('20230103','RJY','88.48'),
- ('20230103','EUR','0.6400'),
- ('20230103','SKW','867.16'),
- ('20230103','KPS','0.5656'),
- ('20230103','RSD','0.9134'),
- ('20230103','IRE','56.51'),
- ('20230103','RTB','23.47'),
- ('20230103','NZD','1.0760'),
- ('20230103','NTD','20.97'),
- ('20230103','RMR','2.9975'),
- ('20230103','RIR','10635'),
- ('20230103','RVD','16039'),
- ('20230103','AED',NULL),
- ('20230103','NGK',NULL),
- ('20230103','HKD','5.3316'),
- ('20230103','RCD',NULL),
- ('20230103','ARD',NULL),
- ('20230103','RSF','0.6310'),
- ('20230103','PHP','38.07'),
- ('20230103','DR','0.5131'),
- ...
18009 rows affected
Select USD Exchange Rate ordered by date:
You can visualise the data from the Forex menu above.