''' cities2mysqlbetter.py A non-modularized script for converting some city population data into SQL for use in MySQL. ''' import sys import csv _tableCreator = ''' DROP TABLE IF EXISTS populations; CREATE TABLE populations ( city text, year int, population int ); DROP TABLE IF EXISTS betterpopulations; CREATE TABLE betterpopulations ( city_id int, year int, population int ); DROP TABLE IF EXISTS cities; CREATE TABLE cities ( id int, name text ); ''' # Extract city data from a CSV file whose first row consists of titles # (misc text, 1790, 1800,...) where the second and later titles are year integers. # The remaining rows consist of a city name followed by populations (in thousands) # for each of the years in the title row. reader = csv.reader(sys.stdin) titleRow = reader.next()[1:] cities = {} for row in reader: row = map(str.strip, row) assert len(row) == len(titleRow) + 1 cityName = row[0] assert cityName not in cities populations = row[1:] cities[cityName] = {'id':len(cities), 'populations':populations} # Print the output. print _tableCreator for cityName in cities: cityID = cities[cityName]['id'] populationRow = cities[cityName]['populations'] # Fill the naive "populations" table. for k in range(len(titleRow)): yearString = titleRow[k] populationString = populationRow[k] if populationString == '': populationString = '0' query = "INSERT INTO populations (city, year, population)" query += " VALUES ('%s', %s, %s);" % (cityName, yearString, populationString) print query # Fill the "cities" table. print print "INSERT INTO cities (id, name) VALUES (%d, '%s');" % (cityID, cityName) # Fill the smarter "betterpopulations" table. print for k in range(len(titleRow)): yearString = titleRow[k] populationString = populationRow[k] if populationString == '': populationString = '0' query = "INSERT INTO betterpopulations (city_id, year, population)" query += " VALUES (%d, %s, %s);" % (cityID, yearString, populationString) print query