import polars as pl
900)
pl.Config.set_fmt_str_lengths(900) pl.Config.set_tbl_width_chars(
polars.config.Config
Just increasing the size of the output to be able to read it easier.
In [161]:
I manually input the data from the placemat into the list of dictionaries below. Where I made edits I also commented why the edits were made.
In [162]:
placement_data = [
# 1998
{"tasting_year": 1998, "rank": 1, "name": "Macallan (12)"},
{"tasting_year": 1998, "rank": 2, "name": "Glenrothes (15)"},
{"tasting_year": 1998, "rank": 3, "name": "Bowmore (15)"},
{"tasting_year": 1998, "rank": 4, "name": "Glenkinchie (10)"},
{"tasting_year": 1998, "rank": 5, "name": "Talisker (10)"},
# 1999
{"tasting_year": 1999, "rank": 1, "name": "Glenmorangie (18)"},
{"tasting_year": 1999, "rank": 2, "name": "Macallan (18)"},
{"tasting_year": 1999, "rank": 3, "name": "Tobermory (Ledaig 1974)"},
{"tasting_year": 1999, "rank": 4, "name": "Scapa (12)"},
{"tasting_year": 1999, "rank": 5, "name": "Bunnahabhain (12)"},
# 2000
{"tasting_year": 2000, "rank": 1, "name": "Glenmorangie (1971)"},
{"tasting_year": 2000, "rank": 2, "name": "Highland Park (12)"},
{"tasting_year": 2000, "rank": 3, "name": "Balvenie (15)"},
{"tasting_year": 2000, "rank": 4, "name": "Dalwhinnie (15)"},
# Assume Bowmaore was a typo and should be Bowmore.
{"tasting_year": 2000, "rank": 5, "name": "Bowmore (12)"},
# 2001
{"tasting_year": 2001, "rank": 1, "name": "Macallan (25)"},
{"tasting_year": 2001, "rank": 2, "name": "Glengoyne (17)"},
{"tasting_year": 2001, "rank": 3, "name": "Knockando (12)"},
{"tasting_year": 2001, "rank": 4, "name": "Oban (14)"},
{"tasting_year": 2001, "rank": 5, "name": "Talisker (10)"},
# 2002
{"tasting_year": 2002, "rank": 1, "name": "Highland Park (1977)"},
{"tasting_year": 2002, "rank": 2, "name": "Coleburn (21)"},
{"tasting_year": 2002, "rank": 3, "name": "Glenglassaugh (1974)"},
{"tasting_year": 2002, "rank": 4, "name": "Lagavulin (16)"},
{"tasting_year": 2002, "rank": 5, "name": "Glenkinchie (Distiller's Edition)"},
# 2003
{"tasting_year": 2003, "rank": 1, "name": "Glenfiddich (21)"},
{"tasting_year": 2003, "rank": 2, "name": "Teaninich (21)"},
{"tasting_year": 2003, "rank": 3, "name": "St. Magdalene (25)"},
{"tasting_year": 2003, "rank": 4, "name": "Ardbeg (17)"},
{"tasting_year": 2003, "rank": 5, "name": "Linkwood (25)"},
# 2004
{"tasting_year": 2004, "rank": 1, "name": "Balvenie (25)"},
{"tasting_year": 2004, "rank": 2, "name": "Macallan (1841 Replica)"},
{"tasting_year": 2004, "rank": 3, "name": "Provenance (10)"},
{"tasting_year": 2004, "rank": 4, "name": "Bowmore (10)"},
{"tasting_year": 2004, "rank": 5, "name": "Old Pulteney (12)"},
# 2005
{"tasting_year": 2005, "rank": 1, "name": "Highland Park (25)"},
{"tasting_year": 2005, "rank": 2, "name": "Glen Ord (12)"},
{"tasting_year": 2005, "rank": 3, "name": "Inverleven (1986)"},
{"tasting_year": 2005, "rank": 4, "name": "Tobermory (Mull Village 10)"},
{"tasting_year": 2005, "rank": 5, "name": "Caol Ila (Cask Strength)"},
# 2006
{"tasting_year": 2006, "rank": 1, "name": "Poit Dhubh (12)"},
{"tasting_year": 2006, "rank": 2, "name": "Balblair (16)"},
{"tasting_year": 2006, "rank": 3, "name": "Glenburgie (Glencraig 29)"},
{"tasting_year": 2006, "rank": 4, "name": "Auchentoshan (21)"},
{"tasting_year": 2006, "rank": 5, "name": "Fettercairn (13)"},
# 2007
{"tasting_year": 2007, "rank": 1, "name": "Macallan (35)"},
{"tasting_year": 2007, "rank": 2, "name": "Jura (Superstition ?)"},
{"tasting_year": 2007, "rank": 3, "name": "Highland Park (30)"},
{"tasting_year": 2007, "rank": 4, "name": "Glen Ord (25)"},
{"tasting_year": 2007, "rank": 5, "name": "St. Magdalene (1975)"},
# 2008
{"tasting_year": 2008, "rank": 1, "name": "Glenfarclas (30)"},
{"tasting_year": 2008, "rank": 2, "name": "Scapa (26)"},
{"tasting_year": 2008, "rank": 3, "name": "Pittyvaich (1976)"},
{"tasting_year": 2008, "rank": 4, "name": "Auchentoshan (18)"},
{"tasting_year": 2008, "rank": 5, "name": "Arran (?)"},
# 2009
{"tasting_year": 2009, "rank": 1, "name": "Glenmorangie (25)"},
# TODO: Is Glenrothes and Glenrothes the same?
{"tasting_year": 2009, "rank": 2, "name": "Glenrothes (18)"},
{"tasting_year": 2009, "rank": 3, "name": "Brora (24)"},
{"tasting_year": 2009, "rank": 4, "name": "Glenfarclas (Blairfindy 31)"},
{"tasting_year": 2009, "rank": 5, "name": "Arran (10 Burns' 250th Anniversary)"},
# 2010
{"tasting_year": 2010, "rank": 1, "name": "Glenlivet (XXV)"},
{"tasting_year": 2010, "rank": 2, "name": "Tamdhu (18)"},
{"tasting_year": 2010, "rank": 3, "name": "Rosebank (18)"},
{"tasting_year": 2010, "rank": 4, "name": "Port Ellen (1982)"},
{"tasting_year": 2010, "rank": 5, "name": "Old Pulteney (17)"},
# 2011
{"tasting_year": 2011, "rank": 1, "name": "Bruichladdich (16)"},
{"tasting_year": 2011, "rank": 2, "name": "Glen Ord (Singleton 12)"},
{"tasting_year": 2011, "rank": 3, "name": "Glenfarclas (40)"},
{"tasting_year": 2011, "rank": 4, "name": "Scapa (16)"},
{"tasting_year": 2011, "rank": 5, "name": "Bladnoch (1992)"},
# 2012
{"tasting_year": 2012, "rank": 1, "name": "Macallan (1969)"},
{"tasting_year": 2012, "rank": 2, "name": "Balblair (1989)"},
{"tasting_year": 2012, "rank": 3, "name": "Balvenie (21)"},
{"tasting_year": 2012, "rank": 4, "name": "Highland Park (12)"},
{"tasting_year": 2012, "rank": 5, "name": "Tobermory (15)"},
# 2013
{"tasting_year": 2013, "rank": 1, "name": "Glengoyne (21)"},
{"tasting_year": 2013, "rank": 2, "name": "Glen Keith (1968)"},
{"tasting_year": 2013, "rank": 3, "name": "Glen Grant (1974)"},
{"tasting_year": 2013, "rank": 4, "name": "Jura (Prophency ?)"},
{"tasting_year": 2013, "rank": 5, "name": "Linkwood (12)"},
# 2014
{"tasting_year": 2014, "rank": 1, "name": "Glenfiddich (30)"},
{"tasting_year": 2014, "rank": 2, "name": "Highland Park (Loki)"},
{"tasting_year": 2014, "rank": 3, "name": "Littlemill (21)"},
{"tasting_year": 2014, "rank": 4, "name": "Mackinlay Shackleton"},
{"tasting_year": 2014, "rank": 5, "name": "Kilchoman (3)"},
# 2015
{"tasting_year": 2015, "rank": 1, "name": "Arbelour (18)"},
{"tasting_year": 2015, "rank": 2, "name": "Glenmorangie (Signet)"},
{"tasting_year": 2015, "rank": 3, "name": "Macallan (Ruby)"},
{"tasting_year": 2015, "rank": 4, "name": "Tomatin (1989)"},
{"tasting_year": 2015, "rank": 5, "name": "Bunnahabhain (XXV)"},
# 2016
{"tasting_year": 2016, "rank": 1, "name": "Dalmore (King Alex III)"},
{"tasting_year": 2016, "rank": 2, "name": "Mortlach (Rare Old)"},
{"tasting_year": 2016, "rank": 3, "name": "Lismore (21)"},
{"tasting_year": 2016, "rank": 4, "name": "Cragganmore (25)"},
{"tasting_year": 2016, "rank": 5, "name": "Ardbeg (Corryvrechan)"},
# 2017
{"tasting_year": 2017, "rank": 1, "name": "Glendronach (21)"},
{"tasting_year": 2017, "rank": 2, "name": "Bowmore (23)"},
{"tasting_year": 2017, "rank": 3, "name": "Glenfarclas (12)"},
{"tasting_year": 2017, "rank": 4, "name": "Talisker (25)"},
{"tasting_year": 2017, "rank": 5, "name": "Tomintoul (16)"},
# 2018
{"tasting_year": 2018, "rank": 1, "name": "Craigellachie (23)"},
{"tasting_year": 2018, "rank": 2, "name": "Royal Brackla (21)"},
# Singleton, not Singletone
{"tasting_year": 2018, "rank": 3, "name": "Glen Ord (Singleton Tailfire)"},
{"tasting_year": 2018, "rank": 4, "name": "Benriach (1997)"},
{"tasting_year": 2018, "rank": 5, "name": "Bruichladdich (Black Art)"},
# 2019
{"tasting_year": 2019, "rank": 1, "name": "Blair Athol (23)"},
{"tasting_year": 2019, "rank": 2, "name": "Longmorn (16)"},
{"tasting_year": 2019, "rank": 3, "name": "Arran (Sauternes)"},
{"tasting_year": 2019, "rank": 4, "name": "Glenfiddich (Winter Storm)"},
{"tasting_year": 2019, "rank": 5, "name": "Kilkerran (12)"},
# 2020 - COVID
# 2021 - COVID
# 2022
{"tasting_year": 2022, "rank": 1, "name": "Tullibardine (20)"},
{"tasting_year": 2022, "rank": 2, "name": "Aberfeldy (19)"},
{"tasting_year": 2022, "rank": 3, "name": "Deanston (18)"},
{"tasting_year": 2022, "rank": 4, "name": "Jura (Tastival 2017)"},
{"tasting_year": 2022, "rank": 5, "name": "Glen Grant (18)"},
# 2023
{"tasting_year": 2023, "rank": 1, "name": "Glenfiddich (23)"},
{"tasting_year": 2023, "rank": 2, "name": "Bladnoch (Talia)"},
{"tasting_year": 2023, "rank": 3, "name": "Bunnahabhain (12)"},
{"tasting_year": 2023, "rank": 4, "name": "Highland Park (15)"},
{"tasting_year": 2023, "rank": 5, "name": "Ben Nevis (6)"},
]
In [163]:
In [164]:
# Extract more information from the column.
scotch_yearly_rankings = scotch_yearly_rankings.with_columns(
pl.col("name").str.extract(r"([\.\w\s]+) ", 1).alias("distillery"),
pl.col("name")
.str.extract(r"([1-4]?[0-9])", 1)
.cast(pl.Int32)
.alias("age_statement"),
pl.col("name").str.extract(r"(\d{4})", 1).cast(pl.Int32).alias("bottling_year"),
pl.col("name").str.extract(r"(\(.*\))", 1).alias("metadata"),
(6 - pl.col("rank")).alias("stars"),
)
We want more information about each distillery for our analysis. Wikipedia includes a table which of existing and past dead Scotch distilleries. It’s not a complete list, but it’s enough to get started with.
In [165]:
In [166]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
distilleries_table = soup.find("table", {"class": "wikitable"})
distilleries_rows = distilleries_table.find("tbody").find_all("tr")
distillery_data = []
for row in distilleries_rows:
cells = row.find_all("td")
if not cells:
continue
distillery_data.append(
{
"distillery": cells[0].text.strip(),
"location": cells[1].text.strip(),
"region": cells[2].text.strip(),
"founded": cells[3].text.strip(),
"owner": cells[4].text.strip(),
}
)
In [167]:
# Some distilleries are missing from the Wikipedia page. For those, I manually searched
# online to find the missing information.
manually_added_distilleries = [
{
"distillery": "Old Pulteney",
"location": "Wick",
"region": "Highland",
"founded": "1826",
"owner": "Inver House Distillers",
},
{"distillery": "Poit Dhubh", "location": "Isle of Skye", "region": "Island"},
{
"distillery": "Lismore",
"region": "Speyside",
},
{
"distillery": "Provenance",
"region": "Unknown",
},
{
"distillery": "Mull Village",
"region": "Island",
},
{
"distillery": "Mackinlay",
"region": "Highland",
"founded": "1847",
},
{
"distillery": "Arbelour",
"location": "Arbelour",
"region": "Speyside",
"founded": "1879",
"owner": "Chivas Brothers",
},
{
"distillery": "Benriach",
"location": "Moray",
"region": "Speyside",
"founded": "1898",
"owner": "Brown-Forman",
},
{
"distillery": "Kilkerran",
"location": "Campbeltown",
"region": "Campbeltown",
"founded": "1872",
"owner": "Mitchell's Glengyle Ltd",
},
{
"distillery": "Aberfeldy",
"location": "Aberfeldy",
"region": "Highland",
"founded": "1896",
"owner": "Bacardi",
},
]
In [168]:
dead_distilleries = []
soup = BeautifulSoup(response.text, "html.parser")
dead_distilleries_table = soup.find_all("table", {"class": "wikitable"})[2]
dead_distillery_rows = dead_distilleries_table.find("tbody").find_all("tr")
for row in dead_distillery_rows:
cells = row.find_all("td")
if not cells:
continue
dead_distilleries.append(
{
"distillery": cells[0].text.strip(),
"location": cells[1].text.strip(),
"region": cells[2].text.strip(),
"year_closed": cells[3].text.strip(),
}
)
In [169]:
With the distillery data, we can now join it with the data from the placemat.
This is the fun part! Used seaborn to create the plots.
In [172]:
region_aggregate = (
enhanced_df.groupby("region")
.agg(
pl.col("distillery").count().alias("count"),
pl.col("stars").mean().alias("stars_mean"),
pl.col("stars").sum().alias("stars_total"),
)
.sort("stars_mean", descending=True)
)
/var/folders/4l/q3fxbjh106d0tz36zw1smt480000gn/T/ipykernel_83160/1441878289.py:2: DeprecationWarning: `groupby` is deprecated. It has been renamed to `group_by`.
enhanced_df.groupby("region")
In [173]:
In [174]: