import pandas as pd
from typing import AnyIn [1]:
Extract data from placemat
I manually input the data from the placemat into the list of dictionaries below. Where I made edits I also commented why the edits were made.
In [2]:
placement_data: list[dict[str, Any]] = [
# 1998
{"tasting_year": 1998, "rank": 1, "name": "Macallan (12)"},
{"tasting_year": 1998, "rank": 2, "name": "Glenrothes (15)"},
{"tasting_year": 1998, "rank": 3, "name": "Bowmore (15)"},
{"tasting_year": 1998, "rank": 4, "name": "Glenkinchie (10)"},
{"tasting_year": 1998, "rank": 5, "name": "Talisker (10)"},
# 1999
{"tasting_year": 1999, "rank": 1, "name": "Glenmorangie (18)"},
{"tasting_year": 1999, "rank": 2, "name": "Macallan (18)"},
{"tasting_year": 1999, "rank": 3, "name": "Tobermory (Ledaig 1974)"},
{"tasting_year": 1999, "rank": 4, "name": "Scapa (12)"},
{"tasting_year": 1999, "rank": 5, "name": "Bunnahabhain (12)"},
# 2000
{"tasting_year": 2000, "rank": 1, "name": "Glenmorangie (1971)"},
{"tasting_year": 2000, "rank": 2, "name": "Highland Park (12)"},
{"tasting_year": 2000, "rank": 3, "name": "Balvenie (15)"},
{"tasting_year": 2000, "rank": 4, "name": "Dalwhinnie (15)"},
# Assume Bowmaore was a typo and should be Bowmore.
{"tasting_year": 2000, "rank": 5, "name": "Bowmore (12)"},
# 2001
{"tasting_year": 2001, "rank": 1, "name": "Macallan (25)"},
{"tasting_year": 2001, "rank": 2, "name": "Glengoyne (17)"},
{"tasting_year": 2001, "rank": 3, "name": "Knockando (12)"},
{"tasting_year": 2001, "rank": 4, "name": "Oban (14)"},
{"tasting_year": 2001, "rank": 5, "name": "Talisker (10)"},
# 2002
{"tasting_year": 2002, "rank": 1, "name": "Highland Park (1977)"},
{"tasting_year": 2002, "rank": 2, "name": "Coleburn (21)"},
{"tasting_year": 2002, "rank": 3, "name": "Glenglassaugh (1974)"},
{"tasting_year": 2002, "rank": 4, "name": "Lagavulin (16)"},
{"tasting_year": 2002, "rank": 5, "name": "Glenkinchie (Distiller's Edition)"},
# 2003
{"tasting_year": 2003, "rank": 1, "name": "Glenfiddich (21)"},
{"tasting_year": 2003, "rank": 2, "name": "Teaninich (21)"},
{"tasting_year": 2003, "rank": 3, "name": "St. Magdalene (25)"},
{"tasting_year": 2003, "rank": 4, "name": "Ardbeg (17)"},
{"tasting_year": 2003, "rank": 5, "name": "Linkwood (25)"},
# 2004
{"tasting_year": 2004, "rank": 1, "name": "Balvenie (25)"},
{"tasting_year": 2004, "rank": 2, "name": "Macallan (1841 Replica)"},
{"tasting_year": 2004, "rank": 3, "name": "Provenance (10)"},
{"tasting_year": 2004, "rank": 4, "name": "Bowmore (10)"},
{"tasting_year": 2004, "rank": 5, "name": "Old Pulteney (12)"},
# 2005
{"tasting_year": 2005, "rank": 1, "name": "Highland Park (25)"},
{"tasting_year": 2005, "rank": 2, "name": "Glen Ord (12)"},
{"tasting_year": 2005, "rank": 3, "name": "Inverleven (1986)"},
{"tasting_year": 2005, "rank": 4, "name": "Tobermory (Mull Village 10)"},
{"tasting_year": 2005, "rank": 5, "name": "Caol Ila (Cask Strength)"},
# 2006
{"tasting_year": 2006, "rank": 1, "name": "Poit Dhubh (12)"},
{"tasting_year": 2006, "rank": 2, "name": "Balblair (16)"},
{"tasting_year": 2006, "rank": 3, "name": "Glenburgie (Glencraig 29)"},
{"tasting_year": 2006, "rank": 4, "name": "Auchentoshan (21)"},
{"tasting_year": 2006, "rank": 5, "name": "Fettercairn (13)"},
# 2007
{"tasting_year": 2007, "rank": 1, "name": "Macallan (35)"},
{"tasting_year": 2007, "rank": 2, "name": "Jura (Superstition ?)"},
{"tasting_year": 2007, "rank": 3, "name": "Highland Park (30)"},
{"tasting_year": 2007, "rank": 4, "name": "Glen Ord (25)"},
{"tasting_year": 2007, "rank": 5, "name": "St. Magdalene (1975)"},
# 2008
{"tasting_year": 2008, "rank": 1, "name": "Glenfarclas (30)"},
{"tasting_year": 2008, "rank": 2, "name": "Scapa (26)"},
{"tasting_year": 2008, "rank": 3, "name": "Pittyvaich (1976)"},
{"tasting_year": 2008, "rank": 4, "name": "Auchentoshan (18)"},
{"tasting_year": 2008, "rank": 5, "name": "Arran (?)"},
# 2009
{"tasting_year": 2009, "rank": 1, "name": "Glenmorangie (25)"},
# TODO: Is Glenrothes and Glenrothes the same?
{"tasting_year": 2009, "rank": 2, "name": "Glenrothes (18)"},
{"tasting_year": 2009, "rank": 3, "name": "Brora (24)"},
{"tasting_year": 2009, "rank": 4, "name": "Glenfarclas (Blairfindy 31)"},
{"tasting_year": 2009, "rank": 5, "name": "Arran (10 Burns' 250th Anniversary)"},
# 2010
{"tasting_year": 2010, "rank": 1, "name": "Glenlivet (XXV)"},
{"tasting_year": 2010, "rank": 2, "name": "Tamdhu (18)"},
{"tasting_year": 2010, "rank": 3, "name": "Rosebank (18)"},
{"tasting_year": 2010, "rank": 4, "name": "Port Ellen (1982)"},
{"tasting_year": 2010, "rank": 5, "name": "Old Pulteney (17)"},
# 2011
{"tasting_year": 2011, "rank": 1, "name": "Bruichladdich (16)"},
{"tasting_year": 2011, "rank": 2, "name": "Glen Ord (Singleton 12)"},
{"tasting_year": 2011, "rank": 3, "name": "Glenfarclas (40)"},
{"tasting_year": 2011, "rank": 4, "name": "Scapa (16)"},
{"tasting_year": 2011, "rank": 5, "name": "Bladnoch (1992)"},
# 2012
{"tasting_year": 2012, "rank": 1, "name": "Macallan (1969)"},
{"tasting_year": 2012, "rank": 2, "name": "Balblair (1989)"},
{"tasting_year": 2012, "rank": 3, "name": "Balvenie (21)"},
{"tasting_year": 2012, "rank": 4, "name": "Highland Park (12)"},
{"tasting_year": 2012, "rank": 5, "name": "Tobermory (15)"},
# 2013
{"tasting_year": 2013, "rank": 1, "name": "Glengoyne (21)"},
{"tasting_year": 2013, "rank": 2, "name": "Glen Keith (1968)"},
{"tasting_year": 2013, "rank": 3, "name": "Glen Grant (1974)"},
{"tasting_year": 2013, "rank": 4, "name": "Jura (Prophency ?)"},
{"tasting_year": 2013, "rank": 5, "name": "Linkwood (12)"},
# 2014
{"tasting_year": 2014, "rank": 1, "name": "Glenfiddich (30)"},
{"tasting_year": 2014, "rank": 2, "name": "Highland Park (Loki)"},
{"tasting_year": 2014, "rank": 3, "name": "Littlemill (21)"},
{"tasting_year": 2014, "rank": 4, "name": "Mackinlay Shackleton"},
{"tasting_year": 2014, "rank": 5, "name": "Kilchoman (3)"},
# 2015
{"tasting_year": 2015, "rank": 1, "name": "Arbelour (18)"},
{"tasting_year": 2015, "rank": 2, "name": "Glenmorangie (Signet)"},
{"tasting_year": 2015, "rank": 3, "name": "Macallan (Ruby)"},
{"tasting_year": 2015, "rank": 4, "name": "Tomatin (1989)"},
{"tasting_year": 2015, "rank": 5, "name": "Bunnahabhain (XXV)"},
# 2016
{"tasting_year": 2016, "rank": 1, "name": "Dalmore (King Alex III)"},
{"tasting_year": 2016, "rank": 2, "name": "Mortlach (Rare Old)"},
{"tasting_year": 2016, "rank": 3, "name": "Lismore (21)"},
{"tasting_year": 2016, "rank": 4, "name": "Cragganmore (25)"},
{"tasting_year": 2016, "rank": 5, "name": "Ardbeg (Corryvrechan)"},
# 2017
{"tasting_year": 2017, "rank": 1, "name": "Glendronach (21)"},
{"tasting_year": 2017, "rank": 2, "name": "Bowmore (23)"},
{"tasting_year": 2017, "rank": 3, "name": "Glenfarclas (12)"},
{"tasting_year": 2017, "rank": 4, "name": "Talisker (25)"},
{"tasting_year": 2017, "rank": 5, "name": "Tomintoul (16)"},
# 2018
{"tasting_year": 2018, "rank": 1, "name": "Craigellachie (23)"},
{"tasting_year": 2018, "rank": 2, "name": "Royal Brackla (21)"},
# Singleton, not Singletone
{"tasting_year": 2018, "rank": 3, "name": "Glen Ord (Singleton Tailfire)"},
{"tasting_year": 2018, "rank": 4, "name": "Benriach (1997)"},
{"tasting_year": 2018, "rank": 5, "name": "Bruichladdich (Black Art)"},
# 2019
{"tasting_year": 2019, "rank": 1, "name": "Blair Athol (23)"},
{"tasting_year": 2019, "rank": 2, "name": "Longmorn (16)"},
{"tasting_year": 2019, "rank": 3, "name": "Arran (Sauternes)"},
{"tasting_year": 2019, "rank": 4, "name": "Glenfiddich (Winter Storm)"},
{"tasting_year": 2019, "rank": 5, "name": "Kilkerran (12)"},
# 2020 - COVID
# 2021 - COVID
# 2022
{"tasting_year": 2022, "rank": 1, "name": "Tullibardine (20)"},
{"tasting_year": 2022, "rank": 2, "name": "Aberfeldy (19)"},
{"tasting_year": 2022, "rank": 3, "name": "Deanston (18)"},
{"tasting_year": 2022, "rank": 4, "name": "Jura (Tastival 2017)"},
{"tasting_year": 2022, "rank": 5, "name": "Glen Grant (18)"},
# 2023
{"tasting_year": 2023, "rank": 1, "name": "Glenfiddich (23)"},
{"tasting_year": 2023, "rank": 2, "name": "Bladnoch (Talia)"},
{"tasting_year": 2023, "rank": 3, "name": "Bunnahabhain (12)"},
{"tasting_year": 2023, "rank": 4, "name": "Highland Park (15)"},
{"tasting_year": 2023, "rank": 5, "name": "Ben Nevis (6)"},
]In [3]:
# Convert the list of dictionaries to a DataFrame.
scotch_yearly_rankings = pd.DataFrame(placement_data)In [4]:
# Extract more information from the column.
scotch_yearly_rankings["distillery"] = scotch_yearly_rankings["name"].str.extract(
r"([\.\w\s]+) ", 1
)
scotch_yearly_rankings["age_statement"] = (
scotch_yearly_rankings["name"].str.extract(r"([1-4]?[0-9])", 1).astype("Int64")
)
scotch_yearly_rankings["bottling_year"] = (
scotch_yearly_rankings["name"].str.extract(r"(\d{4})", 1).astype("Int64")
)
scotch_yearly_rankings["metadata"] = scotch_yearly_rankings["name"].str.extract(
r"(\(.*\))", 1
)
scotch_yearly_rankings["stars"] = 6 - scotch_yearly_rankings["rank"]
scotch_yearly_rankings| tasting_year | rank | name | distillery | age_statement | bottling_year | metadata | stars | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1998 | 1 | Macallan (12) | Macallan | 12 | <NA> | (12) | 5 |
| 1 | 1998 | 2 | Glenrothes (15) | Glenrothes | 15 | <NA> | (15) | 4 |
| 2 | 1998 | 3 | Bowmore (15) | Bowmore | 15 | <NA> | (15) | 3 |
| 3 | 1998 | 4 | Glenkinchie (10) | Glenkinchie | 10 | <NA> | (10) | 2 |
| 4 | 1998 | 5 | Talisker (10) | Talisker | 10 | <NA> | (10) | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115 | 2023 | 1 | Glenfiddich (23) | Glenfiddich | 23 | <NA> | (23) | 5 |
| 116 | 2023 | 2 | Bladnoch (Talia) | Bladnoch | <NA> | <NA> | (Talia) | 4 |
| 117 | 2023 | 3 | Bunnahabhain (12) | Bunnahabhain | 12 | <NA> | (12) | 3 |
| 118 | 2023 | 4 | Highland Park (15) | Highland Park | 15 | <NA> | (15) | 2 |
| 119 | 2023 | 5 | Ben Nevis (6) | Ben Nevis | 6 | <NA> | (6) | 1 |
120 rows × 8 columns
Extract distillery info from Wikipedia
We want more information about each distillery for our analysis. Wikipedia includes a table which of existing and past dead Scotch distilleries. It’s not a complete list, but it’s enough to get started with.
In [5]:
from urllib.parse import urljoin
import requests
base_url = "https://en.wikipedia.org/"
path = "/wiki/List_of_whisky_distilleries_in_Scotland"
headers = {
"User-Agent": f"Bot/0.0 (https://lennoxstevenson.com lennox.stevenson@gmail.com) requests/{requests.__version__}"
}
response = requests.get(urljoin(base_url, path), headers=headers)In [6]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
distilleries_table = soup.find("table", {"class": "wikitable"})
distilleries_rows = distilleries_table.find("tbody").find_all("tr")
distillery_data: list[dict[str, Any]] = []
for row in distilleries_rows:
cells = row.find_all("td")
if not cells:
continue
distillery_data.append(
{
"distillery": cells[0].text.strip(),
"location": cells[1].text.strip(),
"region": cells[2].text.strip(),
"founded": cells[3].text.strip(),
"owner": cells[4].text.strip(),
}
)In [7]:
# Some distilleries are missing from the Wikipedia page. For those, I manually searched
# online to find the missing information.
manually_added_distilleries = [
{
"distillery": "Old Pulteney",
"location": "Wick",
"region": "Highland",
"founded": "1826",
"owner": "Inver House Distillers",
},
{"distillery": "Poit Dhubh", "location": "Isle of Skye", "region": "Island"},
{
"distillery": "Lismore",
"region": "Speyside",
},
{
"distillery": "Provenance",
"region": "Unknown",
},
{
"distillery": "Mull Village",
"region": "Island",
},
{
"distillery": "Mackinlay",
"region": "Highland",
"founded": "1847",
},
{
"distillery": "Arbelour",
"location": "Arbelour",
"region": "Speyside",
"founded": "1879",
"owner": "Chivas Brothers",
},
{
"distillery": "Benriach",
"location": "Moray",
"region": "Speyside",
"founded": "1898",
"owner": "Brown-Forman",
},
{
"distillery": "Kilkerran",
"location": "Campbeltown",
"region": "Campbeltown",
"founded": "1872",
"owner": "Mitchell's Glengyle Ltd",
},
{
"distillery": "Aberfeldy",
"location": "Aberfeldy",
"region": "Highland",
"founded": "1896",
"owner": "Bacardi",
},
]In [8]:
dead_distilleries: list[dict[str, Any]] = []
soup = BeautifulSoup(response.text, "html.parser")
dead_distilleries_table = soup.find_all("table", {"class": "wikitable"})[2]
dead_distillery_rows = dead_distilleries_table.find("tbody").find_all("tr")
for row in dead_distillery_rows:
cells = row.find_all("td")
if not cells:
continue
dead_distilleries.append(
{
"distillery": cells[0].text.strip(),
"location": cells[1].text.strip(),
"region": cells[2].text.strip(),
"year_closed": cells[3].text.strip(),
}
)In [9]:
# Combine all the distillery data into a single DataFrame.
distilleries_df = pd.DataFrame(
distillery_data + dead_distilleries + manually_added_distilleries
)
distilleries_df| distillery | location | region | founded | owner | year_closed | |
|---|---|---|---|---|---|---|
| 0 | Aberargie | Aberargie | Lowland | 2017 | Perth Distilling Co | NaN |
| 1 | Aberfeldy | Aberfeldy | Highland | 1896 | John Dewar & Sons | NaN |
| 2 | Aberlour | Aberlour | Speyside | 1879 | Chivas Brothers | NaN |
| 3 | Abhainn Dearg | Uig, Isle of Lewis | Island | 2008 | Mark Tayburn | NaN |
| 4 | Ailsa Bay | Girvan | Lowland | 2009 | William Grant & Sons | NaN |
| ... | ... | ... | ... | ... | ... | ... |
| 247 | Mackinlay | NaN | Highland | 1847 | NaN | NaN |
| 248 | Arbelour | Arbelour | Speyside | 1879 | Chivas Brothers | NaN |
| 249 | Benriach | Moray | Speyside | 1898 | Brown-Forman | NaN |
| 250 | Kilkerran | Campbeltown | Campbeltown | 1872 | Mitchell's Glengyle Ltd | NaN |
| 251 | Aberfeldy | Aberfeldy | Highland | 1896 | Bacardi | NaN |
252 rows × 6 columns
Join distillery info with placemat data
With the distillery data, we can now join it with the data from the placemat.
In [10]:
enhanced_df = scotch_yearly_rankings.merge(distilleries_df, on="distillery", how="left")
enhanced_df| tasting_year | rank | name | distillery | age_statement | bottling_year | metadata | stars | location | region | founded | owner | year_closed | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1998 | 1 | Macallan (12) | Macallan | 12 | <NA> | (12) | 5 | Craigellachie | Speyside | 1824 | Edrington | NaN |
| 1 | 1998 | 2 | Glenrothes (15) | Glenrothes | 15 | <NA> | (15) | 4 | Rothes | Speyside | 1879 | Edrington | NaN |
| 2 | 1998 | 3 | Bowmore (15) | Bowmore | 15 | <NA> | (15) | 3 | Bowmore | Islay | 1779 | Suntory Global Spirits | NaN |
| 3 | 1998 | 4 | Glenkinchie (10) | Glenkinchie | 10 | <NA> | (10) | 2 | Pencaitland | Lowland | 1837 | Diageo | NaN |
| 4 | 1998 | 5 | Talisker (10) | Talisker | 10 | <NA> | (10) | 1 | Carbost, Isle of Skye | Island | 1830 | Diageo | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 116 | 2023 | 1 | Glenfiddich (23) | Glenfiddich | 23 | <NA> | (23) | 5 | Dufftown | Speyside | 1886 | William Grant & Sons | NaN |
| 117 | 2023 | 2 | Bladnoch (Talia) | Bladnoch | <NA> | <NA> | (Talia) | 4 | Wigtown | Lowland | 1817 | David Prior | NaN |
| 118 | 2023 | 3 | Bunnahabhain (12) | Bunnahabhain | 12 | <NA> | (12) | 3 | Port Askaig | Islay | 1881 | Distell | NaN |
| 119 | 2023 | 4 | Highland Park (15) | Highland Park | 15 | <NA> | (15) | 2 | Kirkwall | Island | 1798 | Edrington | NaN |
| 120 | 2023 | 5 | Ben Nevis (6) | Ben Nevis | 6 | <NA> | (6) | 1 | Fort William | Highland | 1825 | Nikka Whisky Distilling Co Ltd | NaN |
121 rows × 13 columns
Start exploring the data
This is the fun part! Used seaborn to create the plots.
In [11]:
import seaborn as sns
sns.set_theme()In [12]:
region_aggregate = (
enhanced_df.groupby("region", as_index=False)
.agg(
count=("distillery", "count"),
stars_mean=("stars", "mean"),
stars_total=("stars", "sum"),
)
.sort_values("stars_mean", ascending=False)
)
region_aggregate| region | count | stars_mean | stars_total | |
|---|---|---|---|---|
| 5 | Speyside | 39 | 3.538462 | 138 |
| 1 | Highland | 31 | 3.419355 | 106 |
| 6 | Unknown | 1 | 3.000000 | 3 |
| 2 | Island | 20 | 2.800000 | 56 |
| 4 | Lowland | 11 | 2.272727 | 25 |
| 3 | Islay | 15 | 2.000000 | 30 |
| 0 | Campbeltown | 1 | 1.000000 | 1 |
In [13]:
ax = sns.barplot(data=region_aggregate, x="region", y="stars_mean")
ax.set_ylim(0, 5)
ax.set_title("Average stars per region")
for i in ax.containers:
ax.bar_label(i)
In [14]:
distillery_aggregate = enhanced_df.groupby("distillery", as_index=False).agg(
stars_mean=("stars", "mean"),
stars_total=("stars", "sum"),
count=("stars", "count"),
)
distillery_aggregate| distillery | stars_mean | stars_total | count | |
|---|---|---|---|---|
| 0 | Aberfeldy | 4.000000 | 8 | 2 |
| 1 | Arbelour | 5.000000 | 5 | 1 |
| 2 | Ardbeg | 1.500000 | 3 | 2 |
| 3 | Arran | 1.666667 | 5 | 3 |
| 4 | Auchentoshan | 2.000000 | 4 | 2 |
| ... | ... | ... | ... | ... |
| 62 | Teaninich | 4.000000 | 4 | 1 |
| 63 | Tobermory | 2.000000 | 6 | 3 |
| 64 | Tomatin | 2.000000 | 2 | 1 |
| 65 | Tomintoul | 1.000000 | 1 | 1 |
| 66 | Tullibardine | 5.000000 | 5 | 1 |
67 rows × 4 columns
In [15]:
top_5_distilleries_by_stars_total = distillery_aggregate.sort_values(
"stars_total", ascending=False
).head(5)
ax = sns.barplot(
data=top_5_distilleries_by_stars_total, x="distillery", y="stars_total"
)
ax.set_title("Sum of stars for each distillery")
for i in ax.containers:
ax.bar_label(i)