Dessert Flavor Pairings — Chord Diagram using Python

Max Bade
3 min readJun 3, 2020

--

VISUALIZING THE INTER-RELATIONSHIPS BETWEEN DATA IN A MATRIX.

Chord Diagram using Python Library: Chord

Easy way to interact with and analyze matrix data. The above image (created in jupyterlab) shows over 7,000 dessert recipe flavor pairings. Outer rim sizes get larger the more combinations they have.

WHERE’S THE CODE?!

import pandas as pd
import numpy as np
import sqlalchemy as sq
import datetime
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as py
from plotly.subplots import make_subplots
import geopandas as gp
import datetime as dt
import itertools
%matplotlib inline
import ssl
from chord import Chord
ssl._create_default_https_context = ssl._create_unverified_contextpd.options.display.max_rows = 999
pd.set_option("display.float_format", lambda x: "%.5f" % x)
df = pd.read_csv("recipes.csv")
print(df.shape)
df.isna().sum()df = df.dropna()df["flavor_list"] = df["flavors"].apply(lambda x: sorted(list(set(x.split()))))df.rating.hist()df.success.hist()#Filter on > 2 rating and at least 1 person having reported success making it
df = df[df.rating > 2]
print(df.shape)
df = df[df.success >= 0.1]
print(df.shape)
#Clean up
df["recipe_list"] = df["recipe_name"].apply(
lambda x: sorted(list(set(x.replace("-", " ").split())))
)
df["recipe_list"] = df["recipe_list"].apply(lambda l: [x.lower() for x in l])
#We want to use words in recipe in flavor list (like caramel)
df["flavor_list2"] = df["flavor_list"] + df["recipe_list"]
df["flavor_list2"] = df["flavor_list2"].apply(lambda x: sorted(list(set(x))))#Remove bad flavor values from flavor_list2
REMOVE_FLAVORS = [
"with",
"and",
"bundt",
"cake",
"cream",
"pie",
"ice",
"sauce",
"cookies",
"tart",
"pudding",
"allspice",
"butter",
"oat",
"cheesecake",
"white",
"fruit",
"graham",
"raisin",
]
all_f_lists = []
for i in range(len(df)):
f_list3 = []
f_list2 = df["flavor_list2"].iloc[i]
for flavor in f_list2:
# if the bad flavor is not in the f_list2
if flavor == "expresso":
f_list3.append("espresso")
elif flavor == "cocoa":
f_list3.append("chocolate")
elif flavor == "peanut":
f_list3.append("peanut butter")
elif flavor not in REMOVE_FLAVORS:
f_list3.append(flavor)
all_f_lists.append(f_list3)df["flavor_list3"] = all_f_listsdf.head(2)Now get value_count (rank) of each flavorflavors = []
for flavor_list in df["flavor_list3"].values:
flavors = flavors + flavor_list
flavor_series = pd.Series(flavors)flavor_series.nunique()#Look at top dessert flavors
#Look at cooccurence of dessert flavors
flavor_cdf = pd.DataFrame(flavor_series.value_counts()).reset_index()
flavor_cdf.columns = ["flavor", "recipe_counts"]
# flavor_cdf.isna().sum()
flavor_cdf[
flavor_cdf["flavor"].isin(
["peanut_butter", "peanut", "espresso", "cocoa", "expresso"]
)
]
TOP_NUM_FLAVORS = 37flavor_cdf.head(TOP_NUM_FLAVORS)fig = go.Figure(
[
go.Bar(
x=flavor_cdf.head(TOP_NUM_FLAVORS).flavor,
y=flavor_cdf.head(TOP_NUM_FLAVORS).recipe_counts,
)
]
)
py.offline.iplot(fig)
#Save only top flavors
# top50_flavors = list(flavor_cdf.head(50).flavor.values)
top_flavors = list(flavor_cdf.head(TOP_NUM_FLAVORS).flavor.values)
## Chord Diagram
#Take the original dataframe and calcualte co occurence matrix
adf = df[["recipe_name", "flavor_list3"]]
adf.head()
#Lets focus on the top 50 recipes
f_list = adf["flavor_list3"].iloc[0]
f_listlist(itertools.combinations(f_list, 2))flavor_pairs_list = []
for f_list in adf["flavor_list3"].values:
flavor_pairs = list(itertools.combinations(f_list, 2))
flavor_pairs_list = flavor_pairs_list + flavor_pairs
flavor_pdf = pd.DataFrame(flavor_pairs_list, columns=["flavor1", "flavor2"])
print(flavor_pdf.shape)
flavor_pdf.head(2)# flavor_t50_pdf = flavor_pdf[(flavor_pdf.flavor1.isin(top50_flavors)) & (flavor_pdf.flavor2.isin(top50_flavors))]
flavor_top_pdf = flavor_pdf[
(flavor_pdf.flavor1.isin(top_flavors)) & (flavor_pdf.flavor2.isin(top_flavors))
]
print(flavor_top_pdf.shape)
flavor_top_pdf.head(2)data = list(itertools.chain.from_iterable((i, i[::-1]) for i in flavor_top_pdf.values))matrix = pd.pivot_table(
pd.DataFrame(data), index=0, columns=1, aggfunc="size", fill_value=0
).values.tolist()
mdf = pd.DataFrame(matrix)print(mdf.shape)
mdf.head(2)
mdf.sum()names = np.unique(data).tolist()
name_df = pd.DataFrame(names, columns=["flavor_name"])
name_df
# Chord(matrix, names, wrap_labels=False).show()color_dic = {
'almond': "#b06e31",
'apricot': "#fbceb1",
'apple': "#cf350e",
'banana': "#ffe135",
'blackberry': "#43182f",
'blueberry': "#4f86f7",
'brandy': "#87413f",
'caramel': "#C68E17",
'cardamom': "#958d34",
'cherry': "#8f0b0b",
'chocolate': "#622A0F",
'cinnamon': "#D2691E",
'clove': "#B5651D",
'coconut': "#E1DABB",
'coffee': "#3B270C",
'cranberry': "#950714",
'ginger': "#b06500",
'hazelnut': "#ae9f80",
'honey': "#ebb028",
'lemon': "#e8ce25",
'lime': "#5ce825",
'maple': "#bb9351",
'mint': "#98ff98",
'nutmeg': "#7e4a3b",
'orange': "#ffa500",
'peach': "#ffe5b4",
'peanut butter': "#cd9141",
'pear': "#d1e231",
'pecan': "#48260D",
'pineapple': "#e6ae25",
'pistachio': "#93c572 ",
'pumpkin': "#ff7518",
'raspberry': "#db1f1f",
'rum': "#D7C5A9",
'strawberry': "#FC5A8D",
'vanilla': "#F9E5BC",
'walnut': "#43270F"
}
name_df["color"] = name_df["flavor_name"].map(color_dic)name_dfcolors = list(name_df.color.values)#Chord(matrix, names, colors=colors, wrap_labels=False).show()Chord(matrix, names, colors=colors, wrap_labels=False).show()Chord(matrix, names, colors=colors, wrap_labels=False).to_html()

HOW ELSE COULD THIS BE USEFUL?

There’s a million ways this could be used, but one that comes to mind, on a fraud/risk perspective, is to identify the co-occurrence of high volume and heavy weighted rules.

How do you use Chord Diagrams?

Social Media:

Instagram: @maxbade

Github: @supercoolgetsallthegirlsmax

--

--

Max Bade
Max Bade

Written by Max Bade

Data Science and Analytics Consultant. Email: Maxbade@yahoo.com. linkedIn:www.linkedin.com/in/maxbade github:https://github.com/maxwellbade

No responses yet