C:\Users\Soham Das\Data-analysis-projects\Hotel-Booking-Data-Analysis/hotel-bookings.csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv(r'C:\Users\Soham Das\Data-analysis-projects\Hotel-Booking-Data-Analysis/hotel-bookings.csv')
df.head()
#this is the dataframe which we would analyse
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | No Deposit | NaN | NaN | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | NaN | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | No Deposit | 304.0 | NaN | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | No Deposit | 240.0 | NaN | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
5 rows × 32 columns
df.shape
#no. of rows and column
(119390, 32)
df.isnull().values.any()#if there is any missing values in data
True
df.isnull().sum() #will show the missing values of the data
# Replace missing values:
# agent: If no agency is given, booking was most likely made without one.
# company: If none given, it was most likely private.
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df.fillna(0,inplace=True)
df.isnull().sum()
#now there are no missing values in the data
#so we are good to go with the analysis
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 0 company 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df['meal'].value_counts()#count of each type of meal
BB 92310 HB 14463 SC 10650 Undefined 1169 FB 798 Name: meal, dtype: int64
df['children'].unique()#unique children in our data frame
array([ 0., 1., 2., 10., 3.])
df['adults'].unique()
array([ 2, 1, 3, 4, 40, 26, 50, 27, 55, 0, 20, 6, 5, 10], dtype=int64)
df['babies'].unique()
#seems to have some dirtiness in data as
#Adults,babies & children cant be zero at a same time
array([ 0, 1, 2, 10, 9], dtype=int64)
pd.set_option('display.max_columns',32)#to display all the coloumns
#Since count of children, adults and babies cant be 0 at same time
filter=(df['children']==0) & (df['adults']==0) & (df['babies']==0)
df[filter]
#these are wrong entries in our dataframe
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2224 | Resort Hotel | 0 | 1 | 2015 | October | 41 | 6 | 0 | 3 | 0 | 0.0 | 0 | SC | PRT | Corporate | Corporate | 0 | 0 | 0 | A | I | 1 | No Deposit | 0.0 | 174.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 10/6/2015 |
2409 | Resort Hotel | 0 | 0 | 2015 | October | 42 | 12 | 0 | 0 | 0 | 0.0 | 0 | SC | PRT | Corporate | Corporate | 0 | 0 | 0 | A | I | 0 | No Deposit | 0.0 | 174.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 10/12/2015 |
3181 | Resort Hotel | 0 | 36 | 2015 | November | 47 | 20 | 1 | 2 | 0 | 0.0 | 0 | SC | ESP | Groups | TA/TO | 0 | 0 | 0 | A | C | 0 | No Deposit | 38.0 | 0.0 | 0 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 11/23/2015 |
3684 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 1 | 4 | 0 | 0.0 | 0 | SC | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/4/2016 |
3708 | Resort Hotel | 0 | 165 | 2015 | December | 53 | 30 | 2 | 4 | 0 | 0.0 | 0 | SC | PRT | Groups | TA/TO | 0 | 0 | 0 | A | C | 1 | No Deposit | 308.0 | 0.0 | 122 | Transient-Party | 0.00 | 0 | 0 | Check-Out | 1/5/2016 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
115029 | City Hotel | 0 | 107 | 2017 | June | 26 | 27 | 0 | 3 | 0 | 0.0 | 0 | BB | CHE | Online TA | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 7.0 | 0.0 | 0 | Transient | 100.80 | 0 | 0 | Check-Out | 6/30/2017 |
115091 | City Hotel | 0 | 1 | 2017 | June | 26 | 30 | 0 | 1 | 0 | 0.0 | 0 | SC | PRT | Complementary | Direct | 0 | 0 | 0 | E | K | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.00 | 1 | 1 | Check-Out | 7/1/2017 |
116251 | City Hotel | 0 | 44 | 2017 | July | 28 | 15 | 1 | 1 | 0 | 0.0 | 0 | SC | SWE | Online TA | TA/TO | 0 | 0 | 0 | A | K | 2 | No Deposit | 425.0 | 0.0 | 0 | Transient | 73.80 | 0 | 0 | Check-Out | 7/17/2017 |
116534 | City Hotel | 0 | 2 | 2017 | July | 28 | 15 | 2 | 5 | 0 | 0.0 | 0 | SC | RUS | Online TA | TA/TO | 0 | 0 | 0 | A | K | 1 | No Deposit | 9.0 | 0.0 | 0 | Transient-Party | 22.86 | 0 | 1 | Check-Out | 7/22/2017 |
117087 | City Hotel | 0 | 170 | 2017 | July | 30 | 27 | 0 | 2 | 0 | 0.0 | 0 | BB | BRA | Offline TA/TO | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 52.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/29/2017 |
180 rows × 32 columns
data=df[~filter]#to remove the filtered data i.e the wrong ones
data.head()#we will analyse this dataframe
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
resort = data[(data["hotel"] == "Resort Hotel") & (data["is_canceled"] == 0)]
#data of Resort Hotel and making sure booking is not being cancelled
city = data[(data["hotel"] == "City Hotel") & (data["is_canceled"] == 0)]
#data of City Hotel and making sure booking is not being cancelled
resort.head()#Dataframe head for Resort Hotels
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
city.head()#Dataframe head for City Hotels
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
40060 | City Hotel | 0 | 6 | 2015 | July | 27 | 1 | 0 | 2 | 1 | 0.0 | 0 | HB | PRT | Offline TA/TO | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 6.0 | 0.0 | 0 | Transient | 0.00 | 0 | 0 | Check-Out | 7/3/2015 |
40066 | City Hotel | 0 | 3 | 2015 | July | 27 | 2 | 0 | 3 | 1 | 0.0 | 0 | HB | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 58.67 | 0 | 0 | Check-Out | 7/5/2015 |
40070 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | 0.0 | 0 | HB | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
40071 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | 0.0 | 0 | HB | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 1 | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 43.00 | 0 | 0 | Check-Out | 7/5/2015 |
40072 | City Hotel | 0 | 43 | 2015 | July | 27 | 3 | 0 | 2 | 2 | 0.0 | 0 | HB | PRT | Groups | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 1.0 | 0.0 | 0 | Transient-Party | 86.00 | 0 | 0 | Check-Out | 7/5/2015 |
!pip install plotly
Requirement already satisfied: plotly in /usr/local/lib/python3.7/dist-packages (4.4.1) Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from plotly) (1.15.0) Requirement already satisfied: retrying>=1.3.3 in /usr/local/lib/python3.7/dist-packages (from plotly) (1.3.3)
import plotly.graph_objects as go
from plotly.offline import iplot #to see graphs in offline mode
import plotly.express as px
resort['country'].value_counts()#count of guests from each country
PRT 10184 GBR 5922 ESP 3105 IRL 1734 FRA 1399 ... NPL 1 BWA 1 MUS 1 ZMB 1 SMR 1 Name: country, Length: 119, dtype: int64
labels=resort['country'].value_counts().index
values=resort['country'].value_counts()#count of guests from each country
trace=go.Pie(labels=labels, values=values,
hoverinfo='label+percent', textinfo='value')
#'Pie' for making a pie-chart
#'Hoverinfo' the info which is shown when we hover our cursor on the visual
iplot([trace])
#we can see the size of piechart is very small
fig=go.Figure(data=trace)
##updating layout
fig.update_layout(margin=dict(t=20, b=20, l=20, r=20))
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
#now our pi-chart is much clear than the one before
#we can see highest no. of guests are from portugal
#followed by great britain and spain
#these 3 contribute 69% appx. of total guest count
country_wise_data=data[data['is_canceled']==0]['country'].value_counts().reset_index()
#1st coloumn is country and the second is guests
country_wise_data.columns=['country','No of guests']
country_wise_data
country | No of guests | |
---|---|---|
0 | PRT | 20977 |
1 | GBR | 9668 |
2 | FRA | 8468 |
3 | ESP | 6383 |
4 | DEU | 6067 |
... | ... | ... |
161 | BHR | 1 |
162 | TJK | 1 |
163 | SDN | 1 |
164 | GUY | 1 |
165 | AIA | 1 |
166 rows × 2 columns
import folium
from folium.plugins import HeatMap
basemap=folium.Map()
# show on map with the help of px.choropleth
map_guest = px.choropleth(country_wise_data,
locations=country_wise_data['country'],
color=country_wise_data['No of guests'],
hover_name=country_wise_data['country'],
title="Home country of guests")
map_guest.show()
#we can see very less amount of guest from all over the world
#but relatively more number of guests from Europe
#and the white ones are the one with no guest or falls under incorrect data so already removed
data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
data2=data[data['is_canceled']==0]
# boxplot:
plt.figure(figsize=(12, 8))
sns.boxplot(x="reserved_room_type", #price varies with type of room
y="adr",
hue="hotel",#hue helps us to distinguish 2 items on basis of a parameter
data=data2)
plt.title("Price of room types per night", fontsize=16)
plt.xlabel("Room type", fontsize=16)
plt.ylabel("Price [EUR]", fontsize=16)#because most guests are europian
plt.legend(loc="upper right")
plt.ylim(0, 600)
plt.show()
#This figure shows the average price per room.
#depending on its type and the standard deviation.
#data anonymization
#rooms with the same type letter may not necessarily be the same across hotels.
data_resort=resort[resort['is_canceled']==0]
data_city=city[city['is_canceled']==0]
#Accessed the data
#we need arrival date and price feature 'adr'
resort_hotel=data_resort.groupby(['arrival_date_month'])['adr'].mean().reset_index()
resort_hotel
arrival_date_month | adr | |
---|---|---|
0 | April | 75.867816 |
1 | August | 181.205892 |
2 | December | 68.410104 |
3 | February | 54.147478 |
4 | January | 48.761125 |
5 | July | 150.122528 |
6 | June | 107.974850 |
7 | March | 57.056838 |
8 | May | 76.657558 |
9 | November | 48.706289 |
10 | October | 61.775449 |
11 | September | 96.416860 |
#same as above for city hotel
city_hotel=data_city.groupby(['arrival_date_month'])['adr'].mean().reset_index()
city_hotel
arrival_date_month | adr | |
---|---|---|
0 | April | 111.962267 |
1 | August | 118.674598 |
2 | December | 88.401855 |
3 | February | 86.520062 |
4 | January | 82.330983 |
5 | July | 115.818019 |
6 | June | 117.874360 |
7 | March | 90.658533 |
8 | May | 120.669827 |
9 | November | 86.946592 |
10 | October | 102.004672 |
11 | September | 112.776582 |
#we merge the dataframes using merge included in pandas library
final=resort_hotel.merge(city_hotel,on='arrival_date_month')
final.columns=['month','price_for_resort','price_for_city_hotel']
final
# now we will observe over here is month column is not in order,
# if we will visualise we will get improper conclusion
# so very first we have to provide right hierarchy to the month column
month | price_for_resort | price_for_city_hotel | |
---|---|---|---|
0 | April | 75.867816 | 111.962267 |
1 | August | 181.205892 | 118.674598 |
2 | December | 68.410104 | 88.401855 |
3 | February | 54.147478 | 86.520062 |
4 | January | 48.761125 | 82.330983 |
5 | July | 150.122528 | 115.818019 |
6 | June | 107.974850 | 117.874360 |
7 | March | 57.056838 | 90.658533 |
8 | May | 76.657558 | 120.669827 |
9 | November | 48.706289 | 86.946592 |
10 | October | 61.775449 | 102.004672 |
11 | September | 96.416860 | 112.776582 |
!pip install sort-dataframeby-monthorweek
!pip install sorted-months-weekdays
## Dependency package needs to be installed
Requirement already satisfied: sort-dataframeby-monthorweek in d:\python\lib\site-packages (0.4) Requirement already satisfied: sorted-months-weekdays in d:\python\lib\site-packages (0.2)
import sort_dataframeby_monthorweek as sd
final=sd.Sort_Dataframeby_Month(df=final,monthcolumnname='month')
final
#we get the months in order!
month | price_for_resort | price_for_city_hotel | |
---|---|---|---|
0 | January | 48.761125 | 82.330983 |
1 | February | 54.147478 | 86.520062 |
2 | March | 57.056838 | 90.658533 |
3 | April | 75.867816 | 111.962267 |
4 | May | 76.657558 | 120.669827 |
5 | June | 107.974850 | 117.874360 |
6 | July | 150.122528 | 115.818019 |
7 | August | 181.205892 | 118.674598 |
8 | September | 96.416860 | 112.776582 |
9 | October | 61.775449 | 102.004672 |
10 | November | 48.706289 | 86.946592 |
11 | December | 68.410104 | 88.401855 |
#we would use a line plot over here
#since we have achieved a proper hierarchy
px.line(final, x='month',
y=['price_for_resort','price_for_city_hotel'],
title='Room price per night over the Months')
#only from May to September prices are higher in resort hotels
#mainly during summer price of resort_hotel is more
#therefore suring summer demand for resort hotel is more
#we need to find stays in weekend night in each and every market segments
plt.figure(figsize = (15,10))
sns.boxplot(x = "market_segment", y = "stays_in_week_nights", data = data, hue = "hotel", palette = 'Set1');
# most of the groups are normal distributed
# some of them have high skewness
# most people do not seem to prefer to stay at the city hotel for more than 1 week.
# But it seems normal to stay in resort hotels for up to 12-13 days.
#It is obvious that when people go to resort hotels, they prefer to stay more.
#we can access our meal column
data['meal'].value_counts()
BB 92236 HB 14458 SC 10549 Undefined 1169 FB 798 Name: meal, dtype: int64
Meaning of Abbreviations:- RO (Room Only) No meals are included.
SC (Self Catering) No meals are included; however, your accommodation will be provided with catering facilities for you to cook light meals.
BB (Bed and Breakfast) Breakfast is included.
HB (Half Board) Breakfast and evening meals are included. In some cases, you can choose to receive lunch instead of breakfast – the hotel will confirm this on arrival.
FB (Full Board) Breakfast, lunch and evening meals are included.
AI (All Inclusive) All meals and locally produced drinks are included.
#we plot a DocutChart on the data we get from the meals data
#it can be made by pie chart using 'hole'
px.pie(data,names=data['meal'].value_counts().index,
values=data['meal'].value_counts().values,hole=0.2)
#We can find 77.4% of people prefer bed and breakfast
plt.figure(figsize=(8,5))
sns.countplot(x='total_of_special_requests', data=data, palette = 'ocean_r')
#'pallete' is used for changing the theme of graph
plt.title('Total Special Request')
#Around 55% of bookings do not have any special requests
Text(0.5, 1.0, 'Total Special Request')
# '.agg' - aggregate
pivot=data.groupby([ 'total_of_special_requests', 'is_canceled']).agg({'total_of_special_requests':'count'}).rename(columns={'total_of_special_requests':'count'}).unstack()
pivot
# is_cancelled = 1 means booking is cancelled
count | ||
---|---|---|
is_canceled | 0 | 1 |
total_of_special_requests | ||
0 | 36667 | 33534 |
1 | 25867 | 7316 |
2 | 10086 | 2866 |
3 | 2049 | 445 |
4 | 304 | 36 |
5 | 38 | 2 |
pivot.plot(kind='bar')
#Nearly half bookings without any special requests have been cancelled
#another half of them have not been canceled.
<AxesSubplot:xlabel='total_of_special_requests'>
#Resort hotel dataframe
rush_resort=data_resort['arrival_date_month'].value_counts().reset_index()
rush_resort.columns=['month','no of guests']
rush_resort
month | no of guests | |
---|---|---|
0 | August | 3257 |
1 | July | 3137 |
2 | October | 2575 |
3 | March | 2571 |
4 | April | 2550 |
5 | May | 2535 |
6 | February | 2308 |
7 | September | 2102 |
8 | June | 2037 |
9 | December | 2014 |
10 | November | 1975 |
11 | January | 1866 |
#City hotel dataframe
rush_city=data_city['arrival_date_month'].value_counts().reset_index()
rush_city.columns=['month','no of guests']
rush_city
month | no of guests | |
---|---|---|
0 | August | 5367 |
1 | July | 4770 |
2 | May | 4568 |
3 | June | 4358 |
4 | October | 4326 |
5 | September | 4283 |
6 | March | 4049 |
7 | April | 4010 |
8 | February | 3051 |
9 | November | 2676 |
10 | December | 2377 |
11 | January | 2249 |
#So as we know we have to merge the dataframes
final_rush=rush_resort.merge(rush_city,on='month')
#manipulating the column names
final_rush.columns=['month','no of guests in resort','no of guest in city hotel']
final_rush
month | no of guests in resort | no of guest in city hotel | |
---|---|---|---|
0 | August | 3257 | 5367 |
1 | July | 3137 | 4770 |
2 | October | 2575 | 4326 |
3 | March | 2571 | 4049 |
4 | April | 2550 | 4010 |
5 | May | 2535 | 4568 |
6 | February | 2308 | 3051 |
7 | September | 2102 | 4283 |
8 | June | 2037 | 4358 |
9 | December | 2014 | 2377 |
10 | November | 1975 | 2676 |
11 | January | 1866 | 2249 |
#now we sort the months in right order
final_rush=sd.Sort_Dataframeby_Month(df=final_rush,monthcolumnname='month')
final_rush
month | no of guests in resort | no of guest in city hotel | |
---|---|---|---|
0 | January | 1866 | 2249 |
1 | February | 2308 | 3051 |
2 | March | 2571 | 4049 |
3 | April | 2550 | 4010 |
4 | May | 2535 | 4568 |
5 | June | 2037 | 4358 |
6 | July | 3137 | 4770 |
7 | August | 3257 | 5367 |
8 | September | 2102 | 4283 |
9 | October | 2575 | 4326 |
10 | November | 1975 | 2676 |
11 | December | 2014 | 2377 |
final_rush.dtypes
month object no of guests in resort int64 no of guest in city hotel int64 dtype: object
#here we need a line graph
px.line(data_frame=final_rush, x='month', y=['no of guests in resort','no of guest in city hotel'], title='Total no of guests per Months')
#The City hotel has more guests during spring and autumn, prices are higher
#In July and August there are less visitors.
#Resort hotel go down slighty from June to September
#which is also when the prices are highest.
#Both hotels have the fewest guests during the winter.
#we need to remove the cancelled entries
filter=data['is_canceled']==0
clean_data=data[filter]
#hence we get a clean data
clean_data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | 0 | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | 0 | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 |
clean_data["total_nights"] = clean_data["stays_in_weekend_nights"] + clean_data["stays_in_week_nights"]
#this warning can be ignored this is a filter warning
<ipython-input-68-1f0b759752e5>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
clean_data.head()
#we can see a new column at the end is created 'total_nights'
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | ... | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | total_nights | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | ... | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | ... | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | ... | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 | 2 |
5 rows × 33 columns
#on the basis of total night:hotel and aggregate count
stay=clean_data.groupby(['total_nights','hotel']).agg('count').reset_index()
stay=stay.iloc[:,0:3]#we need 3 columns
stay.head()
total_nights | hotel | is_canceled | |
---|---|---|---|
0 | 0 | City Hotel | 251 |
1 | 0 | Resort Hotel | 371 |
2 | 1 | City Hotel | 9155 |
3 | 1 | Resort Hotel | 6579 |
4 | 2 | City Hotel | 10983 |
stay=stay.rename(columns={'is_canceled':'Number of stays'})
#renaming is_cancelled to Number of stays
stay.head()
total_nights | hotel | Number of stays | |
---|---|---|---|
0 | 0 | City Hotel | 251 |
1 | 0 | Resort Hotel | 371 |
2 | 1 | City Hotel | 9155 |
3 | 1 | Resort Hotel | 6579 |
4 | 2 | City Hotel | 10983 |
plt.figure(figsize=(20, 8))
sns.barplot(x = "total_nights", y = "Number of stays" , hue="hotel",
hue_order = ["City Hotel", "Resort Hotel"], data=stay)
#In city hotel more people stay having that much no. of nights.
<AxesSubplot:xlabel='total_nights', ylabel='Number of stays'>
clean_data['market_segment'].value_counts()
Online TA 35673 Offline TA/TO 15880 Direct 10648 Groups 7697 Corporate 4291 Complementary 639 Aviation 183 Name: market_segment, dtype: int64
# pie plot
fig=px.pie(clean_data,
values=clean_data['market_segment'].value_counts().values,
names=clean_data['market_segment'].value_counts().index,
title="Bookings per market segment")
fig.update_traces(rotation=-90, textinfo="percent+label")
fig.show()
#market segment booking is dominated by Online TA follew by Online TO
clean_data.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | ... | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | total_nights | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 3 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | ... | 0 | 0 | C | C | 4 | No Deposit | 0.0 | 0.0 | 0 | Transient | 0.0 | 0 | 0 | Check-Out | 7/1/2015 | 0 |
2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Direct | Direct | ... | 0 | 0 | A | C | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | 0.0 | 0 | BB | GBR | Corporate | Corporate | ... | 0 | 0 | A | A | 0 | No Deposit | 304.0 | 0.0 | 0 | Transient | 75.0 | 0 | 0 | Check-Out | 7/2/2015 | 1 |
4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | 0.0 | 0 | BB | GBR | Online TA | TA/TO | ... | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 98.0 | 0 | 1 | Check-Out | 7/3/2015 | 2 |
5 rows × 33 columns
plt.figure(figsize=(20, 10))
sns.barplot(x="market_segment",
y="adr",
hue="reserved_room_type",
data=clean_data)
#it looks very messy and hard to analyse
<AxesSubplot:xlabel='market_segment', ylabel='adr'>
plt.figure(figsize=(20, 10))
sns.barplot(x="market_segment",
y="adr",
hue="reserved_room_type",
data=clean_data,
ci="sd",
errwidth=1,
capsize=0.1)
#now it looks more clean
<AxesSubplot:xlabel='market_segment', ylabel='adr'>
#accessing booking which were cancelled
cancel=data[data['is_canceled']==1]
#how many Resort Hotel booking is cancelled
rh_cancelations = cancel[cancel["hotel"] == "Resort Hotel"]["is_canceled"].sum()
#how many City Hotel booking is cancelled
ch_cancelations = cancel[cancel["hotel"] == "City Hotel"]["is_canceled"].sum()
rh_cancelations
11120
ch_cancelations
33079
# convert entire stats into percentage
px.pie(values=[11120,33079],names=[rh_cancelations,ch_cancelations])
#cancellations of reservation for city hotel is more. Almost 75%
#we access the cancelled data
cancellation=data[data['is_canceled']==1]
cancellation.head()
hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | meal | country | market_segment | distribution_channel | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | reserved_room_type | assigned_room_type | booking_changes | deposit_type | agent | company | days_in_waiting_list | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | Resort Hotel | 1 | 85 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | A | A | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 82.0 | 0 | 1 | Canceled | 5/6/2015 |
9 | Resort Hotel | 1 | 75 | 2015 | July | 27 | 1 | 0 | 3 | 2 | 0.0 | 0 | HB | PRT | Offline TA/TO | TA/TO | 0 | 0 | 0 | D | D | 0 | No Deposit | 15.0 | 0.0 | 0 | Transient | 105.5 | 0 | 0 | Canceled | 4/22/2015 |
10 | Resort Hotel | 1 | 23 | 2015 | July | 27 | 1 | 0 | 4 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 123.0 | 0 | 0 | Canceled | 6/23/2015 |
27 | Resort Hotel | 1 | 60 | 2015 | July | 27 | 1 | 2 | 5 | 2 | 0.0 | 0 | BB | PRT | Online TA | TA/TO | 0 | 0 | 0 | E | E | 0 | No Deposit | 240.0 | 0.0 | 0 | Transient | 107.0 | 0 | 2 | Canceled | 5/11/2015 |
32 | Resort Hotel | 1 | 96 | 2015 | July | 27 | 1 | 2 | 8 | 2 | 0.0 | 0 | BB | PRT | Direct | Direct | 0 | 0 | 0 | E | E | 0 | No Deposit | 0.0 | 0.0 | 0 | Transient | 108.3 | 0 | 2 | Canceled | 5/29/2015 |
cancellation['hotel'].unique()
array(['Resort Hotel', 'City Hotel'], dtype=object)
cancel_month=data.groupby(['arrival_date_month','hotel']).agg('count').reset_index()
cancelled=cancel_month.iloc[:,0:3]
#dataframe aquired
cancelled
arrival_date_month | hotel | is_canceled | |
---|---|---|---|
0 | April | City Hotel | 7469 |
1 | April | Resort Hotel | 3609 |
2 | August | City Hotel | 8967 |
3 | August | Resort Hotel | 4894 |
4 | December | City Hotel | 4114 |
5 | December | Resort Hotel | 2645 |
6 | February | City Hotel | 4950 |
7 | February | Resort Hotel | 3102 |
8 | January | City Hotel | 3730 |
9 | January | Resort Hotel | 2191 |
10 | July | City Hotel | 8071 |
11 | July | Resort Hotel | 4573 |
12 | June | City Hotel | 7885 |
13 | June | Resort Hotel | 3044 |
14 | March | City Hotel | 6434 |
15 | March | Resort Hotel | 3334 |
16 | May | City Hotel | 8221 |
17 | May | Resort Hotel | 3559 |
18 | November | City Hotel | 4336 |
19 | November | Resort Hotel | 2435 |
20 | October | City Hotel | 7594 |
21 | October | Resort Hotel | 3553 |
22 | September | City Hotel | 7392 |
23 | September | Resort Hotel | 3108 |
#renaming is cancelled to number of cancellation
cancelled=cancelled.rename(columns={'is_canceled':'no of cancellations'})
cancelled
arrival_date_month | hotel | no of cancellations | |
---|---|---|---|
0 | April | City Hotel | 7469 |
1 | April | Resort Hotel | 3609 |
2 | August | City Hotel | 8967 |
3 | August | Resort Hotel | 4894 |
4 | December | City Hotel | 4114 |
5 | December | Resort Hotel | 2645 |
6 | February | City Hotel | 4950 |
7 | February | Resort Hotel | 3102 |
8 | January | City Hotel | 3730 |
9 | January | Resort Hotel | 2191 |
10 | July | City Hotel | 8071 |
11 | July | Resort Hotel | 4573 |
12 | June | City Hotel | 7885 |
13 | June | Resort Hotel | 3044 |
14 | March | City Hotel | 6434 |
15 | March | Resort Hotel | 3334 |
16 | May | City Hotel | 8221 |
17 | May | Resort Hotel | 3559 |
18 | November | City Hotel | 4336 |
19 | November | Resort Hotel | 2435 |
20 | October | City Hotel | 7594 |
21 | October | Resort Hotel | 3553 |
22 | September | City Hotel | 7392 |
23 | September | Resort Hotel | 3108 |
#Sorting the Months in Order
final=sd.Sort_Dataframeby_Month(cancelled,'arrival_date_month')
final
arrival_date_month | hotel | no of cancellations | |
---|---|---|---|
0 | January | City Hotel | 3730 |
1 | January | Resort Hotel | 2191 |
2 | February | City Hotel | 4950 |
3 | February | Resort Hotel | 3102 |
4 | March | City Hotel | 6434 |
5 | March | Resort Hotel | 3334 |
6 | April | City Hotel | 7469 |
7 | April | Resort Hotel | 3609 |
8 | May | City Hotel | 8221 |
9 | May | Resort Hotel | 3559 |
10 | June | City Hotel | 7885 |
11 | June | Resort Hotel | 3044 |
12 | July | City Hotel | 8071 |
13 | July | Resort Hotel | 4573 |
14 | August | City Hotel | 8967 |
15 | August | Resort Hotel | 4894 |
16 | September | City Hotel | 7392 |
17 | September | Resort Hotel | 3108 |
18 | October | City Hotel | 7594 |
19 | October | Resort Hotel | 3553 |
20 | November | City Hotel | 4336 |
21 | November | Resort Hotel | 2435 |
22 | December | City Hotel | 4114 |
23 | December | Resort Hotel | 2645 |
#Plotting a Double Bar Graph
plt.figure(figsize=(12, 8))
sns.barplot(x = "arrival_date_month", y = "no of cancellations" , hue="hotel",
hue_order = ["City Hotel", "Resort Hotel"], data=final)
#For the City hotel the relative number of cancelations is around 40 % throughout the year.
#For the Resort hotel it is highest in the summer and lowest during the winter.
<matplotlib.axes._subplots.AxesSubplot at 0x154484e9c8>