import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# reading the data
data = pd.read_csv("DataForPandasProject.csv")
data.head(3)
# getting an overview of our data
print("Our data has {0} rows and {1} columns".format(data.shape[0], data.shape[1]))
# checking for missing values
print("Are there missing values? {}".format(data.isnull().any().any()))
data.describe()
data.info()
data["Interaction Type"].fillna("None", inplace = True)
data["Rejected"].fillna(0, inplace = True)
data["Short"].fillna(0, inplace = True)
data["Engage Time"].fillna(0, inplace = True)
data["Hold Time"].fillna(0, inplace = True)
data["Wrap Time"].fillna(0, inplace = True)
data["Handle Time"].fillna(0, inplace = True)
data["Avg Engage Time"].fillna(0, inplace = True)
data["Avg Hold Time"].fillna(0, inplace = True)
data["Avg Wrap Time"].fillna(0, inplace = True)
data["Avg Handle Time"].fillna(0, inplace = True)
data["Engage Time %"].fillna(0, inplace = True)
#convert to usable and faster formats
data["Manager"] = data["Manager"].astype("category")
data["Date"] = pd.to_datetime(data["Date"])
data["Agent Group"] = data["Agent Group"].astype("category")
data["Agent Name"] = data["Agent Name"].astype("category")
data["Interaction Type"] = data["Interaction Type"].astype("category")
data["Engage Time"] = pd.to_timedelta(data["Engage Time"])
data["Hold Time"] = pd.to_timedelta(data["Hold Time"])
data["Wrap Time"] = pd.to_timedelta(data["Wrap Time"])
data["Handle Time"] = pd.to_timedelta(data["Handle Time"])
data["Avg Engage Time"] = pd.to_timedelta(data["Avg Engage Time"])
data["Avg Hold Time"] = pd.to_timedelta(data["Avg Hold Time"])
data["Avg Wrap Time"] = pd.to_timedelta(data["Avg Wrap Time"])
data["Avg Handle Time"] = pd.to_timedelta(data["Avg Handle Time"])
from datetime import datetime
from datetime import date
datetime.today()
today = datetime.today()
type(today)
todaydate = date.today()
type(todaydate)
todaydate
data.info()
#verify that there are no longer any null values and useful format
data.head()
#isolate the three columns
top5 = data[["Agent Name","Handle Time Minutes", "Accepted"]]
#show the highest numbers on top
top5.sort_values(["Handle Time Minutes", "Accepted"], ascending = [False, False]).head()
#show the highest rejecteds
rej = data[["Agent Name","Rejected"]]
rej.sort_values("Rejected").tail(13)
#pivot table
pd.pivot_table(data, index= ['Manager', "Agent Name"], values= ["Accepted", "Rejected"])
#step back and start again, pivot managers
pd.pivot_table(data, index= 'Manager', values= "Accepted")
sns.set()
pd.pivot_table(data, index= 'Manager', values= "Accepted").plot(kind= 'bar')
plt.ylabel("Accepted")
#pivot agents
pd.pivot_table(data, index= 'Agent Name', values= "Accepted")
sns.set()
pd.pivot_table(data, index= 'Agent Name', values= "Accepted").plot(figsize = (15,12), kind= 'barh')
plt.ylabel("Accepted")
manager = data["Manager"] == "Manager4"
rejectedHigh = data["Rejected"] > 0
#looking closer- this formula selects all cases where manager is manager4 and rejected is >0
manager4 = data[manager & rejectedHigh]
manager4
agentIndex = topRejects.set_index("Agent Name")
agentIndex
agentIndex.plot(kind = "bar")
data.head()
def rank_performance(ht):
if ht >= 500:
return "Poor"
elif ht < 500 and ht >= 10:
return "Satisfactory"
else:
return "Stellar"
#this equation assumes an AHT expectation for the week to be 500 minutes
bool = data["Handle Time Minutes"].apply(rank_performance).head()
bool
data["Handle Time Minutes"].apply(rank_performance).value_counts()
plt.style.use("ggplot")
data["Handle Time Minutes"].apply(rank_performance).value_counts().plot(kind = "bar")
plt.style.use("ggplot")
data["Handle Time Minutes"].apply(rank_performance).value_counts().plot(kind = "pie", legend = True)
aht = data.pivot_table(values = "Avg Handle Time", index = ["Date"], columns = "Manager", aggfunc = np.sum)
aht
aht.plot(figsize = (12,9), kind = "bar")
ahtMean = data.pivot_table(values = "Handle Time Minutes", index = ["Date"], columns = "Manager", aggfunc = "mean")
ahtMean
ahtMean.plot(figsize = (12,9), kind = "barh")
#manager2 has a higher aht than expected
#To select a row based on multiple conditions you can use &:
array = ['7/10/2019', 'Manager2']
data.loc[(data['Date'] == "7/1/2019") & data['Manager'].isin(array)]
#To select rows where a column value does not equal a value, use !=, or use >=, or similar options:
data.loc[data['Avg Handle Time'] >= "00:30:00"]
#to create a subtotal
df_sub = data[["Agent Name","Rejected","Accepted"]].groupby('Agent Name').sum()
df_sub.head(10)
data[(data['Date'] >='7/1/2019') & (data['Date'] <= '7/2/2019')].head()