-
Notifications
You must be signed in to change notification settings - Fork 0
/
Walmart_data_analysis.sql
227 lines (202 loc) · 4.89 KB
/
Walmart_data_analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
select * from sales;
-- "Feature Engineering "
-- time_of_day (New table created from time )
select time,( case
when time between "00:00:00" and "12:00:00" then "Morning"
when time between "12:01:00" and "16:00:00" then "Afternoon"
else "Evening"
end
) as time_of_day
from sales;
alter table sales add column time_of_day varchar(20);
update sales
set time_of_day = (
case
when time between "00:00:00" and "12:00:00" then "Morning"
when time between "12:01:00" and "16:00:00" then "Afternoon"
else "Evening"
end
);
-- -----------------------------------------------------------------------
-- Adding Day_name column & month_name column
alter table sales add column day_name varchar(15);
update sales
set day_name = dayname(date);
alter table sales add column month_name varchar(15);
update sales
set month_name = monthname(date);
---------------------------------------------------------------------------
-- how many unique cities the data have:
select distinct city, branch from sales;
-- -------------------Products----------------------------------------------
-- How many unique products lines does data have ?
select count(distinct product_line) as NoOfProductLine from sales;
-- what is the most common payment method ?
select
payment,
count(payment) as Frequency
from sales
group by payment
order by Frequency desc;
-- what is the most selling product line?
select
product_line,
count(product_line) as Frequency
from sales
group by product_line
order by Frequency desc;
-- what is the total revenue by month
select
month_name as Month,
sum(total) as Revenue
from sales
group by month_name
order by Revenue desc;
-- what month had the largest COGS
select
month_name Month,
sum(cogs) COGS
from sales
group by month_name
order by COGS desc;
-- what product line has the largest revenue
select
product_line,
sum(total) as Revenue
from sales
group by product_line
order by Revenue desc;
-- what is the city with the largest revenue?
select
branch,
city,
sum(total) as Revenue
from sales
group by city,branch
order by Revenue desc;
-- what product line had the largest VAT?
select
product_line,
avg(tax_pct) avg_tax
from sales
group by product_line
order by avg_tax desc;
-- Fetch each product line and add a column to those product line showing "Good", "Bad". Good if its greater than average sales
-- Which branch sold more products than average product sold?
select
branch Branch,
sum(quantity) Quantity
from
sales
group by branch
having sum(quantity) > (select avg(quantity) from sales);
-- What is the most common product line by gender?
select
gender Gender,
product_line,
count(gender) as Frequency
from
sales
group by gender,product_line
order by Frequency desc;
-- What is the average rating of each product line?
select
product_line,
round((rating),2) AverageRating
from
sales
group by product_line
order by AverageRating desc;
### Sales
-- 1. Number of sales made in each time of the day per weekday
select
time_of_day,
count(*) NumberOfSales
from
sales
where day_name = "Monday"
group by time_of_day
order by NumberOfSales desc;
-- 2. Which of the customer types brings the most revenue?
select
customer_type,
sum(total) Revenue
from
sales
group by customer_type
order by Revenue;
-- 3. Which city has the largest tax percent/ VAT (**Value Added Tax**)?
select
city,
avg(tax_pct) vat
from sales
group by city
order by vat desc;
-- 4. Which customer type pays the most in VAT?
select
customer_type,
round(avg(tax_pct),2) VAT
from
sales
group by customer_type
order by VAT desc;
### Customer
-- 1. How many unique customer types does the data have?
select
distinct customer_type
from sales;
-- 2. How many unique payment methods does the data have?
select
distinct payment
from sales;
-- 3. Which customer type buys the most?
select
customer_type,
count(*) Frequency
from sales
group by customer_type
order by Frequency;
-- 4. What is the gender of most of the customers?
select
gender,
count(gender) Frequency
from sales
group by gender;
-- 5. What is the gender distribution per branch?
select
branch,
gender,
count(*) Frequency
from sales
group by gender, branch
order by branch;
-- 6. Which time of the day do customers give most ratings?
select
time_of_day,
round(avg(rating),2) Frequency
from sales
group by time_of_day
order by Frequency desc;
-- 7. Which time of the day do customers give most ratings per branch?
select
branch,
time_of_day,
round(avg(rating),2) Frequency
from sales
group by time_of_day,branch
order by branch;
-- 8. Which day fo the week has the best avg ratings?
select
day_name,
round(avg(rating),2) Avg_rating
from sales
group by day_name
order by Avg_rating desc;
-- 9. Which day of the week has the best average ratings per branch?
select
branch,
day_name,
round(avg(rating),2) Avg_rating
from sales
group by day_name,branch
order by Avg_rating desc;