1> /* Advanced SQL Programming Fall 2002*/
2> /* */
3> /* Take-home exercises, Nov. 25th */
4> /* */
5> use evelyn
1> /* */
2> /* 1. Average, median, and mode of product prices: */
3> /* */
4> /* - Average - */
5> /* */
6> select avg(price) AveragePrice from product
AveragePrice
------------------------
60.80
(1 row affected)
1> /* */
2> /* - Mode - */
3> /* */
4> /* a. List prices in order so we can see what we have: */
5> /* */
6> select name, price
7> from product
8> where price is not null
9> order by price
name price
-------------------- ------------------------
cook & book 19.99
paper dolls 19.99
home poll kit 19.99
memory manager 19.99
more paper dolls 19.99
nt guru 20.00
bug stories 20.00
how multi is media? 20.00
money master 29.00
typing test 29.99
blood & guts 29.99
C++ for kids 39.99
star systems 39.99
mortgage minder 39.99
bugbane 49.00
tax time 49.99
teach yourself greek 49.99
landlord logs 89.99
memory tripler 119.99
z_connector 149.00
memory8 400.00
(21 rows affected)
1> /* */
2> /* b. Find count (= frequency) of each price: */
3> /* */
4> select price, count(*) Frequency
5> from product
6> where price is not null
7> group by price
8> order by Frequency
price Frequency
------------------------ -----------
29.00 1
49.00 1
89.99 1
119.99 1
149.00 1
400.00 1
29.99 2
49.99 2
20.00 3
39.99 3
19.99 5
(11 rows affected)
1> /* */
2> /* c. Find highest frequency, i.e., mode of price: */
3> /* */
4> select price, count(*) Mode
5> from product
6> where price is not null
7> group by price
8> having count(*) >= all
9> (select count(*)
10> from product
11> group by price)
price Mode
------------------------ -----------
19.99 5
(1 row affected)
1> /* */
2> /* - Median price - Celko's first median, separate views. */
3> /* */
4> /* Smallest value in upper half: */
5> /* */
6> create view small
7> as
8> select min(price) MinTopHalf
9> from product
10> where price is not null
11> and price in
12> (select p1.price
13> from product p1, product p2
14> where p2.price >= p1.price
15> and p1.price is not null
16> group by p1.price
17> having count(p1.price) <=
18> (select ceiling(count(*) / 2.0)
19> from product))
1> /* */
2> select * from small
MinTopHalf
------------------------
49.00
(1 row affected)
1> /* */
2> /* Highest value in lower half: */
3> /* */
4> create view big
5> as
6> select max(price) MaxLowHalf
7> from product
8> where price is not null
9> and price in
10> (select p1.price
11> from product p1, product p2
12> where p2.price <= p1.price
13> and p1.price is not null
14> group by p1.price
15> having count(p1.price) <=
16> (select ceiling(count(*) / 2.0)
17> from product))
1> /* */
2> select * from big
MaxLowHalf
------------------------
29.00
(1 row affected)
1> /* */
2> /* Average the two values: */
3> /* */
4> select convert(numeric(6,2), ((MinTopHalf + MaxLowHalf) / 2.0))
5> Median_1
6> from small, big
Median_1
---------
39.00
(1 row affected)
1> /* */
2> /* - Median price - using Murchison's median, Celko, p.357 */
3> /* */
4> select avg(price) Median_2
5> from product p1
6> where exists
7> (select count(*)
8> from product p2
9> where p2.price >= p1.price
10> having count(*) =
11> (select floor(count(*) / 2.0)
12> from product)
13> or count(*) =
14> (select ceiling(count(*) / 2.0)
15> from product))
Median_2
------------------------
39.99
(1 row affected)
1> /* Check on how difference arises between expected median and */
2> /* Murchison's median - probably rounding in ceiling, floor. */
3> /* */
4> -- Check values: price, prodnum
5> select price, prodnum
6> from product
7> order by price
price prodnum
------------------------ -----------
19.99 1084
19.99 1105
19.99 2000
19.99 2047
19.99 2049
20.00 1255
20.00 1357
20.00 1457
29.00 1083
29.99 1099
29.99 1108
39.99 1107
39.99 1109
39.99 1110
49.00 2113
49.99 1104
49.99 2050
89.99 2110
119.99 2111
149.00 1106
400.00 1794
(21 rows affected)
1> --
2> -- Murchison's median, original data, 21 rows
-- This time contenating prodnum to make values unique
3> --
4> select avg(price)
5> from product p1
6> where exists
7> (select count(*)
8> from product p2
9> where convert(char(10), p2.price) + convert(char(4), p2.prodnum) >=
10> convert(char(10), p1.price) + convert(char(4), p1.prodnum)
11> having count(*) =
12> (select floor(count(*) / 2.0) from product)
13> or count(*) =
14> (select ceiling(count(*) / 2.0 ) from product))
------------------------
34.99
(1 row affected)
1> --
2> -- Check floor and ceiling results
3> --
4> select floor(count(*) / 2.0) LowestTopHalf from product
5> select ceiling(count(*) / 2.0) HighestLowHalf from product
LowestTopHalf
--------------------
10
(1 row affected)
HighestLowHalf
--------------------
11
(1 row affected)
/* */
1> /* 2. (Mode allowing for 10% difference) */
2> /* */
3> /* Original data shows no difference for mode + or - 10%, so*/
4> /* will use + or - 40% here, to demonstrate the difference. */
5> /* */
6> select count (price) Frequency, avg(price) Mode
7> from product
8> group by price
9> having count(*) >= all
10> -- (select count(*) * .9 NOTE: No other values within 10% of mode
11> (select count(*) * .6 -- Max frequency is 5; next closest is 3
12> from product
13> group by price)
Frequency Mode
----------- ------------------------
5 19.99
3 20.00
3 39.99
(3 rows affected)
1> /* */
2> /* 3. Running total of revenue by order num. and product num. */
3> /* */
4> /* - Revenue by order and product, w/o running total, as view -*/
5> /* */
6> create view RevByLine
7> as
8> select
9> convert(char(2), d.ordnum) Order#, convert(char(4), d.prodnum) Prod#,
10> unit, convert(numeric(6,2), price) Price, sum(unit*price) LineTotal
11> from orderdetail d, product p
12> where d.prodnum = p.prodnum
13> group by d.ordnum, d.prodnum, unit, price
1> /* */
2> select * from RevByLine
Order# Prod# unit Price LineTotal
------ ----- ------ --------- ------------------------
81 1106 2 149.00 298.00
81 1357 1 20.00 20.00
81 2050 5 49.99 249.95
84 1099 1 29.99 29.99
84 1255 1 20.00 20.00
84 2050 1 49.99 49.99
85 1794 1 400.00 400.00
85 2050 25 49.99 1,249.75
86 1083 7 29.00 203.00
86 1105 20 19.99 399.80
86 2000 2 19.99 39.98
87 1083 1 29.00 29.00
87 1105 20 19.99 399.80
87 1106 1 149.00 149.00
87 1794 1 400.00 400.00
87 2000 20 19.99 399.80
87 2113 1 49.00 49.00
89 2050 2 49.99 99.98
90 2110 1 89.99 89.99
91 1107 7 39.99 279.93
91 2111 5 119.99 599.95
92 2050 3333 49.99 166,616.67
93 1105 1 19.99 19.99
93 2050 5 49.99 249.95
94 1083 5 29.00 145.00
94 1108 1 29.99 29.99
95 1083 2 29.00 58.00
95 1105 1 19.99 19.99
95 1108 1 29.99 29.99
95 1255 1 20.00 20.00
99 2047 6 19.99 119.94
99 2050 2222 49.99 111,077.78
(32 rows affected)
1> /* */
2> /* - Running Total on view - */
3> /* */
4> select r.Order#, r.Prod#, r.LineTotal,
5> (select sum(r2.LineTotal)
6> from RevByLine r2
7> where r.Order# >= r2.Order#) RunningTotal
8> from RevByLine r
9> order by r.Order# -- already grouped, ordered by Prod#
Order# Prod# LineTotal RunningTotal
------ ----- ------------------------ ------------------------
81 1106 298.00 567.95
81 1357 20.00 567.95
81 2050 249.95 567.95
84 1099 29.99 667.93
84 1255 20.00 667.93
84 2050 49.99 667.93
85 1794 400.00 2,317.68
85 2050 1,249.75 2,317.68
86 1083 203.00 2,960.46
86 1105 399.80 2,960.46
86 2000 39.98 2,960.46
87 1083 29.00 4,387.06
87 1105 399.80 4,387.06
87 1106 149.00 4,387.06
87 1794 400.00 4,387.06
87 2000 399.80 4,387.06
87 2113 49.00 4,387.06
89 2050 99.98 4,487.04
90 2110 89.99 4,577.03
91 1107 279.93 5,456.91
91 2111 599.95 5,456.91
92 2050 166,616.67 172,073.58
93 1105 19.99 172,343.52
93 2050 249.95 172,343.52
94 1083 145.00 172,518.51
94 1108 29.99 172,518.51
95 1083 58.00 172,646.49
95 1105 19.99 172,646.49
95 1108 29.99 172,646.49
95 1255 20.00 172,646.49
99 2047 119.94 283,844.21
99 2050 111,077.78 283,844.21
(32 rows affected)
1> /* */
2> /* - Create view with numbered rows to use for running sum -*/
3> /* */
4> create view RevNumberedLines
5> as
6> select Order#, Prod#, (r1.Order#+"."+r1.Prod#) CalcOrd#, LineTotal,
7> (select count(*)
8> from RevByLine r2
9> where (r2.Order#+"."+r2.Prod#) < (r1.Order#+"."+r1.Prod#)) + 1
10> LineNum
11> from RevByLine r1
1> /* */
2> select * from RevNumberedLines
3> order by CalcOrd#
Order# Prod# CalcOrd# LineTotal LineNum
------ ----- -------- ------------------------ -----------
81 1106 81.1106 298.00 1
81 1357 81.1357 20.00 2
81 2050 81.2050 249.95 3
84 1099 84.1099 29.99 4
84 1255 84.1255 20.00 5
84 2050 84.2050 49.99 6
85 1794 85.1794 400.00 7
85 2050 85.2050 1,249.75 8
86 1083 86.1083 203.00 9
86 1105 86.1105 399.80 10
86 2000 86.2000 39.98 11
87 1083 87.1083 29.00 12
87 1105 87.1105 399.80 13
87 1106 87.1106 149.00 14
87 1794 87.1794 400.00 15
87 2000 87.2000 399.80 16
87 2113 87.2113 49.00 17
89 2050 89.2050 99.98 18
90 2110 90.2110 89.99 19
91 1107 91.1107 279.93 20
91 2111 91.2111 599.95 21
92 2050 92.2050 166,616.67 22
93 1105 93.1105 19.99 23
93 2050 93.2050 249.95 24
94 1083 94.1083 145.00 25
94 1108 94.1108 29.99 26
95 1083 95.1083 58.00 27
95 1105 95.1105 19.99 28
95 1108 95.1108 29.99 29
95 1255 95.1255 20.00 30
99 2047 99.2047 119.94 31
99 2050 99.2050 111,077.78 32
(32 rows affected)
1> /* */
2> /* - New running total - */
3> /* */
4> select r.CalcOrd#, r.Order#, r.Prod#, r.LineTotal,
5> (select sum(r2.LineTotal)
6> from RevNumberedLines r2
7> where r.LineNum >= r2.LineNum) RunningTotal
8> from RevNumberedLines r
9> order by r.CalcOrd#
CalcOrd# Order# Prod# LineTotal RunningTotal
-------- ------ ----- ------------------------ ------------------------
81.1106 81 1106 298.00 298.00
81.1357 81 1357 20.00 318.00
81.2050 81 2050 249.95 567.95
84.1099 84 1099 29.99 597.94
84.1255 84 1255 20.00 617.94
84.2050 84 2050 49.99 667.93
85.1794 85 1794 400.00 1,067.93
85.2050 85 2050 1,249.75 2,317.68
86.1083 86 1083 203.00 2,520.68
86.1105 86 1105 399.80 2,920.48
86.2000 86 2000 39.98 2,960.46
87.1083 87 1083 29.00 2,989.46
87.1105 87 1105 399.80 3,389.26
87.1106 87 1106 149.00 3,538.26
87.1794 87 1794 400.00 3,938.26
87.2000 87 2000 399.80 4,338.06
87.2113 87 2113 49.00 4,387.06
89.2050 89 2050 99.98 4,487.04
90.2110 90 2110 89.99 4,577.03
91.1107 91 1107 279.93 4,856.96
91.2111 91 2111 599.95 5,456.91
92.2050 92 2050 166,616.67 172,073.58
93.1105 93 1105 19.99 172,093.57
93.2050 93 2050 249.95 172,343.52
94.1083 94 1083 145.00 172,488.52
94.1108 94 1108 29.99 172,518.51
95.1083 95 1083 58.00 172,576.51
95.1105 95 1105 19.99 172,596.50
95.1108 95 1108 29.99 172,626.49
95.1255 95 1255 20.00 172,646.49
99.2047 99 2047 119.94 172,766.43
99.2050 99 2050 111,077.78 283,844.21
(32 rows affected)
1> /* */
2> /* Drop all views */
3> /* */
4> drop view small
5> drop view big
6> drop view RevByLine
7> drop view RevNumberedLines
1> /* */
2> dump tran evelyn with truncate_only