当前位置：首页 > news >正文

天塌了！！！SQL竟也可以做预测分析？| 商品零售额的预测

news 2025/7/4 10:20:13

0 问题背景

1 数据准备

2 问题解决

2.1 模型构建

（1）符号规定

（2）基本假设

（3）模型的分析与建立

2.2 模型求解

3 小结

0 问题背景

1960年—1985年全国社会商品零售额如图1 所示

表1全国社会商品零售额数据

年份	1960	1961	1962	1963	1964	1965	1966	1967
零售总额	696.6	607.7	604	604.5	638.2	670.3	732.8	770.5

年份	1968	1969	1970	1971	1972	1973	1974	1975
零售总额	737.3	801.5	858	929.2	1023.3	1106.7	1163.6	1271.1

年份	1976	1977	1978	1979	1980	1981	1982
零售总额	1339.4	1432.8	1558.6	1800	2140	2350	2570

问题：试用三次指数平滑法预测1983年和1985年全国社会商品零售额？

1 数据准备

create table sale_amount as			
select '1960' years, '696.6' sale_amount from dual union all
select '1961' years, '607.7' sale_amount from dual union all
select '1962' years, '604'   sale_amount from dual union all
select '1963' years, '604.5' sale_amount from dual union all
select '1964' years, '638.2' sale_amount from dual union all
select '1965' years, '670.3' sale_amount from dual union all
select '1966' years, '732.8' sale_amount from dual union all
select '1967' years, '770.5' sale_amount from dual union all
select '1968' years, '737.3' sale_amount from dual union all
select '1969' years, '801.5' sale_amount from dual union all
select '1970' years, '858'   sale_amount from dual union all
select '1971' years, '929.2'  sale_amount from dual union all
select '1972' years, '1023.3' sale_amount from dual union all
select '1973' years, '1106.7' sale_amount from dual union all
select '1974' years, '1163.6' sale_amount from dual union all
select '1975' years, '1271.1' sale_amount from dual union all
select '1976' years, '1339.4' sale_amount from dual union all
select '1977' years, '1432.8' sale_amount from dual union all
select '1978' years, '1558.6' sale_amount from dual union all
select '1979' years, '1800' sale_amount from dual union all
select '1980' years, '2140' sale_amount from dual union all
select '1981' years, '2350' sale_amount from dual union all
select '1982' years, '2570' sale_amount from dual

2 问题解决

2.1 模型构建

（1）符号规定

（2）基本假设

假设本问题考虑全社会商品零售额数据；
假设本问题只考虑销售，不考虑其余因素
假设本问题只考虑销售额总额，不考虑其余分支

（3）模型的分析与建立

令加权系数 $eq?%5Calpha%20%3D0.3$ ，则计算公式为

其中， $eq?%7BS_%7Bt%7D%7D%5E%7B%281%29%7D$ 表示一次指数的平滑值； $eq?%7BS_%7Bt%7D%7D%5E%7B%282%29%7D$ 表示二次次指数的平滑值； $eq?%7BS_%7Bt%7D%7D%5E%7B%283%29%7D$ 表示三次指数的平滑值。初始值为

三次指数平滑法的预测模型为：

其中，

2.2 模型求解

步骤1：计算初始值

select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rn
from (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t

步骤2 ：计算一次平滑值

with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
select * from s1 order by  years;

步骤3：计算二次平滑值

with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)
select * from s2 order by  years;

步骤4：计算三次平滑值


with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)--计算三次平滑值
,s3 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s2_p3, cast(sum(case when t2.rn <= t1.rn then t2.s2_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s3_p3from s2 t1,s2 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s2_p3
)
select * from s3 order by  years;

步骤4：计算二次函数模型系数


with init as (select years, sale_amount, last_value(init_sale_amount ignore nulls) over (order by YEARS) init_sale_amount, rnfrom (select years, sale_amount, casewhen rn = 1 then cast(avg(sale_amount)over (order by years rows between current row and 2 following ) as decimal(18, 1)) end init_sale_amount, rnfrom (select years, sale_amount, row_number() over (order by years) rnfrom sale_amount) t) t
)
--计算一次平滑值, s1 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, cast(sum(case when t2.rn <= t1.rn then t2.sale_amount * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s1_p3from init t1,init t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn
)
--计算二次平滑值
, s2 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, cast(sum(case when t2.rn <= t1.rn then t2.s1_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s2_p3from s1 t1,s1 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3
)--计算三次平滑值
,s3 as (select t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, t1.s2_p3, cast(sum(case when t2.rn <= t1.rn then t2.s2_p3 * power(0.7, t1.rn - t2.rn) else 0 end) * 0.3 +power(0.7, t1.rn) * t1.init_sale_amount as decimal(18, 4)) s3_p3from s2 t1,s2 t2group by t1.years, t1.sale_amount, t1.init_sale_amount, t1.rn, t1.s1_p3, t1.s2_p3
)--计算二次趋势模型系数
select years, sale_amount, init_sale_amount, rn, s1_p3, s2_p3, s3_p3, cast(case when rk=1 then 3*s1_p3 - 3*s2_p3 + s3_p3 else 0 end as decimal(18,4)) a_p3, cast(case when rk=1 then ((6-5*0.3)*s1_p3 - 2*(5-4*0.3)*s2_p3 + (4-3*0.3)*s3_p3 ) * 0.3/(2*power(0.7,2))  else 0 end as decimal(18,2))  b_p3, cast(case when rk=1 then (s1_p3 - 2*s2_p3 + s3_p3 ) * power(0.3,2)/(2*power(0.7,2))  else 0 end as decimal(18,4))  c_p3
from (select years, sale_amount, init_sale_amount, rn, s1_p3, s2_p3, s3_p3, row_number() over (order by rn desc) rkfrom s3) t
order by years

步骤5：构建二次预测模型,并预测结果值

由步骤4得知：

a=2572.2607,b=259.3367,c=8.9818

则预测模型为：

$eq?%5Cwidehat%7By%7D%20%3D%208.9818m%5E2%20+%20259.3367m%20+%202572.2607$

最后求得1983，1985年销售额的预测值分别是2840.5792亿元，3431.107亿元。

3 小结

本文针对商品零售额采用三次指数平滑法构建预测模型，文中选取加权系数 $eq?%5Calpha%20%3D0.3$ 求解模型，并利用SQL语言进行实现，若实际中有相关需求，可针对加权系数再进行优化，利用RMSE均方根误差来使模型达到最优。

如果您觉得本文还不错，对你有帮助，那么不妨可以关注一下我的数字化建设实践之路专栏，这里的内容会更精彩。

专栏原价99，现在活动价59.9，按照阶梯式增长，还差5个人上升到69.9，最终恢复到原价。

专栏优势：
（1）一次收费持续更新。

（2）实战中总结的SQL技巧，帮助SQLBOY 在SQL语言上有质的飞越，无论你应对业务难题及面试都会游刃有余【全网唯一讲SQL实战技巧，方法独特】

SQL很简单，可你却写不好？每天一点点，收获不止一点点-CSDN博客

（3）实战中数仓建模技巧总结，让你认识不一样的数仓。【数据建模+业务建模，不一样的认知体系】（如果只懂数据建模而不懂业务建模，数仓体系认知是不全面的）

（4）数字化建设当中遇到难题解决思路及问题思考。

我的专栏具体链接如下：

数字化建设通关指南_莫叫石榴姐的博客-CSDN博客

0 问题背景

1 数据准备

2 问题解决

2.1 模型构建

（1）符号规定

（2）基本假设

（3）模型的分析与建立

2.2 模型求解

3 小结

相关文章：