当前位置:文档之家› 数据仓库的构建

数据仓库的构建

昆明理工大学信息工程与自动化学院学生实验报告(2015—2016学年第1学期)课程名称:数据仓库与数据挖掘开课实验室:444 2015年10月24日年级、专业、班计科121 学号姓名成绩实验项目名称数据仓库的构建指导教师教师评语该同学是否了解实验原理: A.了解□ B.基本了解□ C.不了解□该同学的实验能力: A.强□ B.中等□ C.差□该同学的实验是否达到要求: A.达到□ B.基本达到□ C.未达到□实验报告是否规范: A.规范□ B.基本规范□ C.不规范□实验过程是否详细记录: A.详细□ B.一般□ C.没有□教师签名:年月日一、上机目的及内容目的:1、理解数据库与数据仓库之间的区别与联系;2、掌握典型的关系型数据库及其数据仓库系统的工作原理以及应用方法;3、掌握数据仓库建立的基本方法及其相关工具的使用。

内容:以SQL Server为系统平台,设计、建立数据库,并以此为基础创建数据仓库。

要求:利用实验室和指导教师提供的实验软件,认真完成规定的实验项目,真实地记录实验中遇到的各种问题和解决的方法与过程,并绘出模拟实验案例的数据仓库模型。

实验完成后,应根据实验情况写出实验报告。

二、实验原理及基本技术路线图(方框原理图或程序流程图)数据仓库,由数据仓库之父W.H.Inmon于1990年提出,主要功能仍是将组织透过资讯系统之联机交易处理(OLTP)经年累月所累积的大量资料,透过数据仓库理论所特有的资料储存架构,作一有系统的分析整理,以利各种分析方法如线上分析处理(OLAP)、数据挖掘(Data Mining)之进行,并进而支持如决策支持系统(DSS)、主管资讯系统(EIS)之创建,帮助决策者能快速有效的自大量资料中,分析出有价值的资讯,以利决策拟定及快速回应外在环境变动,帮助建构商业智能(BI)。

三、所用仪器、材料(设备名称、型号、规格等或使用软件)1台PC及Microsoft SQL Server套件四、实验方法、步骤(或:程序代码或操作过程)--建立数据USE masterCREATE DATABASE [DW] ON PRIMARY( NAME = N'DW', FILENAME = N'E:\DW.mdf' )LOG ON( NAME = N'DW_log', FILENAME = N'E:\DW_log.ldf' )GOUSE DW----------------------------------1、建维表/*1.1 订单方式*/CREATE TABLE DIM_ORDER_METHOD(ONLINEORDERFLAG INT,DSC VARCHAR(20))/*1.2 销售人员及销售地区*/CREATE TABLE DIM_SALEPERSON(SALESPERSONID INT,DSC VARCHAR(20),SALETERRITORY_DSC VARCHAR(50))/*1.3 发货方式*/CREATE TABLE DIM_SHIPMETHOD(SHIPMETHODID INT,DSC VARCHAR(20))/*1.4 订单日期*/CREATE TABLE DIM_DATE(TIME_CD VARCHAR(8),TIME_MONTH VARCHAR(6),TIME_YEAR VARCHAR(6),TINE_QUAUTER VARCHAR(8),TIME_WEEK VARCHAR(6),TIME_XUN VARCHAR(4))/*1.5 客户*/CREATE TABLE DIM_CUSTOMER(CUSTOMERID INT,CUSTOMER_NAME VARCHAR(100),CUSTOMERTYPE VARCHAR(20),AGE INT,SEX VARCHAR(2),MaritalStatus VARCHAR(10),YearlyIncome VARCHAR(50),Education VARCHAR(50),Occupation VARCHAR(50),NumberCarsOwned INT,TotalChildren INT,COUNTRY_NAME VARCHAR(100),STATEPROVINCE_NAME VARCHAR(100),CITY_NAME VARCHAR(100))/*1.6 订单状态*/CREATE TABLE DIM_ORDER_STATUS(STATUS INT ,DSC VARCHAR(30))/*1.7 客户价值*/CREATE TABLE V_SUBTOTAL_VALUES(ORDER_VALUES_ID INT,DSC VARCHAR(30),MIN_VALUE INT,MAX_VALUE INT)---------------------------------------二、维度表的ETL-------------------------------------INSERT INTO DIM_ORDER_METHODVALUES (0,'销售人员')INSERT INTO DIM_ORDER_METHODVALUES (1,'客户在线')INSERT INTO DIM_SHIPMETHODSELECT ShipMethodID,NAME FROM AdventureWorks.Purchasing.ShipMethodINSERT INTO DIM_SALEPERSONSELECT A.SalesPersonID,'',FROM AdventureWorks.Sales.SalesPerson A,AdventureWorks.Sales.SalesTerritory B WHERE A.TerritoryID=B.TerritoryIDINSERT INTO DIM_ORDER_STATUSVALUES(1,'处理中')INSERT INTO DIM_ORDER_STATUSVALUES(2,'已批准')INSERT INTO DIM_ORDER_STATUSVALUES(3,'预订')INSERT INTO DIM_ORDER_STATUSVALUES(4,'已拒绝')INSERT INTO DIM_ORDER_STATUSVALUES(5,'已发货')INSERT INTO DIM_ORDER_STATUSVALUES(6,'已取消')INSERT INTO V_SUBTOTAL_VALUESVALUES(1,'0-100',0,100 )INSERT INTO V_SUBTOTAL_VALUESVALUES(2,'100-500',100,500 )INSERT INTO V_SUBTOTAL_VALUESVALUES(3,'500-1000',500,1000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(4,'1000-2000',1000,2000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(5,'2000-5000',2000,5000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(6,'5000以上',5000,1000000000 )declare @day dateTIMESET @day='2001-01-01'while @day<'2005-01-01'BEGINinsert into DIM_DATESELECT CONVERT(CHAR(8),@day,112),CONVERT(CHAR(6),@day,112),CONVERT(CHAR(4),@day,112)+'年','第'+CAST(DATEname(QUARTER , @day) AS VARCHAR(1))+'季度',DATEname(weekday , @day),case WHEN DATEPART(DAY,@day)<11 THEN '上旬' WHEN DATEPART(DAY,@day)<21 THEN '中旬' ELSE '下旬' ENDSELECT @day=DATEADD(DAY,1,@day)ENDINSERT INTO DIM_CUSTOMERSELECT A.CustomerID,G.FirstName,CASE WHEN A.CustomerType='I' THEN '个人' ELSE '商店' END,DATEDIFF(YEAR,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/IndividualSurvey";(/zhh:IndividualSurvey/zhh:BirthDate)[1]','VARCHAR(10)'),GETDATE()) , Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Gender)[1]','VARCHAR(2)') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:MaritalStatus)[1]','VARCHAR(10)') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:YearlyIncome)[1]','VARCHAR(20)') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Education)[1]','VARCHAR(20)') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Occupation)[1]','VARCHAR(20)') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:NumberCarsOwned)[1]','int') ,Demographics.value('declare namespacezhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:TotalChildren)[1]','int') ,,,D.CityFROM AdventureWorks.Sales.Customer A,AdventureWorks.Sales.Individual B,AdventureWorks.Sales.CustomerAddress C,AdventureWorks.Person.Address D,AdventureWorks.Person.StateProvince E,AdventureWorks.Person.CountryRegion F,AdventureWorks.Person.Contact GWHERE A.CustomerID=B.CustomerIDAND A.CustomerID=C.CustomerIDAND C.AddressID=D.AddressIDAND D.StateProvinceID=E.StateProvinceIDAND E.CountryRegionCode=F.CountryRegionCodeAND B.ContactID=G.ContactID----------------------------------------------------------------------------三、建事实表CREATE TABLE FACT_SALEORDER(SALEORDERID INT,TIME_CD VARCHAR(8),STATUS INT,ONLINEORDERFLAG INT,CUSTOMERID INT,SALESPERSONID INT,SHIPMETHOD INT,ORDER_VALUES INT,SUBTOTAL DECIMAL(10,2),TAXAMT DECIMAL(10,2),FREIGHT DECIMAL(10,2))-------------------------------------四、事实表的ETL/*4.1 FACT_SALEORDER的ETL*/TRUNCATE TABLE FACT_SALEORDERINSERT INTO FACT_SALEORDERSELECT SalesOrderID,CONVERT(CHAR(8),A.OrderDate,112) ,A.Status,A.OnlineOrderFlag,ISNULL(A.CustomerID,0),ISNULL(A.SalesPersonID,0),A. ShipMethodID,B.ORDER_VALUES_ID,A.SubTotal,A.TaxAmt,A.FreightFROM AdventureWorks.Sales.SalesOrderHeader A,V_SUBTOTAL_VALUES BWHERE A.SubTotal>=B.MIN_VALUE AND A.SubTotal<B.MAX_VALUE--缺省值处理INSERT INTO DIM_CUSTOMERselect DISTINCT CUSTOMERID,'未知','商店',0,'N','N','未知','未知','未知',0,0,'未知','未知','未知' from FACT_SALEORDERwhere CUSTOMERID not in (SELECT CUSTOMERIDFROM dbo.DIM_CUSTOMER )INSERT INTO DIM_SALEPERSONselect DISTINCT SALESPERSONID,'未知','未知' from FACT_SALEORDERwhere SALESPERSONID not in (SELECT SALESPERSONIDFROM dbo.DIM_SALEPERSON )select * FROM FACT_SALEORDER WHERE CUSTOMERID IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_CUSTOMER BWHERE A.CUSTOMERID=B.CUSTOMERID)select * FROM FACT_SALEORDER WHERE TIME_CD IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_DATE BWHERE A.TIME_CD=B.TIME_CD)select * FROM FACT_SALEORDER WHERE SHIPMETHOD IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_SHIPMETHOD BWHERE A.SHIPMETHOD=B.SHIPMETHODID)select * FROM FACT_SALEORDER WHERE STATUS IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_ORDER_STATUS BWHERE A.STATUS =B.STATUS )select * FROM FACT_SALEORDER WHERE SALESPERSONID IS NULLSELECT DISTINCT SALESPERSONID FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_SALEPERSON BWHERE A.SALESPERSONID =B.SALESPERSONID )select * FROM FACT_SALEORDER WHERE ONLINEORDERFLAG IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.DIM_ORDER_METHOD BWHERE A.ONLINEORDERFLAG =B.ONLINEORDERFLAG )select * FROM FACT_SALEORDER WHERE ORDER_VALUES IS NULLSELECT * FROM FACT_SALEORDER AWHERE NOT EXISTS (SELECT 1 FROM dbo.V_SUBTOTAL_VALUES BWHERE A.ORDER_VALUES =B.ORDER_VALUES_ID )insert into DIM_SALEPERSONvalues(0,'','无')UPDATE FACT_SALEORDER SET SALESPERSONID=0 WHERE SALESPERSONID NOT IN (SELECT SALESPERSONIDFROM DIM_SALEPERSON)select * from DIM_CUSTOMERwhere customerid in (select customerid from FACT_SALEORDERgroup by customeridhaving COUNT(*)>1)order by customeridSELECT * INTO AAAA FROM (SELECT A.*,ROW_NUMBER() OVER (PARTITION BY CUSTOMERIDORDER BY AGE ) AS OrderRankFROM DIM_CUSTOMER A) B WHERE OrderRank=1TRUNCATE TABLE DIM_CUSTOMERINSERT INTO DIM_CUSTOMERSELECT CUSTOMERID, CUSTOMER_NAME, CUSTOMERTYPE, AGE, SEX, MaritalStatus, YearlyIncome, Education, Occupation, NumberCarsOwned, TotalChildren, COUNTRY_NAME, STATEPROVINCE_NAME, CITY_NAME FROM AAAA五、实验过程原始记录( 测试数据、图表、计算等)六、实验结果、分析和结论(误差分析与数据处理、成果总结等。

相关主题