当前位置:文档之家› 数据仓库的建立实验一

数据仓库的建立实验一

昆明理工大学信息工程与自动化学院学生实验报告(2014 —2015 学年第 1 学期)课程名称:数据库仓库与数据挖掘开课实验室:信自楼4442014 年12月28日一、实验内容和目的目的:1.理解数据库与数据仓库之间的区别与联系;2.掌握典型的关系型数据库及其数据仓库系统的工作原理以及应用方法;3.掌握数据仓库建立的基本方法及其相关工具的使用。

二、实验原理及基本技术路线图(方框原理图)数据库(DataBase,DB)是长期存储在计算机内、有组织的、统一管理的相关数据的集合。

DB能为各种用户共享,具有较小的冗余度、数据间联系紧密而又有较高的数据独立性等特点。

构成的三要素是数据结构、数据操作、约束性条件。

三、所用仪器、材料(设备名称、型号、规格等)PC机和Microsoft SQL Server 2008四、实验方法、步骤1、登录SQL Server登录名:localhost2、使用SQL语句构建数据库(1)还原数据库(2)建立数据--建立数据USE cdCREATE DATABASE[DW]ON PRIMARY(NAME=N'DW',FILENAME=N'G:\DW.mdf')LOG ON(NAME=N'DW_log',FILENAME=N'G:\DW_log.ldf') GO(3)建立数据库:数据库→新建数据库(4)建维表①SQL语句USE DW----------------------------------1、建维表/*1.1 订单方式*/CREATE TABLE DIM_ORDER_METHOD(ONLINEORDERFLAG INT,DSC VARCHAR(20))/*1.2 销售人员及销售地区*/CREATE TABLE DIM_SALEPERSON(SALESPERSONID INT,DSC VARCHAR(20),SALETERRITORY_DSC VARCHAR(50))/*1.3 发货方式*/CREATE TABLE DIM_SHIPMETHOD (SHIPMETHODID INT,DSC VARCHAR(20))/*1.4 订单日期*/CREATE TABLE DIM_DATE(TIME_CD VARCHAR(8),TIME_MONTH VARCHAR(6),TIME_YEAR VARCHAR(6),TINE_QUAUTER VARCHAR(8),TIME_WEEK VARCHAR(6),TIME_XUN VARCHAR(4))/*1.5 客户*/CREATE TABLE DIM_CUSTOMER (CUSTOMERID INT,CUSTOMER_NAME VARCHAR(100), CUSTOMERTYPE VARCHAR(20),AGE INT,SEX VARCHAR(2),MaritalStatus VARCHAR(10), YearlyIncome VARCHAR(50),Education VARCHAR(50),Occupation VARCHAR(50), NumberCarsOwned INT,TotalChildren INT,COUNTRY_NAME VARCHAR(100), STATEPROVINCE_NAME VARCHAR(100), CITY_NAME VARCHAR(100))/*1.6 订单状态*/CREATE TABLE DIM_ORDER_STATUS (STATUS INT,DSC VARCHAR(30))/*1.7 客户价值*/CREATE TABLE V_SUBTOTAL_VALUES (ORDER_VALUES_ID INT,DSC VARCHAR(30),MIN_VALUE INT,MAX_VALUE INT)(5)建维度表的ETL①SQL语句--二、维度表的ETL-------------------------------------INSERT INTO DIM_ORDER_METHODVALUES (0,'销售人员')INSERT INTO DIM_ORDER_METHODVALUES (1,'客户在线')INSERT INTO DIM_SHIPMETHODSELECT ShipMethodID,NAME FROM cd.Purchasing.ShipMethodINSERT INTO DIM_SALEPERSONSELECT A.SalesPersonID,'',FROM cd.Sales.SalesPerson A,cd.Sales.SalesTerritory B WHERE A.TerritoryID=B.TerritoryIDINSERT INTO DIM_ORDER_STATUSVALUES(1,'处理中')INSERT INTO DIM_ORDER_STATUSVALUES(2,'已批准')INSERT INTO DIM_ORDER_STATUSVALUES(3,'预订')INSERT INTO DIM_ORDER_STATUSVALUES(4,'已拒绝')INSERT INTO DIM_ORDER_STATUSVALUES(5,'已发货')INSERT INTO DIM_ORDER_STATUSVALUES(6,'已取消')INSERT INTO V_SUBTOTAL_VALUESVALUES(1,'0-100',0,100 )INSERT INTO V_SUBTOTAL_VALUESVALUES(2,'100-500',100,500 )INSERT INTO V_SUBTOTAL_VALUESVALUES(3,'500-1000',500,1000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(4,'1000-2000',1000,2000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(5,'2000-5000',2000,5000 )INSERT INTO V_SUBTOTAL_VALUESVALUES(6,'5000以上',5000,1000000000 )declare@day dateTIMESET@day='2001-01-01'while@day<'2005-01-01'BEGINinsert into DIM_DATESELECT CONVERT(CHAR(8),@day,112),CONVERT(CHAR(6),@day,112),CONVERT(CHAR(4),@day,112)+'年','第'+CAST(DATEname(QUARTER,@day)AS VARCHAR(1))+'季度',DATEname(weekday,@day),case WHEN DATEPART(DAY,@day)<11 THEN'上旬'WHEN DATEPART(DAY,@day)<21 THEN'中旬'ELSE'下旬'ENDSELECT@day=DATEADD(DAY,1,@day)ENDINSERT INTO DIM_CUSTOMERSELECT A.CustomerID,G.FirstName,CASE WHEN A.CustomerType='I'THEN'个人'ELSE'商店'END,DATEDIFF(YEAR,Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:BirthDate)[1]','VARCHAR(10)'),GETDATE()), Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Gender)[1]','VARCHAR(2)'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:MaritalStatus)[1]','VARCHAR(10)'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:YearlyIncome)[1]','VARCHAR(20)'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Education)[1]','VARCHAR(20)'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:Occupation)[1]','VARCHAR(20)'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/IndividualSurvey";(/zhh:IndividualSurvey/zhh:NumberCarsOwned)[1]','int'),Demographics.value('declare namespace zhh="/sqlserver/2004/07/adventure-works/Individual Survey";(/zhh:IndividualSurvey/zhh:TotalChildren)[1]','int'),,,D.CityFROM cd.Sales.Customer A,cd.Sales.Individual B,cd.Sales.CustomerAddress C,cd.Person.Address D,cd.Person.StateProvince E,cd.Person.CountryRegion F,cd.Person.Contact GWHERE A.CustomerID=B.CustomerIDAND A.CustomerID=C.CustomerIDAND C.AddressID=D.AddressIDAND D.StateProvinceID=E.StateProvinceIDAND E.CountryRegionCode=F.CountryRegionCodeAND B.ContactID=G.ContactID②执行SQL语句后(6)建事实表--三、建事实表CREATE TABLE FACT_SALEORDER(SALEORDERID INT,TIME_CD VARCHAR(8),STATUS INT,ONLINEORDERFLAG INT,CUSTOMERID INT,SALESPERSONID INT,SHIPMETHOD INT,ORDER_VALUES INT,SUBTOTAL DECIMAL(10,2),TAXAMT DECIMAL(10,2),FREIGHT DECIMAL(10,2))(7)建事实表的ETL①SQL语句--四、事实表的ETL/*4.1 FACT_SALEORDER的ETL*/TRUNCATE TABLE FACT_SALEORDERINSERT INTO FACT_SALEORDERSELECT SalesOrderID,CONVERT(CHAR(8),A.OrderDate,112),A.Status,A.OnlineOrderFlag,ISNULL(A.CustomerID,0),ISNULL(A.SalesPersonID,0),A. ShipMethodID,B.ORDER_VALUES_ID,A.SubTotal,A.TaxAmt,A.FreightFROM cd.Sales.SalesOrderHeader A,V_SUBTOTAL_VALUES BWHERE A.SubTotal>=B.MIN_VALUE AND A.SubTotal<B.MAX_VALUE--缺省值处理INSERT INTO DIM_CUSTOMERselect DISTINCT CUSTOMERID,'未知','商店',0,'N','N','未知','未知','未知',0,0,'未知','未知','未知'from FACT_SALEORDERwhere CUSTOMERID not in(SELECT CUSTOMERIDFROM dbo.DIM_CUSTOMER)②执行SQL语句后五、实验过程原始记录(数据、图表、计算等)将还原后的数据库(cd)的数据转到建立好数据库(DW)维度表和事实表后,效果如下图六、实验结果、分析和结论(误差分析与数据处理、成果总结等。

相关主题