当前位置:文档之家› 语音信号处理实验报告

语音信号处理实验报告

语音信号处理实验班级:学号:姓名:实验一 基于MATLAB 的语音信号时域特征分析(2学时)1) 短时能量 (1)加矩形窗 a=wavread('mike.wav'); a=a(:,1);subplot(6,1,1),plot(a); N=32; for i=2:6h=linspace(1,1,2.^(i-2)*N);%形成一个矩形窗,长度为2.^(i-2)*N En=conv(h,a.*a);% 求短时能量函数En subplot(6,1,i),plot(En); if (i==2) ,legend('N=32'); elseif (i==3), legend('N=64'); elseif (i==4) ,legend('N=128'); elseif (i==5) ,legend('N=256'); elseif (i==6) ,legend('N=512'); end end00.51 1.52 2.53x 104-1100.511.522.53x 104024N=3200.511.522.53x 10405N=6400.511.522.53x 1040510N=12800.511.522.53x 10401020N=25600.511.522.53x 10402040N=512(2)加汉明窗 a=wavread('mike.wav'); a=a(:,1);subplot(6,1,1),plot(a); N=32;for i=2:6h=hanning(2.^(i-2)*N);%形成一个汉明窗,长度为2.^(i-2)*N En=conv(h,a.*a);% 求短时能量函数En subplot(6,1,i),plot(En); if (i==2), legend('N=32'); elseif (i==3), legend('N=64'); elseif (i==4) ,legend('N=128'); elseif (i==5) ,legend('N=256'); elseif (i==6) ,legend('N=512'); end end00.51 1.52 2.53x 104-1100.511.522.53x 104012N=3200.511.522.53x 104024N=6400.511.522.53x 104024N=12800.511.522.53x 1040510N=25600.511.522.53x 10401020N=5122) 短时平均过零率 a=wavread('mike.wav'); a=a(:,1); n=length(a); N=320;subplot(3,1,1),plot(a); h=linspace(1,1,N);En=conv(h,a.*a); %求卷积得其短时能量函数En subplot(3,1,2),plot(En);for i=1:n-1 if a(i)>=0 b(i)= 1;else b(i) = -1; end if a(i+1)>=0 b(i+1)=1; else b(i+1)= -1; endw(i)=abs(b(i+1)-b(i)); %求出每相邻两点符号的差值的绝对值end k=1; j=0;while (k+N-1)<n Zm(k)=0; for i=0:N-1;Zm(k)=Zm(k)+w(k+i); end j=j+1;k=k+N/2; %每次移动半个窗 end for w=1:jQ(w)=Zm(160*(w-1)+1)/(2*N); %短时平均过零率 endsubplot(3,1,3),plot(Q),grid;00.51 1.52 2.53x 104-1100.51 1.52 2.53x 10410200204060801001201401601800.53) 自相关函数 N=240y=wavread('mike.wav'); y=y(:,1);x=y(13271:13510); x=x.*rectwin(240); R=zeros(1,240); for k=1:240for n=1:240-kR(k)=R(k)+x(n)*x(n+k); end end j=1:240;plot(j,R); grid;050100150200250-1.5-1-0.500.511.522.5实验二 基于MATLAB 分析语音信号频域特征1)短时谱 cleara=wavread('mike.wav'); a=a(:,1);subplot(2,1,1),plot(a);title('original signal'); grid N=256;h=hamming(N); for m=1:Nb(m)=a(m)*h(m)endy=20*log(abs(fft(b))) subplot(2,1,2)plot(y);title('短时谱'); grid0.511.522.53x 104-1-0.500.51original signal00.20.40.60.81 1.2 1.4 1.6 1.820.51短时谱2)语谱图[x,fs,nbits]=wavread('mike.wav') x=x(:,1);specgram(x,512,fs,100); xlabel('时间(s)'); ylabel('频率(Hz)');title('语谱图');时间(s)频率(H z )语谱图0.511.52100020003000400050003)倒谱和复倒谱(1)加矩形窗时的倒谱和复倒谱 cleara=wavread('mike.wav',[4000,4350]); a=a(:,1); N=300;h=linspace(1,1,N); for m=1:Nb(m)=a(m)*h(m); endc=cceps(b); c=fftshift(c); d=rceps(b); d=fftshift(d); subplot(2,1,1)plot(d);title('加矩形窗时的倒谱') subplot(2,1,2)plot(c);title('加矩形窗时的复倒谱')050100150200250300-2-11050100150200250300-10-50510加矩形窗时的复倒谱(2)加汉明窗时的倒谱和复倒谱 cleara=wavread('mike.wav',[4000,4350]); a=a(;,1); N=300;h=hamming(N); for m=1:Nb(m)=a(m)*h(m); endc=cceps(b); c=fftshift(c); d=rceps(b); d=fftshift(d); subplot(2,1,1)plot(d);title('加汉明窗时的倒谱') subplot(2,1,2)plot(c);title('加汉明窗时的复倒谱')50100150200250300-3-2-101050100150200250300-10-50510加汉明窗时的复倒谱实验三基于MATLAB的LPC分析MusicSource = wavread('mike.wav');MusicSource=MusicSource(:,1);Music_source = MusicSource';N = 256; % window length,N = 100 -- 1000;Hamm = hamming(N); % create Hamming windowframe = input('请键入想要处理的帧位置= ');% origin is current frameorigin = Music_source(((frame - 1) * (N / 2) + 1):((frame - 1) * (N / 2) + N));Frame = origin .* Hamm';%%Short Time Fourier Transform%[s1,f1,t1] = specgram(MusicSource,N,N/2,N);[Xs1,Ys1] = size(s1);for i = 1:Xs1FTframe1(i) = s1(i,frame);endN1 = input('请键入预测器阶数= '); % N1 is predictor's order[coef,gain] = lpc(Frame,N1); % LPC analysis using Levinson-Durbin recursionest_Frame = filter([0 -coef(2:end)],1,Frame); % estimate frame(LP)FFT_est = fft(est_Frame);err = Frame - est_Frame; % error% FFT_err = fft(err);subplot(2,1,1),plot(1:N,Frame,1:N,est_Frame,'-r');grid;title('原始语音帧vs.预测后语音帧') subplot(2,1,2),plot(err);grid;title('误差');pause%subplot(2,1,2),plot(f',20*log(abs(FTframe2)));grid;title('短时谱')%% Gain solution using G^2 = Rn(0) - sum(ai*Rn(i)),i = 1,2,...,P%fLength(1 : 2 * N) = [origin,zeros(1,N)];Xm = fft(fLength,2 * N);X = Xm .* conj(Xm);Y = fft(X , 2 * N);Rk = Y(1 : N);PART = sum(coef(2 : N1 + 1) .* Rk(1 : N1));G = sqrt(sum(Frame.^2) - PART);A = (FTframe1 - FFT_est(1 : length(f1'))) ./ FTframe1 ; % inverse filter A(Z)subplot(2,1,1),plot(f1',20*log(abs(FTframe1)),f1',(20*log(abs(1 ./ A))),'-r');grid;title('短时谱'); subplot(2,1,2),plot(f1',(20*log(abs(G ./ A))));grid;title('LPC谱');pause%plot(abs(ifft(FTframe1 ./ (G ./ A))));grid;title('excited')%plot(f1',20*log(abs(FFT_est(1 : length(f1')) .* A / G )));grid;%pause%% find_pitch%temp = FTframe1 - FFT_est(1 : length(f1'));% not move higher frequncepitch1 = log(abs(temp));pLength = length(pitch1);result1 = ifft(pitch1,N);% move higher frequncepitch1((pLength - 32) : pLength) = 0;result2 = ifft(pitch1,N);% direct do real cepstrum with errpitch = fftshift(rceps(err));origin_pitch = fftshift(rceps(Frame));subplot(211),plot(origin_pitch);grid;title('原始语音帧倒谱(直接调用函数)');subplot(212),plot(pitch);grid;title('预测误差倒谱(直接调用函数)');pausesubplot(211),plot(1:length(result1),fftshift(real(result1)));grid;title('预测误差倒谱(根据定义编写,没有去除高频分量)');subplot(212),plot(1:length(result2),fftshift(real(result2)));grid;title('预测误差倒谱(根据定义编写,去除高频分量)');50100150200250300-0.4-0.200.20.4原始语音帧vs.预测后语音帧50100150200250300-0.2-0.100.10.2误差010203040506070-100-5050短时谱010203040506070406080100LPC 谱050100150200250300-1-0.50.5原始语音帧倒谱(直接调用函数)050100150200250300-1-0.50.5预测误差倒谱(直接调用函数)50100150200250300-0.6-0.4-0.200.2预测误差倒谱(根据定义编写,没有去除高频分量)50100150200250300-0.3-0.2-0.100.1预测误差倒谱(根据定义编写,去除高频分量)50100150200250300-0.6-0.4-0.200.2预测误差倒谱(根据定义编写,没有去除高频分量)50100150200250300-0.3-0.2-0.100.1预测误差倒谱(根据定义编写,去除高频分量)50100150200250300-0.6-0.4-0.200.2预测误差倒谱(根据定义编写,没有去除高频分量)50100150200250300-0.3-0.2-0.100.1预测误差倒谱(根据定义编写,去除高频分量)实验四基于VQ的特定人孤立词语音识别研究1、mfcc.mfunction ccc = mfcc(x)bank=melbankm(24,256,8000,0,0.5,'m');bank=full(bank);bank=bank/max(bank(:));for k=1:12n=0:23;dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));endw = 1 + 6 * sin(pi * [1:12] ./ 12);w = w/max(w);xx=double(x);xx=filter([1 -0.9375],1,xx);xx=enframe(xx,256,80);for i=1:size(xx,1)y = xx(i,:);s = y' .* hamming(256);t = abs(fft(s));t = t.^2;c1=dctcoef * log(bank * t(1:129));c2 = c1.*w';m(i,:)=c2';enddtm = zeros(size(m));for i=3:size(m,1)-2dtm(i,:) = -2*m(i-2,:) - m(i-1,:) + m(i+1,:) + 2*m(i+2,:); enddtm = dtm / 3;ccc = [m dtm];ccc = ccc(3:size(m,1)-2,:);2、vad.mfunction [x1,x2] = vad(x)x = double(x);x = x / max(abs(x));FrameLen = 240;FrameInc = 80;amp1 = 10;amp2 = 2;zcr1 = 10;zcr2 = 5;maxsilence = 8; % 6*10ms = 30msminlen = 15; % 15*10ms = 150msstatus = 0;count = 0;silence = 0;tmp1 = enframe(x(1:end-1), FrameLen, FrameInc);tmp2 = enframe(x(2:end) , FrameLen, FrameInc);signs = (tmp1.*tmp2)<0;diffs = (tmp1 -tmp2)>0.02;zcr = sum(signs.*diffs, 2);amp = sum(abs(enframe(filter([1 -0.9375], 1, x), FrameLen, FrameInc)), 2);amp1 = min(amp1, max(amp)/4);amp2 = min(amp2, max(amp)/8);x1 = 0;x2 = 0;for n=1:length(zcr)goto = 0;switch statuscase {0,1}if amp(n) > amp1x1 = max(n-count-1,1);status = 2;silence = 0;count = count + 1;elseif amp(n) > amp2 | ...zcr(n) > zcr2status = 1;count = count + 1;elsestatus = 0;count = 0;endcase 2,if amp(n) > amp2 | ...zcr(n) > zcr2count = count + 1;elsesilence = silence+1;if silence < maxsilence count = count + 1;elseif count < minlenstatus = 0;silence = 0;count = 0;elsestatus = 3;endendcase 3,break;endendcount = count-silence/2;x2 = x1 + count -1;3、codebook.m%clear;function xchushi= codebook(m)[a,b]=size(m);[m1,m2]=szhixin(m);[m3,m4]=szhixin(m2);[m1,m2]=szhixin(m1);[m7,m8]=szhixin(m4);[m5,m6]=szhixin(m3);[m3,m4]=szhixin(m2);[m1,m2]=szhixin(m1);[m15,m16]=szhixin(m8);[m13,m14]=szhixin(m7);[m11,m12]=szhixin(m6);[m9,m10]=szhixin(m5);[m7,m8]=szhixin(m4);[m5,m6]=szhixin(m3);[m3,m4]=szhixin(m2);[m1,m2]=szhixin(m1);chushi(1,:)=zhixinf(m1);chushi(2,:)=zhixinf(m2);chushi(3,:)=zhixinf(m3);chushi(4,:)=zhixinf(m4);chushi(5,:)=zhixinf(m5);chushi(6,:)=zhixinf(m6);chushi(7,:)=zhixinf(m7);chushi(8,:)=zhixinf(m8);chushi(9,:)=zhixinf(m9);chushi(10,:)=zhixinf(m10);chushi(11,:)=zhixinf(m11);chushi(12,:)=zhixinf(m12);chushi(13,:)=zhixinf(m13);chushi(14,:)=zhixinf(m14);chushi(15,:)=zhixinf(m15);chushi(16,:)=zhixinf(m16);sumd=zeros(1,1000);k=1;dela=1;xchushi=chushi;while(k<=1000)sum=ones(1,16);for p=1:afor i=1:16d(i)=odistan(m(p,:),chushi(i,:));enddmin=min(d);sumd(k)=sumd(k)+dmin;for i=1:16if d(i)==dminxchushi(i,:)=xchushi(i,:)+m(p,:); sum(i)=sum(i)+1;endendendfor i=1:16xchushi(i,:)=xchushi(i,:)/sum(i); endif k>1dela=abs(sumd(k)-sumd(k-1))/sumd(k); endk=k+1;chushi=xchushi;return4、testvq.mclear;disp('这是一个简易语音识别系统,请保证已经将您的语音保存在相应文件夹中')disp('正在训练您的语音模版指令,请稍后...')for i=1:10fname = sprintf('D:\\matlab\\work\\dtw1\\海儿的声音\\%da.wav',i-1); x = wavread(fname);[x1 x2] = vad(x);m = mfcc(x);m = m(x1:x2-5,:);ref(i).code=codebook(m);enddisp('语音指令训练成功,恭喜!¡')disp('正在测试您的测试语音指令,请稍后...')for i=1:10fname = sprintf('D:\\matlab\\work\\dtw1\\海儿的声音\\%db.wav',i-1);x = wavread(fname);[x1 x2] = vad(x);mn = mfcc(x);mn = mn(x1:x2-5,:);%mn = mn(x1:x2,:)test(i).mfcc = mn;endsumsumdmax=0;sumsumdmin=0;disp('对训练过的语音进行测试')for w=1:10sumd=zeros(1,10);[a,b]=size(test(w).mfcc);for i=1:10for p=1:afor j=1:16d(j)=odistan(test(w).mfcc(p,:),ref(i).code(j,:));enddmin=min(d);sumd(i)=sumd(i)+dmin;%×ÜÊ§Õæendsumdmin=min(sumd)/a;sumdmin1=min(sumd);sumdmax(w)=max(sumd)/a;sumsumdmin=sumdmin+sumsumdmax;sumsumdmax=sumdmax(w)+sumsumdmax;disp('正在匹配您的语音指令,请稍后...')for i=1:10if (sumd(i)==sumdmin1)switch (i)case 1fprintf('您输入的语音指令为:%s; 识别结果为%s\n','前', '前');case 2fprintf('您输入的语音指令为:%s; 识别结果为%s\n','后', '后');case 3fprintf('您输入的语音指令为:%s; 识别结果为%s\n','左', '左');case 4fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','右', '右');case 5fprintf('您输入的语音指令为:%s; 识别结果为%s\n','东', '东');case 6fprintf('您输入的语音指令为:%s; 识别结果为%s\n','南', '南');case 7fprintf('您输入的语音指令为:%s; 识别结果为%s\n','西', '西');case 8fprintf('您输入的语音指令为:%s; 识别结果为%s\n','北', '北');case 9fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','上', '上');case 10fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','下', '下');otherwisefprintf('error');endendendenddelamin=sumsumdmin/10;delamax=sumsumdmax/10;disp('对没有训练过的语音进行测试')disp('正在测试你的语音,请稍后...')for i=1:10fname = sprintf('D:\\matlab\\work\\dtw1\\º£¶ùµÄÉùÒô\\%db.wav',i-1); x = wavread(fname);[x1 x2] = vad(x);mn = mfcc(x);mn = mn(x1:x2-5,:);%mn = mn(x1:x2,:)test(i).mfcc = mn;endfor w=1:10sumd=zeros(1,10);[a,b]=size(test(w).mfcc);for i=1:10for p=1:afor j=1:16d(j)=odistan(test(w).mfcc(p,:),ref(i).code(j,:));enddmin=min(d);sumd(i)=sumd(i)+dmin;%×ÜÊ§Õæendendsumdmin=min(sumd);z=0;for i=1:10if (((sumd(i))/a)>delamax)||z=z+1;endenddisp('正在匹配您的语音指令,请稍后...')if z<=3for i=1:10if (sumd(i)==sumdmin)switch (i)case 1fprintf('您输入的语音指令为:%s; 识别结果为%s\n','前', '前');case 2fprintf('您输入的语音指令为:%s; 识别结果为%s\n','后', '后');case 3fprintf('您输入的语音指令为:%s; 识别结果为%s\n','左', '左');case 4fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','右', '右');case 5fprintf('您输入的语音指令为:%s; 识别结果为%s\n','东', '东');case 6fprintf('您输入的语音指令为:%s; 识别结果为%s\n','南', '南');case 7fprintf('您输入的语音指令为:%s; 识别结果为%s\n','西', '西');case 8fprintf('您输入的语音指令为:%s; 识别结果为%s\n','北', '北');case 9fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','上', '上');case 10fprintf('您输入的语音指令为ª:%s; 识别结果为%s\n','下', '下');otherwisefprintf('error');endendendelsefprintf('您输入的语音无效¡£¡\n')endend。

相关主题