function align()
%
% Alignement of a list of speech files from two speakers uttering the same sentences
% using DTW based on magnitude spectra.
%
% similarity measurement (inner product) and dynamic programming (5 steps)
% from Dan Ellis, Columbia University, NY
%
% Yannis Stylianou, 29/07/2006
% University of Crete, Computer Science Dept., Multimedia Informatics Lab
%
% yannis@csd.uoc.gr
%



%Modified by Matthieu Jottrand(add the building of time and name vector)




dr1 = 'c:\\Documents and Settings\\Jottrand\\Mes documents\\ENTERFACE06\\databases\\Arctic\\validation\\cmu_us_awb_arctic\wav\\';
dr2 = 'c:\\Documents and Settings\\Jottrand\\Mes documents\\ENTERFACE06\\databases\\Arctic\\validation\\eNTERFACE06_us_td_arctic\\wav';

% For STFT features for both sounds (75% window overlap)
NFFT=512;
NOVERLAP = round(0.75*NFFT);

% For mfcc
p.fs = 16000;
p.visu = 0;
p.fft_size = NFFT;
p.hopsize = round(0.25*NFFT);
p.num_ceps_coeffs = 20;
p.use_first_coeff = 0;
p.male_filt_bank = [20 16000 40];
p.dB_max = 96;
p.nrg = 0;

% for DTW
St = [1 1 1;1 0 1;0 1 1;1 2 2;2 1 2]; % steps

%for time stamping
fft_size_sec=NFFT*1000/p.fs; %length of the window in ms (milliseconds)
hopsize_sec=p.hopsize*1000/p.fs; %length of the hopsize in ms  


% open files for saving aligned cepstral coeff.
fp_target = fopen('validation_target_awb.aligned.mfcc','wb');
fp_source = fopen('validation_source_td.aligned.mfcc','wb');
fp_target_data= fopen('validation_target_awb.time.data','wb');
fp_source_data= fopen('validation_source_td.time.data','wb');

% load list of files -> D
eval(['load c:\\MATLAB\\work\\List_of_Validation_Files_V6.mat']);

Lfl = length(D);

% collect some data for statistics
diff_of_length= zeros(Lfl,1);
ii=0;
% do alignment and save mfcc coeff.
for i=1:Lfl
    Lfl
   % number corresponding to the sentence number
   % read input files
   fl = D(i).name;
   disp(fl)
   fl1 = [dr1 fl];
   fl2 = [dr2 fl];
   [d1,sr] = wavread(fl1);
   fl2
   [d2,sr] = wavread(fl2);
   

   
   % compute STFT
   D1 = specgram(d1,NFFT,sr,NFFT,NOVERLAP);
   D2 = specgram(d2,NFFT,sr,NFFT,NOVERLAP);
 
   % compute difference in the time analysis of thw two input files
   diff_of_length(i) = size(D1,2)-size(D2,2);
   
   % compute MFCC
   C1 = Comp_MFCC(d1,p);
   C2 = Comp_MFCC(d2,p);
     
   % Construct the 'local match' scores matrix as the cosine distance 
   % between the STFT magnitudes
   SM = simmx(abs(D1),abs(D2));

   % DTW : for Dan Ellis Columbia Univ.
   % using the fast version and 5-steps  
   [n,m,C,sc] = dpfast(1-SM,St);
   
   TargetMfcc = C1(:,n);
   SourceMfcc = C2(:,m);
   
   vT = size(TargetMfcc);
   vS = size(SourceMfcc);
   
   
  
   
  
   
   if(vT==vS)
      
      %Compute time data
      target_time=n*hopsize_sec+fft_size_sec/2-hopsize_sec;
      source_time=m*hopsize_sec+fft_size_sec/2-hopsize_sec;

      
      name=ones(1,vT(2))*i;
      target_data=[target_time;name]; 
      source_data=[source_time;name];
      
      
      tw = vT(1)*vT(2);
      fwrite(fp_target, reshape(TargetMfcc,1,tw),'float');
      fwrite(fp_source, reshape(SourceMfcc,1,tw),'float');
      fwrite(fp_target_data,target_data(:),'float');
      fwrite(fp_source_data,source_data(:),'float');
   else
      disp('there is a problem in the DTW');
      keyboard;pause;
   end

end

fclose(fp_target);
fclose(fp_source);
fclose(fp_target_data);
fclose(fp_source_data);

hist(diff_of_length,20);
median(diff_of_length)
print -deps Diff_awb_td
