// // time_stretch.sci // // ------------------------------------------------------------------------ // copyright 2007 Alfred Steffens Jr. // ----------------------------------------------------------------------- // ----------------------------------------------------------------------- // Copying Permission: // // This is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation. // // This software is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this software (see file called "COPYING"); if not, write to // the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, // MA 02111-1307 USA // ------------------------------------------------------------------------ // // // // "Model" a note in the frequency domain by FFT of the subsections in time, // stretching each subsection by some factor, then re-assembling the time // data // // 1. Start with the sound file. Convert it to an ASCII data file (with Sox) // and do any necessary editing so that the file will be 2 columns: the // time in seconds and the sound wave amplitude normalized between -1 and // +1. Edit the line with soundfile = , and enter the file name. // // 2. Create a file that specifies the sample numbers at which to subdivide // the wave form into smaller sections. This must be a single-column // ASCII data file. It doesn't have to start at 1. The ending point of // the previous subsection will also be the starting point for the next // subsection. If the file specifies N subsections there will be N+1 // sample numbers in the file. // Example: // 8760 // start of section 1 // 12079 // start of section 2 (and end of 1) // 15134 // start of section 3 (and end of 2) // . // . // 34501 // start of section N // 36337 // end of section N and last point // Edit the line with samplefile = , and enter the file name. // // 3. Create an ASCII data file with 2 columns that will be the "model" // file. This file must have the same number of rows (lines) as the // sample number file, the first row, column 1 will always be zero. // The number in the first column will be the time-stretch factor, // which must be an integer number (a whole number) due to the way the // FFT stretch works. The number in the second column will be the // relative amplitude (1 = no change) of that subsection. // Example: // 0 0 // meaningless zero, amp at beginning of 1 // 1 0.4 // no stretch on 1, amp at end of 1 // 1 0.9 // no stretch on 2, amp at end of 2 // 8 1.0 // stretch on 3 by factor of 8, amp at end of 3 // . . // . . // . . // Edit the line with modelfile = , and enter the file name. // // 4. If the sample rate is not 44100, change the sample rate at the line // Fs = 44100. // // 5. The noise threshold will need to be tweaked each time you run. You // will need to modify the line // threshold = 0.04; // until you get the right result. This is the loudest relative amplitude // of a frequency component to keep. Every frequency will be compared by // first dividing each one by the amplitude of the loudest component. // this results in each amplitude being in the range of -1 to +1. Any // frequency component whose absolute value is less than the threshold // be replace with zero. In other words, the value of threshold should // be between 0 and 1. // // getf('fft_stretch.sci', 'c'); // import the function clear dataT; // // input DATA file name // This is assumed to be a 2-column ASCII data set where the first column // is the time in seconds and the second column is the sound data normalized // to values between -1 and +1. // soundfile = '../resample/asn5a.dat'; // // input file name // file containing the sample numbers where to cut the subsections // samplefile = 'sampletrack.dat'; // // input file name // file containing the "model", that is, 2 columns of numbers // modelfile = 'modelfile.dat'; // // the sample rate // Fs = 44100; // // relative fraction of the largest amplitude to keep // threshold = 0.00; // // use a scale factor file? (this would a second pass using the results in // energy[] ). the scale factor file has one less row than the // model file. // set this to 1 to use a scale file // use_scale = 0; // // use a file of individual thresholds? // if this is 0, the single value of threshold defined above will be used // set this to 1 to use a threshold file // use_thresh_each = 0; // // // Read the input sound data already converted into ascii numbers // A = read(soundfile, -1, 2); tx = A(:,1); y = A(:,2); m = max(size(tx)); tn = 1:m; // // Read the sample numbers file // B = read(samplefile, -1, 1); sampmax = max(B); // // Read the model file // C = read(modelfile, -1, 2); ifac = C(2:max(size(C)),1); // time factors fAmp = C(:,2); // amplitudes // // if -- use_scale // if (use_scale == 1) then D = read('scale.dat', -1, 1); end // // if -- use_thresh_each // if this is 0, the single value of threshold defined above will be used // if (use_thresh_each == 1) then thresh_each = read('thresh.dat', -1, 1); end // // // number of time frames // This is the number of subsections into which the sound data will be // divided. // tmframes = max(size(B)) - 1; // dataT = []; // the new sound data newbounds = []; // the new sample numbers at the boundaries nsamp = 1; newbounds = [newbounds, nsamp]; energy = []; // // loop through each time section // for kloop = 1 : tmframes, // // get the current subsection from the data // n1 = B(kloop); m1 = B(kloop+1); // // an array on numbers counting from n1 to m1 // q1 = (n1+1):m1; // // how many samples will be in this subsection? // M1 = max(size(q1)); // size of the cut M1Q = M1 * ifac(kloop); // predicted new size nsamp = nsamp + M1Q - 1; // // the sound data for this subsection // y1 = y(q1); // y data from the cut p1 = sum(abs(y1)) / M1; // energy // // the time values corresponding to each data sample // tx1 = tx(q1); // time axis // // ---------------------------------------------------- // // time-stretch this subsection // if (use_thresh_each == 0) then // // 1 global threshold // y2 = fft_stretch(y1, ifac(kloop), threshold, 1); else // // thresholds on each section // y2 = fft_stretch(y1, ifac(kloop), thresh_each(kloop), 1); end // // ---------------------------------------------------- // // calc an amplitude indicator of the result // p2 = sum(abs(y2)) / M1Q; // energy y2 = y2 * (p1/p2); // correct the amplitude // // apply scale factors from file ? // if (use_scale == 1) then y2 = y2 * D(kloop); end // // keep a log of the energies // p3 = sum(abs(y2)) / M1Q; // final energy energy = [energy, p3]; // // create the amplitude ramp (envelope) // M1M = max(size(y2)); a2 = linspace(fAmp(kloop), fAmp(kloop+1), M1M); // // amplitude factor on the output data // dataT = [dataT, a2 .* y2]; // // keep a log of the new boundary sample locations // newbounds = [newbounds, nsamp]; end // // time in seconds // M3 = max(size(dataT)); tx2 = (0:(M3-1)) / Fs; fprintfMat('outstretch.dat', [tx2', dataT'], '%f');