% -----------------------------------------------------------------------
% Finds spurts and plateaus using AMD
%
% The spurt_analysis modules finds significant spurts in the data and, for
% each one, outputs 3 parameters (with std error values): amplitude,
% duration and location.
%
% Ganger data
% Within each row, each entry is the number of NEW words reported each day
% for that child. 
% -1 is used to represent missing days. 
% Each child's data is on a new row. Each row is in REVERSE chronological 
% order (i.e., the first entry represents the last day of data collection). 
% -----------------------------------------------------------------------

clear all

addpath('../AMD_library')
addpath('../FDA_library')

% Loading global constants
global_constants;

lambda = 1e10;

resultsDir = './results/ganger/';

mkdir(resultsDir);

% To set simulation parameters, see description of 
% simulation_parameters_template in
% file global_constants.m
simulation_parameters = simulation_parameters_template;

% Input the data from file (height values = y values)
% values for males and females are stored in two separate files
all_data_reversed = load('./Ganger/ganger.dat');

all_subjectId = all_data_reversed(:,1);
all_data_reversed(:,1) = [];  % first column is subject id, so drop

% now, reverse the data
all_data = zeros(size(all_data_reversed));
for sampleId = 1:size(all_data,2)
    i = size(all_data,2)-sampleId+1;
    all_data(:,i) = all_data_reversed(:,sampleId);
end

all_cumul_voc = [];

for rowId=1:size(all_data,1)
    sample_t = [];
    cumul_voc = [];
    vocSize = 0;
    for sampleId = 1:size(all_data,2)
        if(all_data(rowId,sampleId) > -1)
            % valid point
            sample_t = [sample_t sampleId];
            vocSize = vocSize + all_data(rowId,sampleId);
            cumul_voc = [cumul_voc vocSize];
        else
            % no observation, repeat last value
            sample_t = [sample_t sampleId];
            cumul_voc = [cumul_voc vocSize];
        end
    end
    all_cumul_voc(rowId,:) = cumul_voc;
end

simulation_parameters(1).title = 'Vocabulary growth';
simulation_parameters.subject_ids = all_subjectId;

simulation_parameters.sample_ts = sample_t';
simulation_parameters.x_label = 'Day';

simulation_parameters.sample_ys = all_cumul_voc';
simulation_parameters.y_label = 'Vocabulary item count';

simulation_parameters.group_ids = ones(size(all_subjectId,1),1);
simulation_parameters.group_labels = {''};

simulation_parameters.degree = 1;
simulation_parameters.pvalue = 0.05;
simulation_parameters.error_level   = 1e-4; 
simulation_parameters.output_all_maxima = false; 
simulation_parameters.compute_std_errors = false;
simulation_parameters.boot_samples = 0;
simulation_parameters.lambda_fct    = lambda;
simulation_parameters.lambda_stderr = lambda;

significant_spurts_data = [];

ncase = size(all_subjectId,1);  % number of curves (cases) in the raw data
for id=1:ncase
    % compute spurts data
    [spurt_data graph_handle] = find_spurts(simulation_parameters, id);

    % append to existing data
    significant_spurts_data = [significant_spurts_data spurt_data];
    group = simulation_parameters.group_ids(id);  % retrieving group for this case

    % output plot for individual data
    fileName = [resultsDir, 'case_', num2str(simulation_parameters.subject_ids(id)), '_' ...
        char(simulation_parameters.group_labels(group)), '.png'];
    print('-f', '-dpng',  fileName);
    close
end

% output spurts data to file as text
data_file_name = [resultsDir, 'AMD_spurts.txt'];
output_significant_data(data_file_name, significant_spurts_data, ...
    simulation_parameters.compute_std_errors);

% plot group data
h = plot_spurts(significant_spurts_data, simulation_parameters, SORT_CASE_ID);
if (h)
    print('-f', '-dpng',  [resultsDir,'spurts.png']);
    close
end


