%% ========================================================================= %% PLAYER LOAD EQUATION COMPARISON — FULL PROCESSING PIPELINE %% ========================================================================= % % Callaway, A.J., Ellis, S.A., & Williams, J. % "Not All Player Load Equations Are Equivalent: Formula Choice Alters % Load Magnitude and Interpretation in Elite Football" % % This script reproduces the complete analysis pipeline from raw PFF tracking % data files through to all statistical outputs used in the manuscript. % % WHAT THIS SCRIPT DOES (in order): % % Stage 1 — Extract player positions from event JSON files % Builds a player-position lookup table (game + team + jersey) % % Stage 2 — Parse tracking JSONL files % Extracts raw and Kalman-smoothed XY coordinates per frame % Saves one intermediate .mat file per game % % Stage 3 — Derive acceleration % Applies four signal-processing conditions (raw, Kalman, % Butterworth 2 Hz, Butterworth 4 Hz) and computes acceleration % using a central finite-difference scheme % % Stage 4 — Apply all 11 PlayerLoad equations % Accumulates per-player-match load under each condition % Saves summary table as both .mat and .csv % % Stage 5 — Statistical analysis % Descriptive statistics, Bland-Altman agreement, pairwise % conversion coefficients, rank-order correlations % Saves three Excel workbooks % % INPUT FILES REQUIRED: % - Tracking data: one .jsonl file per game (from PFF Sports) % - Event data: one .json file per game (from PFF Sports) % % OUTPUT FILES PRODUCED: % - player_positions.csv — player position lookup table % - intermediate/*.mat — one per game (can delete after Stage 4) % - PlayerLoad_Summary.mat/.csv — per-player-match load values, all equations % - PlayerLoad_Descriptive.xlsx — mean ± SD by position and condition % - PlayerLoad_BlandAltman.xlsx — agreement statistics for all equation pairs % - PlayerLoad_RatioMatrix.xlsx — pairwise conversion coefficients % % REQUIREMENTS: % - MATLAB R2020b or later % - Signal Processing Toolbox (for butter, filtfilt) % % USAGE: % 1. Set the five paths in SECTION 0 below % 2. Run the script (Ctrl+Enter runs section by section, or press Run for all) % 3. Each stage is independent — if a stage has already been run, set the % corresponding CONFIG.skip_* flag to true to resume from that point % % ESTIMATED RUN TIME (64-game World Cup dataset): % Stage 1: ~2 min % Stage 2: ~20-40 min (depends on hardware and file I/O speed) % Stage 3: ~30-60 min % Stage 4: ~5 min % Stage 5: ~10 min % % ========================================================================= clear; clc; close all; %% ========================================================================= %% SECTION 0: CONFIGURATION — SET THESE FIVE PATHS BEFORE RUNNING %% ========================================================================= % Folder containing tracking data files — one .jsonl file per game % Example: '/Users/yourname/data/tracking/' CONFIG.tracking_folder = '/path/to/tracking_jsonl_files/'; % Folder containing event data files — one .json file per game % These are used to extract player positions (position codes such as CM, CB etc.) % Example: '/Users/yourname/data/events/' CONFIG.event_folder = '/path/to/event_json_files/'; % Folder where all output files will be written % The script will create subfolders automatically % Example: '/Users/yourname/data/output/' CONFIG.output_folder = '/path/to/output/'; % --- Optional: resume flags ----------------------------------------------- % Set to true to skip a stage that has already completed successfully. % Stage 2 is automatically resume-safe (skips games already processed). CONFIG.skip_stage1 = false; % set true if player_positions.csv already exists CONFIG.skip_stage2 = false; % set true if all intermediate .mat files exist CONFIG.skip_stage3 = false; % set true if PlayerLoad_Summary.mat already exists CONFIG.skip_stage4 = false; % set true if all three Excel outputs already exist % --- Signal parameters (do not change unless you know why) ---------------- CONFIG.fps = 29.9697; % PFF tracking nominal frame rate (Hz) CONFIG.dt = 1 / CONFIG.fps; CONFIG.butter_order = 2; % filter order; zero-phase via filtfilt = effective 4th order CONFIG.butter_freqs = [2, 4]; % Hz — Butterworth conditions used in paper CONFIG.min_frames = 100; % minimum valid frames per player-period to retain CONFIG.stoppage_p1 = 2700; % clock threshold (s) for stoppage flagging, period 1 CONFIG.stoppage_p2 = 5400; % clock threshold (s) for stoppage flagging, period 2 % ========================================================================= % Derived paths — do not edit % ========================================================================= CONFIG.positions_file = fullfile(CONFIG.output_folder, 'player_positions.csv'); CONFIG.intermediate_folder = fullfile(CONFIG.output_folder, 'intermediate'); CONFIG.summary_mat = fullfile(CONFIG.output_folder, 'PlayerLoad_Summary.mat'); CONFIG.summary_csv = fullfile(CONFIG.output_folder, 'PlayerLoad_Summary.csv'); CONFIG.out_descriptive = fullfile(CONFIG.output_folder, 'PlayerLoad_Descriptive.xlsx'); CONFIG.out_ba = fullfile(CONFIG.output_folder, 'PlayerLoad_BlandAltman.xlsx'); CONFIG.out_ratio = fullfile(CONFIG.output_folder, 'PlayerLoad_RatioMatrix.xlsx'); EQ_NAMES = {'EQ1_ActiGraph','EQ2_PL_RT_Cum','EQ3_PlayerLoad', ... 'EQ4_ImpLoad','EQ5_PL_RE','EQ6_TotalLoad', ... 'EQ7_PL_x','EQ8_PL_y','EQ9_PL_z', ... 'EQ10_PL2D','EQ11_PLTM'}; EQ_SHORT = {'EQ1','EQ2','EQ3','EQ4','EQ5','EQ6','EQ7','EQ8','EQ9','EQ10','EQ11'}; N_EQ = numel(EQ_NAMES); CONDITIONS = {'raw','kalman','butter_2hz','butter_4hz'}; N_COND = numel(CONDITIONS); % Create output folders for d = {CONFIG.output_folder, CONFIG.intermediate_folder} if ~exist(d{1}, 'dir'); mkdir(d{1}); end end fprintf('========== PLAYER LOAD PIPELINE ==========\n'); fprintf('Tracking data: %s\n', CONFIG.tracking_folder); fprintf('Event data: %s\n', CONFIG.event_folder); fprintf('Output: %s\n\n', CONFIG.output_folder); %% ========================================================================= %% STAGE 1: EXTRACT PLAYER POSITIONS FROM EVENT JSON FILES %% ========================================================================= % Reads event data JSON files and builds a position lookup table. % Position is assigned as the modal positionGroupType across all events % for each player within each game. % % Output: player_positions.csv % Columns: game_id, team, jersey, player_id, player_name, position_raw % ========================================================================= if CONFIG.skip_stage1 fprintf('--- Stage 1 skipped (skip_stage1 = true) ---\n\n'); else fprintf('--- Stage 1: Extract player positions ---\n\n'); event_files = dir(fullfile(CONFIG.event_folder, '*.json')); n_event = numel(event_files); if n_event == 0 error('No .json files found in event folder:\n %s\nCheck CONFIG.event_folder.', ... CONFIG.event_folder); end fprintf('Found %d event JSON files\n\n', n_event); posMap = containers.Map('KeyType','char','ValueType','any'); for iFile = 1:n_event jsonFile = fullfile(CONFIG.event_folder, event_files(iFile).name); [~, gameID, ~] = fileparts(event_files(iFile).name); fprintf('[%d/%d] %s ... ', iFile, n_event, gameID); try fid = fopen(jsonFile, 'r'); raw = fread(fid, '*char')'; fclose(fid); data = jsondecode(raw); catch ME fprintf('PARSE ERROR: %s\n', ME.message); continue; end if isstruct(data) nEv = numel(data); getEv = @(i) data(i); elseif iscell(data) nEv = numel(data); getEv = @(i) data{i}; else fprintf('UNKNOWN FORMAT\n'); continue; end nFound = 0; for iEv = 1:nEv ev = getEv(iEv); for side = {'home','away'} side_str = side{1}; field = [side_str 'Players']; if ~isfield(ev, field) || isempty(ev.(field)); continue; end players = ev.(field); if isstruct(players); players = num2cell(players); end for ip = 1:numel(players) p = players{ip}; jer = pff_get_num(p, 'jerseyNum'); if isnan(jer); continue; end key = sprintf('%s_%s_%d', gameID, side_str, jer); if ~isKey(posMap, key) posMap(key) = struct('gameID', gameID, ... 'team', side_str, 'jersey', jer, ... 'player_id', pff_get_num(p,'playerId'), ... 'player_name', pff_get_str(p,'playerName'), ... 'positions', {{}}); end entry = posMap(key); pos = pff_get_str(p, 'positionGroupType'); if ~isempty(pos) entry.positions{end+1} = pos; posMap(key) = entry; end nFound = nFound + 1; end end end fprintf('%d player-event entries\n', nFound); end % Build output table: modal position per player all_keys = keys(posMap); n_players = numel(all_keys); out_game = cell(n_players, 1); out_team = cell(n_players, 1); out_jer = zeros(n_players, 1); out_pid = zeros(n_players, 1); out_name = cell(n_players, 1); out_pos = cell(n_players, 1); for ik = 1:n_players e = posMap(all_keys{ik}); if isempty(e.positions) modal_pos = 'UNKNOWN'; else [uniq_pos, ~, ic] = unique(e.positions); counts = accumarray(ic, 1); [~, best] = max(counts); modal_pos = uniq_pos{best}; end out_game{ik} = e.gameID; out_team{ik} = e.team; out_jer(ik) = e.jersey; out_pid(ik) = e.player_id; out_name{ik} = e.player_name; out_pos{ik} = modal_pos; end posTable = table(string(out_game), string(out_team), out_jer, out_pid, ... string(out_name), string(out_pos), ... 'VariableNames', ... {'game_id','team','jersey','player_id','player_name','position_raw'}); writetable(posTable, CONFIG.positions_file); fprintf('\nPlayer positions saved: %d entries -> %s\n\n', n_players, CONFIG.positions_file); end %% ========================================================================= %% STAGE 2: PARSE TRACKING JSONL FILES %% ========================================================================= % Reads each game's .jsonl tracking file line by line. % Extracts raw and Kalman-smoothed XY positions for all players per frame. % Saves one .mat file per game to the intermediate folder. % % This stage is resume-safe: games already saved to .mat are skipped % unless CONFIG.skip_stage2 is false AND the file already exists. % ========================================================================= if CONFIG.skip_stage2 fprintf('--- Stage 2 skipped (skip_stage2 = true) ---\n\n'); else fprintf('--- Stage 2: Parse tracking JSONL files ---\n\n'); jsonl_files = dir(fullfile(CONFIG.tracking_folder, '*.jsonl')); n_games = numel(jsonl_files); if n_games == 0 error('No .jsonl files found in tracking folder:\n %s\nCheck CONFIG.tracking_folder.', ... CONFIG.tracking_folder); end fprintf('Found %d game files\n\n', n_games); s2_status = cell(n_games, 1); s2_rows = zeros(n_games, 1); for iFile = 1:n_games jsonlFile = fullfile(CONFIG.tracking_folder, jsonl_files(iFile).name); [~, gameID, ~] = fileparts(jsonl_files(iFile).name); outFile = fullfile(CONFIG.intermediate_folder, sprintf('raw_%s.mat', gameID)); fprintf('[%d/%d] Game %s ', iFile, n_games, gameID); if exist(outFile, 'file') fprintf('(already exists — skipping)\n'); s2_status{iFile} = 'SKIPPED'; continue; end tGame = tic; % Pre-allocate column arrays game_id_col = {}; frame_col = []; period_col = []; clock_col = []; team_col = {}; jersey_col = []; vis_col = {}; conf_col = []; x_raw_col = []; y_raw_col = []; x_smth_col = []; y_smth_col = []; try fid = fopen(jsonlFile, 'r'); lineNum = 0; while true line = fgetl(fid); if ~ischar(line); break; end lineNum = lineNum + 1; if isempty(strtrim(line)); continue; end try frm = jsondecode(line); catch continue; end fn = pff_get_num(frm, 'frameNum'); per = pff_get_num(frm, 'period'); clk = pff_get_num(frm, 'periodGameClockTime'); for side = {'home','away'} side_str = side{1}; raw_field = [side_str 'Players']; smth_field= [side_str 'PlayersSmoothed']; if ~isfield(frm, raw_field) || isempty(frm.(raw_field)); continue; end if ~isfield(frm, smth_field) || isempty(frm.(smth_field)); continue; end hp_raw = frm.(raw_field); hp_smth = frm.(smth_field); nP = min(numel(hp_raw), numel(hp_smth)); for i = 1:nP game_id_col{end+1,1} = gameID; frame_col(end+1,1) = fn; period_col(end+1,1) = per; clock_col(end+1,1) = clk; team_col{end+1,1} = side_str; jersey_col(end+1,1) = pff_get_num(hp_smth(i), 'jerseyNum'); vis_col{end+1,1} = pff_get_str(hp_smth(i), 'visibility'); conf_col{end+1,1} = pff_get_str(hp_smth(i), 'confidence'); x_raw_col(end+1,1) = pff_get_num(hp_raw(i), 'x'); y_raw_col(end+1,1) = pff_get_num(hp_raw(i), 'y'); x_smth_col(end+1,1) = pff_get_num(hp_smth(i), 'x'); y_smth_col(end+1,1) = pff_get_num(hp_smth(i), 'y'); end end if mod(lineNum, 50000) == 0 fprintf('\n ... %dk lines (%.0fs)', lineNum/1000, toc(tGame)); end end fclose(fid); catch ME if exist('fid','var') && fid > 0; fclose(fid); end fprintf(' FAILED: %s\n', ME.message); s2_status{iFile} = 'ERROR'; continue; end % Build and save table nRows = numel(frame_col); T = table(string(game_id_col), frame_col, period_col, clock_col, ... string(team_col), jersey_col, ... string(vis_col), string(conf_col), ... x_raw_col, y_raw_col, x_smth_col, y_smth_col, ... 'VariableNames', {'game_id','frame','period','game_clock_s', ... 'team','jersey','visibility','confidence', ... 'x_raw','y_raw','x_smooth','y_smooth'}); save(outFile, 'T', '-v7.3'); fprintf(' %d rows -> %s (%.0fs)\n', nRows, sprintf('raw_%s.mat', gameID), toc(tGame)); s2_status{iFile} = 'OK'; s2_rows(iFile) = nRows; clear game_id_col frame_col period_col clock_col team_col jersey_col ... vis_col conf_col x_raw_col y_raw_col x_smth_col y_smth_col T; end n_ok = sum(strcmp(s2_status, 'OK')); n_sk = sum(strcmp(s2_status, 'SKIPPED')); fprintf('\nStage 2 complete: %d processed, %d skipped, %d errors\n\n', ... n_ok, n_sk, n_games - n_ok - n_sk); end %% ========================================================================= %% STAGE 3: DERIVE ACCELERATION AND APPLY BUTTERWORTH FILTERS %% ========================================================================= % Loads each game's intermediate .mat file. % Applies four signal-processing conditions to XY positions: % (1) raw — unfiltered positional differences % (2) kalman — PFF-supplied Kalman-smoothed coordinates % (3) butter_2hz — zero-phase Butterworth low-pass at 2 Hz % (4) butter_4hz — zero-phase Butterworth low-pass at 4 Hz % % Velocity and acceleration estimated via central finite-difference scheme. % Saves one .mat per game to intermediate folder (replaces raw_ file). % % Output per game: accel_XXXX.mat % ========================================================================= if CONFIG.skip_stage3 fprintf('--- Stage 3 skipped (skip_stage3 = true) ---\n\n'); else fprintf('--- Stage 3: Derive acceleration ---\n\n'); % Load position lookup if ~exist(CONFIG.positions_file, 'file') error('player_positions.csv not found.\nRun Stage 1 first, or set skip_stage1=false.\nExpected: %s', ... CONFIG.positions_file); end posTable = readtable(CONFIG.positions_file, 'TextType','string'); posKeys = strcat(string(posTable.game_id), '_', string(posTable.team), '_', string(posTable.jersey)); posVals = string(posTable.position_raw); fprintf('Position lookup loaded: %d entries\n\n', height(posTable)); % Butterworth coefficients butter_b = cell(2,1); butter_a = cell(2,1); for ib = 1:2 Wn = CONFIG.butter_freqs(ib) / (CONFIG.fps / 2); [butter_b{ib}, butter_a{ib}] = butter(CONFIG.butter_order, Wn, 'low'); fprintf('Butterworth %d Hz: Wn = %.4f\n', CONFIG.butter_freqs(ib), Wn); end fprintf('\n'); raw_files = dir(fullfile(CONFIG.intermediate_folder, 'raw_*.mat')); n_games = numel(raw_files); if n_games == 0 error('No raw_*.mat files found in intermediate folder.\nRun Stage 2 first.'); end fprintf('Found %d intermediate files\n\n', n_games); for iFile = 1:n_games matFile = fullfile(CONFIG.intermediate_folder, raw_files(iFile).name); tok = regexp(raw_files(iFile).name, 'raw_(\w+)\.mat', 'tokens'); if isempty(tok); continue; end gameID = tok{1}{1}; outFile = fullfile(CONFIG.intermediate_folder, sprintf('accel_%s.mat', gameID)); fprintf('[%d/%d] Game %s ', iFile, n_games, gameID); if exist(outFile, 'file') fprintf('(accel already exists — skipping)\n'); continue; end tGame = tic; S = load(matFile, 'T'); T = S.T; T = T(T.team ~= "ball", :); % Resolve duplicate frames grpKey = strcat(string(T.game_id),'_',string(T.frame),'_', ... string(T.team),'_',string(T.jersey)); [~, ia, ic] = unique(grpKey, 'stable'); grpCounts = accumarray(ic, 1); dupGroups = find(grpCounts > 1); for dg = 1:numel(dupGroups) rows = find(ic == dupGroups(dg)); T.x_raw(rows(1)) = mean(T.x_raw(rows), 'omitnan'); T.y_raw(rows(1)) = mean(T.y_raw(rows), 'omitnan'); T.x_smooth(rows(1)) = mean(T.x_smooth(rows), 'omitnan'); T.y_smooth(rows(1)) = mean(T.y_smooth(rows), 'omitnan'); end T = T(ia, :); playerKeys = strcat(string(T.team),'_',string(T.jersey)); uniquePlayerKeys = unique(playerKeys, 'stable'); nPlayers = numel(uniquePlayerKeys); % Pre-allocate output column vectors out_game_id = string([]); out_team = string([]); out_jersey = double([]); out_pos = string([]); out_period = int8([]); out_frame = int32([]); out_clock = single([]); out_cond = string([]); out_vis = string([]); out_conf = string([]); out_stop = logical([]); out_ax = single([]); out_ay = single([]); out_accel = single([]); for ip = 1:nPlayers pKey = uniquePlayerKeys(ip); parts = strsplit(pKey, '_'); pTeam = parts(1); pJersey = str2double(parts(2)); lookupKey = sprintf('%s_%s_%d', gameID, pTeam, pJersey); posMatch = find(posKeys == string(lookupKey), 1); posLabel = "UNKNOWN"; if ~isempty(posMatch); posLabel = posVals(posMatch); end pMask = T.team == pTeam & T.jersey == pJersey; pData = sortrows(T(pMask,:), 'frame'); periods = unique(pData.period); for iPer = 1:numel(periods) per = periods(iPer); perData = pData(pData.period == per, :); nF = height(perData); if nF < CONFIG.min_frames; continue; end frames = int32(perData.frame); clk = single(perData.game_clock_s); vis = perData.visibility; conf = perData.confidence; stoppage = clk > single(per == 1) * CONFIG.stoppage_p1 + ... single(per == 2) * CONFIG.stoppage_p2; xr = double(perData.x_raw); yr = double(perData.y_raw); xs = double(perData.x_smooth); ys = double(perData.y_smooth); for iCond = 1:4 switch iCond case 1; px = xr; py = yr; case 2; px = xs; py = ys; case 3; px = pl_filtfilt(butter_b{1}, butter_a{1}, xr); py = pl_filtfilt(butter_b{1}, butter_a{1}, yr); case 4; px = pl_filtfilt(butter_b{2}, butter_a{2}, xr); py = pl_filtfilt(butter_b{2}, butter_a{2}, yr); end vx = pl_central_diff(px, CONFIG.dt); vy = pl_central_diff(py, CONFIG.dt); ax = single(pl_central_diff(vx, CONFIG.dt)); ay = single(pl_central_diff(vy, CONFIG.dt)); condName = string(CONDITIONS{iCond}); out_game_id = [out_game_id; repmat(string(gameID), nF, 1)]; out_team = [out_team; repmat(pTeam, nF, 1)]; out_jersey = [out_jersey; repmat(pJersey, nF, 1)]; out_pos = [out_pos; repmat(posLabel, nF, 1)]; out_period = [out_period; repmat(int8(per), nF, 1)]; out_frame = [out_frame; frames]; out_clock = [out_clock; clk]; out_cond = [out_cond; repmat(condName, nF, 1)]; out_vis = [out_vis; vis]; out_conf = [out_conf; conf]; out_stop = [out_stop; stoppage]; out_ax = [out_ax; ax]; out_ay = [out_ay; ay]; out_accel = [out_accel; sqrt(ax.^2 + ay.^2)]; end end if mod(ip, 10) == 0 || ip == nPlayers fprintf('\n Player %d/%d (%.0fs)', ip, nPlayers, toc(tGame)); end end data.game_id = out_game_id; data.team = out_team; data.jersey = out_jersey; data.position_raw = out_pos; data.period = out_period; data.frame = out_frame; data.game_clock_s = out_clock; data.smoothing_condition = out_cond; data.visibility = out_vis; data.confidence = out_conf; data.stoppage = out_stop; data.ax = out_ax; data.ay = out_ay; data.accel_mag = out_accel; data.meta.fps = CONFIG.fps; data.meta.dt = CONFIG.dt; data.meta.conditions = CONDITIONS; data.meta.created = datestr(now); save(outFile, 'data', '-v7.3'); fprintf('\n Saved accel_%s.mat (%.0fs)\n\n', gameID, toc(tGame)); clear T S data out_game_id out_team out_jersey out_pos out_period ... out_frame out_clock out_cond out_vis out_conf out_stop out_ax out_ay out_accel; end fprintf('Stage 3 complete\n\n'); end %% ========================================================================= %% STAGE 4: APPLY ALL 11 PLAYER LOAD EQUATIONS %% ========================================================================= % Loads each game's accel_*.mat file. % Accumulates all 11 equations per player-period under each condition. % Pools P1+P2 to yield player-match observations. % Saves summary table. % % The 11 equations (see Table 1 in manuscript): % EQ1 AcelT Σ √(ax² + ay²) instantaneous magnitude % EQ2 PL_RT 0.01 × Σ √((Δax²+Δay²)/100) jerk (RealTrack scalar) % EQ3 PlayerLoad Σ √((Δax²+Δay²)/100) jerk (Catapult) % EQ4 ImpulseLoad Σ √(ax²+ay²) / 9.8067 instantaneous / g % EQ5 PL_RE Σ (ax²+ay²) / 800 energy-weighted (ZXY) % EQ6 TotalLoad Σ √((Δax²+Δay²)/1000) jerk (StatSports) % EQ7 PL_x Σ √(Δax²/100) jerk — lateral only % EQ8 PL_y Σ √(Δay²/100) jerk — AP only % EQ9 PL_z Σ √(Δaz²/100) = 0 jerk — vertical (zero in 2D) % EQ10 PL2D Σ √((Δax²+Δay²)/100) = EQ3 jerk — 2D (algebraically EQ3) % EQ11 PLTM Σ √((Δax²+Δay²)/100) = EQ3 jerk (also algebraically EQ3) % % Output: PlayerLoad_Summary.mat and PlayerLoad_Summary.csv % ========================================================================= if CONFIG.skip_stage4 fprintf('--- Stage 4 skipped (skip_stage4 = true) ---\n\n'); else fprintf('--- Stage 4: Apply PlayerLoad equations ---\n\n'); accel_files = dir(fullfile(CONFIG.intermediate_folder, 'accel_*.mat')); n_files = numel(accel_files); if n_files == 0 error('No accel_*.mat files found.\nRun Stage 3 first.'); end fprintf('Found %d game files\n\n', n_files); % Global typed accumulators acc_game = string([]); acc_team = string([]); acc_jer = double([]); acc_pos = string([]); acc_per = double([]); acc_cond = string([]); acc_n_tot = double([]); acc_n_val = double([]); acc_pct_val = double([]); acc_pct_stp = double([]); acc_pct_vis = double([]); acc_EQ = zeros(0, N_EQ); tTotal = tic; for iFile = 1:n_files matFile = fullfile(CONFIG.intermediate_folder, accel_files(iFile).name); tok = regexp(accel_files(iFile).name, 'accel_(\w+)\.mat', 'tokens'); if isempty(tok); continue; end gameID = tok{1}{1}; fprintf('[%d/%d] Game %s ... ', iFile, n_files, gameID); tGame = tic; S = load(matFile, 'data'); d = S.data; n = numel(d.frame); ax = double(d.ax); ay = double(d.ay); teamNum = double(string(d.team) == "home") + 1; jerseyN = double(d.jersey); periodN = double(d.period); condStr = string(d.smoothing_condition); condNum = zeros(n, 1); condNum(condStr == "raw") = 1; condNum(condStr == "kalman") = 2; condNum(condStr == "butter_2hz") = 3; condNum(condStr == "butter_4hz") = 4; groupInt = teamNum*100000 + jerseyN*1000 + periodN*10 + condNum; [sortedInt, sortIdx] = sort(groupInt); grpChange = [true; diff(sortedInt) ~= 0]; grpIdx = cumsum(grpChange); nGroups = grpIdx(end); ax_s = ax(sortIdx); ay_s = ay(sortIdx); vis_s = string(d.visibility(sortIdx)); stop_s = logical(d.stoppage(sortIdx)); pos_s = string(d.position_raw(sortIdx)); gid_s = string(d.game_id(sortIdx)); tm_s = string(d.team(sortIdx)); jer_s = double(d.jersey(sortIdx)); per_s = double(d.period(sortIdx)); cond_s = string(d.smoothing_condition(sortIdx)); starts = find(grpChange); ends = [starts(2:end)-1; n]; g_EQ = zeros(nGroups, N_EQ); g_valid = false(nGroups, 1); g_game = string(zeros(nGroups,1)); g_team = string(zeros(nGroups,1)); g_jer = zeros(nGroups,1); g_pos = string(zeros(nGroups,1)); g_per = zeros(nGroups,1); g_cond = string(zeros(nGroups,1)); g_ntot = zeros(nGroups,1); g_nval = zeros(nGroups,1); g_pval = zeros(nGroups,1); g_pstp = zeros(nGroups,1); g_pvis = zeros(nGroups,1); for ig = 1:nGroups i1 = starts(ig); i2 = ends(ig); axg = ax_s(i1:i2); ayg = ay_s(i1:i2); valid = ~isnan(axg) & ~isnan(ayg); axv = axg(valid); ayv = ayg(valid); nv = numel(axv); n_total = i2 - i1 + 1; g_ntot(ig) = n_total; g_nval(ig) = nv; g_pval(ig) = 100 * nv / n_total; g_pstp(ig) = 100 * double(sum(stop_s(i1:i2))) / n_total; g_pvis(ig) = 100 * sum(vis_s(i1:i2) == "VISIBLE") / n_total; g_game(ig) = gid_s(i1); g_team(ig) = tm_s(i1); g_jer(ig) = jer_s(i1); g_pos(ig) = pos_s(i1); g_per(ig) = per_s(i1); g_cond(ig) = cond_s(i1); if nv < 2; continue; end dax = diff(axv); day = diff(ayv); mag2 = axv.^2 + ayv.^2; dmag2 = dax.^2 + day.^2; g_EQ(ig,:) = [ ... sum(sqrt(mag2)), ... % EQ1 AcelT sum(sqrt(dmag2) / 100) * 0.01, ... % EQ2 PL_RT sum(sqrt(dmag2) / 100), ... % EQ3 PlayerLoad sum(sqrt(mag2) / 9.8067), ... % EQ4 ImpulseLoad sum(mag2 / 800), ... % EQ5 PL_RE sum(sqrt(dmag2) / 1000), ... % EQ6 TotalLoad sum(abs(day) / 10), ... % EQ7 PL_x sum(abs(dax) / 10), ... % EQ8 PL_y 0, ... % EQ9 PL_z (zero in 2D) sum(sqrt(dmag2) / 100), ... % EQ10 PL2D = EQ3 sum(sqrt(dmag2) / 100)]; % EQ11 PLTM = EQ3 g_valid(ig) = true; end % Keep valid rows and append keep = g_valid; acc_game = [acc_game; g_game(keep)]; acc_team = [acc_team; g_team(keep)]; acc_jer = [acc_jer; g_jer(keep)]; acc_pos = [acc_pos; g_pos(keep)]; acc_per = [acc_per; g_per(keep)]; acc_cond = [acc_cond; g_cond(keep)]; acc_n_tot = [acc_n_tot; g_ntot(keep)]; acc_n_val= [acc_n_val;g_nval(keep)]; acc_pct_val = [acc_pct_val; g_pval(keep)]; acc_pct_stp = [acc_pct_stp; g_pstp(keep)]; acc_pct_vis = [acc_pct_vis; g_pvis(keep)]; acc_EQ = [acc_EQ; g_EQ(keep,:)]; fprintf('%d groups, %.0fs\n', sum(keep), toc(tGame)); clear S d ax ay ax_s ay_s g_EQ g_valid g_game g_team g_jer g_pos g_per g_cond; end fprintf('\nBuilding summary table (%d rows)...\n', numel(acc_game)); T_summary = table(acc_game, acc_team, acc_jer, acc_pos, acc_per, acc_cond, ... acc_n_tot, acc_n_val, acc_pct_val, acc_pct_stp, acc_pct_vis, ... acc_EQ(:,1), acc_EQ(:,2), acc_EQ(:,3), acc_EQ(:,4), ... acc_EQ(:,5), acc_EQ(:,6), acc_EQ(:,7), acc_EQ(:,8), ... acc_EQ(:,9), acc_EQ(:,10), acc_EQ(:,11), ... 'VariableNames', ... {'game_id','team','jersey','position_raw','period', ... 'smoothing_condition','n_frames_total','n_frames_valid', ... 'pct_frames_valid','pct_stoppage','pct_visible', ... EQ_NAMES{:}}); save(CONFIG.summary_mat, 'T_summary', '-v7.3'); writetable(T_summary, CONFIG.summary_csv); fprintf('Saved: %s\nSaved: %s\n', CONFIG.summary_mat, CONFIG.summary_csv); fprintf('Total time: %.1f min\n\nStage 4 complete\n\n', toc(tTotal)/60); end %% ========================================================================= %% STAGE 5: STATISTICAL ANALYSIS %% ========================================================================= % Loads the summary table, pools periods (P1+P2), and produces three % Excel workbooks containing the outputs reported in the manuscript. % % Outputs: % PlayerLoad_Descriptive.xlsx — mean ± SD by position and condition % (also includes per-90 min normalised values) % PlayerLoad_BlandAltman.xlsx — Bland-Altman agreement for all 55 equation pairs % across all conditions and positions % PlayerLoad_RatioMatrix.xlsx — pairwise conversion coefficients % ========================================================================= if CONFIG.skip_stage4 fprintf('--- Stage 5 skipped (skip_stage4 = true, summary table not available) ---\n\n'); else fprintf('--- Stage 5: Statistical analysis ---\n\n'); if ~exist(CONFIG.summary_mat, 'file') error('Summary .mat not found. Run Stage 4 first.\nExpected: %s', CONFIG.summary_mat); end fprintf('Loading summary data... '); S = load(CONFIG.summary_mat, 'T_summary'); T = S.T_summary; fprintf('%d rows\n\n', height(T)); % Pool P1+P2: sum EQ values and n_frames_valid per player-match fprintf('Pooling periods (P1+P2)...\n'); grpVars = {'game_id','team','jersey','position_raw','smoothing_condition'}; poolVars = [EQ_NAMES, {'n_frames_valid'}]; Tp = groupsummary(T, grpVars, 'sum', poolVars); for i = 1:numel(poolVars) Tp.Properties.VariableNames{['sum_' poolVars{i}]} = poolVars{i}; end % Per-90 normalisation Tp.minutes_played = Tp.n_frames_valid ./ (CONFIG.fps * 60); for i = 1:N_EQ Tp.([EQ_NAMES{i} '_per90']) = Tp.(EQ_NAMES{i}) ./ Tp.minutes_played .* 90; end EQ_PER90 = cellfun(@(x) [x '_per90'], EQ_NAMES, 'UniformOutput', false); positions = unique(Tp.position_raw); positions = positions(positions ~= "UNKNOWN"); positions = ['ALL'; positions]; N_POS = numel(positions); fprintf('Positions found: %s\n\n', strjoin(positions, ', ')); % --- OUTPUT 1: Descriptive statistics (absolute and per-90) ----------- fprintf('Writing descriptive statistics...\n'); for iCond = 1:N_COND cond = CONDITIONS{iCond}; condMask = Tp.smoothing_condition == cond; colLabels = {}; for e = 1:N_EQ colLabels{end+1} = [EQ_SHORT{e} '_Mean']; colLabels{end+1} = [EQ_SHORT{e} '_SD']; colLabels{end+1} = [EQ_SHORT{e} '_per90_Mean']; colLabels{end+1} = [EQ_SHORT{e} '_per90_SD']; end descData = NaN(N_POS, N_EQ * 4); for ip = 1:N_POS if positions(ip) == "ALL" sub = Tp(condMask, :); else sub = Tp(condMask & Tp.position_raw == positions(ip), :); end for e = 1:N_EQ col = (e-1)*4 + 1; v = sub.(EQ_NAMES{e}); v = v(isfinite(v)); v90 = sub.(EQ_PER90{e}); v90 = v90(isfinite(v90)); if ~isempty(v) descData(ip, col) = mean(v); descData(ip, col+1) = std(v); end if ~isempty(v90) descData(ip, col+2) = mean(v90); descData(ip, col+3) = std(v90); end end end descT = array2table(descData, 'VariableNames', colLabels, 'RowNames', cellstr(positions)); writetable(descT, CONFIG.out_descriptive, 'Sheet', cond, 'WriteRowNames', true); fprintf(' Written sheet: %s\n', cond); end % --- OUTPUT 2: Bland-Altman analysis ---------------------------------- fprintf('\nRunning Bland-Altman analysis...\n'); pairs = nchoosek(1:N_EQ, 2); N_PAIRS = size(pairs, 1); N_ROWS = N_PAIRS * N_COND * N_POS; ba_eqx = cell(N_ROWS,1); ba_eqy = cell(N_ROWS,1); ba_cond = cell(N_ROWS,1); ba_pos = cell(N_ROWS,1); ba_n = zeros(N_ROWS,1); ba_bias = NaN(N_ROWS,1); ba_biasp = NaN(N_ROWS,1); ba_sd = NaN(N_ROWS,1); ba_loa_lo = NaN(N_ROWS,1); ba_loa_hi = NaN(N_ROWS,1); ba_r2 = NaN(N_ROWS,1); ba_slope = NaN(N_ROWS,1); ba_int = NaN(N_ROWS,1); ba_see = NaN(N_ROWS,1); row = 0; for iCond = 1:N_COND cond = CONDITIONS{iCond}; condMask = Tp.smoothing_condition == cond; for ip = 1:N_POS if positions(ip) == "ALL" sub = Tp(condMask, :); else sub = Tp(condMask & Tp.position_raw == positions(ip), :); end for ip2 = 1:N_PAIRS ei = pairs(ip2, 1); ej = pairs(ip2, 2); x = sub.(EQ_NAMES{ei}); y = sub.(EQ_NAMES{ej}); ok = isfinite(x) & isfinite(y); x = x(ok); y = y(ok); nv = numel(x); row = row + 1; ba_eqx{row} = EQ_SHORT{ei}; ba_eqy{row} = EQ_SHORT{ej}; ba_cond{row} = cond; ba_pos{row} = char(positions(ip)); ba_n(row) = nv; if nv < 3; continue; end d = x - y; b = mean(d); sdd = std(d); ba_bias(row) = b; ba_biasp(row) = 100 * b / mean((x+y)/2); ba_sd(row) = sdd; ba_loa_lo(row) = b - 1.96*sdd; ba_loa_hi(row) = b + 1.96*sdd; r = corrcoef(x, y); ba_r2(row) = r(1,2)^2; p = polyfit(x, y, 1); ba_slope(row) = p(1); ba_int(row) = p(2); y_hat = polyval(p, x); ba_see(row) = sqrt(mean((y - y_hat).^2)); end end fprintf(' %s done\n', cond); end baT = table(string(ba_eqx), string(ba_eqy), string(ba_cond), string(ba_pos), ... ba_n, ba_bias, ba_biasp, ba_sd, ba_loa_lo, ba_loa_hi, ... ba_r2, ba_slope, ba_int, ba_see, ... 'VariableNames', {'EQ_x','EQ_y','condition','position', ... 'N','bias','bias_pct','SD_diff', ... 'LoA_lower','LoA_upper','R2','slope','intercept','SEE'}); writetable(baT, CONFIG.out_ba, 'Sheet', 'BlandAltman'); fprintf('Saved: %s\n\n', CONFIG.out_ba); % --- OUTPUT 3: Ratio matrix ------------------------------------------- fprintf('Writing ratio matrix...\n'); for iCond = 1:N_COND cond = CONDITIONS{iCond}; sub = Tp(Tp.smoothing_condition == cond, :); ratioCell = cell(N_EQ+1, N_EQ+1); ratioCell{1,1} = 'EQ \ EQ'; for e = 1:N_EQ ratioCell{1,e+1} = EQ_SHORT{e}; ratioCell{e+1,1} = EQ_SHORT{e}; end for ei = 1:N_EQ for ej = 1:N_EQ if ei == ej; ratioCell{ei+1,ej+1} = '1.000 (r=1.000)'; continue; end x = sub.(EQ_NAMES{ei}); y = sub.(EQ_NAMES{ej}); ok = isfinite(x) & isfinite(y) & y ~= 0; x = x(ok); y = y(ok); if numel(x) < 3; ratioCell{ei+1,ej+1} = 'N/A'; continue; end rc = corrcoef(x, y); ratioCell{ei+1,ej+1} = sprintf('%.4f (r=%.4f)', mean(x)/mean(y), rc(1,2)); end end ratioT = cell2table(ratioCell(2:end,:), ... 'VariableNames', ['Position', cellstr(string(ratioCell(1,2:end)))]); writetable(ratioT, CONFIG.out_ratio, 'Sheet', cond); fprintf(' Written sheet: %s\n', cond); end fprintf('Saved: %s\n\n', CONFIG.out_ratio); fprintf('Stage 5 complete\n\n'); end %% ========================================================================= %% PIPELINE COMPLETE %% ========================================================================= fprintf('========== PIPELINE COMPLETE ==========\n'); fprintf('Output folder: %s\n\n', CONFIG.output_folder); fprintf('Files produced:\n'); for f = {CONFIG.positions_file, CONFIG.summary_csv, CONFIG.summary_mat, ... CONFIG.out_descriptive, CONFIG.out_ba, CONFIG.out_ratio} if exist(f{1}, 'file') info = dir(f{1}); fprintf(' [OK] %s (%.1f MB)\n', info.name, info.bytes/1e6); else fprintf(' [--] %s (not found)\n', f{1}); end end fprintf('\nIntermediate .mat files are in:\n %s\n', CONFIG.intermediate_folder); fprintf('These can be deleted once you have verified the summary outputs.\n\n'); %% ========================================================================= %% LOCAL FUNCTIONS %% ========================================================================= function d = pl_central_diff(x, dt) % Central finite-difference scheme. Returns NaN at boundaries. n = numel(x); d = NaN(n, 1); if n < 3; return; end d(2:n-1) = (x(3:n) - x(1:n-2)) / (2 * dt); end function y = pl_filtfilt(b, a, x) % Zero-phase Butterworth filter with graceful fallback. try y = filtfilt(b, a, x); catch y = NaN(size(x)); warning('filtfilt failed — check Signal Processing Toolbox is installed.'); end end function val = pff_get_num(s, field) % Safely extract a numeric field from a struct. if isfield(s, field) && ~isempty(s.(field)) raw = s.(field); if ischar(raw) || isstring(raw) val = str2double(raw); else val = double(raw(1)); end else val = NaN; end end function val = pff_get_str(s, field) % Safely extract a string field from a struct. if isfield(s, field) && ~isempty(s.(field)) val = char(s.(field)); else val = ''; end end