User:Jfd34/Batting graph PHP script

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
<?php

# Assign default values for some of the GET parameters

$batting_graph_options = array(
    "title_height"         => @($_GET["titleheight"])       ?: 120,
    "margin_bottom"        => @($_GET["marginbottom"])      ?: 120,
    "margin_left"          => @($_GET["marginleft"])        ?: 160,
    "margin_right"         => @($_GET["marginright"])       ?: 50,
    "legend_height"        => @($_GET["legendheight"])      ?: 120,
    "border_style"         => @($_GET["borderstyle"])       ?: "fill:none; stroke:#000000; stroke-width:2; stroke-opacity:1",
    "text_style"           => @($_GET["textstyle"])         ?: "font-family:Liberation Sans; font-style:normal; font-weight:normal; fill:#000000; fill-opacity:1; stroke:none",
    "legend_max_size"      => @($_GET["legendmaxsize"])     ?: 2000,
    "guide_label_spacing"  => @($_GET["guidelabelspacing"]) ?: 10,
);  
$batting_graph_options["year_labels_height"] = @($_GET["yearlabelsheight"]) ?: round($batting_graph_options["margin_bottom"] * 0.6, 3);

# Detect the format to obtain the correct URL of the data source
function get_format() {
    switch ( strtolower($_GET["format"]) ) {
        case "odi"  : return "_ODI";
        case "test" : return "";
        # howstat does not support T20 at the moment. Return an error if the format is anything other than test or odi
        default     : die("No format or unsupported format specified. Only \"test\" and \"odi\" (case-insensitive) are allowed. T20 not supported for now.");
    }
}

if ( get_format() !== null ) {
    # Set the Content-Type HTTP header so that user agents interpret it as SVG and not HTML
    header("Content-Type: image/svg+xml");
}

# Load the data from howstat.com.au
$data_source_HTML = file_get_contents("http://www.howstat.com.au/cricket/Statistics/Players/PlayerProgressBat" . get_format() . ".asp?PlayerID={$_GET["id"]}");

$data_document = new DOMDocument();
$data_document->loadHTML($data_source_HTML);

# Locate the data table from the HTML, and then store the scores in an array
$data_table = $data_document->getElementsByTagName("form")->item(0)->getElementsByTagName("table")->item(0)->getElementsByTagName("table")->item(14)->getElementsByTagName("table")->item(1);

$scores = array();
for ( $i = 1; $i < $data_table->getElementsByTagName("tr")->length; $i++ ) {   # i = 1 and not 0 so that the header row is skipped
    $scores[] = $data_table->getElementsByTagName("tr")->item($i)->getElementsByTagName("td")->item((get_format() === "_ODI") ? 5 : 6)->childNodes->item(0)->nodeValue;  # Score is in the seventh column (Tests) and sizth column (ODIs)
}

# Remove any "DNB" (did not bat) values, and re-index the array
$scores = preg_grep('/DNB/', $scores, PREG_GREP_INVERT);
$scores = array_values($scores);

# Define $scores2, which removes * (not out sign) from the values in the $scores array, as well as leading and trailing spaces and newlines, so that they can be mathematically evaluated
$scores2 = array();
foreach ( $scores as $score ) {
    $scores2[] = preg_replace('/[\s\n\r\t]*\*?([0-9]+?)[\s\n\r\t]*/', "\\1", $score);   # HowSTAT places the not out sign BEFORE the score
}
unset($score);

# Some variables needed for generating the output
$data_area_width = count($scores) * ( $_GET["barwidth"] + $_GET["barspacing"] ) + $_GET["barspacing"];
$data_area_height = $_GET["max"] * $_GET["scale"];
$total_margin_top = $batting_graph_options["title_height"] + (($_GET["legend"]) ? $batting_graph_options["legend_height"] : 0);

# The width attribute of the root svg element
$document_width = $batting_graph_options["margin_left"] + $data_area_width + $batting_graph_options["margin_right"];

# The height attribute of the root svg element
$document_height = $total_margin_top + $data_area_height + $batting_graph_options["margin_bottom"];

# Now output the xml declaration and root element

echo <<<START
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">

<svg xml:lang="en" version="1.1" width="{$document_width}" height="{$document_height}" xmlns="http://www.w3.org/2000/svg">

START;
# <title> element (text in the browser's title bar)
function get_format2() {    # This outputs the format with proper capitalisation
    switch ( strtolower($_GET["format"]) ) {
        case "odi"  : return "ODI";
        case "test" : return "Test";
    }
}

$title = $_GET["name"] . " &#8212; Batting performance in " . get_format2() . "s";
echo "  <title>{$title}</title>\r\n";

echo "  <defs>\r\n";

# Clip path which crops any plotted data beyond $_GET["max"]
# The transform attribute on the data group will transform the clip path with it, cropping anything that goes beyond the top and left edges of the data area (no need to worry about the bottom and right)
echo <<<CLIP
    <clipPath id="data-clip" clipPathUnits="userSpaceOnUse">
      <rect x="0" y="0" width="{$document_width}" height="{$document_height}" />
    </clipPath>

CLIP;

# Internal CSS stylesheets
if ( $_GET["barstyle"] ) {
    $barstyle = $_GET["barstyle"];
    echo <<<STYLE
    <style type="text/css">/*<![CDATA[*/
      .innings {
        {$barstyle}
      }
 
STYLE;
    $notoutstyle = @($_GET["notoutstyle"]) ?: $barstyle;
    echo <<<STYLE
      .innings-notout {
        {$notoutstyle}
      }
    /*]]>*/</style>

STYLE;
}
echo "  </defs>\r\n";

# Data area borders and guidelines
echo "  <g style=\"{$batting_graph_options["border_style"]}\">\r\n";
$guide_height = $data_area_height / $_GET["intervals"];
$guide_Y_position = $total_margin_top;

while ( ceil($guide_Y_position) < $data_area_height + $total_margin_top ) {
    echo "    <rect x=\"{$batting_graph_options["margin_left"]}\" y=\"".round($guide_Y_position, 6)."\" width=\"{$data_area_width}\" height=\"".round($guide_height, 6)."\" />\r\n";
    $guide_Y_position += $guide_height;
}
echo "  </g>\r\n";


function text_baseline_offset($centre, $fontsize) {   # Used to align text vertically in relation to other objects
    # "Average" fonts have a height of 70% the font size
    # Though some "fancy" fonts may have a different height-to-fontsize ratio, PHP on its own cannot determine the height of specific fonts
    # If using such fonts result in misplaced text it can be corrected with an SVG editor (e.g. Inkscape)
    return round($centre + $fontsize * 0.35, 3);
}

function text_baseline_offset_2line($centre, $fontsize) {   # For double-line text: Moves the baseline of the first line (x attribute of the text) 15% upwards
    return round($centre - $fontsize * 0.15, 3);
}

# Title
$title_fontsize = round($_GET["fontsize"] * 1.5, 2);  # Title size is 1.5 times the given font size
echo "  <text style=\"font-size:{$title_fontsize}px; text-align:center; text-anchor:middle; {$batting_graph_options["text_style"]}\" x=\"" . ($document_width / 2) . "\" y=\"" . text_baseline_offset($batting_graph_options["title_height"] / 2, $title_fontsize) . "\">{$title}</text>\r\n";

# Legend
if ( @$_GET["legend"] ) {
    $legend_area_width = ($document_width <= $batting_graph_options["legend_max_size"]) ? $document_width : $batting_graph_options["legend_max_size"];
    $legend_X_position = ($document_width <= $batting_graph_options["legend_max_size"]) ? 0 : ($document_width - $batting_graph_options["legend_max_size"]) / 2;

    # For the legend to be centered, divide the document into 2 parts if $_GET["avg"] is negative (where no average line will be output), otherwise divide the document into 3 parts
    $legend_part2 = ($_GET["avg"] < 0) ? $legend_area_width / 2 : round($legend_area_width / 3, 3);
    $legend_part3 = round($legend_area_width * (2/3), 3);

    $legend_fontsize = round($_GET["fontsize"] * 0.8, 2);   # 80% of the font size
    $legend_symbol_width = round($legend_area_width * 0.04, 3);   # Width is 4% of legend width, height is equal to the font size
    $legend_symbol_X_position = round($legend_symbol_width * (2/3), 3);
    $legend_symbol_Y_position = round(($batting_graph_options["legend_height"] - $legend_fontsize) / 2, 2);
    $legend_Y_centre = $batting_graph_options["legend_height"] / 2;
    $legend_text_X_position = $legend_symbol_width + round($legend_symbol_X_position * 1.5, 3);
    $legend_text_baseline = text_baseline_offset($legend_Y_centre, $legend_fontsize);
    $legend_double_text_baseline = text_baseline_offset_2line($legend_Y_centre, $legend_fontsize);

    # Output the legend
    echo " <g transform=\"translate(" . ($legend_X_position + round($legend_area_width * 0.07, 2)) . ",{$batting_graph_options["title_height"]})\">\r\n";   # Move the legend 7% to the right to make it look "centered"
    echo <<<LEGEND
    <g>
      <rect x="{$legend_symbol_X_position}" y="{$legend_symbol_Y_position}" width="{$legend_symbol_width}" height="{$legend_fontsize}" class="innings" />
      <text style="font-size:{$legend_fontsize}px; text-align:start; text-anchor:start; {$batting_graph_options["text_style"]}" x="{$legend_text_X_position}" y="{$legend_text_baseline}">Individual innings</text>
    </g>
    <g transform="translate({$legend_part2},0)">
      <rect x="{$legend_symbol_X_position}" y="{$legend_symbol_Y_position}" width="{$legend_symbol_width}" height="{$legend_fontsize}" class="innings-notout" />
      <text style="font-size:{$legend_fontsize}px; line-height:100%; text-align:start; text-anchor:start; {$batting_graph_options["text_style"]}" x="{$legend_text_X_position}" y="{$legend_double_text_baseline}">
        <tspan>Individual innings</tspan>
        <tspan x="{$legend_text_X_position}" dy="{$legend_fontsize}">(not out)</tspan>
      </text>
    </g>
    
LEGEND;

    if ( $_GET["avg"] >= 0 ) {
        echo "    <g transform=\"translate({$legend_part3},0)\">\r\n";
        echo "      <path d=\"m {$legend_symbol_X_position},{$legend_Y_centre} {$legend_symbol_width},0\" style=\"{$_GET["avgstyle"]}\" />\r\n";
        echo "      <text style=\"font-size:{$legend_fontsize}px; line-height:100%; text-align:start; text-anchor:start; {$batting_graph_options["text_style"]}\" x=\"{$legend_text_X_position}\" y=\"{$legend_double_text_baseline}\">\r\n";
        echo "        <tspan>" . (($_GET["avg"] == 0) ? "Career batting" : "Batting average") . "</tspan>\r\n";
        echo "        <tspan x=\"{$legend_text_X_position}\" dy=\"{$legend_fontsize}\">" . (($_GET["avg"] == 0) ? "average" : "(last {$_GET["avg"]} innings)") . "</tspan>\r\n";
        echo "      </text>\r\n";
        echo "    </g>\r\n";
    }

    echo "  </g>\r\n";
}

# Vertical axis labels

# First add a "0" label at the bottom
# Then continue adding labels on top until the maximum value is reached
echo "  <g style=\"font-size:{$_GET["fontsize"]}px; text-align:end; text-anchor:end; {$batting_graph_options["text_style"]}\">\r\n";

$guide_height = $data_area_height / $_GET["intervals"];
$guidelabel_X_position = $batting_graph_options["margin_left"] - $batting_graph_options["guide_label_spacing"];
$guidelabel_Y_position = text_baseline_offset($total_margin_top + $data_area_height, $_GET["fontsize"]);

$guidelabel_text = 0;
echo "    <text x=\"{$guidelabel_X_position}\" y=\"{$guidelabel_Y_position}\">{$guidelabel_text}</text>\r\n";

while ( ceil($guidelabel_text) < $_GET["max"] ) {
    $guidelabel_text += $_GET["max"] / $_GET["intervals"];
    $guidelabel_Y_position = round($guidelabel_Y_position - $guide_height, 3);
    echo "    <text x=\"{$guidelabel_X_position}\" y=\"{$guidelabel_Y_position}\">" . round($guidelabel_text, 2) . "</text>\r\n";
}
echo "  </g>\r\n";

# RUNS AND BATTING AVERAGE

echo "  <g transform=\"translate({$batting_graph_options["margin_left"]},{$total_margin_top})\" clip-path=\"url(#data-clip)\">\r\n";

# Bars for the runs scored in each innings
echo "    <g>\r\n";
$bar_X_position = $_GET["barspacing"];

foreach ( $scores2 as $j => $score ) {
    if ( $score != 0 ) {   # If the score is 0 don't draw anything, just leave that space blank
        $bar_height = round($score * $_GET["scale"], 6);
        echo "      <rect x=\"{$bar_X_position}\" y=\"" . ($data_area_height - $bar_height) . "\" width=\"{$_GET["barwidth"]}\" height=\"{$bar_height}\" class=\"" . ( (strpos($scores[$j], "*") !== false) ? "innings-notout" : "innings" ) . "\" />\r\n";
    }
    $bar_X_position += $_GET["barwidth"] + $_GET["barspacing"];
}
unset($score);
echo "    </g>\r\n";

# Batting average

if ( $_GET["avg"] >= 0 && count($scores) > $_GET["avg"] ) {
    
    function calculate_bat_avg($start, $length) {
        # Calculates the batting average for $length innings (as stored in $scores and $scores2) starting from $start
        global $scores, $scores2;
        
        $section = array_slice($scores, $start, $length);
        $total_runs = array_sum(array_slice($scores2, $start, $length));
        $dismissals = count( preg_grep('/\*/', $section, PREG_GREP_INVERT) );    # Return the number of elements that do NOT have a *, i.e. where the batsman was out
        
        return @($total_runs / $dismissals);
    }
    
    # Determine the point where to start from
    foreach ( $scores as $k => $score ) {
        if ( strpos($score, "*") === false ) {
            if ( $_GET["avg"] == 0 ) {
                $avg_start = 0;
                $avg_length = $k + 1;
                $avg_skipcount = $k;
            }
            else {
                $avg_start = ($_GET["avg"] >= $k + 1) ? 0 : $k + 1 - $_GET["avg"];
                $avg_length = $_GET["avg"];
                $avg_skipcount = $avg_start + $avg_length - 1;
            }
            break;
        }
    }
    
    if ( isset($avg_start, $avg_length) ) {   # Don't output the average if all innings are not-out -- in which case it is always infinity
        
        function increment_avg_counter() {
            global $avg_start, $avg_length;
            if ( $_GET["avg"] == 0 ) {
                $avg_length++;
            }
            else {
                $avg_start++;
            }
        }
        
        $avg_X_spacing = $_GET["barwidth"] + $_GET["barspacing"];
        
        $current_average = calculate_bat_avg($avg_start, $avg_length);
        $avg_path_data = "m " . ($_GET["barspacing"] + $_GET["barwidth"] / 2 + $avg_X_spacing * $avg_skipcount) . "," . round(($data_area_height - $current_average * $_GET["scale"]), 6);
        increment_avg_counter();
        $avg_skipcount = 1;
        
        while ( $avg_length <= count($scores) && $avg_start + $_GET["avg"] <= count($scores) ) {
            # Since relative coordinates are used, each path command must contain the DIFFERENCE between the current average and the previous one
            # Store the value of $current_average in $last_average before changing its value
            $last_average = $current_average;
            
            # Break the path at points where the average is a division by zero (this usually happens with $_GET["avg"] > 0, and the section for which the average is to be computed cantains only not-out innings
            if ( calculate_bat_avg($avg_start, $avg_length) === false ) {   # Division by zero returns boolean FALSE in PHP 5.4 - although this behaviour might change in a future version
                $avg_skipcount++;
                increment_avg_counter();
                continue;
            }
            
            $current_average = calculate_bat_avg($avg_start, $avg_length);
            $avg_path_data .= (($avg_skipcount > 1) ? " m " : " ") . ($avg_X_spacing * $avg_skipcount) . "," . round(($last_average - $current_average) * $_GET["scale"], 6);
            # The current average is subtracted from the previous one, not the other way round - due to the way in which y-coordinates are expressed in SVG
            increment_avg_counter();
            $avg_skipcount = 1;
        }
        
        echo "    <path style=\"{$_GET["avgstyle"]}\" d=\"{$avg_path_data}\" />\r\n";
    }
}
echo "  </g>\r\n";

# Add year labels at the bottom, if $_GET["years"] evaluates to true

if ( @$_GET["yearlabels"] ) {
    # Use the dates from the data table $data_table defined in line 40
    # Since $scores and $scores2 are indexed in chronological order, it is now only needed to count the number of innings played in each year
    
    $dates = array();
    for ( $i = 1; $i < $data_table->getElementsByTagName("tr")->length; $i++ ) {
        if ( strpos($data_table->getElementsByTagName("tr")->item($i)->getElementsByTagName("td")->item((get_format() === "_ODI") ? 5 : 6)->childNodes->item(0)->nodeValue , "DNB") === false ) {   # Don't add the date for DNB entries
            $date = $data_table->getElementsByTagName("tr")->item($i)->getElementsByTagName("td")->item(1)->childNodes->item(0)->nodeValue;
            # If the date field is empty, as seen in data tables for Tests for two innings in the same match, use the date that immediately precedes it
            $dates[] = $data_table->getElementsByTagName("tr")->item((preg_match('/[0-9]{1,2}\/[0-9]{1,2}\/[0-9]+/', $date)) ? $i : $i - 1)->getElementsByTagName("td")->item(1)->childNodes->item(0)->nodeValue;
        }
    }
    unset($date);
    $yearslist = array();
    
    # Since dates are in DD/MM/YYYY format, PHP date functions should be used due to conflicts with the MM/DD/YYYY format
    # Split each date into three parts (the third part is the year)    
    foreach ( $dates as $date ) {
        $date = trim($date);    # Strip leading and trailing whitespace
        $yearslist[] = explode("/", $date)[2];
    }
    unset($date);
    
    $first_year = $yearslist[0];
    $last_year = $yearslist[count($yearslist) - 1];
    
    echo "  <g transform=\"translate({$batting_graph_options["margin_left"]}," . ($total_margin_top + $data_area_height) . ")\">\r\n";
    echo "    <g style=\"font-size:{$_GET["fontsize"]}px; text-align:center; text-anchor:middle; {$batting_graph_options["text_style"]}\">\r\n";
    $year_separator_path_data = "m " . ($_GET["barspacing"] / 2) . ",0 0,{$batting_graph_options["year_labels_height"]}";
    $year_label_X_position = $_GET["barspacing"] / 2;
    $year_label_Y_position = text_baseline_offset($batting_graph_options["year_labels_height"] / 2, $_GET["fontsize"]);
    
    for ( $this_year = $first_year; $this_year <= $last_year; $this_year++ ) {
        $this_year_count = count(preg_grep('/' . $this_year . '/', $yearslist));
        if ( $this_year_count === 0 ) {
            continue;
        }
        
        $this_year_length = $this_year_count * ($_GET["barwidth"] + $_GET["barspacing"]);
        $year_separator_path_data .= " m " . $this_year_length . "," . ($batting_graph_options["year_labels_height"] * -1) . " 0,{$batting_graph_options["year_labels_height"]}";
        
        $year_label_X_position += $this_year_length / 2;
        echo "      <text x=\"" . round($year_label_X_position, 6) . "\" y=\"{$year_label_Y_position}\">{$this_year}</text>\r\n";
        // var_dump($this_year,$this_year_count,$this_year_length);
        $year_label_X_position += $this_year_length / 2;
    }
    
    echo "    </g>\r\n";
    echo "    <path style=\"{$batting_graph_options["border_style"]}\" d=\"{$year_separator_path_data}\" />\r\n";
    echo "  </g>\r\n";
}

# Adds a "last updated" date at the bottom right if $_GET["lastupdated"] evaluates to true
# This is useful for current players, to warn readers that it might not be up to date
if ( @$_GET["lastupdated"] ) {
    echo "  <text style=\"font-size:" . round($_GET["fontsize"] * 0.8, 2) . "px; text-align:end; text-anchor:end; {$batting_graph_options["text_style"]}\" x=\"" . ($document_width - 10) . "\" y=\"" . ($document_height - 10) . "\">Last updated: " . date('c') . "</text>\r\n";
}

echo "</svg>";

?>