This repository has been archived on 2024-09-05. You can view files and clone it, but cannot push or open issues or pull requests.
shimmie2/core/urls.php

251 lines
7.3 KiB
PHP
Raw Normal View History

2021-12-14 18:32:47 +00:00
<?php
declare(strict_types=1);
namespace Shimmie2;
class Link
{
public ?string $page;
public ?string $query;
2023-11-11 21:49:12 +00:00
public function __construct(?string $page = null, ?string $query = null)
{
$this->page = $page;
$this->query = $query;
}
public function make_link(): string
{
return make_link($this->page, $this->query);
}
}
/**
* Build a link to a search page for given terms,
* with all the appropriate escaping
2024-01-15 15:08:22 +00:00
*
* @param string[] $terms
*/
function search_link(array $terms = [], int $page = 1): string
{
if($terms) {
$q = url_escape(Tag::implode($terms));
return make_link("post/list/$q/$page");
} else {
return make_link("post/list/$page");
}
}
/**
* Figure out the correct way to link to a page, taking into account
* things like the nice URLs setting.
*
2024-02-09 12:43:53 +00:00
* eg make_link("foo/bar") becomes either "/v2/foo/bar" (niceurls) or
* "/v2/index.php?q=foo/bar" (uglyurls)
*/
2023-11-11 21:49:12 +00:00
function make_link(?string $page = null, ?string $query = null, ?string $fragment = null): string
{
global $config;
if (is_null($page)) {
2019-08-02 14:40:03 -05:00
$page = $config->get_string(SetupConfig::MAIN_PAGE);
}
2020-03-28 14:11:14 +00:00
$page = trim($page, "/");
2020-03-28 14:11:14 +00:00
$parts = [];
2020-02-01 22:26:08 +00:00
$install_dir = get_base_href();
2024-02-14 12:35:58 +00:00
if (SPEED_HAX || $config->get_bool(SetupConfig::NICE_URLS, false)) {
2020-03-28 14:11:14 +00:00
$parts['path'] = "$install_dir/$page";
} else {
2020-03-28 14:11:14 +00:00
$parts['path'] = "$install_dir/index.php";
2020-04-02 22:38:50 +01:00
$query = empty($query) ? "q=$page" : "q=$page&$query";
}
2020-03-28 14:11:14 +00:00
$parts['query'] = $query; // http_build_query($query);
$parts['fragment'] = $fragment; // http_build_query($hash);
2020-03-28 14:11:14 +00:00
return unparse_url($parts);
}
2024-02-09 12:43:53 +00:00
/**
* Figure out the current page from a link that make_link() generated
2024-02-09 14:54:37 +00:00
*
2024-02-09 12:43:53 +00:00
* SHIT: notes for the future, because the web stack is a pile of hacks
2024-02-09 14:54:37 +00:00
*
2024-02-09 12:43:53 +00:00
* - According to some specs, "/" is for URL dividers with heiracial
* significance and %2F is for slashes that are just slashes. This
* is what shimmie currently does - eg if you search for "AC/DC",
* the shimmie URL will be /post/list/AC%2FDC/1
* - According to some other specs "/" and "%2F" are identical...
* - PHP's $_GET[] automatically urldecodes the inputs so we can't
* tell the difference between q=foo/bar and q=foo%2Fbar
* - REQUEST_URI contains the exact URI that was given to us, so we
* can parse it for ourselves
* - <input type='hidden' name='q' value='post/list'> generates
2024-02-09 12:43:53 +00:00
* q=post%2Flist
2024-02-09 14:54:37 +00:00
*
2024-02-09 12:43:53 +00:00
* This function should always return strings with no leading slashes
*/
function _get_query(?string $uri = null): string
{
2024-02-09 16:36:57 +00:00
$parsed_url = parse_url($uri ?? $_SERVER['REQUEST_URI'] ?? "");
2024-02-09 12:43:53 +00:00
// if we're looking at http://site.com/$INSTALL_DIR/index.php,
// then get the query from the "q" parameter
if(($parsed_url["path"] ?? "") == (get_base_href() . "/index.php")) {
// $q = $_GET["q"] ?? "";
// default to looking at the root
$q = "";
// (we need to manually parse the query string because PHP's $_GET
// does an extra round of URL decoding, which we don't want)
foreach(explode('&', $parsed_url['query'] ?? "") as $z) {
$qps = explode('=', $z, 2);
if(count($qps) == 2 && $qps[0] == "q") {
$q = $qps[1];
}
}
// if we have no slashes, but do have an encoded
// slash, then we _probably_ encoded too much
if(!str_contains($q, "/") && str_contains($q, "%2F")) {
$q = rawurldecode($q);
}
2024-02-09 12:43:53 +00:00
}
// if we're looking at http://site.com/$INSTALL_DIR/$PAGE,
// then get the query from the path
else {
$q = substr($parsed_url["path"] ?? "", strlen(get_base_href() . "/"));
}
assert(!str_starts_with($q, "/"));
return $q;
}
/**
* Figure out the path to the shimmie install directory.
*
* eg if shimmie is visible at https://foo.com/gallery, this
* function should return /gallery
*
* PHP really, really sucks.
2024-02-09 14:54:37 +00:00
*
2024-02-09 12:43:53 +00:00
* This function should always return strings with no trailing
* slashes, so that it can be used like `get_base_href() . "/data/asset.abc"`
2024-02-09 14:54:37 +00:00
*
2024-02-09 12:43:53 +00:00
* @param array<string, string>|null $server_settings
*/
function get_base_href(?array $server_settings = null): string
{
if (defined("BASE_HREF") && !empty(BASE_HREF)) {
return BASE_HREF;
}
$server_settings = $server_settings ?? $_SERVER;
if(str_ends_with($server_settings['PHP_SELF'], 'index.php')) {
$self = $server_settings['PHP_SELF'];
} elseif(isset($server_settings['SCRIPT_FILENAME']) && isset($server_settings['DOCUMENT_ROOT'])) {
$self = substr($server_settings['SCRIPT_FILENAME'], strlen(rtrim($server_settings['DOCUMENT_ROOT'], "/")));
} else {
die("PHP_SELF or SCRIPT_FILENAME need to be set");
}
$dir = dirname($self);
$dir = str_replace("\\", "/", $dir);
$dir = rtrim($dir, "/");
return $dir;
}
/**
* The opposite of the standard library's parse_url
*
* @param array<string, string|int> $parsed_url
*/
function unparse_url(array $parsed_url): string
{
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
$host = $parsed_url['host'] ?? '';
$port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
$user = $parsed_url['user'] ?? '';
$pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
$pass = ($user || $pass) ? "$pass@" : '';
$path = $parsed_url['path'] ?? '';
$query = !empty($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
$fragment = !empty($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
return "$scheme$user$pass$host$port$path$query$fragment";
}
/**
* Take the current URL and modify some parameters
2024-01-15 15:08:22 +00:00
*
* @param array<string, mixed> $changes
*/
function modify_current_url(array $changes): string
{
2020-03-27 23:35:07 +00:00
return modify_url($_SERVER['REQUEST_URI'], $changes);
}
2024-01-15 15:08:22 +00:00
/**
* Take a URL and modify some parameters
*
* @param array<string, mixed> $changes
*/
function modify_url(string $url, array $changes): string
{
2024-01-20 19:47:26 +00:00
/** @var array<string, mixed> */
2020-03-27 23:35:07 +00:00
$parts = parse_url($url);
$params = [];
2020-04-25 21:36:28 +01:00
if (isset($parts['query'])) {
parse_str($parts['query'], $params);
}
foreach ($changes as $k => $v) {
if (is_null($v) and isset($params[$k])) {
unset($params[$k]);
}
$params[$k] = $v;
}
2020-03-27 23:35:07 +00:00
$parts['query'] = http_build_query($params);
2020-03-27 23:35:07 +00:00
return unparse_url($parts);
}
/**
* Turn a relative link into an absolute one, including hostname
*/
function make_http(string $link): string
{
if (str_contains($link, "://")) {
return $link;
}
if (strlen($link) > 0 && $link[0] != '/') {
$link = get_base_href() . '/' . $link;
}
$protocol = is_https_enabled() ? "https://" : "http://";
$link = $protocol . $_SERVER["HTTP_HOST"] . $link;
$link = str_replace("/./", "/", $link);
return $link;
}
2020-03-27 19:41:34 +00:00
2020-03-27 23:35:07 +00:00
/**
* If HTTP_REFERER is set, and not blacklisted, then return it
* Else return a default $dest
2024-01-15 15:08:22 +00:00
*
* @param string[]|null $blacklist
2020-03-27 23:35:07 +00:00
*/
2023-11-11 21:49:12 +00:00
function referer_or(string $dest, ?array $blacklist = null): string
2020-03-27 19:41:34 +00:00
{
2020-04-25 21:36:28 +01:00
if (empty($_SERVER['HTTP_REFERER'])) {
return $dest;
}
if ($blacklist) {
foreach ($blacklist as $b) {
if (str_contains($_SERVER['HTTP_REFERER'], $b)) {
2020-04-25 21:36:28 +01:00
return $dest;
}
2020-03-27 20:24:26 +00:00
}
}
return $_SERVER['HTTP_REFERER'];
2020-03-27 19:41:34 +00:00
}