diff --git a/.htaccess b/.htaccess new file mode 100644 index 0000000..5b72e7f --- /dev/null +++ b/.htaccess @@ -0,0 +1,13 @@ +# PHProxy path-style routing (#24) +# Forward /http(s):/target/... to index.php with PATH_INFO set. + + + RewriteEngine On + + # Don't rewrite real files or directories + RewriteCond %{REQUEST_FILENAME} !-f + RewriteCond %{REQUEST_FILENAME} !-d + + # /http(s):/target/... -> /index.php/http(s):/target/... + RewriteRule ^(https?:/.*)$ index.php/$1 [L,QSA] + diff --git a/Dockerfile b/Dockerfile index ad6a8d1..23b933b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,7 @@ FROM php:8.5-apache -RUN a2enmod rewrite +RUN a2enmod rewrite \ + && sed -ri 's!AllowOverride None!AllowOverride All!' /etc/apache2/apache2.conf COPY . /var/www/html/ diff --git a/README.md b/README.md index c4639c3..d557123 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,21 @@ any URLs so that they point back to the script. Of course, there is more to it than this, but if you would like to know more in detail, view the source code. +### URL forms + +PHProxy accepts the target URL in three forms: + +| Form | Example | +| --- | --- | +| Query (default, used by the entry form and outbound link rewriting) | `phproxy.example/index.php?_proxurl=` | +| Path via `index.php` (no rewrite module required) | `phproxy.example/index.php/https://example.com/` | +| Bare path (requires Apache `mod_rewrite` + `AllowOverride All`) | `phproxy.example/https://example.com/` | + +The bare-path form is enabled by the shipped `.htaccess`. The Docker +image is configured for it out of the box; on a standalone Apache +install make sure `mod_rewrite` is loaded and `AllowOverride All` is +set for the document root. + ## Bugs and Limitations PHP is restrictive by nature, and as such, some problems arise that @@ -113,3 +128,13 @@ JavaScript, or token-bound TLS (e.g. Google, GitHub, most large social networks) will not work through PHProxy. The proxy rewrites HTML/CSS URLs via regex — it does not run a headless browser, solve CAPTCHAs, or evaluate client-side scripts. This is a fundamental design limitation, not a bug. + +## Anonymity + +PHProxy is anonymous-by-default. The outbound request headers are built +from a known-safe whitelist (method, path, Host, User-Agent, Accept, +optional Referer, Cookie, Authorization, plus POST body headers). The +proxy never forwards `X-Forwarded-For`, `X-Real-IP`, `Via`, or `Forwarded` +to the upstream — targets see this server's IP only. If you want to also +suppress the client User-Agent, use the settings page (`edit.php`) to pin +it to a generic value or to `-` (omit entirely). diff --git a/index.php b/index.php index ea07f5e..cb98ba1 100644 --- a/index.php +++ b/index.php @@ -213,6 +213,25 @@ function _stripslashes(mixed $value): mixed $_COOKIE = _stripslashes($_COOKIE); } +// +// PATH-STYLE ROUTING (#24): phproxy.example/https://target/path +// Apache collapses consecutive slashes in URL paths, so we accept +// "http:/target" (single slash) and restore "http://target". +// + +if (!isset($_POST[$_config['url_var_name']]) + && !isset($_GET[$_config['url_var_name']]) + && !isset($_GET[$_config['get_form_name']]) + && !empty($_SERVER['PATH_INFO']) + && preg_match('#^/(https?):/+(.*)$#i', $_SERVER['PATH_INFO'], $_path_match)) +{ + $_path_target = $_path_match[1] . '://' . $_path_match[2]; + if (!empty($_SERVER['QUERY_STRING'])) { + $_path_target .= (strpos($_path_target, '?') === false ? '?' : '&') . $_SERVER['QUERY_STRING']; + } + $_GET[$_config['url_var_name']] = encode_url($_path_target); +} + // // FIGURE OUT WHAT TO DO (POST URL-form submit, GET form request, regular request, basic auth, cookie manager, show URL-form) // @@ -354,6 +373,13 @@ function _stripslashes(mixed $value): mixed // // SET REQUEST HEADERS // + // Anonymity invariant: the outbound header set is built from scratch + // out of a known-safe whitelist (method, path, Host, User-Agent, Accept, + // optional Referer, Cookie, Authorization, and POST body headers). + // We never forward client-identifying $_SERVER['HTTP_X_FORWARDED_FOR'], + // 'HTTP_X_REAL_IP', 'HTTP_VIA', 'HTTP_FORWARDED', or any other inbound + // proxy header to the upstream. Targets see this server's IP only. + // $_request_headers = $_request_method . ' ' . $_url_parts['path'];