urlHref.php

https://www.wlkl.ch/index.php from 0
  1. 12533 chars [HTTP/1.1 200 OK, Server: nginx, Date: Fri, 04 Apr 2025 12:12:15 GMT, Content-Type: text/html; charset=UTF-8, Connection: close, Expires: Tue, 01 Jan 2002 00:00:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, X-Frame-Options: SAMEORIGIN, Content-Security-Policy: frame-ancestors 'self'; base-uri 'self'; object-src 'none';, X-XSS-Protection: 1; mode=block, Vary: Accept-Encoding, Strict-Transport-Security: max-age=63072000]
  2. 70
  3. already https://www.wlkl.ch/index.php
  4. queued https://www.wlkl.ch/index.php/Main/HomePage
  5. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=edit
  6. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=diff
  7. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=print
  8. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=search&q=link=Main.HomePage
  9. skipping https://www.wlkl.ch/index.php/Site/AllRecentChanges
  10. skipping https://www.wlkl.ch/index.php/Main/RecentChanges
  11. skipping #wikititle
  12. skipping #sideBarTrail
  13. skipping #sideBarGroup
  14. skipping #sideBarWalter
  15. skipping #wikifoot
  16. empty href #
  17. already https://www.wlkl.ch/index.php/Main/HomePage
  18. empty href #
  19. skipping https://www.wlkl.ch/index.php/Main/Menu?action=edit
  20. skipping #wikititle
  21. skipping #sideBarTrail
  22. skipping #sideBarGroup
  23. skipping #sideBarWalter
  24. skipping #wikifoot
  25. already https://www.wlkl.ch/index.php/Main/HomePage
  26. empty href #
  27. queued https://www.wlkl.ch/index.php/J/J
  28. queued https://www.wlkl.ch/index.php/J/All
  29. queued https://www.wlkl.ch/index.php/J/Jour
  30. queued https://www.wlkl.ch/index.php/CS/CS
  31. queued https://www.wlkl.ch/index.php/Climb/Climb
  32. queued https://www.wlkl.ch/index.php/Div/Div
  33. queued https://www.wlkl.ch/index.php/Inf/Inf
  34. queued https://www.wlkl.ch/index.php/Lit/Lit
  35. queued https://www.wlkl.ch/index.php/Math/Math
  36. queued https://www.wlkl.ch/index.php/My/My
  37. queued https://www.wlkl.ch/index.php/Main/Petri
  38. queued https://www.wlkl.ch/index.php/Pub/Pub
  39. queued https://www.wlkl.ch/index.php/Wk13/Wk13
  40. queued https://www.wlkl.ch/index.php/PmWiki/PmWiki
  41. queued https://www.wlkl.ch/index.php/Main/WikiSandbox
  42. queued https://www.wlkl.ch/index.php/PmWiki/BasicEditing
  43. queued https://www.wlkl.ch/index.php/PmWiki/DocumentationIndex
  44. skipping https://www.wlkl.ch/index.php/Site/SideBar?action=edit
  45. skipping #wikititle
  46. skipping #sideBarTrail
  47. skipping #sideBarGroup
  48. skipping #sideBarWalter
  49. skipping #wikifoot
  50. queued https://www.wlkl.ch/index.php/Main
  51. already https://www.wlkl.ch/index.php/Main/HomePage
  52. skipping mailto:wa@wlkl.ch
  53. queued https://www.wlkl.ch/index.php/Climb/Kletterprogramm25
  54. queued https://www.wlkl.ch/index.php/Climb/Bergsee25
  55. already https://www.wlkl.ch/index.php/Climb/Climb
  56. queued https://www.wlkl.ch/index.php/Climb/13Burghuette
  57. queued https://www.wlkl.ch/index.php/Climb/13Schmalstoeckli
  58. queued https://www.wlkl.ch/petri/index.html
  59. queued https://www.wlkl.ch/petri/petriNetsForRE.pdf
  60. queued https://www.wlkl.ch/petri/cdSlidesOverview/index.html
  61. queued https://www.wlkl.ch/petri/cdSlidesOverview.pdf
  62. queued https://www.wlkl.ch//petri/smalltalk/instruction.html
  63. queued https://www.wlkl.ch/myPdfs/fastCharacteristicPolynomial.pdf
  64. queued https://www.wlkl.ch/index.php/Inf/Wsh
  65. queued https://www.wlkl.ch/index.php/FF/Ecua13
  66. already https://www.wlkl.ch/index.php/Pub/Pub
  67. skipping https://www.spwallisellen.ch/positionen/2021/klimaschonend-heizen-ist-unsere-zukunft
  68. skipping https://www.spwallisellen.ch/positionen/2021/klimafreundlich-heizen-und-solarstrom-produzieren-lohnen-sich
  69. queued https://www.wlkl.ch/index.php/Pub/Pub#gnd
  70. skipping https://www.spwallisellen.ch/positionen/2020/noch-nicht-gruen-mit-der-gasversorgung
  71. already https://www.wlkl.ch/index.php/Main
  72. already https://www.wlkl.ch/index.php/Main/HomePage
https://www.wlkl.ch/index.php/Main/HomePage from [https://www.wlkl.ch/index.php]
  1. 12533 chars [HTTP/1.1 200 OK, Server: nginx, Date: Fri, 04 Apr 2025 12:12:15 GMT, Content-Type: text/html; charset=UTF-8, Connection: close, Expires: Tue, 01 Jan 2002 00:00:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, X-Frame-Options: SAMEORIGIN, Content-Security-Policy: frame-ancestors 'self'; base-uri 'self'; object-src 'none';, X-XSS-Protection: 1; mode=block, Vary: Accept-Encoding, Strict-Transport-Security: max-age=63072000]
  2. 70
  3. already https://www.wlkl.ch/index.php
  4. already https://www.wlkl.ch/index.php/Main/HomePage
  5. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=edit
  6. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=diff
  7. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=print
  8. skipping https://www.wlkl.ch/index.php/Main/HomePage?action=search&q=link=Main.HomePage
  9. skipping https://www.wlkl.ch/index.php/Site/AllRecentChanges
  10. skipping https://www.wlkl.ch/index.php/Main/RecentChanges
  11. skipping #wikititle
  12. skipping #sideBarTrail
  13. skipping #sideBarGroup
  14. skipping #sideBarWalter
  15. skipping #wikifoot
  16. empty href #
  17. already https://www.wlkl.ch/index.php/Main/HomePage
  18. empty href #
  19. skipping https://www.wlkl.ch/index.php/Main/Menu?action=edit
  20. skipping #wikititle
  21. skipping #sideBarTrail
  22. skipping #sideBarGroup
  23. skipping #sideBarWalter
  24. skipping #wikifoot
  25. already https://www.wlkl.ch/index.php/Main/HomePage
  26. empty href #
  27. already https://www.wlkl.ch/index.php/J/J
  28. already https://www.wlkl.ch/index.php/J/All
  29. already https://www.wlkl.ch/index.php/J/Jour
  30. already https://www.wlkl.ch/index.php/CS/CS
  31. already https://www.wlkl.ch/index.php/Climb/Climb
  32. already https://www.wlkl.ch/index.php/Div/Div
  33. already https://www.wlkl.ch/index.php/Inf/Inf
  34. already https://www.wlkl.ch/index.php/Lit/Lit
  35. already https://www.wlkl.ch/index.php/Math/Math
  36. already https://www.wlkl.ch/index.php/My/My
  37. already https://www.wlkl.ch/index.php/Main/Petri
  38. already https://www.wlkl.ch/index.php/Pub/Pub
  39. already https://www.wlkl.ch/index.php/Wk13/Wk13
  40. already https://www.wlkl.ch/index.php/PmWiki/PmWiki
  41. already https://www.wlkl.ch/index.php/Main/WikiSandbox
  42. already https://www.wlkl.ch/index.php/PmWiki/BasicEditing
  43. already https://www.wlkl.ch/index.php/PmWiki/DocumentationIndex
  44. skipping https://www.wlkl.ch/index.php/Site/SideBar?action=edit
  45. skipping #wikititle
  46. skipping #sideBarTrail
  47. skipping #sideBarGroup
  48. skipping #sideBarWalter
  49. skipping #wikifoot
  50. already https://www.wlkl.ch/index.php/Main
  51. already https://www.wlkl.ch/index.php/Main/HomePage
  52. skipping mailto:wa@wlkl.ch
  53. already https://www.wlkl.ch/index.php/Climb/Kletterprogramm25
  54. already https://www.wlkl.ch/index.php/Climb/Bergsee25
  55. already https://www.wlkl.ch/index.php/Climb/Climb
  56. already https://www.wlkl.ch/index.php/Climb/13Burghuette
  57. already https://www.wlkl.ch/index.php/Climb/13Schmalstoeckli
  58. already https://www.wlkl.ch/petri/index.html
  59. already https://www.wlkl.ch/petri/petriNetsForRE.pdf
  60. already https://www.wlkl.ch/petri/cdSlidesOverview/index.html
  61. already https://www.wlkl.ch/petri/cdSlidesOverview.pdf
  62. already https://www.wlkl.ch//petri/smalltalk/instruction.html
  63. already https://www.wlkl.ch/myPdfs/fastCharacteristicPolynomial.pdf
  64. already https://www.wlkl.ch/index.php/Inf/Wsh
  65. already https://www.wlkl.ch/index.php/FF/Ecua13
  66. already https://www.wlkl.ch/index.php/Pub/Pub
  67. skipping https://www.spwallisellen.ch/positionen/2021/klimaschonend-heizen-ist-unsere-zukunft
  68. skipping https://www.spwallisellen.ch/positionen/2021/klimafreundlich-heizen-und-solarstrom-produzieren-lohnen-sich
  69. already https://www.wlkl.ch/index.php/Pub/Pub#gnd
  70. skipping https://www.spwallisellen.ch/positionen/2020/noch-nicht-gruen-mit-der-gasversorgung
  71. already https://www.wlkl.ch/index.php/Main
  72. already https://www.wlkl.ch/index.php/Main/HomePage
https://www.wlkl.ch/index.php/J/J from [https://www.wlkl.ch/index.php]
  1. 8786 chars [HTTP/1.1 200 OK, Server: nginx, Date: Fri, 04 Apr 2025 12:12:16 GMT, Content-Type: text/html; charset=UTF-8, Connection: close, Expires: Tue, 01 Jan 2002 00:00:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, X-Frame-Options: SAMEORIGIN, Content-Security-Policy: frame-ancestors 'self'; base-uri 'self'; object-src 'none';, X-XSS-Protection: 1; mode=block, Vary: Accept-Encoding, Strict-Transport-Security: max-age=63072000]
  2. 51
  3. already https://www.wlkl.ch/index.php
  4. already https://www.wlkl.ch/index.php/J/J
  5. skipping https://www.wlkl.ch/index.php/J/J?action=edit
  6. skipping https://www.wlkl.ch/index.php/J/J?action=diff
  7. skipping https://www.wlkl.ch/index.php/J/J?action=print
  8. skipping https://www.wlkl.ch/index.php/J/J?action=search&q=link=J.J
  9. skipping https://www.wlkl.ch/index.php/Site/AllRecentChanges
  10. skipping https://www.wlkl.ch/index.php/J/RecentChanges
  11. skipping #wikititle
  12. skipping #sideBarTrail
  13. skipping #sideBarGroup
  14. skipping #sideBarWalter
  15. skipping #wikifoot
  16. empty href #
  17. already https://www.wlkl.ch/index.php/J/J
  18. empty href #
  19. skipping https://www.wlkl.ch/index.php/J/Menu?action=edit
  20. skipping #wikititle
  21. skipping #sideBarTrail
  22. skipping #sideBarGroup
  23. skipping #sideBarWalter
  24. skipping #wikifoot
  25. already https://www.wlkl.ch/index.php/Main/HomePage
  26. empty href #
  27. already https://www.wlkl.ch/index.php/J/J
  28. already https://www.wlkl.ch/index.php/J/All
  29. already https://www.wlkl.ch/index.php/J/Jour
  30. already https://www.wlkl.ch/index.php/CS/CS
  31. already https://www.wlkl.ch/index.php/Climb/Climb
  32. already https://www.wlkl.ch/index.php/Div/Div
  33. already https://www.wlkl.ch/index.php/Inf/Inf
  34. already https://www.wlkl.ch/index.php/Lit/Lit
  35. already https://www.wlkl.ch/index.php/Math/Math
  36. already https://www.wlkl.ch/index.php/My/My
  37. already https://www.wlkl.ch/index.php/Main/Petri
  38. already https://www.wlkl.ch/index.php/Pub/Pub
  39. already https://www.wlkl.ch/index.php/Wk13/Wk13
  40. already https://www.wlkl.ch/index.php/PmWiki/PmWiki
  41. already https://www.wlkl.ch/index.php/Main/WikiSandbox
  42. already https://www.wlkl.ch/index.php/PmWiki/BasicEditing
  43. already https://www.wlkl.ch/index.php/PmWiki/DocumentationIndex
  44. skipping https://www.wlkl.ch/index.php/Site/SideBar?action=edit
  45. skipping #wikititle
  46. skipping #sideBarTrail
  47. skipping #sideBarGroup
  48. skipping #sideBarWalter
  49. skipping #wikifoot
  50. queued https://www.wlkl.ch/index.php/J
  51. already https://www.wlkl.ch/index.php/J/J
  52. already https://www.wlkl.ch/index.php/J
  53. already https://www.wlkl.ch/index.php/J/J
https://www.wlkl.ch/index.php/J/All from [https://www.wlkl.ch/index.php]
  1. 8811 chars [HTTP/1.1 200 OK, Server: nginx, Date: Fri, 04 Apr 2025 12:12:16 GMT, Content-Type: text/html; charset=UTF-8, Connection: close, Expires: Tue, 01 Jan 2002 00:00:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, X-Frame-Options: SAMEORIGIN, Content-Security-Policy: frame-ancestors 'self'; base-uri 'self'; object-src 'none';, X-XSS-Protection: 1; mode=block, Vary: Accept-Encoding, Strict-Transport-Security: max-age=63072000]
  2. 51
  3. already https://www.wlkl.ch/index.php
  4. already https://www.wlkl.ch/index.php/J/All
  5. skipping https://www.wlkl.ch/index.php/J/All?action=edit
  6. skipping https://www.wlkl.ch/index.php/J/All?action=diff
  7. skipping https://www.wlkl.ch/index.php/J/All?action=print
  8. skipping https://www.wlkl.ch/index.php/J/All?action=search&q=link=J.All
  9. skipping https://www.wlkl.ch/index.php/Site/AllRecentChanges
  10. skipping https://www.wlkl.ch/index.php/J/RecentChanges
  11. skipping #wikititle
  12. skipping #sideBarTrail
  13. skipping #sideBarGroup
  14. skipping #sideBarWalter
  15. skipping #wikifoot
  16. empty href #
  17. already https://www.wlkl.ch/index.php/J/J
  18. empty href #
  19. skipping https://www.wlkl.ch/index.php/J/Menu?action=edit
  20. skipping #wikititle
  21. skipping #sideBarTrail
  22. skipping #sideBarGroup
  23. skipping #sideBarWalter
  24. skipping #wikifoot
  25. already https://www.wlkl.ch/index.php/Main/HomePage
  26. empty href #
  27. already https://www.wlkl.ch/index.php/J/J
  28. already https://www.wlkl.ch/index.php/J/All
  29. already https://www.wlkl.ch/index.php/J/Jour
  30. already https://www.wlkl.ch/index.php/CS/CS
  31. already https://www.wlkl.ch/index.php/Climb/Climb
  32. already https://www.wlkl.ch/index.php/Div/Div
  33. already https://www.wlkl.ch/index.php/Inf/Inf
  34. already https://www.wlkl.ch/index.php/Lit/Lit
  35. already https://www.wlkl.ch/index.php/Math/Math
  36. already https://www.wlkl.ch/index.php/My/My
  37. already https://www.wlkl.ch/index.php/Main/Petri
  38. already https://www.wlkl.ch/index.php/Pub/Pub
  39. already https://www.wlkl.ch/index.php/Wk13/Wk13
  40. already https://www.wlkl.ch/index.php/PmWiki/PmWiki
  41. already https://www.wlkl.ch/index.php/Main/WikiSandbox
  42. already https://www.wlkl.ch/index.php/PmWiki/BasicEditing
  43. already https://www.wlkl.ch/index.php/PmWiki/DocumentationIndex
  44. skipping https://www.wlkl.ch/index.php/Site/SideBar?action=edit
  45. skipping #wikititle
  46. skipping #sideBarTrail
  47. skipping #sideBarGroup
  48. skipping #sideBarWalter
  49. skipping #wikifoot
  50. already https://www.wlkl.ch/index.php/J
  51. already https://www.wlkl.ch/index.php/J/All
  52. already https://www.wlkl.ch/index.php/J
  53. already https://www.wlkl.ch/index.php/J/All
https://www.wlkl.ch/index.php/J/Jour from [https://www.wlkl.ch/index.php]
  1. 8837 chars [HTTP/1.1 200 OK, Server: nginx, Date: Fri, 04 Apr 2025 12:12:17 GMT, Content-Type: text/html; charset=UTF-8, Connection: close, Expires: Tue, 01 Jan 2002 00:00:00 GMT, Cache-Control: no-store, no-cache, must-revalidate, X-Frame-Options: SAMEORIGIN, Content-Security-Policy: frame-ancestors 'self'; base-uri 'self'; object-src 'none';, X-XSS-Protection: 1; mode=block, Vary: Accept-Encoding, Strict-Transport-Security: max-age=63072000]
  2. 51
  3. already https://www.wlkl.ch/index.php
  4. already https://www.wlkl.ch/index.php/J/Jour
  5. skipping https://www.wlkl.ch/index.php/J/Jour?action=edit
  6. skipping https://www.wlkl.ch/index.php/J/Jour?action=diff
  7. skipping https://www.wlkl.ch/index.php/J/Jour?action=print
  8. skipping https://www.wlkl.ch/index.php/J/Jour?action=search&q=link=J.Jour
  9. skipping https://www.wlkl.ch/index.php/Site/AllRecentChanges
  10. skipping https://www.wlkl.ch/index.php/J/RecentChanges
  11. skipping #wikititle
  12. skipping #sideBarTrail
  13. skipping #sideBarGroup
  14. skipping #sideBarWalter
  15. skipping #wikifoot
  16. empty href #
  17. already https://www.wlkl.ch/index.php/J/J
  18. empty href #
  19. skipping https://www.wlkl.ch/index.php/J/Menu?action=edit
  20. skipping #wikititle
  21. skipping #sideBarTrail
  22. skipping #sideBarGroup
  23. skipping #sideBarWalter
  24. skipping #wikifoot
  25. already https://www.wlkl.ch/index.php/Main/HomePage
  26. empty href #
  27. already https://www.wlkl.ch/index.php/J/J
  28. already https://www.wlkl.ch/index.php/J/All
  29. already https://www.wlkl.ch/index.php/J/Jour
  30. already https://www.wlkl.ch/index.php/CS/CS
  31. already https://www.wlkl.ch/index.php/Climb/Climb
  32. already https://www.wlkl.ch/index.php/Div/Div
  33. already https://www.wlkl.ch/index.php/Inf/Inf
  34. already https://www.wlkl.ch/index.php/Lit/Lit
  35. already https://www.wlkl.ch/index.php/Math/Math
  36. already https://www.wlkl.ch/index.php/My/My
  37. already https://www.wlkl.ch/index.php/Main/Petri
  38. already https://www.wlkl.ch/index.php/Pub/Pub
  39. already https://www.wlkl.ch/index.php/Wk13/Wk13
  40. already https://www.wlkl.ch/index.php/PmWiki/PmWiki
  41. already https://www.wlkl.ch/index.php/Main/WikiSandbox
  42. already https://www.wlkl.ch/index.php/PmWiki/BasicEditing
  43. already https://www.wlkl.ch/index.php/PmWiki/DocumentationIndex
  44. skipping https://www.wlkl.ch/index.php/Site/SideBar?action=edit
  45. skipping #wikititle
  46. skipping #sideBarTrail
  47. skipping #sideBarGroup
  48. skipping #sideBarWalter
  49. skipping #wikifoot
  50. already https://www.wlkl.ch/index.php/J
  51. already https://www.wlkl.ch/index.php/J/Jour
  52. already https://www.wlkl.ch/index.php/J
  53. already https://www.wlkl.ch/index.php/J/Jour
5 pages, 2.682e+0 ela, 2.679e+0 get, 2.374e-3 parse , queue 34

End urlHref.php

args

urlHref.php

/home/ch45859/web/wlkl.ch/public_html/inf/php/urlHref.php

*** code does not have a span berfore first <br>***
<?php
/*******************************************************************************
urlHref.php: read url, analyze HTML for <a href

loop
    read an url (i.e. stream) from queue
    analyze it as http: DOMDocument::loadHTML
    find all <a href=
    queue these href-url, 
        if they conform (e.g. same host, no query ....) and not already queued

report stats and times used

problems:
    could not get Connection: keep-alive (not a stream option?)
    we get HTTP/1.1 404 Not Found, and not create page as in firefox
*******************************************************************************/

require_once('env.php');
outBegin(basename(__file__));
$mtS = microtime(1);
$mtG = $mtP = 0;
$u0 = 'https://localhost';
$u0 = 'https://www.wlkl.ch';
$uMsk = "%^$u0/(?!.*RecentChan)[^?]+$%";
$que = ["$u0/index.php"];
$u2f = [$que[0] => 0];
# $ctx = stream_context_set_default(['https' => ['method'=>'GET', 'header' => ['Connection: keep-alive']]]);
# $a2strLevel=4;
# out('context', stream_context_get_params($ctx));
for ($wx=0; $wx < count($que) and $wx < 5; $wx++) {
    $url = $que[$wx]; 
    out($url, 'from', $u2f[$url] ?? '-');
    outOL(); 
    error_clear_last();
    $mtG -= microtime(1);
    $h = @file_get_contents($url);
    $mtG += microtime(1);
    if (false === $h) {
        outOLEnd("bad get in $url from", $u2f[$url], error_get_last());
        continue;
    }
    outLi(strlen($h), "chars", $http_response_header);
    $mtP -= microtime(1);
    $d = new DOMDocument();
    error_clear_last();
    @$d->loadHTML($h);
    $mtP += microtime(1);
    if (false === $d) {
        outOLEnd("bad html in $url from", $u2f[$url], ", html $h", error_get_last());
        continue;
    }
    $aL = $d->getElementsByTagName('a');
    outLi(count($aL), '<a...>');
    foreach ($aL as $a) {
        $href = $a->getAttribute('href');
        if (empty($href) )
            outLi("empty href # " . $d->saveHTML($a));
        elseif (! preg_match($uMsk, $href)) 
            outLi("skipping $href");
        elseif (isset($u2f[$href]))
            outLi("already $href");
        else {
            outLi("queued $href");
            $u2f[$href] = [$url];
            $que[] = $href;
        }
    }
    outOLEnd();
}
out("$wx pages,", sprintf('%9.3e ela, %9.3e get, %9.3e parse', microtime(1) - $mtS, $mtG, $mtP), ", queue", count($que));
outEnd(__FILE__);

?>