python/sitemap.py
"""
"""
from io import StringIO
import os
def err(*m):
print('\n*****\nerror in', __file__ + ':', *m)
x = 1 / 0
iN = '/wkData/tmp/sitemap.html'
oN = 'sitemap.txt'
i = open(iN, "r", encoding="utf-8")
o = open(oN, 'w', encoding="utf-8")
while True:
l = i.readline()
if '<main' in l:
break
if l == '':
err('<main not found in', iN)
print('found main', l)
while True:
l = i.readline()
if '</main' in l:
break
cx = l.find('href=')
if cx > 0:
j = l[cx+5:]
if j[0] != '"':
err('href without "', j, 'from', l)
cx = j.find('"', 1)
if cx < 1:
err('href without ending "', cx, j, 'from', l)
j = j[1: cx]
if '://' not in j:
print('ignoring', j, 'from', l)
else:
# print('href', j, 'from', l)
o.writelines(j+os.linesep)
if l == '':
err('</main not found in', i)
print('found end main', l)
o.close()