From 0f22f36e93945b9555d5db7d0c3f50c6384e0e7d Mon Sep 17 00:00:00 2001 From: Guillaume Pellerin Date: Fri, 29 Oct 2021 16:27:00 +0200 Subject: [PATCH] use python replace instead of sed --- misc/statifier.py | 55 ++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/misc/statifier.py b/misc/statifier.py index b4bdb71..b2cfe9b 100644 --- a/misc/statifier.py +++ b/misc/statifier.py @@ -9,6 +9,8 @@ class Statifier: def __init__(self, domain, languages): self.domain = domain self.languages = languages + self.replace_dict = { + } def wget(self, language, dir): command = 'wget -q --mirror -p --adjust-extension --header="Accept-Language: %s" -e robots=off --base=./ -k -P %s https://%s' % (language, dir, self.domain) @@ -20,25 +22,38 @@ class Statifier: # print(command) os.system(command) - def fix_languages(self, dir, language): - rule = 's,
,,g' % self.domain - self.sed(dir, rule) - - rule = 's,,,g' \ - % (language, language.upper(), language, language, language.upper()) - self.sed(dir, rule) - - rule = 's,,,g' \ - % (language, language.upper(), language, language, language.upper()) - self.sed(dir, rule) - - rule = 's,
,,g' \ - % (language, language.upper(), language, language, language.upper()) - self.sed(dir, rule) - - rule = 's,,,g' \ - % (language, language.upper(), language, language, language.upper()) - self.sed(dir, rule) + def fix_languages(self, language): + for root, dirs, files in os.walk(self.domain): + for filename in files: + name = os.path.splitext(filename)[0] + ext = os.path.splitext(filename)[1][1:] + if ext == 'html': + path = root + os.sep + filename + rel_root = root.split('/')[2:] + rel_root = language + '/' + '/'.join(rel_root) + print(rel_root) + + f = open(path, 'rt') + content = f.read() + f.close() + + s_in = '
' % self.domain + s_out = '' + content = content.replace(s_in, s_out) + + s_in = '' % (language, language.upper()) + s_out = '' + s_in + '' + s_out = s_out % rel_root + content = content.replace(s_in, s_out) + + s_in = '' % (language, language.upper()) + s_out = '' + s_in + '' + s_out = s_out % rel_root + content = content.replace(s_in, s_out) + + f = open(path, 'wt') + f.write(content) + f.close() def main(self): for language in self.languages: @@ -46,7 +61,7 @@ class Statifier: shutil.move(self.domain + os.sep + self.domain, self.domain + os.sep + language) for language in self.languages: - self.fix_languages(self.domain, language) + self.fix_languages(language) def main(): -- 2.39.5