beautifulsoup

This commit is contained in:
ra_ma
2025-06-20 18:19:31 +01:00
parent 1aef069926
commit 1e286536e3

View File

@@ -28,8 +28,8 @@ def get_metalink():
# Parse the .env file to get the filtering criteria # Parse the .env file to get the filtering criteria
excluded_countries = eval(os.getenv('EXCLUDED_COUNTRIES', '[]')) excluded_countries = eval(os.getenv('EXCLUDED_COUNTRIES', '[]'))
preferred_protocols = eval(os.getenv('PREFERRED_PROTOCOLS', '[]')) preferred_protocols = eval(os.getenv('PREFERRED_PROTOCOLS', '["https", "http"]'))
preferred_types = eval(os.getenv('PREFERRED_TYPES', '[]')) preferred_types = eval(os.getenv('PREFERRED_TYPES', '["https", "http"]'))
min_preference = int(os.getenv('MIN_PREFERENCE', '0')) min_preference = int(os.getenv('MIN_PREFERENCE', '0'))
# Filter out the URLs based on the criteria # Filter out the URLs based on the criteria
@@ -58,8 +58,11 @@ def filter_urls(content, excluded_countries, preferred_protocols, preferred_type
preference < min_preference): preference < min_preference):
url.decompose() url.decompose()
# Return the filtered XML as a string # Convert the BeautifulSoup object back to a string and clean up
return str(soup) filtered_content = str(soup)
filtered_content = '\n'.join(line for line in filtered_content.splitlines() if line.strip())
return filtered_content
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) app.run(debug=True)