I made these for my family tonight and they were wildly delicious. I used tapioca starch instead of potato starch; worked great. We ate them with over easy eggs and salsa and my son and my husband fought over the last ones. I’ll be making them again.
version ai :
python
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
def get_full_article_html(article_url_with_anchor):
“””
Fetches the full HTML content of an article from the given URL,
ensuring all image links (src and srcset) are absolute.
“””
# Strip any anchor from the URL to get the base article URL for fetching
parsed_input_url = urlparse(article_url_with_anchor)
base_article_fetch_url = f”{parsed_input_url.scheme}://{parsed_input_url.netloc}{parsed_input_url.path}”
try:
response = requests.get(base_article_fetch_url)
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
soup = BeautifulSoup(response.text, ‘html.parser’)
# Smitten Kitchen articles are typically within an
article_element = soup.find(‘article’, id=lambda x: x and x.startswith(‘post-‘))
if not article_element:
# Fallback to the main content div if the specific article tag isn’t found
# This is less ideal as it might miss the title/header, but provides core content
article_element = soup.find(‘div’, class_=’entry-content’)
if not article_element:
# If neither is found, return an indicator that content could not be extracted
return “”
# Determine the base URL (scheme://netloc) for resolving relative paths
# This ensures links point back to the original site.
base_url_for_links = f”{parsed_input_url.scheme}://{parsed_input_url.netloc}”
# Iterate through all tags within the extracted article content
for img in article_element.find_all(‘img’):
# Handle ‘src’ attribute: make it an absolute URL
src = img.get(‘src’)
if src:
img[‘src’] = urljoin(base_url_for_links, src)
# Handle ‘srcset’ attribute: make all URLs within it absolute
srcset = img.get(‘srcset’)
if srcset:
new_srcset_parts = []
# srcset can contain multiple image URLs with optional descriptors (e.g., “url 100w, url 1.5x”)
for part in srcset.split(‘,’):
part = part.strip()
if not part:
continue # Skip empty parts that might result from splitting
# Split the URL from its descriptor (e.g., ‘768w’ or ‘1.5x’)
url_and_desc = part.split(‘ ‘, 1)
img_url = url_and_desc[0]
# Make the image URL absolute
abs_img_url = urljoin(base_url_for_links, img_url)
# Reconstruct the srcset part with the absolute URL
if len(url_and_desc) > 1:
# If a descriptor was present, include it
new_srcset_parts.append(f”{abs_img_url} {url_and_desc[1]}”)
else:
# Otherwise, just the absolute URL
new_srcset_parts.append(abs_img_url)
# Update the srcset attribute if there are valid parts
if new_srcset_parts:
img[‘srcset’] = ‘, ‘.join(new_srcset_parts)
# Return the HTML of the modified article element as a string
return str(article_element)
except requests.exceptions.RequestException as e:
# Catch network-related errors (e.g., connection refused, timeout, HTTP errors)
return f””
except Exception as e:
# Catch any other unexpected errors during parsing or processing
return f””
# The URL provided by the user, including the comment anchor
user_provided_url = “https://smittenkitchen.com/2023/04/hash-brown-patties/#comment-2691402”
# Get the full article HTML with corrected image links
final_article_html = get_full_article_html(user_provided_url)
# Print the result without any additional explanation or formatting
print(final_article_html)
