python
import json
def clean_and_format_string(data):
"""
Cleans and formats a string, handling character encoding, nested structures,
and Markdown formatting, specifically focusing on the user's requirements.
"""
try:
# Attempt to decode the initial JSON structure
data_json = json.loads(data)
content = data_json.get("content", "") # extracting the "content" field
#Handle nested structure to extract innermost content
while isinstance(content, str) and content.startswith('{') and content.endswith('}'):
try:
content = json.loads(content)
except json.JSONDecodeError:
break # Exit the loop if it's not valid JSON
if isinstance(content, dict) and len(content) == 1:
content = next(iter(content.values())) # Extract value from a dict
if isinstance(content, str):
# Clean up nested JSON strings
try:
content = json.loads(content) # try to parse JSON again (in case the content is a json string)
except (json.JSONDecodeError, TypeError):
pass # if it fails, just keeps the original string content
if isinstance(content, str):
# Further cleaning
content = content.replace("\\n", "\n") # remove escaped newlines
content = content.replace("\\\"", "\"") # remove escaped quotes
if isinstance(content, str) and content.startswith("## ") and content.find("\n") != -1:
parts = content.split('\n', 1)
header = parts[0].strip()
body = "" if len(parts) < 2 else parts[1].strip()
if header.startswith('## "') and header.endswith('"'):
header = "## " + header[4:-1].strip() # Remove quotes from header
cleaned_content = f"{header}\n{body}"
elif isinstance(content, str) and content.startswith("## "):
cleaned_content = content
else:
cleaned_content = str(content) if content else "" # handles no content strings
return cleaned_content
except (json.JSONDecodeError, TypeError) as e:
# Handle the case where the content is not a valid JSON
print(f"Error decoding JSON: {e}")
return ""