🎯 Recommended Samples
Balanced sample collections from various categories for you to explore
Web Serialization Python Samples
Web Python data serialization examples including JSON and XML processing
💻 JSON Serialization python
🟢 simple
⭐⭐
Convert Python objects to JSON strings with custom encoding and formatting options
⏱️ 25 min
🏷️ python, web, serialization, json
Prerequisites:
Basic Python, json module
# Web Python JSON Serialization Examples
# Converting Python objects to JSON strings with various options
# 1. Basic Serialization
import json
from typing import Any, Dict, List, Optional
from datetime import datetime, date
from decimal import Decimal
def to_json(obj: Any, indent: Optional[int] = None, ensure_ascii: bool = True) -> str:
"""
Convert object to JSON string
Args:
obj: Object to serialize
indent: Indentation spaces (None for compact)
ensure_ascii: Whether to escape non-ASCII
Returns:
JSON string
"""
return json.dumps(obj, indent=indent, ensure_ascii=ensure_ascii)
def to_json_compact(obj: Any) -> str:
"""
Convert to compact JSON (no indentation)
Args:
obj: Object to serialize
Returns:
Compact JSON string
"""
return json.dumps(obj, separators=(',', ':'))
def to_json_formatted(obj: Any, indent: int = 2) -> str:
"""
Convert to formatted JSON with indentation
Args:
obj: Object to serialize
indent: Indentation spaces
Returns:
Formatted JSON string
"""
return json.dumps(obj, indent=indent, sort_keys=True)
def to_json_sorted(obj: Any) -> str:
"""
Convert to JSON with sorted keys
Args:
obj: Object to serialize
Returns:
JSON string with sorted keys
"""
return json.dumps(obj, sort_keys=True)
# 2. Custom Serialization
def serialize_datetime(obj: Any) -> Any:
"""
Custom serializer for datetime objects
Args:
obj: Object to serialize
Returns:
Serializable value or TypeError
"""
if isinstance(obj, datetime):
return obj.isoformat()
elif isinstance(obj, date):
return obj.isoformat()
elif isinstance(obj, Decimal):
return float(obj)
raise TypeError(f"Type {type(obj)} not serializable")
def to_json_with_custom(obj: Any, **kwargs) -> str:
"""
Serialize with custom encoder
Args:
obj: Object to serialize
**kwargs: Additional JSON arguments
Returns:
JSON string
"""
return json.dumps(obj, default=serialize_datetime, **kwargs)
class CustomEncoder(json.JSONEncoder):
"""Custom JSON encoder"""
def default(self, obj: Any) -> Any:
"""Handle custom types"""
if isinstance(obj, datetime):
return {'__datetime__': obj.isoformat()}
elif isinstance(obj, date):
return {'__date__': obj.isoformat()}
elif isinstance(obj, Decimal):
return {'__decimal__': str(obj)}
elif isinstance(obj, set):
return {'__set__': list(obj)}
return super().default(obj)
def to_json_custom_encoder(obj: Any, indent: int = 2) -> str:
"""
Serialize using custom encoder class
Args:
obj: Object to serialize
indent: Indentation spaces
Returns:
JSON string
"""
return json.dumps(obj, cls=CustomEncoder, indent=indent)
# 3. Serialize Different Types
def serialize_dict(data: Dict[str, Any], **kwargs) -> str:
"""
Serialize dictionary
Args:
data: Dictionary to serialize
**kwargs: JSON options
Returns:
JSON string
"""
return json.dumps(data, **kwargs)
def serialize_list(data: List[Any], **kwargs) -> str:
"""
Serialize list
Args:
data: List to serialize
**kwargs: JSON options
Returns:
JSON string
"""
return json.dumps(data, **kwargs)
def serialize_object(obj: Any, **kwargs) -> str:
"""
Serialize custom object using __dict__
Args:
obj: Object to serialize
**kwargs: JSON options
Returns:
JSON string
"""
if hasattr(obj, '__dict__'):
return json.dumps(obj.__dict__, **kwargs)
return json.dumps({'value': str(obj)}, **kwargs)
def serialize_dataframe(obj) -> str:
"""
Serialize pandas DataFrame (if available)
Args:
obj: DataFrame object
Returns:
JSON string
"""
try:
import pandas as pd
if isinstance(obj, pd.DataFrame):
return obj.to_json(orient='records', indent=2)
except ImportError:
pass
return json.dumps({'error': 'pandas not available'})
# 4. Advanced Serialization Options
def to_json_ascii(obj: Any) -> str:
"""
Serialize with ASCII escaping
Args:
obj: Object to serialize
Returns:
JSON string with escaped non-ASCII
"""
return json.dumps(obj, ensure_ascii=True)
def to_json_unicode(obj: Any) -> str:
"""
Serialize without ASCII escaping
Args:
obj: Object to serialize
Returns:
JSON string with unicode
"""
return json.dumps(obj, ensure_ascii=False)
def to_json_with_undefined(obj: Any, undefined: str = 'null') -> str:
"""
Handle None/undefined values
Args:
obj: Object to serialize
undefined: How to represent undefined
Returns:
JSON string
"""
def replace_none(value):
if value is None:
return undefined
return value
return json.dumps(obj, default=replace_none)
# 5. Conditional Serialization
def serialize_skip_none(obj: Any) -> str:
"""
Serialize skipping None values
Args:
obj: Object to serialize
Returns:
JSON string without None values
"""
def skip_none(value):
if isinstance(value, dict):
return {k: v for k, v in value.items() if v is not None}
return value
if isinstance(obj, dict):
obj = skip_none(obj)
return json.dumps(obj, default=skip_none)
def serialize_skip_private(obj: Any) -> str:
"""
Skip private attributes (starting with _)
Args:
obj: Object to serialize
Returns:
JSON string without private attributes
"""
if isinstance(obj, dict):
filtered = {k: v for k, v in obj.items() if not k.startswith('_')}
return json.dumps(filtered)
elif hasattr(obj, '__dict__'):
filtered = {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}
return json.dumps(filtered)
return json.dumps(obj)
# 6. Streaming Serialization
def serialize_to_file(obj: Any, file_path: str, **kwargs) -> bool:
"""
Serialize object to JSON file
Args:
obj: Object to serialize
file_path: Output file path
**kwargs: JSON options
Returns:
True if successful
"""
try:
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(obj, f, **kwargs)
return True
except Exception as e:
print(f"Error writing file: {e}")
return False
def serialize_lines(items: List[Any]) -> str:
"""
Serialize list as JSON lines
Args:
items: List of objects
Returns:
JSON lines string
"""
return '\n'.join(json.dumps(item) for item in items)
# 7. Pretty Printing
def to_json_pretty(obj: Any, indent: int = 2, width: int = 80) -> str:
"""
Pretty print JSON with custom formatting
Args:
obj: Object to serialize
indent: Indentation spaces
width: Line width
Returns:
Pretty JSON string
"""
return json.dumps(obj, indent=indent, width=width)
def to_json_colorful(obj: Any) -> str:
"""
Add ANSI color codes to JSON (for terminal display)
Args:
obj: Object to serialize
Returns:
Colorized JSON string
"""
import re
json_str = json.dumps(obj, indent=2)
# Add colors
json_str = re.sub(r'"(.*?)"', r'\033[36m"\1"\033[0m', json_str) # Cyan for strings
json_str = re.sub(r'(\d+\.?\d*)', r'\033[33m\1\033[0m', json_str) # Yellow for numbers
json_str = re.sub(r'\b(true|false|null)\b', r'\033[35m\1\033[0m', json_str) # Magenta for booleans
return json_str
# 8. Validation Before Serialization
def validate_serializable(obj: Any) -> bool:
"""
Check if object is JSON serializable
Args:
obj: Object to check
Returns:
True if serializable
"""
try:
json.dumps(obj)
return True
except (TypeError, OverflowError):
return False
def get_serialization_errors(obj: Any) -> List[str]:
"""
Get list of non-serializable items
Args:
obj: Object to check
Returns:
List of error messages
"""
errors = []
def check(value, path=''):
try:
json.dumps(value)
except (TypeError, OverflowError) as e:
errors.append(f"{path or 'root'}: {type(value).__name__} - {e}")
if isinstance(value, dict):
for k, v in value.items():
check(v, f"{path}.{k}" if path else k)
elif isinstance(value, (list, tuple)):
for i, v in enumerate(value):
check(v, f"{path}[{i}]")
check(obj)
return errors
# 9. Compression
def serialize_compressed(obj: Any) -> bytes:
"""
Serialize and compress JSON
Args:
obj: Object to serialize
Returns:
Compressed bytes
"""
import gzip
json_str = json.dumps(obj)
return gzip.compress(json_str.encode('utf-8'))
def serialize_base64(obj: Any) -> str:
"""
Serialize and encode as base64
Args:
obj: Object to serialize
Returns:
Base64 encoded JSON string
"""
import base64
json_str = json.dumps(obj)
return base64.b64encode(json_str.encode('utf-8')).decode('ascii')
# Usage Examples
def demonstrate_json_serialize():
print("=== Web Python JSON Serialization Examples ===\n")
# 1. Basic serialization
print("--- 1. Basic Serialization ---")
data = {'name': 'Alice', 'age': 30, 'city': 'NYC'}
print(f"Compact: {to_json(data)}")
print(f"Formatted:\n{to_json_formatted(data)}")
print(f"Sorted: {to_json_sorted(data)}")
# 2. Custom types
print("\n--- 2. Custom Types ---")
data_with_date = {
'name': 'Bob',
'timestamp': datetime(2025, 12, 31, 14, 30, 45),
'amount': Decimal('123.45')
}
print(f"With custom encoder:\n{to_json_with_custom(data_with_date, indent=2)}")
# 3. Different structures
print("\n--- 3. Different Structures ---")
list_data = [1, 2, 3, {'key': 'value'}]
print(f"List: {to_json(list_data)}")
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
self._private = 'secret'
person = Person('Charlie', 25)
print(f"Object: {serialize_object(person)}")
print(f"Skip private: {serialize_skip_private(person)}")
# 4. Unicode
print("\n--- 4. Unicode Handling ---")
unicode_data = {'message': 'Hello 世界 🌍'}
print(f"ASCII escaped: {to_json_ascii(unicode_data)}")
print(f"Unicode: {to_json_unicode(unicode_data)}")
# 5. Conditional serialization
print("\n--- 5. Conditional Serialization ---")
data_with_none = {'a': 1, 'b': None, 'c': 3}
print(f"Skip None: {serialize_skip_none(data_with_none)}")
# 6. Validation
print("\n--- 6. Validation ---")
valid_data = {'key': 'value'}
invalid_data = {'func': lambda x: x}
print(f"Valid data serializable: {validate_serializable(valid_data)}")
print(f"Invalid data serializable: {validate_serializable(invalid_data)}")
print(f"Errors: {get_serialization_errors(invalid_data)}")
# 7. Special encodings
print("\n--- 7. Special Encodings ---")
data = {'data': [1, 2, 3] * 100}
compressed = serialize_compressed(data)
print(f"Original size: {len(to_json(data))} bytes")
print(f"Compressed size: {len(compressed)} bytes")
# 8. JSON Lines
print("\n--- 8. JSON Lines ---")
items = [{'id': 1, 'name': 'Item 1'}, {'id': 2, 'name': 'Item 2'}]
print(f"JSON Lines:\n{serialize_lines(items)}")
print("\n=== All JSON Serialization Examples Completed ===")
# Export functions
export { to_json, to_json_compact, to_json_formatted, to_json_sorted }
export { serialize_datetime, to_json_with_custom, CustomEncoder, to_json_custom_encoder }
export { serialize_dict, serialize_list, serialize_object, serialize_dataframe }
export { to_json_ascii, to_json_unicode, to_json_with_undefined }
export { serialize_skip_none, serialize_skip_private }
export { serialize_to_file, serialize_lines }
export { to_json_pretty, to_json_colorful }
export { validate_serializable, get_serialization_errors }
export { serialize_compressed, serialize_base64 }
export { demonstrate_json_serialize }
💻 JSON Deserialization python
🟡 intermediate
⭐⭐⭐
Parse JSON strings into Python objects with custom decoding and error handling
⏱️ 30 min
🏷️ python, web, serialization, json
Prerequisites:
Intermediate Python, json module, type hints
# Web Python JSON Deserialization Examples
# Parsing JSON strings into Python objects with custom handling
# 1. Basic Deserialization
import json
from typing import Any, Dict, List, Optional, TypeVar, Type
from datetime import datetime, date
from decimal import Decimal
T = TypeVar('T')
def from_json(json_string: str) -> Any:
"""
Parse JSON string to Python object
Args:
json_string: JSON string
Returns:
Python object
"""
return json.loads(json_string)
def from_json_file(file_path: str) -> Any:
"""
Load JSON from file
Args:
file_path: Path to JSON file
Returns:
Python object
"""
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
def from_json_safe(json_string: str, default: Any = None) -> Any:
"""
Parse JSON with error handling
Args:
json_string: JSON string
default: Default value on error
Returns:
Python object or default
"""
try:
return json.loads(json_string)
except (json.JSONDecodeError, TypeError) as e:
print(f"JSON decode error: {e}")
return default
# 2. Type-Specific Deserialization
def to_dict(json_string: str) -> Dict[str, Any]:
"""
Parse JSON to dictionary
Args:
json_string: JSON string
Returns:
Dictionary
"""
return json.loads(json_string)
def to_list(json_string: str) -> List[Any]:
"""
Parse JSON to list
Args:
json_string: JSON string
Returns:
List
"""
return json.loads(json_string)
def to_string_list(json_string: str) -> List[str]:
"""
Parse JSON array of strings
Args:
json_string: JSON string
Returns:
List of strings
"""
data = json.loads(json_string)
if isinstance(data, list):
return [str(item) for item in data]
raise ValueError("JSON is not a list")
def to_int_list(json_string: str) -> List[int]:
"""
Parse JSON array of integers
Args:
json_string: JSON string
Returns:
List of integers
"""
data = json.loads(json_string)
if isinstance(data, list):
return [int(item) for item in data]
raise ValueError("JSON is not a list")
# 3. Custom Object Deserialization
def deserialize_datetime(obj: Dict[str, Any]) -> Any:
"""
Deserialize custom datetime format
Args:
obj: Object to process
Returns:
Object with converted datetime
"""
if isinstance(obj, dict):
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
if '__date__' in obj:
return date.fromisoformat(obj['__date__'])
if '__decimal__' in obj:
return Decimal(obj['__decimal__'])
if '__set__' in obj:
return set(obj['__set__'])
return {k: deserialize_datetime(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [deserialize_datetime(item) for item in obj]
return obj
def from_json_custom(json_string: str) -> Any:
"""
Parse JSON with custom type handling
Args:
json_string: JSON string
Returns:
Python object with custom types
"""
data = json.loads(json_string)
return deserialize_datetime(data)
class CustomDecoder(json.JSONDecoder):
"""Custom JSON decoder"""
def __init__(self, *args, **kwargs):
super().__init__(object_hook=self.object_hook, *args, **kwargs)
def object_hook(self, obj: Dict[str, Any]) -> Any:
"""Handle custom objects"""
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
if '__date__' in obj:
return date.fromisoformat(obj['__date__'])
if '__decimal__' in obj:
return Decimal(obj['__decimal__'])
if '__set__' in obj:
return set(obj['__set__'])
return obj
def from_json_decoder(json_string: str) -> Any:
"""
Parse JSON using custom decoder
Args:
json_string: JSON string
Returns:
Python object with custom types
"""
return json.loads(json_string, cls=CustomDecoder)
# 4. Object Reconstruction
def to_object(json_string: str, cls: Type[T]) -> T:
"""
Reconstruct object from JSON
Args:
json_string: JSON string
cls: Class to instantiate
Returns:
Object instance
"""
data = json.loads(json_string)
obj = cls.__new__(cls)
obj.__dict__.update(data)
return obj
def to_objects(json_string: str, cls: Type[T]) -> List[T]:
"""
Reconstruct list of objects
Args:
json_string: JSON string (array)
cls: Class to instantiate
Returns:
List of object instances
"""
data = json.loads(json_string)
objects = []
for item in data:
obj = cls.__new__(cls)
obj.__dict__.update(item)
objects.append(obj)
return objects
# 5. Streaming Deserialization
def from_json_lines(json_lines: str) -> List[Any]:
"""
Parse JSON lines format
Args:
json_lines: JSON lines string
Returns:
List of objects
"""
return [json.loads(line) for line in json_lines.strip().split('\n') if line.strip()]
def from_json_stream(file_path: str) -> List[Any]:
"""
Read JSON lines from file
Args:
file_path: Path to file
Returns:
List of objects
"""
objects = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
if line.strip():
objects.append(json.loads(line))
return objects
# 6. Partial Deserialization
def get_value(json_string: str, key: str, default: Any = None) -> Any:
"""
Get specific value from JSON
Args:
json_string: JSON string
key: Key to extract
default: Default value
Returns:
Value or default
"""
try:
data = json.loads(json_string)
return data.get(key, default)
except (json.JSONDecodeError, AttributeError):
return default
def get_nested_value(json_string: str, *keys: str, default: Any = None) -> Any:
"""
Get nested value from JSON
Args:
json_string: JSON string
*keys: Nested keys
default: Default value
Returns:
Value or default
"""
try:
data = json.loads(json_string)
for key in keys:
if isinstance(data, dict):
data = data.get(key)
if data is None:
return default
else:
return default
return data
except (json.JSONDecodeError, TypeError):
return default
def extract_values(json_string: str, keys: List[str]) -> Dict[str, Any]:
"""
Extract multiple values
Args:
json_string: JSON string
keys: Keys to extract
Returns:
Dictionary of extracted values
"""
try:
data = json.loads(json_string)
return {k: data.get(k) for k in keys if k in data}
except json.JSONDecodeError:
return {}
# 7. Validation During Deserialization
def validate_schema(json_string: str, schema: Dict[str, type]) -> bool:
"""
Validate JSON against schema
Args:
json_string: JSON string
schema: Expected types for keys
Returns:
True if valid
"""
try:
data = json.loads(json_string)
for key, expected_type in schema.items():
if key not in data:
return False
if not isinstance(data[key], expected_type):
return False
return True
except json.JSONDecodeError:
return False
def validate_and_parse(json_string: str, required_keys: List[str]) -> Optional[Dict]:
"""
Validate required keys and parse
Args:
json_string: JSON string
required_keys: Required keys
Returns:
Parsed dict or None
"""
try:
data = json.loads(json_string)
if all(key in data for key in required_keys):
return data
return None
except json.JSONDecodeError:
return None
# 8. Error Handling
def get_parse_error(json_string: str) -> Optional[str]:
"""
Get parse error message
Args:
json_string: JSON string
Returns:
Error message or None
"""
try:
json.loads(json_string)
return None
except json.JSONDecodeError as e:
return str(e)
def is_valid_json(json_string: str) -> bool:
"""
Check if string is valid JSON
Args:
json_string: String to check
Returns:
True if valid
"""
try:
json.loads(json_string)
return True
except (json.JSONDecodeError, TypeError):
return False
def fix_and_parse(json_string: str) -> Optional[Any]:
"""
Try to fix common JSON issues
Args:
json_string: Potentially malformed JSON
Returns:
Parsed object or None
"""
# Try parsing as-is
try:
return json.loads(json_string)
except json.JSONDecodeError:
pass
# Try fixing common issues
fixes = [
# Remove trailing commas
json_string.replace(',\s*}', '}').replace(',\s*]', ']'),
# Fix single quotes
json_string.replace("'", '"'),
# Add missing quotes around keys
json_string,
]
for fixed_json in fixes:
try:
return json.loads(fixed_json)
except:
continue
return None
# 9. Decoding Special Formats
def from_base64_json(encoded_string: str) -> Any:
"""
Decode base64 JSON
Args:
encoded_string: Base64 encoded JSON
Returns:
Python object
"""
import base64
decoded = base64.b64decode(encoded_string).decode('utf-8')
return json.loads(decoded)
def from_compressed_json(compressed_data: bytes) -> Any:
"""
Decompress and parse JSON
Args:
compressed_data: Compressed JSON bytes
Returns:
Python object
"""
import gzip
decompressed = gzip.decompress(compressed_data).decode('utf-8')
return json.loads(decompressed)
# Usage Examples
def demonstrate_json_deserialize():
print("=== Web Python JSON Deserialization Examples ===\n")
# 1. Basic deserialization
print("--- 1. Basic Deserialization ---")
json_str = '{"name": "Alice", "age": 30, "city": "NYC"}'
print(f"Parse: {from_json(json_str)}")
print(f"Safe parse: {from_json_safe('invalid', default={})}")
# 2. Type-specific
print("\n--- 2. Type-Specific Parsing ---")
json_list = '[1, 2, 3, 4, 5]'
print(f"To list: {to_list(json_list)}")
print(f"To int list: {to_int_list(json_list)}")
# 3. Custom types
print("\n--- 3. Custom Type Deserialization ---")
custom_json = '{"name": "Bob", "timestamp": {"__datetime__": "2025-12-31T14:30:45"}}'
print(f"With custom decoder: {from_json_decoder(custom_json)}")
# 4. Object reconstruction
print("\n--- 4. Object Reconstruction ---")
class Person:
def __init__(self, name=None, age=None):
self.name = name
self.age = age
def __repr__(self):
return f"Person(name={self.name}, age={self.age})"
person_json = '{"name": "Charlie", "age": 25}'
person = to_object(person_json, Person)
print(f"Reconstructed object: {person}")
# 5. Nested values
print("\n--- 5. Nested Values ---")
nested_json = '{"user": {"profile": {"name": "David"}}}'
print(f"Get nested: {get_nested_value(nested_json, 'user', 'profile', 'name')}")
print(f"Extract values: {extract_values(nested_json, ['user', 'profile'])}")
# 6. Validation
print("\n--- 6. Validation ---")
valid_json = '{"name": "Eve", "age": 30}'
invalid_json = '{"name": "Eve", "age": "thirty"}'
schema = {'name': str, 'age': int}
print(f"Valid schema: {validate_schema(valid_json, schema)}")
print(f"Invalid schema: {validate_schema(invalid_json, schema)}")
# 7. Error handling
print("\n--- 7. Error Handling ---")
malformed = '{"name": "Frank", "age": 35,}'
print(f"Parse error: {get_parse_error(malformed)}")
print(f"Is valid JSON: {is_valid_json(valid_json)}")
print(f"Fixed and parse: {fix_and_parse(malformed)}")
# 8. JSON Lines
print("\n--- 8. JSON Lines ---")
json_lines = '{"id": 1}\n{"id": 2}\n{"id": 3}'
print(f"Parse JSON lines: {from_json_lines(json_lines)}")
# 9. Base64 encoding
print("\n--- 9. Special Formats ---")
import base64
original = '{"data": "test"}'
encoded = base64.b64encode(original.encode()).decode()
print(f"From base64: {from_base64_json(encoded)}")
print("\n=== All JSON Deserialization Examples Completed ===")
# Export functions
export { from_json, from_json_file, from_json_safe }
export { to_dict, to_list, to_string_list, to_int_list }
export { deserialize_datetime, from_json_custom, CustomDecoder, from_json_decoder }
export { to_object, to_objects }
export { from_json_lines, from_json_stream }
export { get_value, get_nested_value, extract_values }
export { validate_schema, validate_and_parse }
export { get_parse_error, is_valid_json, fix_and_parse }
export { from_base64_json, from_compressed_json }
export { demonstrate_json_deserialize }
💻 XML Parsing python
🟡 intermediate
⭐⭐⭐
Parse XML documents and extract data using ElementTree and other XML libraries
⏱️ 30 min
🏷️ python, web, serialization, xml
Prerequisites:
Intermediate Python, XML basics, ElementTree
# Web Python XML Parsing Examples
# Parsing and processing XML documents with various techniques
# 1. Basic XML Parsing
import xml.etree.ElementTree as ET
from typing import List, Dict, Any, Optional, Tuple
from xml.dom import minidom
def parse_xml_string(xml_string: str) -> ET.Element:
"""
Parse XML from string
Args:
xml_string: XML string
Returns:
Root element
"""
return ET.fromstring(xml_string)
def parse_xml_file(file_path: str) -> ET.Element:
"""
Parse XML from file
Args:
file_path: Path to XML file
Returns:
Root element
"""
tree = ET.parse(file_path)
return tree.getroot()
def get_root_tag(element: ET.Element) -> str:
"""
Get root tag name
Args:
element: XML element
Returns:
Tag name
"""
return element.tag
def get_root_attributes(element: ET.Element) -> Dict[str, str]:
"""
Get root attributes
Args:
element: XML element
Returns:
Dictionary of attributes
"""
return element.attrib
# 2. Element Navigation
def find_child(element: ET.Element, tag: str) -> Optional[ET.Element]:
"""
Find first child with tag
Args:
element: Parent element
tag: Tag to find
Returns:
Child element or None
"""
return element.find(tag)
def find_children(element: ET.Element, tag: str) -> List[ET.Element]:
"""
Find all children with tag
Args:
element: Parent element
tag: Tag to find
Returns:
List of child elements
"""
return element.findall(tag)
def find_all_descendants(element: ET.Element, tag: str) -> List[ET.Element]:
"""
Find all descendants with tag
Args:
element: Root element
tag: Tag to find
Returns:
List of matching elements
"""
return element.iter(tag)
def get_parent(element: ET.Element) -> Optional[Any]:
"""
Get parent element (requires building tree with parent map)
Args:
element: Child element
Returns:
Parent element or None
"""
# ElementTree doesn't store parent by default
# Need to use custom approach
return None
def get_siblings(element: ET.Element) -> List[ET.Element]:
"""
Get sibling elements
Args:
element: Target element
Returns:
List of siblings
"""
parent_map = {c: p for p in element.iter() for c in p}
parent = parent_map.get(element)
if parent:
return [child for child in parent if child != element]
return []
# 3. Element Data Extraction
def get_element_text(element: ET.Element) -> str:
"""
Get element text content
Args:
element: XML element
Returns:
Text content
"""
return element.text or ''
def get_element_tail(element: ET.Element) -> str:
"""
Get element tail text
Args:
element: XML element
Returns:
Tail text
"""
return element.tail or ''
def get_element_attribute(element: ET.Element, attr: str, default: str = '') -> str:
"""
Get element attribute value
Args:
element: XML element
attr: Attribute name
default: Default value
Returns:
Attribute value
"""
return element.get(attr, default)
def get_all_attributes(element: ET.Element) -> Dict[str, str]:
"""
Get all element attributes
Args:
element: XML element
Returns:
Dictionary of attributes
"""
return element.attrib
# 4. XML to Dictionary Conversion
def element_to_dict(element: ET.Element) -> Dict[str, Any]:
"""
Convert element to dictionary
Args:
element: XML element
Returns:
Dictionary representation
"""
result = {}
# Add attributes
if element.attrib:
result.update({'@' + k: v for k, v in element.attrib.items()})
# Add text content
if element.text and element.text.strip():
if len(element) == 0:
return element.text.strip()
result['#text'] = element.text.strip()
# Add child elements
for child in element:
child_data = element_to_dict(child)
if child.tag in result:
if not isinstance(result[child.tag], list):
result[child.tag] = [result[child.tag]]
result[child.tag].append(child_data)
else:
result[child.tag] = child_data
return result
def xml_to_dict(xml_string: str) -> Dict[str, Any]:
"""
Convert XML string to dictionary
Args:
xml_string: XML string
Returns:
Dictionary representation
"""
root = ET.fromstring(xml_string)
return {root.tag: element_to_dict(root)}
# 5. XPath Queries
def xpath_find(element: ET.Element, path: str) -> List[ET.Element]:
"""
Find elements using XPath
Args:
element: Root element
path: XPath expression
Returns:
List of matching elements
"""
return element.findall(path)
def xpath_find_text(element: ET.Element, path: str) -> List[str]:
"""
Find text content using XPath
Args:
element: Root element
path: XPath expression
Returns:
List of text values
"""
elements = element.findall(path)
return [el.text or '' for el in elements]
def xpath_find_first(element: ET.Element, path: str) -> Optional[ET.Element]:
"""
Find first matching element
Args:
element: Root element
path: XPath expression
Returns:
First matching element or None
"""
result = element.find(path)
return result
# 6. XML Modification
def set_element_text(element: ET.Element, text: str) -> None:
"""
Set element text
Args:
element: XML element
text: New text
"""
element.text = text
def set_element_attribute(element: ET.Element, attr: str, value: str) -> None:
"""
Set element attribute
Args:
element: XML element
attr: Attribute name
value: Attribute value
"""
element.set(attr, value)
def add_child_element(parent: ET.Element, tag: str, text: str = '', **attributes) -> ET.Element:
"""
Add child element
Args:
parent: Parent element
tag: Child tag name
text: Child text content
**attributes: Child attributes
Returns:
New child element
"""
child = ET.SubElement(parent, tag, attrib=attributes)
if text:
child.text = text
return child
def remove_element(element: ET.Element) -> None:
"""
Remove element from parent
Args:
element: Element to remove
"""
parent_map = {c: p for p in element.iter() for c in p}
parent = parent_map.get(element)
if parent:
parent.remove(element)
def clear_element(element: ET.Element) -> None:
"""
Clear element content
Args:
element: Element to clear
"""
element.clear()
# 7. XML Generation
def create_xml_element(tag: str, text: str = '', **attributes) -> ET.Element:
"""
Create XML element
Args:
tag: Element tag
text: Element text
**attributes: Element attributes
Returns:
New element
"""
element = ET.Element(tag, attrib=attributes)
if text:
element.text = text
return element
def build_xml_tree() -> ET.Element:
"""
Build example XML tree
Returns:
Root element
"""
root = create_xml_element('root')
child1 = add_child_element(root, 'child1', 'Text 1', id='1')
add_child_element(child1, 'subchild', 'Subtext')
child2 = add_child_element(root, 'child2', 'Text 2', id='2')
return root
def dict_to_xml(tag: str, data: Dict[str, Any]) -> ET.Element:
"""
Convert dictionary to XML element
Args:
tag: Root tag name
data: Dictionary data
Returns:
XML element
"""
element = ET.Element(tag)
for key, value in data.items():
# Handle attributes
if key.startswith('@'):
element.set(key[1:], str(value))
# Handle text content
elif key == '#text':
element.text = str(value)
# Handle child elements
elif isinstance(value, dict):
child = dict_to_xml(key, value)
element.append(child)
elif isinstance(value, list):
for item in value:
child = dict_to_xml(key, item) if isinstance(item, dict) else ET.Element(key)
if isinstance(item, str):
child.text = item
element.append(child)
else:
child = ET.SubElement(element, key)
child.text = str(value)
return element
# 8. XML Serialization
def element_to_string(element: ET.Element, encoding: str = 'unicode') -> str:
"""
Convert element to string
Args:
element: XML element
encoding: Encoding type
Returns:
XML string
"""
return ET.tostring(element, encoding=encoding)
def element_to_pretty_string(element: ET.Element, indent: str = ' ') -> str:
"""
Convert element to pretty-printed string
Args:
element: XML element
indent: Indentation string
Returns:
Pretty XML string
"""
rough_string = ET.tostring(element, encoding='unicode')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=indent)
def save_xml_file(element: ET.Element, file_path: str, encoding: str = 'utf-8') -> bool:
"""
Save element to XML file
Args:
element: XML element
file_path: Output file path
encoding: File encoding
Returns:
True if successful
"""
try:
tree = ET.ElementTree(element)
tree.write(file_path, encoding=encoding, xml_declaration=True)
return True
except Exception as e:
print(f"Error saving XML: {e}")
return False
# 9. XML Validation
def validate_xml_structure(xml_string: str, required_tags: List[str]) -> bool:
"""
Validate XML has required tags
Args:
xml_string: XML string
required_tags: Required tag names
Returns:
True if valid
"""
try:
root = ET.fromstring(xml_string)
all_tags = {elem.tag for elem in root.iter()}
return all(tag in all_tags for tag in required_tags)
except ET.ParseError:
return False
def get_xml_structure(element: ET.Element) -> Dict[str, int]:
"""
Get XML structure statistics
Args:
element: Root element
Returns:
Tag count dictionary
"""
structure = {}
for elem in element.iter():
tag = elem.tag
structure[tag] = structure.get(tag, 0) + 1
return structure
# 10. Namespace Handling
def strip_namespace(element: ET.Element) -> ET.Element:
"""
Remove namespaces from element
Args:
element: XML element with namespaces
Returns:
Element without namespaces
"""
for elem in element.iter():
if '}' in elem.tag:
elem.tag = elem.tag.split('}')[1]
return element
def get_namespace_map(element: ET.Element) -> Dict[str, str]:
"""
Extract namespace map
Args:
element: XML element
Returns:
Namespace prefix to URI mapping
"""
# Extract namespaces from the element
namespaces = {}
if isinstance(element, ET.Element):
for key, value in element.attrib.items():
if key.startswith('xmlns:'):
namespaces[key[6:]] = value
elif key == 'xmlns':
namespaces['default'] = value
return namespaces
# Usage Examples
def demonstrate_xml_parse():
print("=== Web Python XML Parsing Examples ===\n")
# Sample XML
xml_string = '''
<?xml version="1.0" encoding="UTF-8"?>
<library>
<book id="1" category="fiction">
<title>Python Programming</title>
<author>John Doe</author>
<price>29.99</price>
</book>
<book id="2" category="tech">
<title>Web Development</title>
<author>Jane Smith</author>
<price>39.99</price>
</book>
</library>
'''
# 1. Basic parsing
print("--- 1. Basic Parsing ---")
root = parse_xml_string(xml_string)
print(f"Root tag: {get_root_tag(root)}")
print(f"Root attributes: {get_root_attributes(root)}")
# 2. Element navigation
print("\n--- 2. Element Navigation ---")
books = find_children(root, 'book')
print(f"Found {len(books)} books")
for book in books:
title = find_child(book, 'title')
print(f" - {get_element_text(title)}")
# 3. Data extraction
print("\n--- 3. Data Extraction ---")
first_book = find_child(root, 'book')
print(f"First book attributes: {get_all_attributes(first_book)}")
print(f"First book ID: {get_element_attribute(first_book, 'id')}")
# 4. To dictionary
print("\n--- 4. XML to Dictionary ---")
xml_dict = xml_to_dict('<person><name>Alice</name><age>30</age></person>')
print(f"Dictionary: {xml_dict}")
# 5. XPath queries
print("\n--- 5. XPath Queries ---")
titles = xpath_find_text(root, './/title')
print(f"All titles: {titles}")
first_title = xpath_find_first(root, './/title')
print(f"First title: {get_element_text(first_title) if first_title else 'None'}")
# 6. Modification
print("\n--- 6. XML Modification ---")
new_root = build_xml_tree()
new_child = add_child_element(new_root, 'child3', 'Text 3', id='3')
print(f"Modified XML:\n{element_to_pretty_string(new_root)}")
# 7. XML generation
print("\n--- 7. XML Generation ---")
person_xml = dict_to_xml('person', {'name': 'Bob', 'age': '25', '@id': '123'})
print(f"Generated XML:\n{element_to_pretty_string(person_xml)}")
# 8. Validation
print("\n--- 8. Validation ---")
is_valid = validate_xml_structure(xml_string, ['library', 'book', 'title'])
print(f"XML has required tags: {is_valid}")
print(f"XML structure: {get_xml_structure(root)}")
# 9. Namespace handling
print("\n--- 9. Namespace Handling ---")
ns_xml = '<ns:root xmlns:ns="http://example.com"><ns:child>Text</ns:child></ns:root>'
ns_element = parse_xml_string(ns_xml)
clean_element = strip_namespace(ns_element)
print(f"Without namespace: {get_root_tag(clean_element)}")
print("\n=== All XML Parsing Examples Completed ===")
# Export functions
export { parse_xml_string, parse_xml_file, get_root_tag, get_root_attributes }
export { find_child, find_children, find_all_descendants, get_siblings }
export { get_element_text, get_element_tail, get_element_attribute, get_all_attributes }
export { element_to_dict, xml_to_dict }
export { xpath_find, xpath_find_text, xpath_find_first }
export { set_element_text, set_element_attribute, add_child_element, remove_element, clear_element }
export { create_xml_element, build_xml_tree, dict_to_xml }
export { element_to_string, element_to_pretty_string, save_xml_file }
export { validate_xml_structure, get_xml_structure }
export { strip_namespace, get_namespace_map }
export { demonstrate_xml_parse }