Exemples de Sérialisation Web Python
Exemples de sérialisation Web Python incluant le traitement JSON et XML
Key Facts
- Category
- Python
- Items
- 3
- Format Families
- json, xml, text
Sample Overview
Exemples de sérialisation Web Python incluant le traitement JSON et XML This sample set belongs to Python and can be used to test related workflows inside Elysia Tools.
💻 Sérialisation JSON python
🟢 simple
⭐⭐
Convertir des objets Python en chaînes JSON avec des options de codage et de formatage personnalisées
⏱️ 25 min
🏷️ python, web, serialization, json
Prerequisites:
Basic Python, json module
# Web Python JSON Serialization Examples
# Converting Python objects to JSON strings with various options
# 1. Basic Serialization
import json
from typing import Any, Dict, List, Optional
from datetime import datetime, date
from decimal import Decimal
def to_json(obj: Any, indent: Optional[int] = None, ensure_ascii: bool = True) -> str:
"""
Convert object to JSON string
Args:
obj: Object to serialize
indent: Indentation spaces (None for compact)
ensure_ascii: Whether to escape non-ASCII
Returns:
JSON string
"""
return json.dumps(obj, indent=indent, ensure_ascii=ensure_ascii)
def to_json_compact(obj: Any) -> str:
"""
Convert to compact JSON (no indentation)
Args:
obj: Object to serialize
Returns:
Compact JSON string
"""
return json.dumps(obj, separators=(',', ':'))
def to_json_formatted(obj: Any, indent: int = 2) -> str:
"""
Convert to formatted JSON with indentation
Args:
obj: Object to serialize
indent: Indentation spaces
Returns:
Formatted JSON string
"""
return json.dumps(obj, indent=indent, sort_keys=True)
def to_json_sorted(obj: Any) -> str:
"""
Convert to JSON with sorted keys
Args:
obj: Object to serialize
Returns:
JSON string with sorted keys
"""
return json.dumps(obj, sort_keys=True)
# 2. Custom Serialization
def serialize_datetime(obj: Any) -> Any:
"""
Custom serializer for datetime objects
Args:
obj: Object to serialize
Returns:
Serializable value or TypeError
"""
if isinstance(obj, datetime):
return obj.isoformat()
elif isinstance(obj, date):
return obj.isoformat()
elif isinstance(obj, Decimal):
return float(obj)
raise TypeError(f"Type {type(obj)} not serializable")
def to_json_with_custom(obj: Any, **kwargs) -> str:
"""
Serialize with custom encoder
Args:
obj: Object to serialize
**kwargs: Additional JSON arguments
Returns:
JSON string
"""
return json.dumps(obj, default=serialize_datetime, **kwargs)
class CustomEncoder(json.JSONEncoder):
"""Custom JSON encoder"""
def default(self, obj: Any) -> Any:
"""Handle custom types"""
if isinstance(obj, datetime):
return {'__datetime__': obj.isoformat()}
elif isinstance(obj, date):
return {'__date__': obj.isoformat()}
elif isinstance(obj, Decimal):
return {'__decimal__': str(obj)}
elif isinstance(obj, set):
return {'__set__': list(obj)}
return super().default(obj)
def to_json_custom_encoder(obj: Any, indent: int = 2) -> str:
"""
Serialize using custom encoder class
Args:
obj: Object to serialize
indent: Indentation spaces
Returns:
JSON string
"""
return json.dumps(obj, cls=CustomEncoder, indent=indent)
# 3. Serialize Different Types
def serialize_dict(data: Dict[str, Any], **kwargs) -> str:
"""
Serialize dictionary
Args:
data: Dictionary to serialize
**kwargs: JSON options
Returns:
JSON string
"""
return json.dumps(data, **kwargs)
def serialize_list(data: List[Any], **kwargs) -> str:
"""
Serialize list
Args:
data: List to serialize
**kwargs: JSON options
Returns:
JSON string
"""
return json.dumps(data, **kwargs)
def serialize_object(obj: Any, **kwargs) -> str:
"""
Serialize custom object using __dict__
Args:
obj: Object to serialize
**kwargs: JSON options
Returns:
JSON string
"""
if hasattr(obj, '__dict__'):
return json.dumps(obj.__dict__, **kwargs)
return json.dumps({'value': str(obj)}, **kwargs)
def serialize_dataframe(obj) -> str:
"""
Serialize pandas DataFrame (if available)
Args:
obj: DataFrame object
Returns:
JSON string
"""
try:
import pandas as pd
if isinstance(obj, pd.DataFrame):
return obj.to_json(orient='records', indent=2)
except ImportError:
pass
return json.dumps({'error': 'pandas not available'})
# 4. Advanced Serialization Options
def to_json_ascii(obj: Any) -> str:
"""
Serialize with ASCII escaping
Args:
obj: Object to serialize
Returns:
JSON string with escaped non-ASCII
"""
return json.dumps(obj, ensure_ascii=True)
def to_json_unicode(obj: Any) -> str:
"""
Serialize without ASCII escaping
Args:
obj: Object to serialize
Returns:
JSON string with unicode
"""
return json.dumps(obj, ensure_ascii=False)
def to_json_with_undefined(obj: Any, undefined: str = 'null') -> str:
"""
Handle None/undefined values
Args:
obj: Object to serialize
undefined: How to represent undefined
Returns:
JSON string
"""
def replace_none(value):
if value is None:
return undefined
return value
return json.dumps(obj, default=replace_none)
# 5. Conditional Serialization
def serialize_skip_none(obj: Any) -> str:
"""
Serialize skipping None values
Args:
obj: Object to serialize
Returns:
JSON string without None values
"""
def skip_none(value):
if isinstance(value, dict):
return {k: v for k, v in value.items() if v is not None}
return value
if isinstance(obj, dict):
obj = skip_none(obj)
return json.dumps(obj, default=skip_none)
def serialize_skip_private(obj: Any) -> str:
"""
Skip private attributes (starting with _)
Args:
obj: Object to serialize
Returns:
JSON string without private attributes
"""
if isinstance(obj, dict):
filtered = {k: v for k, v in obj.items() if not k.startswith('_')}
return json.dumps(filtered)
elif hasattr(obj, '__dict__'):
filtered = {k: v for k, v in obj.__dict__.items() if not k.startswith('_')}
return json.dumps(filtered)
return json.dumps(obj)
# 6. Streaming Serialization
def serialize_to_file(obj: Any, file_path: str, **kwargs) -> bool:
"""
Serialize object to JSON file
Args:
obj: Object to serialize
file_path: Output file path
**kwargs: JSON options
Returns:
True if successful
"""
try:
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(obj, f, **kwargs)
return True
except Exception as e:
print(f"Error writing file: {e}")
return False
def serialize_lines(items: List[Any]) -> str:
"""
Serialize list as JSON lines
Args:
items: List of objects
Returns:
JSON lines string
"""
return '\n'.join(json.dumps(item) for item in items)
# 7. Pretty Printing
def to_json_pretty(obj: Any, indent: int = 2, width: int = 80) -> str:
"""
Pretty print JSON with custom formatting
Args:
obj: Object to serialize
indent: Indentation spaces
width: Line width
Returns:
Pretty JSON string
"""
return json.dumps(obj, indent=indent, width=width)
def to_json_colorful(obj: Any) -> str:
"""
Add ANSI color codes to JSON (for terminal display)
Args:
obj: Object to serialize
Returns:
Colorized JSON string
"""
import re
json_str = json.dumps(obj, indent=2)
# Add colors
json_str = re.sub(r'"(.*?)"', r'\033[36m"\1"\033[0m', json_str) # Cyan for strings
json_str = re.sub(r'(\d+\.?\d*)', r'\033[33m\1\033[0m', json_str) # Yellow for numbers
json_str = re.sub(r'\b(true|false|null)\b', r'\033[35m\1\033[0m', json_str) # Magenta for booleans
return json_str
# 8. Validation Before Serialization
def validate_serializable(obj: Any) -> bool:
"""
Check if object is JSON serializable
Args:
obj: Object to check
Returns:
True if serializable
"""
try:
json.dumps(obj)
return True
except (TypeError, OverflowError):
return False
def get_serialization_errors(obj: Any) -> List[str]:
"""
Get list of non-serializable items
Args:
obj: Object to check
Returns:
List of error messages
"""
errors = []
def check(value, path=''):
try:
json.dumps(value)
except (TypeError, OverflowError) as e:
errors.append(f"{path or 'root'}: {type(value).__name__} - {e}")
if isinstance(value, dict):
for k, v in value.items():
check(v, f"{path}.{k}" if path else k)
elif isinstance(value, (list, tuple)):
for i, v in enumerate(value):
check(v, f"{path}[{i}]")
check(obj)
return errors
# 9. Compression
def serialize_compressed(obj: Any) -> bytes:
"""
Serialize and compress JSON
Args:
obj: Object to serialize
Returns:
Compressed bytes
"""
import gzip
json_str = json.dumps(obj)
return gzip.compress(json_str.encode('utf-8'))
def serialize_base64(obj: Any) -> str:
"""
Serialize and encode as base64
Args:
obj: Object to serialize
Returns:
Base64 encoded JSON string
"""
import base64
json_str = json.dumps(obj)
return base64.b64encode(json_str.encode('utf-8')).decode('ascii')
# Usage Examples
def demonstrate_json_serialize():
print("=== Web Python JSON Serialization Examples ===\n")
# 1. Basic serialization
print("--- 1. Basic Serialization ---")
data = {'name': 'Alice', 'age': 30, 'city': 'NYC'}
print(f"Compact: {to_json(data)}")
print(f"Formatted:\n{to_json_formatted(data)}")
print(f"Sorted: {to_json_sorted(data)}")
# 2. Custom types
print("\n--- 2. Custom Types ---")
data_with_date = {
'name': 'Bob',
'timestamp': datetime(2025, 12, 31, 14, 30, 45),
'amount': Decimal('123.45')
}
print(f"With custom encoder:\n{to_json_with_custom(data_with_date, indent=2)}")
# 3. Different structures
print("\n--- 3. Different Structures ---")
list_data = [1, 2, 3, {'key': 'value'}]
print(f"List: {to_json(list_data)}")
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
self._private = 'secret'
person = Person('Charlie', 25)
print(f"Object: {serialize_object(person)}")
print(f"Skip private: {serialize_skip_private(person)}")
# 4. Unicode
print("\n--- 4. Unicode Handling ---")
unicode_data = {'message': 'Hello 世界 🌍'}
print(f"ASCII escaped: {to_json_ascii(unicode_data)}")
print(f"Unicode: {to_json_unicode(unicode_data)}")
# 5. Conditional serialization
print("\n--- 5. Conditional Serialization ---")
data_with_none = {'a': 1, 'b': None, 'c': 3}
print(f"Skip None: {serialize_skip_none(data_with_none)}")
# 6. Validation
print("\n--- 6. Validation ---")
valid_data = {'key': 'value'}
invalid_data = {'func': lambda x: x}
print(f"Valid data serializable: {validate_serializable(valid_data)}")
print(f"Invalid data serializable: {validate_serializable(invalid_data)}")
print(f"Errors: {get_serialization_errors(invalid_data)}")
# 7. Special encodings
print("\n--- 7. Special Encodings ---")
data = {'data': [1, 2, 3] * 100}
compressed = serialize_compressed(data)
print(f"Original size: {len(to_json(data))} bytes")
print(f"Compressed size: {len(compressed)} bytes")
# 8. JSON Lines
print("\n--- 8. JSON Lines ---")
items = [{'id': 1, 'name': 'Item 1'}, {'id': 2, 'name': 'Item 2'}]
print(f"JSON Lines:\n{serialize_lines(items)}")
print("\n=== All JSON Serialization Examples Completed ===")
# Export functions
# export { to_json, to_json_compact, to_json_formatted, to_json_sorted }
# export { serialize_datetime, to_json_with_custom, CustomEncoder, to_json_custom_encoder }
# export { serialize_dict, serialize_list, serialize_object, serialize_dataframe }
# export { to_json_ascii, to_json_unicode, to_json_with_undefined }
# export { serialize_skip_none, serialize_skip_private }
# export { serialize_to_file, serialize_lines }
# export { to_json_pretty, to_json_colorful }
# export { validate_serializable, get_serialization_errors }
# export { serialize_compressed, serialize_base64 }
# export { demonstrate_json_serialize }
💻 Désérialisation JSON python
🟡 intermediate
⭐⭐⭐
Analyser les chaînes JSON en objets Python avec un décodage personnalisé et une gestion des erreurs
⏱️ 30 min
🏷️ python, web, serialization, json
Prerequisites:
Intermediate Python, json module, type hints
# Web Python JSON Deserialization Examples
# Parsing JSON strings into Python objects with custom handling
# 1. Basic Deserialization
import json
from typing import Any, Dict, List, Optional, TypeVar, Type
from datetime import datetime, date
from decimal import Decimal
T = TypeVar('T')
def from_json(json_string: str) -> Any:
"""
Parse JSON string to Python object
Args:
json_string: JSON string
Returns:
Python object
"""
return json.loads(json_string)
def from_json_file(file_path: str) -> Any:
"""
Load JSON from file
Args:
file_path: Path to JSON file
Returns:
Python object
"""
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
def from_json_safe(json_string: str, default: Any = None) -> Any:
"""
Parse JSON with error handling
Args:
json_string: JSON string
default: Default value on error
Returns:
Python object or default
"""
try:
return json.loads(json_string)
except (json.JSONDecodeError, TypeError) as e:
print(f"JSON decode error: {e}")
return default
# 2. Type-Specific Deserialization
def to_dict(json_string: str) -> Dict[str, Any]:
"""
Parse JSON to dictionary
Args:
json_string: JSON string
Returns:
Dictionary
"""
return json.loads(json_string)
def to_list(json_string: str) -> List[Any]:
"""
Parse JSON to list
Args:
json_string: JSON string
Returns:
List
"""
return json.loads(json_string)
def to_string_list(json_string: str) -> List[str]:
"""
Parse JSON array of strings
Args:
json_string: JSON string
Returns:
List of strings
"""
data = json.loads(json_string)
if isinstance(data, list):
return [str(item) for item in data]
raise ValueError("JSON is not a list")
def to_int_list(json_string: str) -> List[int]:
"""
Parse JSON array of integers
Args:
json_string: JSON string
Returns:
List of integers
"""
data = json.loads(json_string)
if isinstance(data, list):
return [int(item) for item in data]
raise ValueError("JSON is not a list")
# 3. Custom Object Deserialization
def deserialize_datetime(obj: Dict[str, Any]) -> Any:
"""
Deserialize custom datetime format
Args:
obj: Object to process
Returns:
Object with converted datetime
"""
if isinstance(obj, dict):
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
if '__date__' in obj:
return date.fromisoformat(obj['__date__'])
if '__decimal__' in obj:
return Decimal(obj['__decimal__'])
if '__set__' in obj:
return set(obj['__set__'])
return {k: deserialize_datetime(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [deserialize_datetime(item) for item in obj]
return obj
def from_json_custom(json_string: str) -> Any:
"""
Parse JSON with custom type handling
Args:
json_string: JSON string
Returns:
Python object with custom types
"""
data = json.loads(json_string)
return deserialize_datetime(data)
class CustomDecoder(json.JSONDecoder):
"""Custom JSON decoder"""
def __init__(self, *args, **kwargs):
super().__init__(object_hook=self.object_hook, *args, **kwargs)
def object_hook(self, obj: Dict[str, Any]) -> Any:
"""Handle custom objects"""
if '__datetime__' in obj:
return datetime.fromisoformat(obj['__datetime__'])
if '__date__' in obj:
return date.fromisoformat(obj['__date__'])
if '__decimal__' in obj:
return Decimal(obj['__decimal__'])
if '__set__' in obj:
return set(obj['__set__'])
return obj
def from_json_decoder(json_string: str) -> Any:
"""
Parse JSON using custom decoder
Args:
json_string: JSON string
Returns:
Python object with custom types
"""
return json.loads(json_string, cls=CustomDecoder)
# 4. Object Reconstruction
def to_object(json_string: str, cls: Type[T]) -> T:
"""
Reconstruct object from JSON
Args:
json_string: JSON string
cls: Class to instantiate
Returns:
Object instance
"""
data = json.loads(json_string)
obj = cls.__new__(cls)
obj.__dict__.update(data)
return obj
def to_objects(json_string: str, cls: Type[T]) -> List[T]:
"""
Reconstruct list of objects
Args:
json_string: JSON string (array)
cls: Class to instantiate
Returns:
List of object instances
"""
data = json.loads(json_string)
objects = []
for item in data:
obj = cls.__new__(cls)
obj.__dict__.update(item)
objects.append(obj)
return objects
# 5. Streaming Deserialization
def from_json_lines(json_lines: str) -> List[Any]:
"""
Parse JSON lines format
Args:
json_lines: JSON lines string
Returns:
List of objects
"""
return [json.loads(line) for line in json_lines.strip().split('\n') if line.strip()]
def from_json_stream(file_path: str) -> List[Any]:
"""
Read JSON lines from file
Args:
file_path: Path to file
Returns:
List of objects
"""
objects = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
if line.strip():
objects.append(json.loads(line))
return objects
# 6. Partial Deserialization
def get_value(json_string: str, key: str, default: Any = None) -> Any:
"""
Get specific value from JSON
Args:
json_string: JSON string
key: Key to extract
default: Default value
Returns:
Value or default
"""
try:
data = json.loads(json_string)
return data.get(key, default)
except (json.JSONDecodeError, AttributeError):
return default
def get_nested_value(json_string: str, *keys: str, default: Any = None) -> Any:
"""
Get nested value from JSON
Args:
json_string: JSON string
*keys: Nested keys
default: Default value
Returns:
Value or default
"""
try:
data = json.loads(json_string)
for key in keys:
if isinstance(data, dict):
data = data.get(key)
if data is None:
return default
else:
return default
return data
except (json.JSONDecodeError, TypeError):
return default
def extract_values(json_string: str, keys: List[str]) -> Dict[str, Any]:
"""
Extract multiple values
Args:
json_string: JSON string
keys: Keys to extract
Returns:
Dictionary of extracted values
"""
try:
data = json.loads(json_string)
return {k: data.get(k) for k in keys if k in data}
except json.JSONDecodeError:
return {}
# 7. Validation During Deserialization
def validate_schema(json_string: str, schema: Dict[str, type]) -> bool:
"""
Validate JSON against schema
Args:
json_string: JSON string
schema: Expected types for keys
Returns:
True if valid
"""
try:
data = json.loads(json_string)
for key, expected_type in schema.items():
if key not in data:
return False
if not isinstance(data[key], expected_type):
return False
return True
except json.JSONDecodeError:
return False
def validate_and_parse(json_string: str, required_keys: List[str]) -> Optional[Dict]:
"""
Validate required keys and parse
Args:
json_string: JSON string
required_keys: Required keys
Returns:
Parsed dict or None
"""
try:
data = json.loads(json_string)
if all(key in data for key in required_keys):
return data
return None
except json.JSONDecodeError:
return None
# 8. Error Handling
def get_parse_error(json_string: str) -> Optional[str]:
"""
Get parse error message
Args:
json_string: JSON string
Returns:
Error message or None
"""
try:
json.loads(json_string)
return None
except json.JSONDecodeError as e:
return str(e)
def is_valid_json(json_string: str) -> bool:
"""
Check if string is valid JSON
Args:
json_string: String to check
Returns:
True if valid
"""
try:
json.loads(json_string)
return True
except (json.JSONDecodeError, TypeError):
return False
def fix_and_parse(json_string: str) -> Optional[Any]:
"""
Try to fix common JSON issues
Args:
json_string: Potentially malformed JSON
Returns:
Parsed object or None
"""
# Try parsing as-is
try:
return json.loads(json_string)
except json.JSONDecodeError:
pass
# Try fixing common issues
fixes = [
# Remove trailing commas
json_string.replace(',\s*}', '}').replace(',\s*]', ']'),
# Fix single quotes
json_string.replace("'", '"'),
# Add missing quotes around keys
json_string,
]
for fixed_json in fixes:
try:
return json.loads(fixed_json)
except:
continue
return None
# 9. Decoding Special Formats
def from_base64_json(encoded_string: str) -> Any:
"""
Decode base64 JSON
Args:
encoded_string: Base64 encoded JSON
Returns:
Python object
"""
import base64
decoded = base64.b64decode(encoded_string).decode('utf-8')
return json.loads(decoded)
def from_compressed_json(compressed_data: bytes) -> Any:
"""
Decompress and parse JSON
Args:
compressed_data: Compressed JSON bytes
Returns:
Python object
"""
import gzip
decompressed = gzip.decompress(compressed_data).decode('utf-8')
return json.loads(decompressed)
# Usage Examples
def demonstrate_json_deserialize():
print("=== Web Python JSON Deserialization Examples ===\n")
# 1. Basic deserialization
print("--- 1. Basic Deserialization ---")
json_str = '{"name": "Alice", "age": 30, "city": "NYC"}'
print(f"Parse: {from_json(json_str)}")
print(f"Safe parse: {from_json_safe('invalid', default={})}")
# 2. Type-specific
print("\n--- 2. Type-Specific Parsing ---")
json_list = '[1, 2, 3, 4, 5]'
print(f"To list: {to_list(json_list)}")
print(f"To int list: {to_int_list(json_list)}")
# 3. Custom types
print("\n--- 3. Custom Type Deserialization ---")
custom_json = '{"name": "Bob", "timestamp": {"__datetime__": "2025-12-31T14:30:45"}}'
print(f"With custom decoder: {from_json_decoder(custom_json)}")
# 4. Object reconstruction
print("\n--- 4. Object Reconstruction ---")
class Person:
def __init__(self, name=None, age=None):
self.name = name
self.age = age
def __repr__(self):
return f"Person(name={self.name}, age={self.age})"
person_json = '{"name": "Charlie", "age": 25}'
person = to_object(person_json, Person)
print(f"Reconstructed object: {person}")
# 5. Nested values
print("\n--- 5. Nested Values ---")
nested_json = '{"user": {"profile": {"name": "David"}}}'
print(f"Get nested: {get_nested_value(nested_json, 'user', 'profile', 'name')}")
print(f"Extract values: {extract_values(nested_json, ['user', 'profile'])}")
# 6. Validation
print("\n--- 6. Validation ---")
valid_json = '{"name": "Eve", "age": 30}'
invalid_json = '{"name": "Eve", "age": "thirty"}'
schema = {'name': str, 'age': int}
print(f"Valid schema: {validate_schema(valid_json, schema)}")
print(f"Invalid schema: {validate_schema(invalid_json, schema)}")
# 7. Error handling
print("\n--- 7. Error Handling ---")
malformed = '{"name": "Frank", "age": 35,}'
print(f"Parse error: {get_parse_error(malformed)}")
print(f"Is valid JSON: {is_valid_json(valid_json)}")
print(f"Fixed and parse: {fix_and_parse(malformed)}")
# 8. JSON Lines
print("\n--- 8. JSON Lines ---")
json_lines = '{"id": 1}\n{"id": 2}\n{"id": 3}'
print(f"Parse JSON lines: {from_json_lines(json_lines)}")
# 9. Base64 encoding
print("\n--- 9. Special Formats ---")
import base64
original = '{"data": "test"}'
encoded = base64.b64encode(original.encode()).decode()
print(f"From base64: {from_base64_json(encoded)}")
print("\n=== All JSON Deserialization Examples Completed ===")
# Export functions
# export { from_json, from_json_file, from_json_safe }
# export { to_dict, to_list, to_string_list, to_int_list }
# export { deserialize_datetime, from_json_custom, CustomDecoder, from_json_decoder }
# export { to_object, to_objects }
# export { from_json_lines, from_json_stream }
# export { get_value, get_nested_value, extract_values }
# export { validate_schema, validate_and_parse }
# export { get_parse_error, is_valid_json, fix_and_parse }
# export { from_base64_json, from_compressed_json }
# export { demonstrate_json_deserialize }
💻 Analyse XML python
🟡 intermediate
⭐⭐⭐
Analyser les documents XML et extraire des données en utilisant ElementTree et d'autres bibliothèques XML
⏱️ 30 min
🏷️ python, web, serialization, xml
Prerequisites:
Intermediate Python, XML basics, ElementTree
# Web Python XML Parsing Examples
# Parsing and processing XML documents with various techniques
# 1. Basic XML Parsing
import xml.etree.ElementTree as ET
from typing import List, Dict, Any, Optional, Tuple
from xml.dom import minidom
def parse_xml_string(xml_string: str) -> ET.Element:
"""
Parse XML from string
Args:
xml_string: XML string
Returns:
Root element
"""
return ET.fromstring(xml_string)
def parse_xml_file(file_path: str) -> ET.Element:
"""
Parse XML from file
Args:
file_path: Path to XML file
Returns:
Root element
"""
tree = ET.parse(file_path)
return tree.getroot()
def get_root_tag(element: ET.Element) -> str:
"""
Get root tag name
Args:
element: XML element
Returns:
Tag name
"""
return element.tag
def get_root_attributes(element: ET.Element) -> Dict[str, str]:
"""
Get root attributes
Args:
element: XML element
Returns:
Dictionary of attributes
"""
return element.attrib
# 2. Element Navigation
def find_child(element: ET.Element, tag: str) -> Optional[ET.Element]:
"""
Find first child with tag
Args:
element: Parent element
tag: Tag to find
Returns:
Child element or None
"""
return element.find(tag)
def find_children(element: ET.Element, tag: str) -> List[ET.Element]:
"""
Find all children with tag
Args:
element: Parent element
tag: Tag to find
Returns:
List of child elements
"""
return element.findall(tag)
def find_all_descendants(element: ET.Element, tag: str) -> List[ET.Element]:
"""
Find all descendants with tag
Args:
element: Root element
tag: Tag to find
Returns:
List of matching elements
"""
return element.iter(tag)
def get_parent(element: ET.Element) -> Optional[Any]:
"""
Get parent element (requires building tree with parent map)
Args:
element: Child element
Returns:
Parent element or None
"""
# ElementTree doesn't store parent by default
# Need to use custom approach
return None
def get_siblings(element: ET.Element) -> List[ET.Element]:
"""
Get sibling elements
Args:
element: Target element
Returns:
List of siblings
"""
parent_map = {c: p for p in element.iter() for c in p}
parent = parent_map.get(element)
if parent:
return [child for child in parent if child != element]
return []
# 3. Element Data Extraction
def get_element_text(element: ET.Element) -> str:
"""
Get element text content
Args:
element: XML element
Returns:
Text content
"""
return element.text or ''
def get_element_tail(element: ET.Element) -> str:
"""
Get element tail text
Args:
element: XML element
Returns:
Tail text
"""
return element.tail or ''
def get_element_attribute(element: ET.Element, attr: str, default: str = '') -> str:
"""
Get element attribute value
Args:
element: XML element
attr: Attribute name
default: Default value
Returns:
Attribute value
"""
return element.get(attr, default)
def get_all_attributes(element: ET.Element) -> Dict[str, str]:
"""
Get all element attributes
Args:
element: XML element
Returns:
Dictionary of attributes
"""
return element.attrib
# 4. XML to Dictionary Conversion
def element_to_dict(element: ET.Element) -> Dict[str, Any]:
"""
Convert element to dictionary
Args:
element: XML element
Returns:
Dictionary representation
"""
result = {}
# Add attributes
if element.attrib:
result.update({'@' + k: v for k, v in element.attrib.items()})
# Add text content
if element.text and element.text.strip():
if len(element) == 0:
return element.text.strip()
result['#text'] = element.text.strip()
# Add child elements
for child in element:
child_data = element_to_dict(child)
if child.tag in result:
if not isinstance(result[child.tag], list):
result[child.tag] = [result[child.tag]]
result[child.tag].append(child_data)
else:
result[child.tag] = child_data
return result
def xml_to_dict(xml_string: str) -> Dict[str, Any]:
"""
Convert XML string to dictionary
Args:
xml_string: XML string
Returns:
Dictionary representation
"""
root = ET.fromstring(xml_string)
return {root.tag: element_to_dict(root)}
# 5. XPath Queries
def xpath_find(element: ET.Element, path: str) -> List[ET.Element]:
"""
Find elements using XPath
Args:
element: Root element
path: XPath expression
Returns:
List of matching elements
"""
return element.findall(path)
def xpath_find_text(element: ET.Element, path: str) -> List[str]:
"""
Find text content using XPath
Args:
element: Root element
path: XPath expression
Returns:
List of text values
"""
elements = element.findall(path)
return [el.text or '' for el in elements]
def xpath_find_first(element: ET.Element, path: str) -> Optional[ET.Element]:
"""
Find first matching element
Args:
element: Root element
path: XPath expression
Returns:
First matching element or None
"""
result = element.find(path)
return result
# 6. XML Modification
def set_element_text(element: ET.Element, text: str) -> None:
"""
Set element text
Args:
element: XML element
text: New text
"""
element.text = text
def set_element_attribute(element: ET.Element, attr: str, value: str) -> None:
"""
Set element attribute
Args:
element: XML element
attr: Attribute name
value: Attribute value
"""
element.set(attr, value)
def add_child_element(parent: ET.Element, tag: str, text: str = '', **attributes) -> ET.Element:
"""
Add child element
Args:
parent: Parent element
tag: Child tag name
text: Child text content
**attributes: Child attributes
Returns:
New child element
"""
child = ET.SubElement(parent, tag, attrib=attributes)
if text:
child.text = text
return child
def remove_element(element: ET.Element) -> None:
"""
Remove element from parent
Args:
element: Element to remove
"""
parent_map = {c: p for p in element.iter() for c in p}
parent = parent_map.get(element)
if parent:
parent.remove(element)
def clear_element(element: ET.Element) -> None:
"""
Clear element content
Args:
element: Element to clear
"""
element.clear()
# 7. XML Generation
def create_xml_element(tag: str, text: str = '', **attributes) -> ET.Element:
"""
Create XML element
Args:
tag: Element tag
text: Element text
**attributes: Element attributes
Returns:
New element
"""
element = ET.Element(tag, attrib=attributes)
if text:
element.text = text
return element
def build_xml_tree() -> ET.Element:
"""
Build example XML tree
Returns:
Root element
"""
root = create_xml_element('root')
child1 = add_child_element(root, 'child1', 'Text 1', id='1')
add_child_element(child1, 'subchild', 'Subtext')
child2 = add_child_element(root, 'child2', 'Text 2', id='2')
return root
def dict_to_xml(tag: str, data: Dict[str, Any]) -> ET.Element:
"""
Convert dictionary to XML element
Args:
tag: Root tag name
data: Dictionary data
Returns:
XML element
"""
element = ET.Element(tag)
for key, value in data.items():
# Handle attributes
if key.startswith('@'):
element.set(key[1:], str(value))
# Handle text content
elif key == '#text':
element.text = str(value)
# Handle child elements
elif isinstance(value, dict):
child = dict_to_xml(key, value)
element.append(child)
elif isinstance(value, list):
for item in value:
child = dict_to_xml(key, item) if isinstance(item, dict) else ET.Element(key)
if isinstance(item, str):
child.text = item
element.append(child)
else:
child = ET.SubElement(element, key)
child.text = str(value)
return element
# 8. XML Serialization
def element_to_string(element: ET.Element, encoding: str = 'unicode') -> str:
"""
Convert element to string
Args:
element: XML element
encoding: Encoding type
Returns:
XML string
"""
return ET.tostring(element, encoding=encoding)
def element_to_pretty_string(element: ET.Element, indent: str = ' ') -> str:
"""
Convert element to pretty-printed string
Args:
element: XML element
indent: Indentation string
Returns:
Pretty XML string
"""
rough_string = ET.tostring(element, encoding='unicode')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=indent)
def save_xml_file(element: ET.Element, file_path: str, encoding: str = 'utf-8') -> bool:
"""
Save element to XML file
Args:
element: XML element
file_path: Output file path
encoding: File encoding
Returns:
True if successful
"""
try:
tree = ET.ElementTree(element)
tree.write(file_path, encoding=encoding, xml_declaration=True)
return True
except Exception as e:
print(f"Error saving XML: {e}")
return False
# 9. XML Validation
def validate_xml_structure(xml_string: str, required_tags: List[str]) -> bool:
"""
Validate XML has required tags
Args:
xml_string: XML string
required_tags: Required tag names
Returns:
True if valid
"""
try:
root = ET.fromstring(xml_string)
all_tags = {elem.tag for elem in root.iter()}
return all(tag in all_tags for tag in required_tags)
except ET.ParseError:
return False
def get_xml_structure(element: ET.Element) -> Dict[str, int]:
"""
Get XML structure statistics
Args:
element: Root element
Returns:
Tag count dictionary
"""
structure = {}
for elem in element.iter():
tag = elem.tag
structure[tag] = structure.get(tag, 0) + 1
return structure
# 10. Namespace Handling
def strip_namespace(element: ET.Element) -> ET.Element:
"""
Remove namespaces from element
Args:
element: XML element with namespaces
Returns:
Element without namespaces
"""
for elem in element.iter():
if '}' in elem.tag:
elem.tag = elem.tag.split('}')[1]
return element
def get_namespace_map(element: ET.Element) -> Dict[str, str]:
"""
Extract namespace map
Args:
element: XML element
Returns:
Namespace prefix to URI mapping
"""
# Extract namespaces from the element
namespaces = {}
if isinstance(element, ET.Element):
for key, value in element.attrib.items():
if key.startswith('xmlns:'):
namespaces[key[6:]] = value
elif key == 'xmlns':
namespaces['default'] = value
return namespaces
# Usage Examples
def demonstrate_xml_parse():
print("=== Web Python XML Parsing Examples ===\n")
# Sample XML
xml_string = '''
<?xml version="1.0" encoding="UTF-8"?>
<library>
<book id="1" category="fiction">
<title>Python Programming</title>
<author>John Doe</author>
<price>29.99</price>
</book>
<book id="2" category="tech">
<title>Web Development</title>
<author>Jane Smith</author>
<price>39.99</price>
</book>
</library>
'''
# 1. Basic parsing
print("--- 1. Basic Parsing ---")
root = parse_xml_string(xml_string)
print(f"Root tag: {get_root_tag(root)}")
print(f"Root attributes: {get_root_attributes(root)}")
# 2. Element navigation
print("\n--- 2. Element Navigation ---")
books = find_children(root, 'book')
print(f"Found {len(books)} books")
for book in books:
title = find_child(book, 'title')
print(f" - {get_element_text(title)}")
# 3. Data extraction
print("\n--- 3. Data Extraction ---")
first_book = find_child(root, 'book')
print(f"First book attributes: {get_all_attributes(first_book)}")
print(f"First book ID: {get_element_attribute(first_book, 'id')}")
# 4. To dictionary
print("\n--- 4. XML to Dictionary ---")
xml_dict = xml_to_dict('<person><name>Alice</name><age>30</age></person>')
print(f"Dictionary: {xml_dict}")
# 5. XPath queries
print("\n--- 5. XPath Queries ---")
titles = xpath_find_text(root, './/title')
print(f"All titles: {titles}")
first_title = xpath_find_first(root, './/title')
print(f"First title: {get_element_text(first_title) if first_title else 'None'}")
# 6. Modification
print("\n--- 6. XML Modification ---")
new_root = build_xml_tree()
new_child = add_child_element(new_root, 'child3', 'Text 3', id='3')
print(f"Modified XML:\n{element_to_pretty_string(new_root)}")
# 7. XML generation
print("\n--- 7. XML Generation ---")
person_xml = dict_to_xml('person', {'name': 'Bob', 'age': '25', '@id': '123'})
print(f"Generated XML:\n{element_to_pretty_string(person_xml)}")
# 8. Validation
print("\n--- 8. Validation ---")
is_valid = validate_xml_structure(xml_string, ['library', 'book', 'title'])
print(f"XML has required tags: {is_valid}")
print(f"XML structure: {get_xml_structure(root)}")
# 9. Namespace handling
print("\n--- 9. Namespace Handling ---")
ns_xml = '<ns:root xmlns:ns="http://example.com"><ns:child>Text</ns:child></ns:root>'
ns_element = parse_xml_string(ns_xml)
clean_element = strip_namespace(ns_element)
print(f"Without namespace: {get_root_tag(clean_element)}")
print("\n=== All XML Parsing Examples Completed ===")
# Export functions
# export { parse_xml_string, parse_xml_file, get_root_tag, get_root_attributes }
# export { find_child, find_children, find_all_descendants, get_siblings }
# export { get_element_text, get_element_tail, get_element_attribute, get_all_attributes }
# export { element_to_dict, xml_to_dict }
# export { xpath_find, xpath_find_text, xpath_find_first }
# export { set_element_text, set_element_attribute, add_child_element, remove_element, clear_element }
# export { create_xml_element, build_xml_tree, dict_to_xml }
# export { element_to_string, element_to_pretty_string, save_xml_file }
# export { validate_xml_structure, get_xml_structure }
# export { strip_namespace, get_namespace_map }
# export { demonstrate_xml_parse }