Compare commits

...

4 Commits

Author SHA1 Message Date
40c8001116 patchy mcpatchy 2025-05-13 21:53:29 +10:00
1f46f0a6d9 patching again
patch here patch there patch everywhere
2025-05-13 21:48:38 +10:00
881669e5e9 patch
update pathing for SD splitting, for odd names in json files.

iterate through all json files and read the contents for SD not just based on name.
2025-05-13 20:49:02 +10:00
655b186197 patch
handling non standard json package names
2025-05-13 20:30:31 +10:00
2 changed files with 312 additions and 22 deletions

Binary file not shown.

334
main.py
View File

@ -10,6 +10,7 @@ import tarfile
import json
from datetime import datetime, timedelta
import asyncio
import os
# Import from core
from core import (
@ -381,7 +382,106 @@ async def search_igs(query: str = '', search_type: str = 'semantic'):
finally:
db.close()
logger.info("Closed database session after search")
#-----------------------------------------------------------------------------OLD
# @app.get("/igs/{ig_id}/profiles", response_model=List[ProfileMetadata])
# async def list_profiles(ig_id: str, version: Optional[str] = None):
# """List StructureDefinition profiles in the specified IG, optionally for a specific version."""
# logger.info(f"Listing profiles for IG: {ig_id}, version: {version}")
# # Parse ig_id for version if it includes a '#'
# ig_name = ig_id
# if '#' in ig_id:
# parts = ig_id.split('#', 1)
# ig_name = parts[0]
# if version and parts[1] != version:
# logger.warning(f"Version specified in ig_id ({parts[1]}) conflicts with version parameter ({version}). Using version parameter.")
# else:
# version = parts[1]
# logger.info(f"Parsed ig_id: name={ig_name}, version={version}")
# # Validate ig_name
# if not ig_name or not re.match(r'^[a-zA-Z0-9\.\-_]+$', ig_name):
# logger.error(f"Invalid IG name: {ig_name}")
# raise HTTPException(status_code=400, detail="Invalid IG name. Use format like 'hl7.fhir.au.core'.")
# # Validate version if provided
# if version and not re.match(r'^[a-zA-Z0-9\.\-_]+$', version):
# logger.error(f"Invalid version: {version}")
# raise HTTPException(status_code=400, detail="Invalid version format. Use format like '1.1.0-preview'.")
# # Check if profiles are cached
# cache_key = f"{ig_name}#{version if version else 'latest'}"
# if cache_key in app_config["PROFILE_CACHE"]:
# logger.info(f"Returning cached profiles for IG {ig_name} (version: {version if version else 'latest'})")
# return app_config["PROFILE_CACHE"][cache_key]
# # Fetch package metadata from cache
# packages = app_config["MANUAL_PACKAGE_CACHE"]
# if not packages:
# logger.error("Package cache is empty. Please refresh the cache using /refresh-cache.")
# raise HTTPException(status_code=500, detail="Package cache is empty. Please refresh the cache.")
# # Find the package
# package = None
# for pkg in packages:
# if pkg['package_name'].lower() == ig_name.lower():
# package = pkg
# break
# if not package:
# logger.error(f"IG {ig_name} not found in cached packages.")
# raise HTTPException(status_code=404, detail=f"IG '{ig_name}' not found.")
# # Determine the version to fetch
# if version:
# target_version = None
# for ver_entry in package['all_versions']:
# if ver_entry['version'] == version:
# target_version = ver_entry['version']
# break
# if not target_version:
# logger.error(f"Version {version} not found for IG {ig_name}.")
# raise HTTPException(status_code=404, detail=f"Version '{version}' not found for IG '{ig_name}'.")
# else:
# target_version = package['latest_version']
# version = target_version
# logger.info(f"No version specified, using latest version: {target_version}")
# # Download the package
# tgz_path, error = download_package(ig_name, version, package)
# if not tgz_path:
# logger.error(f"Failed to download package for IG {ig_name} (version: {version}): {error}")
# if "404" in error:
# raise HTTPException(status_code=404, detail=f"Package for IG '{ig_name}' (version: {version}) not found.")
# raise HTTPException(status_code=500, detail=f"Failed to fetch package: {error}")
# # Extract profiles from the .tgz file
# profiles = []
# try:
# with tarfile.open(tgz_path, mode="r:gz") as tar:
# for member in tar.getmembers():
# if member.name.endswith('.json') and 'StructureDefinition' in member.name:
# f = tar.extractfile(member)
# if f:
# resource = json.load(f)
# if resource.get("resourceType") == "StructureDefinition":
# profiles.append(ProfileMetadata(
# name=resource.get("name", ""),
# description=resource.get("description"),
# version=resource.get("version"),
# url=resource.get("url", "")
# ))
# except Exception as e:
# logger.error(f"Failed to extract profiles from package for IG {ig_name} (version: {version}): {str(e)}")
# raise HTTPException(status_code=500, detail=f"Failed to extract profiles: {str(e)}")
# # Cache the profiles
# app_config["PROFILE_CACHE"][cache_key] = profiles
# logger.info(f"Cached {len(profiles)} profiles for IG {ig_name} (version: {version})")
# logger.info(f"Found {len(profiles)} profiles in IG {ig_name} (version: {version})")
# return profiles
#----------------------------------------------------------------------------end
@app.get("/igs/{ig_id}/profiles", response_model=List[ProfileMetadata])
async def list_profiles(ig_id: str, version: Optional[str] = None):
"""List StructureDefinition profiles in the specified IG, optionally for a specific version."""
@ -459,17 +559,27 @@ async def list_profiles(ig_id: str, version: Optional[str] = None):
try:
with tarfile.open(tgz_path, mode="r:gz") as tar:
for member in tar.getmembers():
if member.name.endswith('.json') and 'StructureDefinition' in member.name:
if member.name.endswith('.json'): # Check all JSON files
logger.debug(f"Processing file: {member.name}")
f = tar.extractfile(member)
if f:
resource = json.load(f)
if resource.get("resourceType") == "StructureDefinition":
profiles.append(ProfileMetadata(
name=resource.get("name", ""),
description=resource.get("description"),
version=resource.get("version"),
url=resource.get("url", "")
))
try:
resource = json.load(f)
# Check if the resource is a StructureDefinition
if resource.get("resourceType") == "StructureDefinition":
logger.debug(f"Found StructureDefinition in file: {member.name}")
profiles.append(ProfileMetadata(
name=resource.get("name", ""),
description=resource.get("description"),
version=resource.get("version"),
url=resource.get("url", "")
))
else:
logger.debug(f"File {member.name} is not a StructureDefinition, resourceType: {resource.get('resourceType', 'unknown')}")
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON in file {member.name}: {str(e)}")
except Exception as e:
logger.warning(f"Error processing file {member.name}: {str(e)}")
except Exception as e:
logger.error(f"Failed to extract profiles from package for IG {ig_name} (version: {version}): {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to extract profiles: {str(e)}")
@ -480,6 +590,153 @@ async def list_profiles(ig_id: str, version: Optional[str] = None):
logger.info(f"Found {len(profiles)} profiles in IG {ig_name} (version: {version})")
return profiles
#------------------------------------------------------------------OLD
# @app.get("/igs/{ig_id}/profiles/{profile_id}", response_model=StructureDefinition)
# async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None, include_narrative: bool = True):
# """
# Retrieve a specific StructureDefinition from an Implementation Guide (IG).
# This endpoint fetches a specific FHIR StructureDefinition (profile) from the given IG.
# It supports optional version specification and an option to strip the narrative content.
# Args:
# ig_id (str): The ID of the Implementation Guide (e.g., 'hl7.fhir.au.core' or 'hl7.fhir.au.core#1.1.0-preview').
# If the version is included in the ig_id (after '#'), it takes precedence unless overridden by the version parameter.
# profile_id (str): The ID or name of the profile to retrieve (e.g., 'AUCorePatient').
# version (str, optional): The version of the IG (e.g., '1.1.0-preview'). If not provided and ig_id contains a version,
# the version from ig_id is used; otherwise, the latest version is used.
# include_narrative (bool, optional): Whether to include the narrative (`text` element) in the StructureDefinition.
# Defaults to True. Set to False to strip the narrative, removing human-readable content.
# Returns:
# StructureDefinition: A dictionary containing the requested StructureDefinition resource.
# The response includes the `resource` field with the StructureDefinition JSON.
# If `include_narrative=False`, the `text` element will be set to null.
# Raises:
# HTTPException:
# - 400: If the IG name, version, or profile ID is invalid.
# - 404: If the IG, version, or profile is not found.
# - 500: If an error occurs during package retrieval or profile extraction.
# Example:
# - GET /igs/hl7.fhir.au.core/profiles/AUCorePatient?version=1.1.0-preview
# Returns the AUCorePatient profile with narrative included.
# - GET /igs/hl7.fhir.au.core/profiles/AUCorePatient?version=1.1.0-preview&include_narrative=false
# Returns the AUCorePatient profile with the narrative (`text` element) stripped.
# """
# logger.info(f"Retrieving profile {profile_id} for IG: {ig_id}, version: {version}, include_narrative: {include_narrative}")
# # Parse ig_id for version if it includes a '#'
# ig_name = ig_id
# if '#' in ig_id:
# parts = ig_id.split('#', 1)
# ig_name = parts[0]
# if version and parts[1] != version:
# logger.warning(f"Version specified in ig_id ({parts[1]}) conflicts with version parameter ({version}). Using version parameter.")
# else:
# version = parts[1]
# logger.info(f"Parsed ig_id: name={ig_name}, version={version}")
# # Validate ig_name
# if not ig_name or not re.match(r'^[a-zA-Z0-9\.\-_]+$', ig_name):
# logger.error(f"Invalid IG name: {ig_name}")
# raise HTTPException(status_code=400, detail="Invalid IG name. Use format like 'hl7.fhir.au.core'.")
# # Validate version if provided
# if version and not re.match(r'^[a-zA-Z0-9\.\-_]+$', version):
# logger.error(f"Invalid version: {version}")
# raise HTTPException(status_code=400, detail="Invalid version format. Use format like '1.1.0-preview'.")
# # Validate profile_id
# if not profile_id or not re.match(r'^[a-zA-Z0-9\.\-_]+$', profile_id):
# logger.error(f"Invalid profile ID: {profile_id}")
# raise HTTPException(status_code=400, detail="Invalid profile ID format.")
# # Check if profiles are cached
# cache_key = f"{ig_name}#{version if version else 'latest'}"
# if cache_key in app_config["PROFILE_CACHE"]:
# logger.info(f"Using cached profiles for IG {ig_name} (version: {version if version else 'latest'})")
# profiles = app_config["PROFILE_CACHE"][cache_key]
# for profile in profiles:
# if profile.name == profile_id or profile.url.endswith(profile_id):
# break
# else:
# logger.error(f"Profile {profile_id} not found in cached profiles for IG {ig_name} (version: {version if version else 'latest'})")
# raise HTTPException(status_code=404, detail=f"Profile '{profile_id}' not found in IG '{ig_name}' (version: {version if version else 'latest'}).")
# else:
# profiles = await list_profiles(ig_id, version)
# # Fetch package metadata
# packages = app_config["MANUAL_PACKAGE_CACHE"]
# if not packages:
# logger.error("Package cache is empty. Please refresh the cache using /refresh-cache.")
# raise HTTPException(status_code=500, detail="Package cache is empty. Please refresh the cache.")
# package = None
# for pkg in packages:
# if pkg['package_name'].lower() == ig_name.lower():
# package = pkg
# break
# if not package:
# logger.error(f"IG {ig_name} not found in cached packages.")
# raise HTTPException(status_code=404, detail=f"IG '{ig_name}' not found.")
# # Determine the version to fetch
# if version:
# target_version = None
# for ver_entry in package['all_versions']:
# if ver_entry['version'] == version:
# target_version = ver_entry['version']
# break
# if not target_version:
# logger.error(f"Version {version} not found for IG {ig_name}.")
# raise HTTPException(status_code=404, detail=f"Version '{version}' not found for IG '{ig_name}'.")
# else:
# target_version = package['latest_version']
# version = target_version
# logger.info(f"No version specified, using latest version: {target_version}")
# # Download the package
# tgz_path, error = download_package(ig_name, version, package)
# if not tgz_path:
# logger.error(f"Failed to download package for IG {ig_name} (version: {version}): {error}")
# if "404" in error:
# raise HTTPException(status_code=404, detail=f"Package for IG '{ig_name}' (version: {version}) not found.")
# raise HTTPException(status_code=500, detail=f"Failed to fetch package: {error}")
# # Extract the specific profile from the .tgz file
# profile_resource = None
# try:
# with tarfile.open(tgz_path, mode="r:gz") as tar:
# for member in tar.getmembers():
# if member.name.endswith('.json') and 'StructureDefinition' in member.name:
# f = tar.extractfile(member)
# if f:
# resource = json.load(f)
# if resource.get("resourceType") == "StructureDefinition":
# resource_name = resource.get("name", "")
# resource_url = resource.get("url", "")
# if resource_name == profile_id or resource_url.endswith(profile_id):
# profile_resource = resource
# break
# if not profile_resource:
# logger.error(f"Profile {profile_id} not found in package for IG {ig_name} (version: {version})")
# raise HTTPException(status_code=404, detail=f"Profile '{profile_id}' not found in IG '{ig_name}' (version: {version}).")
# except Exception as e:
# logger.error(f"Failed to extract profile {profile_id} from package for IG {ig_name} (version: {version}): {str(e)}")
# raise HTTPException(status_code=500, detail=f"Failed to extract profile: {str(e)}")
# # Strip narrative if requested
# if not include_narrative:
# logger.info(f"Stripping narrative from profile {profile_id}")
# if "text" in profile_resource:
# profile_resource["text"] = None
# logger.info(f"Successfully retrieved profile {profile_id} for IG {ig_name} (version: {version})")
# return StructureDefinition(resource=profile_resource)
#------------------------------------------------------------------------------end
@app.get("/igs/{ig_id}/profiles/{profile_id}", response_model=StructureDefinition)
async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None, include_narrative: bool = True):
@ -492,7 +749,7 @@ async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None
Args:
ig_id (str): The ID of the Implementation Guide (e.g., 'hl7.fhir.au.core' or 'hl7.fhir.au.core#1.1.0-preview').
If the version is included in the ig_id (after '#'), it takes precedence unless overridden by the version parameter.
profile_id (str): The ID or name of the profile to retrieve (e.g., 'AUCorePatient').
profile_id (str): The ID or name of the profile to retrieve (e.g., 'AUCorePatient' or 'DAV_PR_ERP_Abrechnungszeilen').
version (str, optional): The version of the IG (e.g., '1.1.0-preview'). If not provided and ig_id contains a version,
the version from ig_id is used; otherwise, the latest version is used.
include_narrative (bool, optional): Whether to include the narrative (`text` element) in the StructureDefinition.
@ -546,15 +803,24 @@ async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None
# Check if profiles are cached
cache_key = f"{ig_name}#{version if version else 'latest'}"
if cache_key in app_config["PROFILE_CACHE"]:
logger.info(f"Using cached profiles for IG {ig_name} (version: {version if version else 'latest'})")
logger.info(f"Cache hit for IG {ig_name} (version: {version if version else 'latest'})")
profiles = app_config["PROFILE_CACHE"][cache_key]
logger.debug(f"Cached profiles: {[profile.name for profile in profiles]}")
profile_found = False
# Normalize profile_id for matching (remove hyphens, underscores, and convert to lowercase)
normalized_profile_id = profile_id.lower().replace('-', '').replace('_', '')
for profile in profiles:
if profile.name == profile_id or profile.url.endswith(profile_id):
normalized_name = profile.name.lower().replace('-', '').replace('_', '') if profile.name else ''
normalized_url_end = profile.url.lower().split('/')[-1].replace('-', '').replace('_', '') if profile.url else ''
if normalized_name == normalized_profile_id or normalized_url_end == normalized_profile_id:
logger.info(f"Found profile {profile_id} in cached profiles: name={profile.name}, url={profile.url}")
profile_found = True
break
else:
if not profile_found:
logger.error(f"Profile {profile_id} not found in cached profiles for IG {ig_name} (version: {version if version else 'latest'})")
raise HTTPException(status_code=404, detail=f"Profile '{profile_id}' not found in IG '{ig_name}' (version: {version if version else 'latest'}).")
else:
logger.info(f"Cache miss for IG {ig_name} (version: {version if version else 'latest'}), calling list_profiles")
profiles = await list_profiles(ig_id, version)
# Fetch package metadata
@ -588,7 +854,15 @@ async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None
version = target_version
logger.info(f"No version specified, using latest version: {target_version}")
# Check directory state before calling download_package
instance_dir = "instance"
if os.path.exists(instance_dir):
logger.info(f"Directory {instance_dir} exists before calling download_package in get_profile")
else:
logger.warning(f"Directory {instance_dir} does NOT exist before calling download_package in get_profile")
# Download the package
logger.info(f"Calling download_package for IG {ig_name} (version: {version}) in get_profile")
tgz_path, error = download_package(ig_name, version, package)
if not tgz_path:
logger.error(f"Failed to download package for IG {ig_name} (version: {version}): {error}")
@ -600,17 +874,34 @@ async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None
profile_resource = None
try:
with tarfile.open(tgz_path, mode="r:gz") as tar:
# Normalize profile_id for matching (remove hyphens, underscores, and convert to lowercase)
normalized_profile_id = profile_id.lower().replace('-', '').replace('_', '')
for member in tar.getmembers():
if member.name.endswith('.json') and 'StructureDefinition' in member.name:
if member.name.endswith('.json'): # Check all JSON files
logger.debug(f"Processing file: {member.name}")
f = tar.extractfile(member)
if f:
resource = json.load(f)
if resource.get("resourceType") == "StructureDefinition":
resource_name = resource.get("name", "")
resource_url = resource.get("url", "")
if resource_name == profile_id or resource_url.endswith(profile_id):
profile_resource = resource
break
try:
resource = json.load(f)
# Check if the resource is a StructureDefinition
if resource.get("resourceType") == "StructureDefinition":
resource_name = resource.get("name", "")
resource_url = resource.get("url", "")
# Normalize name and URL for matching
normalized_name = resource_name.lower().replace('-', '').replace('_', '') if resource_name else ''
normalized_url_end = resource_url.lower().split('/')[-1].replace('-', '').replace('_', '') if resource_url else ''
logger.debug(f"Found StructureDefinition in file: {member.name}, name={resource_name}, url={resource_url}, normalized_name={normalized_name}, normalized_url_end={normalized_url_end}")
# Match profile_id against name or the last segment of the URL
if normalized_name == normalized_profile_id or normalized_url_end == normalized_profile_id:
logger.info(f"Matched profile {profile_id} in file {member.name}: name={resource_name}, url={resource_url}")
profile_resource = resource
break
else:
logger.debug(f"File {member.name} is not a StructureDefinition, resourceType: {resource.get('resourceType', 'unknown')}")
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse JSON in file {member.name}: {str(e)}")
except Exception as e:
logger.warning(f"Error processing file {member.name}: {str(e)}")
if not profile_resource:
logger.error(f"Profile {profile_id} not found in package for IG {ig_name} (version: {version})")
raise HTTPException(status_code=404, detail=f"Profile '{profile_id}' not found in IG '{ig_name}' (version: {version}).")
@ -626,7 +917,6 @@ async def get_profile(ig_id: str, profile_id: str, version: Optional[str] = None
logger.info(f"Successfully retrieved profile {profile_id} for IG {ig_name} (version: {version})")
return StructureDefinition(resource=profile_resource)
@app.get("/status", response_model=RefreshStatus)
async def get_refresh_status():
"""Get the status of the last cache refresh."""