dcraft package

Subpackages

Module contents

class dcraft.BqMetadataRepository(project: str, dataset_id: str, table_id: str, credentials: Any | None = None, _http: Any | None = None, location: Any | None = None, default_query_job_config: Any | None = None, default_load_job_config: Any | None = None, client_info: Any | None = None, client_options: Any | None = None)

Bases: MetadataRepository

load(id: str) Metadata

Loads the metadata for a specific ID.

Parameters:

id (-) – The ID of the metadata to load.

Returns:

The loaded metadata.

Return type:

Metadata

save(metadata: Metadata)

Save the given metadata to the dataset.

Parameters:

metadata (Metadata) – The metadata object to save.

Returns:

None

class dcraft.GcsDataRepository(project_id: str, bucket_name: str, credentials: Any | None = None, _http: Any | None = None, client_info: Any | None = None, client_options: Any | None = None, use_auth_w_custom_endpoint: bool = True)

Bases: DataRepository

load(project_name: str, layer_name: str, id: str, format: str, content_type: ContentType) DataFrame | dict | List[Dict]

Load the content from the specified project, layer, and ID, with the given format and content type.

Parameters:
  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The ID of the content.

  • format (str) – The format of the content.

  • content_type (ContentType) – The type of the content.

Returns:

The loaded content.

Return type:

CoveredContentType

Raises:
save(content: DataFrame | dict | List[Dict], project_name: str, layer_name: str, id: str, format: str, content_type: ContentType)

Save the provided content to the specified project, layer, and ID in the given format and content type.

Parameters:
  • content (CoveredContentType) – The content to be saved.

  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The ID of the content.

  • format (str) – The format in which the content should be saved.

  • content_type (ContentType) – The type of the content.

Raises:
Returns:

None

class dcraft.LocalDataRepository(dir_path: str)

Bases: DataRepository

load(project_name: str, layer_name: str, id: str, format: str, content_type: ContentType) DataFrame | dict | List[Dict]

Load the content from a specified path based on the project name, layer name, id, format, and content type.

Parameters:
  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The ID of the content.

  • format (str) – The format of the content.

  • content_type (ContentType) – The type of the content.

Returns:

The loaded content.

Return type:

CoveredContentType

Raises:
save(content: DataFrame | dict | List[Dict], project_name: str, layer_name: str, id: str, format: str, content_type: ContentType)

Saves the given content to a file with the specified project name, layer name, ID, format, and content type.

Parameters:
  • content (CoveredContentType) – The content to be saved.

  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The ID of the content.

  • format (str) – The format of the file to be saved.

  • content_type (ContentType) – The type of the content.

Raises:
Returns:

None

class dcraft.LocalMetadataRepository(path)

Bases: MetadataRepository

load(id: str) Metadata

Load the metadata for a given ID.

Parameters:

id (str) – The ID of the metadata to load.

Returns:

The loaded metadata.

Return type:

Metadata

Raises:

NoMetadataFound – If no metadata is found for the given ID.

save(metadata: Metadata)

Saves the given metadata to a file.

Parameters:

metadata (Metadata) – The metadata object to be saved.

Returns:

None

class dcraft.MinioRepository(endpoint: str, bucket: str, access_key: str | None = None, secret_key: str | None = None, session_token: str | None = None, secure: bool = True, region: str | None = None, http_client: PoolManager | None = None, credentials: Provider | None = None, cert_check: bool = True)

Bases: DataRepository

load(project_name: str, layer_name: str, id: str, format: str, content_type: ContentType) DataFrame | dict | List[Dict]

Load the specified content from the given project, layer, and ID.

Parameters:
  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The ID of the content.

  • format (str) – The format of the content.

  • content_type (ContentType) – The type of the content.

Returns:

The loaded content.

Return type:

CoveredContentType

Raises:

ContentExtensionMismatch – If the content cannot be saved with the given extension.

save(content: DataFrame | dict | List[Dict], project_name: str, layer_name: str, id: str, format: str, content_type: ContentType)

Save the content to a specified location in the bucket.

Parameters:
  • content (CoveredContentType) – The content to be saved.

  • project_name (str) – The name of the project.

  • layer_name (str) – The name of the layer.

  • id (str) – The unique identifier.

  • format (str) – The format of the content.

  • content_type (ContentType) – The type of the content.

Raises:
class dcraft.MongoMetadataRepository(db: str, collection: str, host: str | Sequence[str] | None = None, port: int | None = None, document_class: Any | None = None, tz_aware: bool | None = None, connect: bool | None = None, type_registry: TypeRegistry | None = None)

Bases: MetadataRepository

load(id: str) Metadata

Loads metadata for a specific ID.

Parameters:

id (str) – The ID of the metadata to load.

Returns:

The loaded metadata object.

Return type:

Metadata

save(metadata: Metadata)

Save the given metadata to the database.

Parameters:

metadata (Metadata) – The metadata object to save.

Returns:

None

class dcraft.RawLayerData(id: str | None, project_name: str, content: DataFrame | dict | List[Dict], author: str | None, created_at: datetime, description: str | None, extra_info: dict | None)

Bases: BaseLayerData

This stores raw data and manage.

This class is used to store raw data. Loaded raw layer data is stored on this class. And also, this manages saving and metadata.

id

Unique id for the data and metadata

Type:

str, optional

project_name

Name of the project

Type:

str

content

Content of the data

Type:

CoveredContentType

author

Author of the data

Type:

str, optional

created_at

Created at

Type:

datetime

description

Description of the data

Type:

str, optional

extra_info

Extra information of the data

Type:

dict, optional

save(format: str, data_repository: DataRepository, metadata_repository: MetadataRepository)

Saves the content of the object to the data repository and the metadata to the metadata repository. On the timing of saving, the id of the object will be updated.

Parameters:
  • format (str) – The format in which the content will be saved.

  • data_repository (DataRepository) – The data repository where the content will be saved.

  • metadata_repository (MetadataRepository) – The metadata repository where the metadata will be saved.

Returns:

None

class dcraft.RefinedLayerData(id: str | None, project_name: str, content: DataFrame | dict | List[Dict], author: str | None, created_at: datetime, description: str | None, extra_info: dict | None, source_ids: List[str] | None)

Bases: BaseLayerData

This stores refined data and manage.

This class is used to store refined data. Loaded refined layer data is stored on this class. And also, this manages saving and metadata.

id

Unique id for the data and metadata

Type:

str, optional

project_name

Name of the project

Type:

str

content

Content of the data

Type:

CoveredContentType

author

Author of the data

Type:

str, optional

created_at

Created at

Type:

datetime

description

Description of the data

Type:

str, optional

extra_info

Extra information of the data

Type:

dict, optional

source_ids

List of source ids

Type:

List[str], optional

save(format: str, data_repository: DataRepository, metadata_repository: MetadataRepository)

Saves the content of this object to the data repository and metadata repository. On the timing of saving, the id of the object will be updated.

Parameters:
  • format (str) – The format in which to save the content.

  • data_repository (DataRepository) – The data repository used for saving the content.

  • metadata_repository (MetadataRepository) – The metadata repository used for saving the metadata.

Returns:

None

class dcraft.TrustedLayerData(id: str | None, project_name: str, content: DataFrame | dict | List[Dict], author: str | None, created_at: datetime, description: str | None, extra_info: dict | None, source_ids: List[str] | None)

Bases: BaseLayerData

This stores trusted data and manage.

This class is used to store trusted data. Loaded trusted layer data is stored on this class. And also, this manages saving and metadata.

id

Unique id for the data and metadata

Type:

str, optional

project_name

Name of the project

Type:

str

content

Content of the data

Type:

CoveredContentType

author

Author of the data

Type:

str, optional

created_at

Created at

Type:

datetime

description

Description of the data

Type:

str, optional

extra_info

Extra information of the data

Type:

dict, optional

source_ids

List of source ids

Type:

List[str], optional

save(format: str, data_repository: DataRepository, metadata_repository: MetadataRepository)

Save the content of the object to the data repository and metadata repository. On the timing of saving, the id of the object will be updated.

Parameters:
  • format (str) – The format in which the content should be saved.

  • data_repository (DataRepository) – The data repository where the content should be saved.

  • metadata_repository (MetadataRepository) – The metadata repository where the metadata should be saved.

Returns:

None

dcraft.create_raw(content: DataFrame | dict | List[Dict], project_name: str, author: str | None = None, description: str | None = None, extra_info: dict | None = None) RawLayerData

Create a RawLayerData object with the given content, project name, author, description, and extra information.

Parameters:
  • content (CoveredContentType) – The content to be stored in the RawLayerData object.

  • project_name (str) – The name of the project.

  • author (Optional[str], optional) – The author of the content. Defaults to None.

  • description (Optional[str], optional) – A description of the content. Defaults to None.

  • extra_info (Optional[dict], optional) – Extra information related to the content. Defaults to None.

Returns:

The created RawLayerData object.

Return type:

RawLayerData

dcraft.create_refined(content: DataFrame | dict | List[Dict], project_name: str, author: str | None = None, description: str | None = None, extra_info: dict | None = None, source_ids: List[str] | None = None) RefinedLayerData

Create a refined layer data object.

Parameters:
  • content (CoveredContentType) – The content of the refined layer.

  • project_name (str) – The name of the project.

  • author (Optional[str], optional) – The author of the refined layer. Defaults to None.

  • description (Optional[str], optional) – The description of the refined layer. Defaults to None.

  • extra_info (Optional[dict], optional) – Extra information about the refined layer. Defaults to None.

  • source_ids (Optional[List[str]], optional) – The source IDs of the refined layer. Defaults to None.

Returns:

The created refined layer data object.

Return type:

RefinedLayerData

dcraft.create_trusted(content: DataFrame | dict | List[Dict], project_name: str, author: str | None = None, description: str | None = None, extra_info: dict | None = None, source_ids: List[str] | None = None) TrustedLayerData

Creates a trusted layer data object.

Parameters:
  • content (CoveredContentType) – The content of the trusted layer.

  • project_name (str) – The name of the project.

  • author (Optional[str], optional) – The author of the trusted layer. Defaults to None.

  • description (Optional[str], optional) – A description of the trusted layer. Defaults to None.

  • extra_info (Optional[dict], optional) – Any extra information associated with the trusted layer. Defaults to None.

  • source_ids (Optional[List[str]], optional) – The source IDs associated with the trusted layer. Defaults to None.

Returns:

The created trusted layer data object.

Return type:

TrustedLayerData

dcraft.read_layer_data(id: str, data_repository: DataRepository, metadata_repository: MetadataRepository) RawLayerData | TrustedLayerData | RefinedLayerData