Rule Inference API

The Rule Inference API provides functions to infer Vadalog rules from a database or data source schema. It generates a linear Vadalog rule for each table or file and a join rule for each table having foreign keys.

Infer Schema

from prometheux_chain.data.database import Database

# Create a Database object
db = Database(
    database_type="postgresql",
    username="prometheux",
    password="prometheux",
    host="localhost",
    port=5432,
    database_name="prometheux"
)

# Infer Vadalog rules from the database
inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

# Save the inferred rules to a file
with open("infer-from-postgresql.vada", 'w') as file:
    file.write(inferred_rules)
Function Signature
def infer_schema(database, add_bind=True, add_model=False)
Parameters
ParameterTypeRequiredDescription
databaseDatabaseYesAn instance of the Database class containing connection details
add_bindboolNoWhether to add a bind statement in the inferred schema. Defaults to True
add_modelboolNoWhether to add a model annotation statement. Defaults to False
ReturnsReturns inferred Vadalog rule from database or datasource schema as a string.

Database Examples

PostgreSQL

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="postgresql",
    username="prometheux",
    password="prometheux",
    host="localhost",
    port=5432,
    database_name="prometheux"
)

inferred_rules = px.infer_schema(db, add_bind=True)

Neo4j

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="neo4j",
    username="neo4j",
    password="neo4j2",
    host="localhost",
    port=7687,
    database_name="neo4j"
)

inferred_rules = px.infer_schema(db, add_bind=True)

CSV File from S3

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="csv",
    username="AKIA4xxxx12",
    password="JyxxxxU+",
    host="s3a://prometheux-data",
    port=None,
    database_name="companies.csv",
    options={
        "region": "eu-west-2", 
        "endpoint": "s3.amazonaws.com",
        "credentials.provider": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider",
        "delimiter": "\t"
    }
)

inferred_rules = px.infer_schema(db, add_bind=True)

Databricks

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="databricks",
    username="token",
    password="dapixxxx",
    host="dbc-xxxx-02fe.cloud.databricks.com",
    port=443,
    database_name="/sql/1.0/warehouses/3283xxxx"
)

inferred_rules = px.infer_schema(db, add_bind=True)

Databricks with Specific Schema

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

inferred_rules = px.infer_schema(
    Database(
        database_type="databricks",
        username="token",
        password="dapixxxx",
        host="dbc-xxxx-02fe.cloud.databricks.com",
        port=443,
        database_name="/sql/1.0/warehouses/3283xxxx",
        schema="my_catalog.my_schema"
    ),
    add_bind=True,
    add_model=False
)

Snowflake

Instead of using a password, you can use a Programmatic Access Token (PAT) for authentication. This avoids MFA prompts during automated workflows. See the Snowflake data source documentation for details.
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="snowflake",
    username="my_username",
    password="my_password",  # Or use your PAT token
    host="jdbc:snowflake://A77885826xxxx-IV3xxxx.snowflakecomputing.com",
    port=443,
    database_name="my_database",
    schema="my_schema",
    options={"warehouse": "my_warehouse"}
)

inferred_rules = px.infer_schema(db, add_bind=True)

Excel File

Excel files are treated as a database where sheets are considered as tables.
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="excel",
    username="",
    password="workbookPassword",
    host="path/to/excel_file",
    port=None,
    database_name="excel_file.xlsx",
)

inferred_rules = px.infer_schema(db, add_bind=True)

BigQuery

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"
gcpAccessToken = os.environ["GCP_ACCESS_TOKEN"]

db = Database(
    database_type="bigquery",
    username="",
    password="",
    host="",
    port=None,
    database_name="my_project_id",
    schema="datasetId",
    options={
        "authMode": "gcpAccessToken",
        "gcpAccessToken": gcpAccessToken,
        "parentProject": "my_parent_project_id",
        "billingProjectId": "my_billing_project_id",
        "region": "us-central1"
    }
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=True)

Text File

Infer concepts and relationships from text content.
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="text",
    username="",
    password="",
    host="path/to/file",
    port=None,
    database_name="document.txt"
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Binary File (PDF, Images)

Binary files support various formats including PDF, JPG, PNG, and other binary formats.
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="binaryfile",
    username="",
    password="",
    host="path/to/file",
    port=None,
    database_name="document.pdf"
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Business Documents

Structured documents such as ID documents, receipts, tax forms, and mortgage documents. Supported document types include:
CategoryDocument Types
Financialcheck.us, bankStatement.us, payStub.us, creditCard, invoice
ID DocumentsidDocument.driverLicense, idDocument.passport, idDocument.nationalIdentityCard, idDocument.residencePermit, idDocument.usSocialSecurityCard
Receiptsreceipt.retailMeal, receipt.creditCard, receipt.gas, receipt.parking, receipt.hotel
Tax Documentstax.us.1040.2023, tax.us.w2, tax.us.w4, tax.us.1095A, tax.us.1098, tax.us.1099
Mortgage Documentsmortgage.us.1003 (URLA), mortgage.us.1004 (URAR), mortgage.us.closingDisclosure
Othercontract, healthInsuranceCard.us, marriageCertificate.us
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="binaryfile",
    username="",
    password="",
    host="path/to/file",
    port=None,
    database_name="driver_license.pdf",
    options={"documentType": "idDocument.driverLicense"}
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Amazon DynamoDB

from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
    database_type="dynamodb",
    username="AKIA4xxxx12",  # AWS Access Key ID
    password="JyxxxxU+",     # AWS Secret Access Key
    host="",
    port=None,
    database_name="",
    options={
        "region": "us-east-1",
        "endpoint": "",
        "sessionToken": "",
        "sampleLimit": "100"
    }
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=True)