Skip to main content

Rule Inference API

The Rule Inference API provides functions to infer Vadalog rules from a database or data source schema. It generates a linear Vadalog rule for each table or file and a join rule for each table having foreign keys.


Infer Schema

import prometheux_chain as px
from prometheux_chain.data.database import Database

# Create a Database object
db = Database(
database_type="postgresql",
username="prometheux",
password="prometheux",
host="localhost",
port=5432,
database_name="prometheux"
)

# Infer Vadalog rules from the database
inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

# Save the inferred rules to a file
with open("infer-from-postgresql.vada", 'w') as file:
file.write(inferred_rules)

Function Signature

def infer_schema(database, add_bind=True, add_model=False)

Parameters

ParameterTypeRequiredDescription
databaseDatabaseYesAn instance of the Database class containing connection details
add_bindboolNoWhether to add a bind statement in the inferred schema. Defaults to True
add_modelboolNoWhether to add a model annotation statement. Defaults to False

Returns

Returns inferred Vadalog rule from database or datasource schema as a string.


Database Examples

PostgreSQL

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="postgresql",
username="prometheux",
password="prometheux",
host="localhost",
port=5432,
database_name="prometheux"
)

inferred_rules = px.infer_schema(db, add_bind=True)

Neo4j

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="neo4j",
username="neo4j",
password="neo4j2",
host="localhost",
port=7687,
database_name="neo4j"
)

inferred_rules = px.infer_schema(db, add_bind=True)

CSV File from S3

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="csv",
username="AKIA4xxxx12",
password="JyxxxxU+",
host="s3a://prometheux-data",
port=None,
database_name="companies.csv",
options={
"region": "eu-west-2",
"endpoint": "s3.amazonaws.com",
"credentials.provider": "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider",
"delimiter": "\t"
}
)

inferred_rules = px.infer_schema(db, add_bind=True)

Databricks

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="databricks",
username="token",
password="dapixxxx",
host="dbc-xxxx-02fe.cloud.databricks.com",
port=443,
database_name="/sql/1.0/warehouses/3283xxxx"
)

inferred_rules = px.infer_schema(db, add_bind=True)

Databricks with Specific Schema

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

inferred_rules = px.infer_schema(
Database(
database_type="databricks",
username="token",
password="dapixxxx",
host="dbc-xxxx-02fe.cloud.databricks.com",
port=443,
database_name="/sql/1.0/warehouses/3283xxxx",
schema="my_catalog.my_schema"
),
add_bind=True,
add_model=False
)

Snowflake

note

Instead of using a password, you can use a Programmatic Access Token (PAT) for authentication. This avoids MFA prompts during automated workflows. See the Snowflake data source documentation for details.

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="snowflake",
username="my_username",
password="my_password", # Or use your PAT token
host="jdbc:snowflake://A77885826xxxx-IV3xxxx.snowflakecomputing.com",
port=443,
database_name="my_database",
schema="my_schema",
options={"warehouse": "my_warehouse"}
)

inferred_rules = px.infer_schema(db, add_bind=True)

Excel File

Excel files are treated as a database where sheets are considered as tables.

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="excel",
username="",
password="workbookPassword",
host="path/to/excel_file",
port=None,
database_name="excel_file.xlsx",
)

inferred_rules = px.infer_schema(db, add_bind=True)

BigQuery

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"
gcpAccessToken = os.environ["GCP_ACCESS_TOKEN"]

db = Database(
database_type="bigquery",
username="",
password="",
host="",
port=None,
database_name="my_project_id",
schema="datasetId",
options={
"authMode": "gcpAccessToken",
"gcpAccessToken": gcpAccessToken,
"parentProject": "my_parent_project_id",
"billingProjectId": "my_billing_project_id",
"region": "us-central1"
}
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=True)

Text File

Infer concepts and relationships from text content.

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="text",
username="",
password="",
host="path/to/file",
port=None,
database_name="document.txt"
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Binary File (PDF, Images)

Binary files support various formats including PDF, JPG, PNG, and other binary formats.

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="binaryfile",
username="",
password="",
host="path/to/file",
port=None,
database_name="document.pdf"
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Business Documents

Structured documents such as ID documents, receipts, tax forms, and mortgage documents. Supported document types include:

CategoryDocument Types
Financialcheck.us, bankStatement.us, payStub.us, creditCard, invoice
ID DocumentsidDocument.driverLicense, idDocument.passport, idDocument.nationalIdentityCard, idDocument.residencePermit, idDocument.usSocialSecurityCard
Receiptsreceipt.retailMeal, receipt.creditCard, receipt.gas, receipt.parking, receipt.hotel
Tax Documentstax.us.1040.2023, tax.us.w2, tax.us.w4, tax.us.1095A, tax.us.1098, tax.us.1099
Mortgage Documentsmortgage.us.1003 (URLA), mortgage.us.1004 (URAR), mortgage.us.closingDisclosure
Othercontract, healthInsuranceCard.us, marriageCertificate.us
import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="binaryfile",
username="",
password="",
host="path/to/file",
port=None,
database_name="driver_license.pdf",
options={"documentType": "idDocument.driverLicense"}
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=False)

Amazon DynamoDB

import os
import prometheux_chain as px
from prometheux_chain.data.database import Database

os.environ["PMTX_TOKEN"] = "YOUR_TOKEN"

db = Database(
database_type="dynamodb",
username="AKIA4xxxx12", # AWS Access Key ID
password="JyxxxxU+", # AWS Secret Access Key
host="",
port=None,
database_name="",
options={
"region": "us-east-1",
"endpoint": "",
"sessionToken": "",
"sampleLimit": "100"
}
)

inferred_rules = px.infer_schema(db, add_bind=True, add_model=True)