Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
589 changes: 510 additions & 79 deletions .gitignore

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/gRPC-vs-REST.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Variables
PYTHON := python
PYTHONPATH := `pwd`

# Protobuf
.PHONY: protoc
protoc:
protoc -I=./patient_data/ --python_out=./patient_data/ ./patient_data/protobuf_grpc.proto
3 changes: 1 addition & 2 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ fastapi = ">=0.105"
brotli-asgi = ">=1.4"
hypercorn = ">=0.15"
httpx = {version = ">=0.25", extras = ["http2"]}

# Optional production cloud specific packages.
# Install using: pipenv install [package] --catgeories cloud_logger cloud_secret
#
# DON'T install multiple vendors. Install ONLY one.
ujson = "*"

[cloud_logger]
#infisical = ">=1.2"
Expand All @@ -29,7 +29,6 @@ httpx = {version = ">=0.25", extras = ["http2"]}
#google-cloud-secret-manager = ">=2.16"
#boto3 = ">=1.26"
#oci = ">=2.104"

# Developement tools.
# Install using: pipenv install [package] --dev

Expand Down
633 changes: 353 additions & 280 deletions Pipfile.lock

Large diffs are not rendered by default.

Empty file added patient_data/__init__.py
Empty file.
155 changes: 155 additions & 0 deletions patient_data/gen_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import random
from faker import Faker
from ujson import dumps
from enum import Enum
import sys
from datetime import date
from dateutil.relativedelta import relativedelta
import protobuf_grpc as proto
import patient_dataclass as pd
from dataclasses import asdict

faker = Faker('en_US')


# Method to create a fake name
def generate_name(seed, birth_date, gender=''):
Faker.seed(seed)
if gender == 'Non-Binary' or gender == 'Other':
full_name = faker.name_nonbinary()
elif gender == 'Male':
full_name = faker.name_male()
elif gender == 'Female':
full_name = faker.name_female()
else:
full_name = faker.name()
n = pd.Name(
faker.word(ext_word_list=['usual', 'official', 'temp', 'nickname', 'anonymous', 'old', 'maiden']),
full_name,
full_name.split(' ')[0],
full_name.split(' ')[1],
faker.date_between(start_date=birth_date)
)

return n


# Method to create a fake telecom
def generate_telecom(seed, rank, birth_date, names):
Faker.seed(seed)
t = pd.Telecom(
faker.word(ext_word_list=['phone', 'fax', 'email', 'sms']),
faker.word(ext_word_list=['home', 'work', 'temp', 'old', 'mobile']),
rank,
faker.date_between(start_date=birth_date)
)
if t.system == 'email':
t.value = f'{names[random.randint(0, len(names) - 1)].given}' \
f'{names[random.randint(0, len(names) - 1)].family}@{faker.domain_name()}'
else:
t.value = faker.phone_number()

return t


# Method to create a fake address
def generate_address(seed, birth_date):
Faker.seed(seed)
a = pd.Address(
faker.word(ext_word_list=['home', 'work', 'temp', 'old', 'billing']),
faker.word(ext_word_list=['postal', 'physical', 'both']),
faker.street_address(),
faker.city(),
faker.state(),
faker.postcode(),
'USA',
faker.date_between(start_date=birth_date),
)
a.text = f'{a.line} {a.city}, {a.state} {a.zipcode} {a.country}'
if a.use in ['temp', 'old']:
a.period_end = faker.date_between(start_date=a.period_start + relativedelta(years=1))

return a


# Format class for ease of selecting data format
class Format(Enum):
Json = 1
Protobuf = 2

def __repr__(self):
return self.value

def __str__(self):
return str(self.value)


def main(size, data_format: Format):
# Set-up
curr_size = 0
seed = 1
patient_list = []

# Set the maximum number of items for each repeated dataclass
max_names = 3
max_telecoms = 2
max_addresses = 2
max_contacts = 4

# Generate patients
while curr_size < size:
Faker.seed(seed)
random.seed(seed)
p = pd.Patient(
faker.pyint(min_value=11111, max_value=99999),
faker.boolean(chance_of_getting_true=90),
faker.word(ext_word_list=['Male', 'Female', 'Non-binary', 'Other']),
faker.date_of_birth(minimum_age=18, maximum_age=100),
faker.word(ext_word_list=['Married', 'Single', 'Separated', 'Divorced', 'Widowed'])
)

# Find date of death if applicable
if p.birth_date < date.today() - relativedelta(years=65) and \
faker.boolean(chance_of_getting_true=15):
p.deceased_on = faker.date_between(start_date=date.today()-relativedelta(years=10)).strftime('%m/%d/%Y')

# Generate names
for x in range(random.randint(1, max_names)):
seed += 1
p.names.append(generate_name(seed, p.birth_date, p.gender))

# Generate telecoms
for x in range(random.randint(1, max_telecoms)):
seed += 1
p.telecoms.append(generate_telecom(seed, x+1, p.birth_date, p.names))

# Generate addresses
for x in range(random.randint(1, max_addresses)):
seed += 1
p.addresses.append(generate_address(seed, p.birth_date))
# Generate contacts
for x in range(random.randint(1, max_contacts)):
c_birth_date = faker.date_of_birth(minimum_age=18, maximum_age=100)
c = pd.Contact(
faker.word(ext_word_list=['emergency', 'family', 'guardian', 'friend', 'partner', 'work',
'caregiver', 'agent', 'guarantor', 'parent']),
generate_name(seed+1, c_birth_date),
generate_address(seed+1, c_birth_date)
)
seed += 3
c.telecom = generate_telecom(seed, 1, c_birth_date, [c.name])
p.contacts.append(c)
# Add patient to list and keep track of size
patient_list.append(p)
curr_size += sys.getsizeof(p)
seed += 1

match data_format:
case Format.Json:
return dumps([asdict(p) for p in patient_list], indent=4, default=str)
case Format.Protobuf:
return proto.generate_patients(patient_list).all_patients


if __name__ == '__main__':
print(main(128, Format.Protobuf))
56 changes: 56 additions & 0 deletions patient_data/patient_dataclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from dataclasses import dataclass, field
from datetime import date


@dataclass
class Name:
use: str
text: str
given: str
family: str
active: date


@dataclass
class Telecom:
system: str
use: str
rank: int
active: date
value: str = field(default='')


@dataclass
class Address:
use: str
type: str
line: str
city: str
state: str
zipcode: str
country: str
period_start: date
text: str = field(default='')
period_end: date = field(default=None)


@dataclass
class Contact:
relation: str
name: Name
address: Address
telecom: Telecom = field(default=Telecom)


@dataclass
class Patient:
id: int
active: bool
gender: str
birth_date: date
marital_status: str
deceased_on: str = field(default='')
names: list[Name] = field(default_factory=list)
telecoms: list[Telecom] = field(default_factory=list)
addresses: list[Address] = field(default_factory=list)
contacts: list[Contact] = field(default_factory=list)
54 changes: 54 additions & 0 deletions patient_data/protobuf_grpc.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
syntax = 'proto2';

message Name {
optional string use = 1;
optional string text = 2;
optional string given = 3;
optional string family = 4;
optional string active = 5;
}

message Telecom {
optional string system = 1;
optional string use = 2;
optional int32 rank = 3;
optional string active = 4;
optional string value = 5;
}

message Address {
optional string use = 1;
optional string type = 2;
optional string line = 3;
optional string city = 4;
optional string state = 5;
optional string zipcode = 6;
optional string country = 7;
optional string period_start = 8;
optional string text = 9;
optional string period_end = 10;
}

message Contact {
optional string relation = 1;
optional Name name = 2;
optional Address address = 3;
optional Telecom telecom = 4;
}

message Patient {
optional int32 id = 1;
optional bool active = 2;
optional string gender = 3;
optional string birth_date = 4;
optional string marital_status = 5;
optional string deceased_on = 6;
repeated Name names = 7;
repeated Telecom telecoms = 8;
repeated Address addresses = 9;
repeated Contact contacts = 10;
}

message Patients {
repeated Patient all_patients = 1;
}
Loading