In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
import numpy as np
from statsmodels.stats.proportion import proportions_ztest
from scipy.stats import chi2_contingency
import warnings
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import TwoSlopeNorm
import matplotlib
In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
# Specify the folder path
folder_path = 'csvs/IncarcerationSupervision'
# List all files in the folder
files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
# Initialize a dictionary to store dataframes and their corresponding file names
dataframes = {}
# Loop through each file and read it into a dataframe
for i, file in enumerate(files, start=1):
file_path = os.path.join(folder_path, file)
df = pd.read_csv(file_path)
dataframes[file] = df
# Iterate over each dataframe and print info() and head()
for file_name, df in dataframes.items():
print(f"DataFrame {file_name}:")
print("Info:")
print(df.info())
print("\nHead:")
print(df.head())
print("\n---\n")
DataFrame IncarcerationSupervisionDisciplinary_00001.csv: Info: <class 'pandas.core.frame.DataFrame'> RangeIndex: 54524 entries, 0 to 54523 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 54524 non-null int64 1 INCAR_DISCIPLINARY_VIOL_CODE 54524 non-null int64 2 INCAR_DISCIPLINARY_ACTION_CODE 54524 non-null object 3 INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC 54524 non-null object 4 INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 54524 non-null object 5 INCAR_SUPERV_CREATED_DATE 54524 non-null object 6 UNIQUE_CORRELATION_ID 54524 non-null object 7 CORRELATION_ID 54524 non-null object dtypes: int64(2), object(6) memory usage: 3.3+ MB None Head: INCARCERATION_SUPERVISION_ID INCAR_DISCIPLINARY_VIOL_CODE \ 0 3853839 314 1 3853839 61 2 3853839 314 3 3853839 917 4 3853839 927 INCAR_DISCIPLINARY_ACTION_CODE INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC \ 0 DC Disciplinary Confinement 1 GT Loss of Gain Time 2 GT Loss of Gain Time 3 CC Cashless Canteen Denial 4 GT Loss of Gain Time INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC \ 0 Unauthorized possession/use of communication d... 1 Disobeying verbal or written order 2 Unauthorized possession/use of communication d... 3 Disorderly conduct 4 Use of unauthorized drugs INCAR_SUPERV_CREATED_DATE UNIQUE_CORRELATION_ID \ 0 2024-10-16 10:19:19.3990000 72a3a208-c80d-4e43-95ac-90c25ab873a9 1 2024-10-16 10:19:19.3990000 72a3a208-c80d-4e43-95ac-90c25ab873a9 2 2024-10-16 10:19:19.3990000 72a3a208-c80d-4e43-95ac-90c25ab873a9 3 2024-10-16 10:19:19.3990000 72a3a208-c80d-4e43-95ac-90c25ab873a9 4 2024-10-16 10:19:19.3990000 72a3a208-c80d-4e43-95ac-90c25ab873a9 CORRELATION_ID 0 0f4f4bcc-84f6-484c-9af8-f0657d877b95 1 0f4f4bcc-84f6-484c-9af8-f0657d877b95 2 0f4f4bcc-84f6-484c-9af8-f0657d877b95 3 0f4f4bcc-84f6-484c-9af8-f0657d877b95 4 0f4f4bcc-84f6-484c-9af8-f0657d877b95 --- DataFrame IncarcerationSupervisionDisciplinary_00000.csv: Info: <class 'pandas.core.frame.DataFrame'> RangeIndex: 574092 entries, 0 to 574091 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 574092 non-null int64 1 INCAR_DISCIPLINARY_VIOL_CODE 574092 non-null int64 2 INCAR_DISCIPLINARY_ACTION_CODE 574092 non-null object 3 INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC 574092 non-null object 4 INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 574092 non-null object 5 INCAR_SUPERV_CREATED_DATE 574092 non-null object 6 UNIQUE_CORRELATION_ID 574092 non-null object 7 CORRELATION_ID 574092 non-null object dtypes: int64(2), object(6) memory usage: 35.0+ MB None Head: INCARCERATION_SUPERVISION_ID INCAR_DISCIPLINARY_VIOL_CODE \ 0 26 52 1 26 61 2 26 934 3 26 916 4 26 23 INCAR_DISCIPLINARY_ACTION_CODE INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC \ 0 DC Disciplinary Confinement 1 VR Verbal Reprimand 2 DC Disciplinary Confinement 3 GT Loss of Gain Time 4 DC Disciplinary Confinement INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC \ 0 Failure to comply with count procedures 1 Disobeying verbal or written order 2 Tampering/defeating any fire or other safety d... 3 Refusing to work or participate in mandatory p... 4 Creating, participating in or inciting a minor... INCAR_SUPERV_CREATED_DATE UNIQUE_CORRELATION_ID \ 0 2022-02-25 17:43:59.9930000 20042f87-0e82-422e-8844-3171a46edd08 1 2022-02-25 17:43:59.9930000 20042f87-0e82-422e-8844-3171a46edd08 2 2022-02-25 17:43:59.9930000 20042f87-0e82-422e-8844-3171a46edd08 3 2022-02-25 17:43:59.9930000 20042f87-0e82-422e-8844-3171a46edd08 4 2022-02-25 17:43:59.9930000 20042f87-0e82-422e-8844-3171a46edd08 CORRELATION_ID 0 20042f87-0e82-422e-8844-3171a46edd08 1 20042f87-0e82-422e-8844-3171a46edd08 2 20042f87-0e82-422e-8844-3171a46edd08 3 20042f87-0e82-422e-8844-3171a46edd08 4 20042f87-0e82-422e-8844-3171a46edd08 --- DataFrame Incarcerationsupervision_00000.csv: Info: <class 'pandas.core.frame.DataFrame'> RangeIndex: 162196 entries, 0 to 162195 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 162196 non-null int64 1 UNIQUE_CORRELATION_ID 162196 non-null object 2 OWNER_ORI 162196 non-null object 3 PERSON_ID 162196 non-null int64 4 MDM_PERSON_ID 161521 non-null float64 5 RACE_CODE 162196 non-null object 6 SEX_CODE 162196 non-null object 7 ETHNICITY_CODE 162196 non-null object 8 CUSTODY_ADMISSION_DATE 162196 non-null object 9 SUPERVISION_BEGIN_DATE 3592 non-null object 10 CURRENT_INSTITUTION_ORI_TYP_CODE 162196 non-null object 11 INCAR_CUSTODY_LEVEL_TYPE_CODE 134267 non-null float64 12 CORRECTION_ADMISSION_REASON_CODE 162196 non-null object 13 PRIOR_INCARCERATION_IND 162196 non-null bool 14 CUSTODY_RELEASE_REASON_CODE 23950 non-null float64 15 INCARC_PROG_PARTICIPATION_IND 162196 non-null bool 16 SUPERVISION_CATEGORY_CODE 3592 non-null object 17 HIGHEST_EDUCATION_LEVEL 154821 non-null object 18 SEXUAL_OFFENDER_IND 162196 non-null bool 19 GANG_AFFILIATION_IND 162196 non-null bool 20 INMATE_AGE 162196 non-null int64 21 CORRECTION_ADMISSION_REASON_DESC 162196 non-null object 22 CURRENT_INSTITUTION_ORI_TYPE_DESC 162196 non-null object 23 CUSTODY_RELEASE_REASON_DESC 23950 non-null object 24 INCARCERATION_CUSTODY_LEVEL_TYPE_DESC 134267 non-null object 25 SUPERVISION_CATEGORY_DESC 3592 non-null object 26 AGENCY_NAME 162196 non-null object 27 COUNTY_DESCRIPTION 162196 non-null object 28 STATE 162196 non-null object dtypes: bool(4), float64(3), int64(3), object(19) memory usage: 31.6+ MB None Head: INCARCERATION_SUPERVISION_ID UNIQUE_CORRELATION_ID \ 0 2642920 5f3f8275-6d04-4a7b-8a81-8c3e4534560d 1 2642921 5716fe66-fba9-45f8-893c-0f3d41e97fd5 2 2642928 adfe5180-01b4-46a9-b70f-4ca17627dbf3 3 2642948 d92fca90-b85f-4485-9c1c-f4beed7298f3 4 2642958 032a39ff-5072-4ce2-9d60-90fb3cc97a74 OWNER_ORI PERSON_ID MDM_PERSON_ID RACE_CODE SEX_CODE \ 0 FL037275C 57675189 708281.0 White Female 1 FL037275C 57675190 647720.0 White Male 2 FL037275C 57675199 1309746.0 White Male 3 FL037275C 57675291 1612413.0 White Male 4 FL037275C 57675329 276821.0 Black Male ETHNICITY_CODE CUSTODY_ADMISSION_DATE SUPERVISION_BEGIN_DATE \ 0 Not Hispanic or Latino 2022-12-19 00:00:00.0000000 NaN 1 Not Hispanic or Latino 2005-05-04 00:00:00.0000000 NaN 2 Not Hispanic or Latino 2023-02-01 00:00:00.0000000 NaN 3 Not Hispanic or Latino 2017-01-26 00:00:00.0000000 NaN 4 Not Hispanic or Latino 2022-08-24 00:00:00.0000000 NaN CURRENT_INSTITUTION_ORI_TYP_CODE INCAR_CUSTODY_LEVEL_TYPE_CODE \ 0 FL042375C NaN 1 FL048055C 4.0 2 FL017045C 1.0 3 FL057025C 4.0 4 FL032125C 4.0 CORRECTION_ADMISSION_REASON_CODE PRIOR_INCARCERATION_IND \ 0 New Conviction False 1 New Conviction False 2 Violation of Probation - Technical False 3 New Conviction True 4 Violation of Probation - New Law Violation True CUSTODY_RELEASE_REASON_CODE INCARC_PROG_PARTICIPATION_IND \ 0 40.0 False 1 NaN True 2 NaN False 3 NaN True 4 NaN True SUPERVISION_CATEGORY_CODE HIGHEST_EDUCATION_LEVEL SEXUAL_OFFENDER_IND \ 0 NaN Ninth Grade False 1 NaN Twelfth Grade True 2 NaN Twelfth Grade False 3 NaN Ninth Grade False 4 NaN Tenth Grade True GANG_AFFILIATION_IND INMATE_AGE \ 0 False 37 1 False 43 2 False 40 3 True 33 4 False 50 CORRECTION_ADMISSION_REASON_DESC \ 0 New Conviction 1 New Conviction 2 Violation of Probation - Technical 3 New Conviction 4 Violation of Probation - New Law Violation CURRENT_INSTITUTION_ORI_TYPE_DESC CUSTODY_RELEASE_REASON_DESC \ 0 FLORIDA WOMENS RECEPTION CENTER Released 1 CENTRAL FLORIDA RECEPTION CENTER MAIN NaN 2 PENSACOLA COMUNITY RELEASE CENTER NaN 3 SANTA ROSA CORRECTIONAL INSTITUTION ANNEX NaN 4 GRACEVILLE CORRECTIONAL FACILITY NaN INCARCERATION_CUSTODY_LEVEL_TYPE_DESC SUPERVISION_CATEGORY_DESC \ 0 NaN NaN 1 Close NaN 2 Community NaN 3 Close NaN 4 Close NaN AGENCY_NAME COUNTY_DESCRIPTION STATE 0 FLORIDA WOMENS RECEPTION CENTER Marion Florida 1 CENTRAL FLORIDA RECEPTION CENTER MAIN Orange Florida 2 PENSACOLA COMUNITY RELEASE CENTER Escambia Florida 3 SANTA ROSA CORRECTIONAL INSTITUTION ANNEX Santa Rosa Florida 4 GRACEVILLE CORRECTIONAL FACILITY Jackson Florida --- DataFrame IncarcerationSupervisionCharge_00000.csv: Info: <class 'pandas.core.frame.DataFrame'> RangeIndex: 979 entries, 0 to 978 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 979 non-null int64 1 CONVICTION_COUNTY 979 non-null int64 2 CONCURRENT_SENTENCES_SERVED_DAYS 979 non-null int64 3 CONSECUTIVE_SENTENCES_SERVED_DAYS 979 non-null int64 4 SENTENCE_SERVED_DAYS 979 non-null int64 5 SUP_ASSIGNED_TERM_DURATION_DAYS 979 non-null int64 6 SUP_ASSIGNED_MAXIMUM_TERM_CODE 0 non-null float64 7 GAIN_TIME_EARNED 87 non-null float64 8 STATUTE 979 non-null object 9 STATUTE_CHAPTER_CODE 979 non-null int64 10 STATUTE_SECTION 979 non-null int64 11 STATUTE_SUBSECTION 340 non-null object 12 FCIC_CODE 979 non-null int64 13 CHARGE_LEVEL 979 non-null object 14 CHARGE_DEGREE 979 non-null object 15 SENTENCE_STATUS_CODE 979 non-null int64 16 MINIMUM_TERM_DURATION_DAYS 979 non-null int64 17 MAXIMUM_TERM_DURATION_DAYS 979 non-null int64 18 MAXIMUM_TERM_CODE 0 non-null float64 19 HABITUAL_OFNDR_IND 979 non-null bool 20 HABITUAL_VIOL_FELONY_OFNDR_IND 979 non-null bool 21 PRISON_RELEASEE_REOFNDR_IND 979 non-null bool 22 VIOLENT_CAREER_CRIM_IND 979 non-null bool 23 SENTENCE_PROBATION_DURATION_DAYS 979 non-null int64 24 SUPERVISION_SERVED_DAYS 979 non-null int64 25 SUPERVISION_REVOCATION_REASON 0 non-null float64 26 DRUG_TYPE_DESC 4 non-null object 27 OFFENSE_FCIC_TYPE_DESC 979 non-null object 28 SENTENCE_STATUS 979 non-null object dtypes: bool(4), float64(4), int64(14), object(7) memory usage: 195.2+ KB None Head: INCARCERATION_SUPERVISION_ID CONVICTION_COUNTY \ 0 155880 13 1 157651 52 2 158929 6 3 52577 52 4 54032 5 CONCURRENT_SENTENCES_SERVED_DAYS CONSECUTIVE_SENTENCES_SERVED_DAYS \ 0 0 0 1 0 0 2 0 0 3 0 0 4 0 0 SENTENCE_SERVED_DAYS SUP_ASSIGNED_TERM_DURATION_DAYS \ 0 0 731 1 0 1096 2 0 1096 3 0 1461 4 0 731 SUP_ASSIGNED_MAXIMUM_TERM_CODE GAIN_TIME_EARNED STATUTE \ 0 NaN NaN 893.13 1 NaN NaN 893.13 2 NaN NaN 893.13 3 NaN NaN 893.13.1a2 4 NaN NaN 812.014.2c1 STATUTE_CHAPTER_CODE STATUTE_SECTION STATUTE_SUBSECTION FCIC_CODE \ 0 893 13 NaN 3581 1 893 13 NaN 3599 2 893 13 NaN 3599 3 893 13 1a2 3562 4 812 14 2c1 2399 CHARGE_LEVEL CHARGE_DEGREE SENTENCE_STATUS_CODE \ 0 Felony Third Degree 8 1 Felony Third Degree 8 2 Felony Third Degree 8 3 Felony Third Degree 8 4 Felony Third Degree 8 MINIMUM_TERM_DURATION_DAYS MAXIMUM_TERM_DURATION_DAYS MAXIMUM_TERM_CODE \ 0 0 0 NaN 1 0 0 NaN 2 0 0 NaN 3 0 0 NaN 4 0 0 NaN HABITUAL_OFNDR_IND HABITUAL_VIOL_FELONY_OFNDR_IND \ 0 False False 1 False False 2 False False 3 False False 4 False False PRISON_RELEASEE_REOFNDR_IND VIOLENT_CAREER_CRIM_IND \ 0 False False 1 False False 2 False False 3 False False 4 False False SENTENCE_PROBATION_DURATION_DAYS SUPERVISION_SERVED_DAYS \ 0 0 731 1 0 1096 2 0 1096 3 0 1461 4 0 731 SUPERVISION_REVOCATION_REASON DRUG_TYPE_DESC OFFENSE_FCIC_TYPE_DESC \ 0 NaN NaN Barbiturate-Sell 1 NaN NaN Dangerous Drugs 2 NaN NaN Dangerous Drugs 3 NaN NaN Marijuana-Possess 4 NaN NaN Larceny SENTENCE_STATUS 0 Not Applicable 1 Not Applicable 2 Not Applicable 3 Not Applicable 4 Not Applicable ---
In [3]:
df = dataframes['IncarcerationSupervisionDisciplinary_00000.csv']
df['INCAR_SUPERV_CREATED_DATE'] = pd.to_datetime(df['INCAR_SUPERV_CREATED_DATE'])
# Print the range of dates
print(f"Range of Dates: from {df['INCAR_SUPERV_CREATED_DATE'].min()} to {df['INCAR_SUPERV_CREATED_DATE'].max()}")
# Plot the distribution of dates
plt.figure(figsize=(12, 6))
plt.hist(df['INCAR_SUPERV_CREATED_DATE'], bins=30, edgecolor='black')
plt.title('Distribution of INCAR_SUPERV_CREATED_DATE')
plt.xlabel('Date')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()
Range of Dates: from 2022-02-25 17:43:59.993000 to 2025-08-14 08:00:02.897000
In [4]:
df = dataframes['IncarcerationSupervisionDisciplinary_00001.csv']
df['INCAR_SUPERV_CREATED_DATE'] = pd.to_datetime(df['INCAR_SUPERV_CREATED_DATE'])
# Print the range of dates
print(f"Range of Dates: from {df['INCAR_SUPERV_CREATED_DATE'].min()} to {df['INCAR_SUPERV_CREATED_DATE'].max()}")
# Plot the distribution of dates
plt.figure(figsize=(12, 6))
plt.hist(df['INCAR_SUPERV_CREATED_DATE'], bins=30, edgecolor='black')
plt.title('Distribution of INCAR_SUPERV_CREATED_DATE')
plt.xlabel('Date')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()
Range of Dates: from 2024-10-16 10:19:19.399000 to 2024-10-16 12:07:30.478000
In [5]:
df0 = dataframes['IncarcerationSupervisionDisciplinary_00000.csv']
df1 = dataframes['IncarcerationSupervisionDisciplinary_00001.csv']
df0['INCAR_SUPERV_CREATED_DATE'] = pd.to_datetime(df0['INCAR_SUPERV_CREATED_DATE'])
df1['INCAR_SUPERV_CREATED_DATE'] = pd.to_datetime(df1['INCAR_SUPERV_CREATED_DATE'])
min0, max0 = df0['INCAR_SUPERV_CREATED_DATE'].min(), df0['INCAR_SUPERV_CREATED_DATE'].max()
min1, max1 = df1['INCAR_SUPERV_CREATED_DATE'].min(), df1['INCAR_SUPERV_CREATED_DATE'].max()
# --- theme colors ---
bg = "#181f22"
text = "#fff"
bar = "#22eff5"
accent = "#8af6e6"
grid = "#25333b"
fig, axs = plt.subplots(2, 1, figsize=(11, 7), sharex=True, facecolor=bg)
for ax in axs:
ax.set_facecolor(bg)
ax.tick_params(axis='x', colors=text)
ax.tick_params(axis='y', colors=text)
for side in ['bottom', 'top', 'left', 'right']:
ax.spines[side].set_color(text)
ax.title.set_color(text)
ax.yaxis.label.set_color(text)
ax.xaxis.label.set_color(text)
ax.grid(axis='y', color=grid, linestyle=':', alpha=0.4)
# First file
axs[0].hist(df0['INCAR_SUPERV_CREATED_DATE'], bins=30, edgecolor=bg, color=bar)
axs[0].set_title('00000.csv: INCAR_SUPERV_CREATED_DATE')
axs[0].set_ylabel('Frequency')
axs[0].text(0.01, 0.92, f'Range: {min0.date()} to {max0.date()}', transform=axs[0].transAxes, fontsize=10, color=accent, fontweight='bold')
# Second file
axs[1].hist(df1['INCAR_SUPERV_CREATED_DATE'], bins=30, edgecolor=bar, color=bar)
axs[1].set_title('00001.csv: INCAR_SUPERV_CREATED_DATE')
axs[1].set_xlabel('Date')
axs[1].set_ylabel('Frequency')
axs[1].text(0.01, 0.92, f'Range: {min1.date()} to {max1.date()}', transform=axs[1].transAxes, fontsize=10, color=accent, fontweight='bold')
fig.autofmt_xdate(rotation=45)
plt.tight_layout()
plt.savefig("assets/date_distributions_composite.png", bbox_inches='tight', dpi=160, facecolor=bg)
plt.show()
In [6]:
df_main = dataframes['IncarcerationSupervisionDisciplinary_00000.csv']
df_patch = dataframes['IncarcerationSupervisionDisciplinary_00001.csv']
cols_for_identity = [
'INCARCERATION_SUPERVISION_ID',
'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC',
'INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'
]
patch_id_set = set(tuple(x) for x in df_patch[cols_for_identity].to_records(index=False))
main_id_set = set(tuple(x) for x in df_main[cols_for_identity].to_records(index=False))
in_patch_not_in_main = patch_id_set - main_id_set
in_main_not_in_patch = main_id_set - patch_id_set
overlap = patch_id_set & main_id_set # intersection
print(f"Logical records in both main and patch: {len(overlap)}")
print(f"Logical records only in patch: {len(in_patch_not_in_main)}")
print(f"Logical records only in main: {len(in_main_not_in_patch)}")
print(f"Total logical records in patch: {len(patch_id_set)}")
print(f"Total logical records in main: {len(main_id_set)}")
print(f"Total union (all unique logical records): {len(patch_id_set | main_id_set)}")
Logical records in both main and patch: 1740 Logical records only in patch: 52784 Logical records only in main: 572352 Total logical records in patch: 54524 Total logical records in main: 574092 Total union (all unique logical records): 626876
In [7]:
# Check uniqueness in main
main_dupes = df_main['UNIQUE_CORRELATION_ID'].duplicated().sum()
print(f"Duplicated UNIQUE_CORRELATION_ID in main: {main_dupes}")
main_dupes = df_main['INCARCERATION_SUPERVISION_ID'].duplicated().sum()
print(f"Duplicated INCARCERATION_SUPERVISION_ID in main: {main_dupes}")
main_dupes = df_main['CORRELATION_ID'].duplicated().sum()
print(f"Duplicated CORRELATION_ID in main: {main_dupes}")
# Same for patch
patch_dupes = df_patch['UNIQUE_CORRELATION_ID'].duplicated().sum()
print(f"Duplicated UNIQUE_CORRELATION_ID in patch: {patch_dupes}")
patch_dupes = df_patch['INCARCERATION_SUPERVISION_ID'].duplicated().sum()
print(f"Duplicated INCARCERATION_SUPERVISION_ID in patch: {patch_dupes}")
patch_dupes = df_patch['CORRELATION_ID'].duplicated().sum()
print(f"Duplicated CORRELATION_ID in patch: {patch_dupes}")
Duplicated UNIQUE_CORRELATION_ID in main: 495824 Duplicated INCARCERATION_SUPERVISION_ID in main: 495824 Duplicated CORRELATION_ID in main: 495824 Duplicated UNIQUE_CORRELATION_ID in patch: 47204 Duplicated INCARCERATION_SUPERVISION_ID in patch: 47204 Duplicated CORRELATION_ID in patch: 47204
In [8]:
# For violation types:
violation_map = df_main.groupby('INCAR_DISCIPLINARY_VIOL_CODE')['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].nunique()
print("Number of violation codes with >1 unique description:", (violation_map > 1).sum())
desc_map = df_main.groupby('INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC')['INCAR_DISCIPLINARY_VIOL_CODE'].nunique()
print("Number of descriptions with >1 unique violation code:", (desc_map > 1).sum())
# For action types:
action_map = df_main.groupby('INCAR_DISCIPLINARY_ACTION_CODE')['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'].nunique()
print("Number of action codes with >1 unique description:", (action_map > 1).sum())
action_desc_map = df_main.groupby('INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC')['INCAR_DISCIPLINARY_ACTION_CODE'].nunique()
print("Number of action descriptions with >1 unique action code:", (action_desc_map > 1).sum())
Number of violation codes with >1 unique description: 0 Number of descriptions with >1 unique violation code: 0 Number of action codes with >1 unique description: 0 Number of action descriptions with >1 unique action code: 0
In [9]:
# Count the number of records per logical event
event_counts = df_patch.groupby([
'INCARCERATION_SUPERVISION_ID',
'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC',
'INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'
]).size().reset_index(name='count')
print(event_counts[event_counts['count'] > 1].head())
print(f"Number of (supervision/action/violation) combos with >1 record: {(event_counts['count'] > 1).sum()}")
# Count the number of records per logical event
event_counts = df_main.groupby([
'INCARCERATION_SUPERVISION_ID',
'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC',
'INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'
]).size().reset_index(name='count')
print(event_counts[event_counts['count'] > 1].head())
print(f"Number of (supervision/action/violation) combos with >1 record: {(event_counts['count'] > 1).sum()}")
Empty DataFrame Columns: [INCARCERATION_SUPERVISION_ID, INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC, INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC, count] Index: [] Number of (supervision/action/violation) combos with >1 record: 0 Empty DataFrame Columns: [INCARCERATION_SUPERVISION_ID, INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC, INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC, count] Index: [] Number of (supervision/action/violation) combos with >1 record: 0
In [10]:
informative_cols = [
'INCARCERATION_SUPERVISION_ID',
'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC',
'INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'
]
combo = pd.concat([df_main, df_patch], ignore_index=True)
combo = combo.drop_duplicates(informative_cols)
combo = combo[informative_cols]
print(combo.info(), combo.head())
<class 'pandas.core.frame.DataFrame'> Index: 626876 entries, 0 to 628615 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 626876 non-null int64 1 INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC 626876 non-null object 2 INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 626876 non-null object dtypes: int64(1), object(2) memory usage: 19.1+ MB None INCARCERATION_SUPERVISION_ID INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC \ 0 26 Disciplinary Confinement 1 26 Verbal Reprimand 2 26 Disciplinary Confinement 3 26 Loss of Gain Time 4 26 Disciplinary Confinement INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 0 Failure to comply with count procedures 1 Disobeying verbal or written order 2 Tampering/defeating any fire or other safety d... 3 Refusing to work or participate in mandatory p... 4 Creating, participating in or inciting a minor...
In [11]:
# Get action outcome types, ordered by frequency
action_order = combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'].value_counts(ascending=False).index.tolist()
# Make the crosstab
top_violations = combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].value_counts().index
crosstab = pd.crosstab(
combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'],
combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC']
).loc[top_violations, action_order]
crosstab_html = crosstab.to_html(classes="discipline-table display nowrap", border=0)
with open("discipline_sanctions_by_violation_top.html", "w") as f:
f.write(crosstab_html)
crosstab.to_csv('assets/discipline_sanctions_by_violation.csv')
crosstab
Out[11]:
INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC | Disciplinary Confinement | Loss of Gain Time | Cashless Canteen Denial | Verbal Reprimand | Privilege Suspension - Other | Loss of Future Gain Time | Disciplinary Squad | Extra Duty | Restitution Payment | Alternative Housing | Privilege Suspension - Mail, Visitation | Privilege Suspension - Visitation | Privilege Suspension - Mail | Restricted Labor Squad | Disciplinary Confinement Part Time |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC | |||||||||||||||
Disobeying verbal or written order | 38343 | 16026 | 9898 | 5258 | 3002 | 2105 | 2433 | 1928 | 53 | 340 | 78 | 115 | 6 | 4 | 11 |
Disrespect persons of authority | 21168 | 8103 | 5252 | 2384 | 1372 | 874 | 1390 | 597 | 3 | 368 | 18 | 37 | 4 | 22 | 7 |
Disorderly conduct | 19639 | 7260 | 3803 | 1789 | 1011 | 898 | 944 | 509 | 5 | 235 | 12 | 28 | 1 | 3 | 2 |
Use of unauthorized drugs | 19547 | 10021 | 3222 | 194 | 921 | 946 | 197 | 108 | 20 | 84 | 45 | 33 | 3 | 3 | 1 |
Fighting | 18350 | 5975 | 3635 | 1701 | 859 | 734 | 271 | 198 | 6 | 134 | 5 | 9 | 1 | 5 | 0 |
Unauthorized possession/use of communication device/technology | 18134 | 7422 | 1853 | 100 | 978 | 760 | 77 | 28 | 470 | 41 | 23 | 35 | 2 | 0 | 0 |
Possession/manufacture of weapons, ammunition or explosives | 17369 | 6211 | 1485 | 35 | 615 | 1236 | 22 | 8 | 15 | 39 | 12 | 17 | 0 | 0 | 0 |
Spoken, written or gestured threats | 13305 | 4855 | 1528 | 187 | 800 | 788 | 84 | 37 | 1 | 34 | 14 | 17 | 3 | 3 | 0 |
Possession of narcotics/unauthorized drugs and paraphernalia | 12707 | 5910 | 944 | 26 | 199 | 509 | 31 | 26 | 1 | 4 | 14 | 7 | 0 | 0 | 1 |
Being in unauthorized area | 5716 | 3794 | 3625 | 2566 | 781 | 571 | 1300 | 1097 | 1 | 126 | 21 | 12 | 0 | 7 | 0 |
Destruction of State or another property | 7612 | 1832 | 1163 | 533 | 351 | 284 | 185 | 150 | 6049 | 28 | 4 | 0 | 1 | 2 | 2 |
Lewd or lascivious exhibition | 9290 | 3995 | 1454 | 221 | 563 | 1083 | 27 | 11 | 0 | 16 | 18 | 75 | 0 | 1 | 1 |
Possession or transfer of other contraband | 6225 | 4051 | 1665 | 1280 | 344 | 289 | 891 | 1512 | 157 | 18 | 8 | 7 | 4 | 3 | 0 |
Disobeying institutional regulations | 4537 | 2832 | 1559 | 1523 | 466 | 400 | 671 | 1536 | 54 | 37 | 6 | 12 | 2 | 6 | 5 |
Battery or attempted battery on an inmate | 8964 | 2948 | 746 | 27 | 232 | 463 | 5 | 6 | 1 | 1 | 3 | 7 | 0 | 0 | 0 |
Lying to staff or falsifying records | 6633 | 2306 | 1231 | 669 | 252 | 134 | 496 | 331 | 8 | 43 | 8 | 8 | 2 | 2 | 2 |
Battery or attempted battery on correctional officer | 6518 | 2740 | 713 | 10 | 316 | 611 | 0 | 0 | 2 | 1 | 1 | 11 | 1 | 0 | 0 |
Unauthorized absence from assigned area | 3876 | 1715 | 1664 | 1270 | 242 | 143 | 670 | 413 | 1 | 18 | 6 | 3 | 0 | 0 | 0 |
Possession of aromatic stimulants or depressants | 6617 | 2087 | 463 | 15 | 214 | 203 | 46 | 33 | 0 | 26 | 7 | 8 | 1 | 1 | 0 |
Personal or business relationship with staff member or volunteer | 5955 | 2107 | 655 | 114 | 168 | 279 | 44 | 14 | 0 | 8 | 4 | 5 | 3 | 0 | 0 |
Possession of tobacco-related product by non-death row or non-community release inmate | 4420 | 1917 | 1141 | 693 | 239 | 96 | 325 | 376 | 0 | 19 | 4 | 21 | 0 | 1 | 2 |
Aggravated battery or attempted on inmate | 5612 | 2070 | 409 | 5 | 155 | 537 | 0 | 0 | 3 | 0 | 6 | 3 | 0 | 0 | 0 |
Telephone regulation violations | 2745 | 1071 | 859 | 592 | 2300 | 83 | 276 | 351 | 1 | 6 | 179 | 23 | 4 | 2 | 0 |
Refusing to work or participate in mandatory programs | 3534 | 1833 | 1216 | 941 | 221 | 95 | 302 | 144 | 0 | 35 | 1 | 9 | 0 | 2 | 0 |
Obscene or profane act, gesture or statement | 4338 | 1978 | 490 | 123 | 161 | 206 | 54 | 14 | 0 | 14 | 19 | 8 | 1 | 0 | 0 |
Creating, participating in or inciting a minor disturbance | 5246 | 1106 | 402 | 26 | 210 | 265 | 13 | 2 | 2 | 2 | 0 | 1 | 0 | 0 | 0 |
Failure to comply with count procedures | 2718 | 1014 | 868 | 739 | 208 | 58 | 344 | 543 | 0 | 16 | 1 | 3 | 0 | 0 | 0 |
Misuse of State or another property | 1432 | 1175 | 731 | 1746 | 343 | 438 | 25 | 59 | 87 | 3 | 1 | 0 | 0 | 0 | 0 |
Refusing to submit to substance abuse testing | 3713 | 1681 | 380 | 49 | 33 | 89 | 6 | 1 | 0 | 6 | 9 | 2 | 0 | 0 | 0 |
Tampering/defeating/depriving staff of security device | 3549 | 838 | 731 | 123 | 422 | 137 | 21 | 22 | 58 | 4 | 2 | 3 | 0 | 0 | 0 |
Attempt or conspire to commit crime or violate rules | 2850 | 1294 | 527 | 152 | 348 | 110 | 68 | 36 | 13 | 14 | 36 | 23 | 32 | 1 | 1 |
Theft of property under $50 in value | 1977 | 863 | 496 | 274 | 88 | 48 | 223 | 180 | 1010 | 17 | 0 | 2 | 0 | 1 | 0 |
Introduction of any contraband | 2283 | 1169 | 210 | 81 | 43 | 78 | 25 | 33 | 1 | 3 | 23 | 43 | 7 | 0 | 0 |
Tattooing, branding or body art including piercing | 1517 | 793 | 553 | 170 | 125 | 38 | 301 | 313 | 3 | 1 | 0 | 8 | 0 | 2 | 0 |
Possession of gang-related paraphernalia or material | 2306 | 552 | 274 | 40 | 62 | 77 | 121 | 56 | 0 | 7 | 1 | 7 | 0 | 2 | 0 |
Possession of unauthorized beverages | 1521 | 676 | 404 | 104 | 167 | 44 | 110 | 34 | 1 | 15 | 2 | 7 | 0 | 0 | 0 |
Failure to maintain acceptable hygiene/appearance of housing | 687 | 510 | 231 | 1098 | 82 | 147 | 39 | 83 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
Sex acts or unauthorized physical contact involving inmates | 1485 | 734 | 262 | 112 | 73 | 35 | 43 | 53 | 0 | 8 | 4 | 16 | 0 | 1 | 0 |
Tampering/defeating any fire or other safety device | 1584 | 331 | 99 | 8 | 48 | 87 | 1 | 0 | 612 | 0 | 0 | 0 | 0 | 0 | 0 |
Failure to maintain personal hygiene/appearance | 699 | 396 | 257 | 942 | 113 | 92 | 30 | 21 | 0 | 0 | 2 | 0 | 0 | 0 | 0 |
Inciting riots, strikes, mutinous acts or disturbances | 1713 | 536 | 85 | 5 | 37 | 131 | 3 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Possession of negotiables in unauthorized amounts, forms or location | 1082 | 655 | 220 | 144 | 59 | 54 | 45 | 61 | 0 | 3 | 6 | 9 | 2 | 0 | 1 |
Other assault/battery or attempted | 1412 | 755 | 2 | 1 | 54 | 64 | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 |
Unauthorized/illegal financial transaction | 1230 | 509 | 288 | 30 | 116 | 54 | 11 | 6 | 16 | 4 | 4 | 5 | 0 | 0 | 0 |
Participating in riots, strikes, mutinous acts or disturbances | 1438 | 511 | 43 | 2 | 9 | 203 | 0 | 1 | 1 | 0 | 0 | 24 | 0 | 0 | 0 |
Altering/defacing State or another property | 608 | 329 | 177 | 164 | 32 | 22 | 52 | 84 | 498 | 1 | 0 | 0 | 0 | 0 | 0 |
Gang-related activities | 1309 | 299 | 158 | 8 | 53 | 44 | 30 | 17 | 1 | 3 | 2 | 0 | 1 | 0 | 0 |
Assault or attempted assault on a correctional officer | 1031 | 319 | 27 | 1 | 12 | 76 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 |
Possession of stolen property | 573 | 358 | 83 | 65 | 18 | 20 | 58 | 101 | 119 | 4 | 0 | 0 | 0 | 0 | 0 |
Mail regulation violations | 633 | 238 | 83 | 28 | 78 | 8 | 27 | 57 | 0 | 0 | 111 | 0 | 121 | 0 | 0 |
Canteen Shortage over $50 | 598 | 192 | 64 | 17 | 9 | 6 | 12 | 4 | 454 | 5 | 0 | 0 | 0 | 0 | 0 |
Possession of escape paraphernalia | 834 | 283 | 61 | 6 | 34 | 34 | 2 | 1 | 2 | 0 | 0 | 1 | 0 | 0 | 0 |
Possession of items facilitating unauthorized/illegal financial transaction | 743 | 275 | 109 | 26 | 48 | 14 | 8 | 1 | 2 | 0 | 5 | 6 | 1 | 0 | 0 |
Bartering with others | 522 | 345 | 134 | 64 | 33 | 26 | 40 | 66 | 3 | 2 | 0 | 3 | 0 | 0 | 0 |
Manufacture of drugs or unauthorized beverages | 738 | 274 | 99 | 9 | 33 | 23 | 26 | 5 | 0 | 7 | 2 | 3 | 0 | 2 | 2 |
Gambling or possession of gambling paraphernalia | 483 | 300 | 39 | 41 | 16 | 9 | 42 | 139 | 0 | 0 | 1 | 1 | 0 | 2 | 0 |
Unauthorized physical contact involving non-inmates | 659 | 255 | 56 | 29 | 12 | 12 | 10 | 5 | 0 | 1 | 11 | 18 | 0 | 0 | 0 |
Aggravated battery or attempted on correctional officer | 602 | 224 | 51 | 0 | 22 | 75 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
Theft of property exceeding $50 in value | 457 | 163 | 43 | 15 | 8 | 8 | 12 | 3 | 220 | 1 | 0 | 0 | 0 | 0 | 0 |
Assault or attempted assault on an inmate | 685 | 148 | 9 | 2 | 7 | 22 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Battery or attempted battery on staff other than correctional officer | 542 | 185 | 43 | 2 | 22 | 39 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
Possession of unauthorized clothing or linen | 227 | 123 | 92 | 123 | 20 | 37 | 50 | 84 | 5 | 1 | 0 | 0 | 0 | 0 | 0 |
Trafficking in drugs or unauthorized beverages | 459 | 210 | 47 | 0 | 13 | 19 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 |
Arson or attempted arson | 445 | 109 | 38 | 3 | 24 | 31 | 1 | 1 | 55 | 2 | 0 | 0 | 0 | 0 | 0 |
Frivolous, malicious, false information presented to court | 329 | 132 | 61 | 60 | 16 | 6 | 22 | 25 | 1 | 6 | 1 | 0 | 2 | 0 | 0 |
Assault/battery or attempted with deadly weapon | 398 | 231 | 0 | 0 | 12 | 13 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Destruction of State or another property by gross negligence | 134 | 69 | 25 | 40 | 7 | 22 | 14 | 20 | 224 | 0 | 0 | 0 | 0 | 0 | 0 |
Aggravated assault or attempted on inmate | 405 | 104 | 16 | 1 | 3 | 23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Use of Alcohol | 281 | 131 | 32 | 16 | 7 | 11 | 5 | 6 | 0 | 3 | 0 | 0 | 0 | 0 | 0 |
Escape or escape attempt | 261 | 173 | 13 | 2 | 5 | 13 | 0 | 0 | 1 | 0 | 6 | 4 | 0 | 0 | 0 |
Visiting regulation violations | 161 | 100 | 19 | 29 | 38 | 7 | 6 | 14 | 0 | 0 | 22 | 41 | 0 | 0 | 0 |
Insufficient work | 97 | 101 | 60 | 69 | 8 | 10 | 27 | 31 | 4 | 0 | 0 | 1 | 0 | 0 | 0 |
Missing count | 217 | 77 | 37 | 29 | 1 | 2 | 14 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
Fraud or attempted fraud | 231 | 94 | 18 | 4 | 9 | 5 | 6 | 2 | 8 | 0 | 1 | 0 | 0 | 0 | 0 |
Extortion or attempted extortion | 245 | 85 | 18 | 2 | 11 | 9 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
Feigning illness or malingering | 124 | 89 | 58 | 54 | 14 | 15 | 3 | 12 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
Loaning or borrowing money or other valuables | 181 | 101 | 26 | 20 | 9 | 6 | 9 | 13 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
Wasting State or another property | 103 | 46 | 35 | 32 | 10 | 2 | 27 | 16 | 72 | 0 | 0 | 0 | 0 | 0 | 0 |
Presenting false testimony or information | 186 | 68 | 31 | 11 | 3 | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Possession of unauthorized or altered identification | 117 | 56 | 43 | 29 | 10 | 12 | 11 | 21 | 5 | 1 | 0 | 0 | 0 | 0 | 0 |
Unauthorized use or tampering with computer or office equipment | 174 | 53 | 23 | 7 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 0 | 0 |
Bribery or attempted bribery | 154 | 53 | 13 | 8 | 1 | 1 | 3 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Aggravated assault or attempted on correctional officer | 150 | 52 | 4 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Robbery or attempted robbery | 100 | 39 | 16 | 0 | 2 | 1 | 2 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 |
Assault or attempted assault on staff other than correctional officer | 97 | 35 | 7 | 0 | 0 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Sexual battery or attempted | 76 | 32 | 0 | 0 | 1 | 7 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Deviating from approved plan | 0 | 56 | 10 | 24 | 0 | 1 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Breaking and entering or attempt | 51 | 20 | 14 | 2 | 3 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Aggravated battery or attempted on staff other than correctional officer | 52 | 24 | 4 | 0 | 0 | 6 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 |
Self-Mutilation | 55 | 27 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Canteen Shortage under $50 | 22 | 14 | 4 | 7 | 1 | 0 | 0 | 0 | 29 | 0 | 0 | 0 | 0 | 0 | 1 |
Battery or attempted on other than staff or inmates | 26 | 12 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Tampering/damaging electronic monitoring equipment | 5 | 4 | 0 | 3 | 0 | 0 | 0 | 2 | 22 | 0 | 0 | 0 | 0 | 0 | 0 |
Failure to deposit earnings | 2 | 18 | 5 | 9 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Failure to proceed to/from designated area | 2 | 9 | 4 | 5 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Possession of tobacco-related product by death row inmate | 16 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Assault or attempted assault on other than staff/inmates | 9 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Unauthorized contact in behalf of another inmate | 3 | 5 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
False property lien | 5 | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Aggravated assault or attempted on staff other than correctional officer | 5 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Failure to remain within designated area of release plan | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Making purchase or contract without approval | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Aggravated battery or attempted not on staff/inmates | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
Consume Intoxicants | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
In [12]:
# Get top 25 action outcome types, ordered by frequency
action_order = combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'].value_counts(ascending=False).index.tolist()
# Make the crosstab as before, limited to top 25 violations
top_violations = combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].value_counts().head(25).index
crosstab2 = pd.crosstab(
combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'],
combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC']
).loc[top_violations, action_order]
crosstab2
Out[12]:
INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC | Disciplinary Confinement | Loss of Gain Time | Cashless Canteen Denial | Verbal Reprimand | Privilege Suspension - Other | Loss of Future Gain Time | Disciplinary Squad | Extra Duty | Restitution Payment | Alternative Housing | Privilege Suspension - Mail, Visitation | Privilege Suspension - Visitation | Privilege Suspension - Mail | Restricted Labor Squad | Disciplinary Confinement Part Time |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC | |||||||||||||||
Disobeying verbal or written order | 38343 | 16026 | 9898 | 5258 | 3002 | 2105 | 2433 | 1928 | 53 | 340 | 78 | 115 | 6 | 4 | 11 |
Disrespect persons of authority | 21168 | 8103 | 5252 | 2384 | 1372 | 874 | 1390 | 597 | 3 | 368 | 18 | 37 | 4 | 22 | 7 |
Disorderly conduct | 19639 | 7260 | 3803 | 1789 | 1011 | 898 | 944 | 509 | 5 | 235 | 12 | 28 | 1 | 3 | 2 |
Use of unauthorized drugs | 19547 | 10021 | 3222 | 194 | 921 | 946 | 197 | 108 | 20 | 84 | 45 | 33 | 3 | 3 | 1 |
Fighting | 18350 | 5975 | 3635 | 1701 | 859 | 734 | 271 | 198 | 6 | 134 | 5 | 9 | 1 | 5 | 0 |
Unauthorized possession/use of communication device/technology | 18134 | 7422 | 1853 | 100 | 978 | 760 | 77 | 28 | 470 | 41 | 23 | 35 | 2 | 0 | 0 |
Possession/manufacture of weapons, ammunition or explosives | 17369 | 6211 | 1485 | 35 | 615 | 1236 | 22 | 8 | 15 | 39 | 12 | 17 | 0 | 0 | 0 |
Spoken, written or gestured threats | 13305 | 4855 | 1528 | 187 | 800 | 788 | 84 | 37 | 1 | 34 | 14 | 17 | 3 | 3 | 0 |
Possession of narcotics/unauthorized drugs and paraphernalia | 12707 | 5910 | 944 | 26 | 199 | 509 | 31 | 26 | 1 | 4 | 14 | 7 | 0 | 0 | 1 |
Being in unauthorized area | 5716 | 3794 | 3625 | 2566 | 781 | 571 | 1300 | 1097 | 1 | 126 | 21 | 12 | 0 | 7 | 0 |
Destruction of State or another property | 7612 | 1832 | 1163 | 533 | 351 | 284 | 185 | 150 | 6049 | 28 | 4 | 0 | 1 | 2 | 2 |
Lewd or lascivious exhibition | 9290 | 3995 | 1454 | 221 | 563 | 1083 | 27 | 11 | 0 | 16 | 18 | 75 | 0 | 1 | 1 |
Possession or transfer of other contraband | 6225 | 4051 | 1665 | 1280 | 344 | 289 | 891 | 1512 | 157 | 18 | 8 | 7 | 4 | 3 | 0 |
Disobeying institutional regulations | 4537 | 2832 | 1559 | 1523 | 466 | 400 | 671 | 1536 | 54 | 37 | 6 | 12 | 2 | 6 | 5 |
Battery or attempted battery on an inmate | 8964 | 2948 | 746 | 27 | 232 | 463 | 5 | 6 | 1 | 1 | 3 | 7 | 0 | 0 | 0 |
Lying to staff or falsifying records | 6633 | 2306 | 1231 | 669 | 252 | 134 | 496 | 331 | 8 | 43 | 8 | 8 | 2 | 2 | 2 |
Battery or attempted battery on correctional officer | 6518 | 2740 | 713 | 10 | 316 | 611 | 0 | 0 | 2 | 1 | 1 | 11 | 1 | 0 | 0 |
Unauthorized absence from assigned area | 3876 | 1715 | 1664 | 1270 | 242 | 143 | 670 | 413 | 1 | 18 | 6 | 3 | 0 | 0 | 0 |
Possession of aromatic stimulants or depressants | 6617 | 2087 | 463 | 15 | 214 | 203 | 46 | 33 | 0 | 26 | 7 | 8 | 1 | 1 | 0 |
Personal or business relationship with staff member or volunteer | 5955 | 2107 | 655 | 114 | 168 | 279 | 44 | 14 | 0 | 8 | 4 | 5 | 3 | 0 | 0 |
Possession of tobacco-related product by non-death row or non-community release inmate | 4420 | 1917 | 1141 | 693 | 239 | 96 | 325 | 376 | 0 | 19 | 4 | 21 | 0 | 1 | 2 |
Aggravated battery or attempted on inmate | 5612 | 2070 | 409 | 5 | 155 | 537 | 0 | 0 | 3 | 0 | 6 | 3 | 0 | 0 | 0 |
Telephone regulation violations | 2745 | 1071 | 859 | 592 | 2300 | 83 | 276 | 351 | 1 | 6 | 179 | 23 | 4 | 2 | 0 |
Refusing to work or participate in mandatory programs | 3534 | 1833 | 1216 | 941 | 221 | 95 | 302 | 144 | 0 | 35 | 1 | 9 | 0 | 2 | 0 |
Obscene or profane act, gesture or statement | 4338 | 1978 | 490 | 123 | 161 | 206 | 54 | 14 | 0 | 14 | 19 | 8 | 1 | 0 | 0 |
In [13]:
# Count occurrences
violation_counts = combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].value_counts().reset_index()
violation_counts.columns = ['Violation', 'Count']
# Add percent of total
total = violation_counts['Count'].sum()
violation_counts['Percent'] = violation_counts['Count'] / total * 100
# Format for HTML display
violation_counts['Percent'] = violation_counts['Percent'].round(2) # or 1 if you want
# Save full table with percent
violation_counts.to_html('assets/top_violations.html', index=False, float_format='%.2f')
# Save CSV if desired
violation_counts.to_csv('csvs/violations.csv', index=False)
# Get top 25
top_violations = violation_counts.head(25).copy()
# For Altair, add label for % display in tooltip (optional, pretty)
top_violations['Percent_Label'] = top_violations['Percent'].map('{:.2f}%'.format)
bar = alt.Chart(top_violations).mark_bar(size=18).encode(
y=alt.Y('Violation:N', sort='-x', title=None, axis=alt.Axis(labelLimit=370)),
x=alt.X('Percent:Q', title='Share of All Disciplinary Events (%)', axis=alt.Axis(format='.1f')),
color=alt.value('#24d7d7'),
tooltip=[
alt.Tooltip('Violation:N', title='Violation'),
alt.Tooltip('Count:Q', title='Raw Count'),
alt.Tooltip('Percent_Label:N', title='% of Total')
]
).properties(
width=420,
height=520,
background='#181f22',
title=alt.TitleParams(
text="Most Common Rule Violations (Share of All Events)",
color='white'
)
).configure_axis(
labelColor='white', titleColor='white'
).configure_title(
color='white'
).configure_view(
stroke=None
)
bar.save('assets/violations_barchart.json')
bar
Out[13]:
In [14]:
# Count occurrences of each disciplinary outcome
outcome_counts = combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'].value_counts().reset_index()
outcome_counts.columns = ['Outcome', 'Count']
# Add percent of total
total = outcome_counts['Count'].sum()
outcome_counts['Percent'] = (outcome_counts['Count'] / total * 100).round(2)
# Save full table with percent
outcome_counts.to_html('assets/outcomes.html', index=False, float_format='%.2f')
outcome_counts.to_csv('csvs/outcomes.csv', index=False)
# Prepare top N (if you want to limit for viz; otherwise just use all)
top_outcomes = outcome_counts.head(25) # Or whatever N you want
# Altair bar chart: show count, but you could use 'Percent:Q' for %
bar2 = alt.Chart(top_outcomes).mark_bar(size=18).encode(
y=alt.Y('Outcome:N', sort='-x', title=None, axis=alt.Axis(labelLimit=370)),
x=alt.X('Percent:Q', title='Percent'), # Or change to 'Percent:Q' for %
color=alt.value('#24d7d7'),
tooltip=['Outcome', 'Count', 'Percent']
).properties(
width=420,
height=330,
background='#181f22',
title=alt.TitleParams(
text="Most Common Disciplinary Outcomes",
color='white'
)
).configure_axis(
labelColor='white', titleColor='white'
).configure_title(
color='white'
).configure_view(
stroke=None
)
bar2.save('assets/outcomes_barchart.json')
bar2
Out[14]:
In [15]:
# Suppose your crosstab2 is as before
df_heat = crosstab2.reset_index().melt(
id_vars='INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC',
var_name='action',
value_name='count'
)
grand_total = df_heat['count'].sum()
df_heat['percent'] = 100 * df_heat['count'] / grand_total
max_pct = df_heat['percent'].max()
min_pct = df_heat['percent'][df_heat['percent'] > 0].min() # exclude zero for a clean legend if desired
# If your top_violations is a DataFrame, get the list:
top_violations_list = top_violations['Violation'].tolist()
chart = alt.Chart(df_heat).mark_rect().encode(
x=alt.X(
'action:N',
sort=list(action_order),
title='Disciplinary Action Outcome',
axis=alt.Axis(labelColor='white', titleColor='white', labelAngle=45)
),
y=alt.Y(
'INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC:N',
sort=top_violations_list,
title=None,
axis=alt.Axis(labelLimit=400, labelFontSize=13, labelOverlap=False)
),
color=alt.Color(
'percent:Q',
scale=alt.Scale(scheme='tealblues', domain=[0, max_pct]),
title='% of All Events'
),
tooltip=[
alt.Tooltip('INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC', title='Violation'),
alt.Tooltip('action', title='Action'),
alt.Tooltip('count', title='Count'),
alt.Tooltip('percent', title='% of All Events', format='.2f')
]
).properties(
width=480,
height=440,
background='#181f22',
title=alt.TitleParams(
text="Top 25 Rule Violations × Disciplinary Outcomes (Share of All Events)",
color='white'
)
).configure_axis(
labelColor='white', titleColor='white'
).configure_title(
color='white'
).configure_legend(
labelColor='white', titleColor='white'
).configure_view(
stroke=None
)
chart.save('assets/violations_heatmap1.json')
chart
Out[15]:
In [16]:
print(dataframes['Incarcerationsupervision_00000.csv']['RACE_CODE'].value_counts(),
dataframes['Incarcerationsupervision_00000.csv']['ETHNICITY_CODE'].value_counts())
RACE_CODE White 79107 Black 74159 Unknown 8714 American Indian or Alaska Native 173 Asian 43 Name: count, dtype: int64 ETHNICITY_CODE Not Hispanic or Latino 139473 Hispanic or Latino 20041 Unknown 2682 Name: count, dtype: int64
In [17]:
dataframes['Incarcerationsupervision_00000.csv']['race_ethnicity_group'] = np.where(
(dataframes['Incarcerationsupervision_00000.csv']['RACE_CODE'] == 'White') & (dataframes['Incarcerationsupervision_00000.csv']['ETHNICITY_CODE'] == 'Not Hispanic or Latino'),
'majority',
'minority'
)
dataframes['Incarcerationsupervision_00000.csv']['race_ethnicity_group'].value_counts(normalize=True)
Out[17]:
race_ethnicity_group minority 0.589756 majority 0.410244 Name: proportion, dtype: float64
In [18]:
# 1. Get the demographic info, including sex and group
demo_df = dataframes['Incarcerationsupervision_00000.csv'][[
'INCARCERATION_SUPERVISION_ID', 'race_ethnicity_group', 'SEX_CODE'
]].copy()
# 2. For each person in demo_df, are they in the infractions table? (Right join on all people)
# First, get set of IDs with at least one infraction
has_infraction = pd.Series(combo['INCARCERATION_SUPERVISION_ID'].unique())
has_infraction = pd.DataFrame({'INCARCERATION_SUPERVISION_ID': has_infraction, 'has_infraction': 1})
# 3. Merge: demo_df (all people) LEFT joined to infraction marker (so all people in demo_df stay)
merged = demo_df.merge(has_infraction, on='INCARCERATION_SUPERVISION_ID', how='left')
merged['has_infraction'] = merged['has_infraction'].fillna(0).astype(int)
# 4. Now for each (race_ethnicity_group, SEX_CODE), compute:
# - count
# - how many have infraction
# - percent with infraction
summary = (
merged
.groupby(['race_ethnicity_group', 'SEX_CODE'])
.agg(
total=('INCARCERATION_SUPERVISION_ID', 'nunique'),
num_with_infraction=('has_infraction', 'sum')
)
.reset_index()
)
summary['percent_with_infraction'] = 100 * summary['num_with_infraction'] / summary['total']
# 5. Show as a confusion matrix (pivot)
conf_matrix = summary.pivot(index='race_ethnicity_group', columns='SEX_CODE', values='percent_with_infraction')
conf_matrix = conf_matrix.round(2)
# Optional: Print nicely
print("Proportion (%) of people who ever received at least one disciplinary infraction:")
print(conf_matrix)
# To save as CSV or HTML:
#conf_matrix.to_csv('assets/infraction_by_group_sex.csv')
conf_matrix.to_html('assets/infraction_by_group_sex.html')
# Build the contingency table with counts (not percentages!)
contingency = summary.pivot(index='race_ethnicity_group', columns='SEX_CODE', values='num_with_infraction').fillna(0).astype(int)
# Also get counts of those with NO infraction
no_infraction = summary.pivot(index='race_ethnicity_group', columns='SEX_CODE', values='total') - contingency
# Build full table:
# rows = [majority, minority], columns = [female_infraction, female_no_infraction, male_infraction, male_no_infraction]
cont_table = pd.DataFrame({
'Female_infraction': contingency['Female'],
'Female_no_infraction': no_infraction['Female'],
'Male_infraction': contingency['Male'],
'Male_no_infraction': no_infraction['Male'],
}, index=contingency.index)
# For chi2, use a 2x2 table, e.g., for Male vs Female, or minority vs majority
# Here's a 2x2 for 'any infraction' by 'minority/majority' (collapse across sex)
infraction_by_group = merged.groupby('race_ethnicity_group')['has_infraction'].agg(['sum', 'count'])
chi2, p, dof, ex = chi2_contingency([
[infraction_by_group.loc['majority', 'sum'], infraction_by_group.loc['majority', 'count'] - infraction_by_group.loc['majority', 'sum']],
[infraction_by_group.loc['minority', 'sum'], infraction_by_group.loc['minority', 'count'] - infraction_by_group.loc['minority', 'sum']]
])
print(f"Chi-squared p-value (minority vs majority): {p:.4g}")
# Or full chi2 on the 2x2x2 table
# For just race x sex x infraction/no infraction
contingency2 = merged.groupby(['race_ethnicity_group', 'SEX_CODE'])['has_infraction'].value_counts().unstack(fill_value=0)
chi2, p, dof, ex = chi2_contingency(contingency2)
print(f"Chi-squared p-value (race x sex): {p:.4g}")
Proportion (%) of people who ever received at least one disciplinary infraction: SEX_CODE Female Male race_ethnicity_group majority 26.28 43.35 minority 40.89 53.09 Chi-squared p-value (minority vs majority): 0 Chi-squared p-value (race x sex): 0
In [19]:
demo_df = dataframes['Incarcerationsupervision_00000.csv'][['INCARCERATION_SUPERVISION_ID', 'race_ethnicity_group']].copy()
merged = combo.merge(demo_df, on='INCARCERATION_SUPERVISION_ID', how='inner')
# 1. Set baseline (already calculated)
baseline_minority = dataframes['Incarcerationsupervision_00000.csv']['race_ethnicity_group'].value_counts(normalize=True)['minority']
# 2. Groupby and calculate the cell-level minority proportion
cell_group = (
merged
.groupby(['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC', 'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'])
)
# 3. Calculate: number of minorities, total count, and proportion for each cell
cell_stats = cell_group['race_ethnicity_group'].value_counts(normalize=False).unstack(fill_value=0)
cell_stats['total'] = cell_stats.sum(axis=1)
cell_stats['minority_prop'] = cell_stats.get('minority', 0) / cell_stats['total']
# 4. Compute deviation from baseline
cell_stats['minority_pct_diff'] = 100 * (cell_stats['minority_prop'] / baseline_minority - 1)
# 5. Reset index for visualization
cell_stats = cell_stats.reset_index()
viz_df = cell_stats[['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC',
'INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC',
'total', 'minority_prop', 'minority_pct_diff']].copy()
# 6. Run a z-test for each cell and add significance to viz_df
viz_df['zstat'] = np.nan
viz_df['pval'] = np.nan
viz_df['statistically_significant'] = False
for idx, row in viz_df.iterrows():
count = int(round(row['minority_prop'] * row['total']))
nobs = int(row['total'])
if nobs > 0:
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
stat, pval = proportions_ztest(count, nobs, baseline_minority)
viz_df.at[idx, 'zstat'] = stat
viz_df.at[idx, 'pval'] = pval
viz_df.at[idx, 'statistically_significant'] = (pval < 0.05) and (nobs >= 10)
else:
viz_df.at[idx, 'statistically_significant'] = False
# (Optional) Save for d3 or further use
viz_df.to_csv('discipline_disparity_long.csv', index=False)
In [20]:
print(viz_df.info(), viz_df.head())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 986 entries, 0 to 985 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 986 non-null object 1 INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC 986 non-null object 2 total 986 non-null int64 3 minority_prop 986 non-null float64 4 minority_pct_diff 986 non-null float64 5 zstat 986 non-null float64 6 pval 986 non-null float64 7 statistically_significant 986 non-null bool dtypes: bool(1), float64(4), int64(1), object(2) memory usage: 55.0+ KB None race_ethnicity_group INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC \ 0 Aggravated assault or attempted on correctiona... 1 Aggravated assault or attempted on correctiona... 2 Aggravated assault or attempted on correctiona... 3 Aggravated assault or attempted on correctiona... 4 Aggravated assault or attempted on inmate race_ethnicity_group INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC total \ 0 Cashless Canteen Denial 4 1 Disciplinary Confinement 131 2 Loss of Future Gain Time 9 3 Loss of Gain Time 47 4 Cashless Canteen Denial 16 race_ethnicity_group minority_prop minority_pct_diff zstat \ 0 1.000000 69.561763 inf 1 0.809160 37.202648 6.390428 2 0.777778 31.881371 1.356778 3 0.808511 37.092489 3.811464 4 0.875000 48.366543 3.449992 race_ethnicity_group pval statistically_significant 0 0.000000e+00 False 1 1.654223e-10 True 2 1.748517e-01 False 3 1.381463e-04 True 4 5.606033e-04 True
In [21]:
# 1. Get Top 25 Violation Types (by total event count)
top_violations = crosstab.sum(axis=1).sort_values(ascending=False).index[:25].tolist()
# 2. Filter crosstab and viz_df to only those violations
crosstab_top = crosstab.loc[top_violations]
viz_df_top = viz_df[viz_df['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].isin(top_violations)]
# 3. Get actions sorted by total count (descending, so most frequent on left/front)
actions_sorted = crosstab_top.sum(axis=0).sort_values(ascending=True).index.tolist()
actions_sorted = actions_sorted[::-1]
violations_sorted = top_violations[::-1] # most frequent at the front
# 4. Prepare matrix for the plot
z_sorted = crosstab_top.loc[violations_sorted, actions_sorted].values
# 5. Set up axes for bar3d
_x = np.arange(len(actions_sorted))
_y = np.arange(len(violations_sorted))
_xx, _yy = np.meshgrid(_x, _y)
x, y = _xx.ravel(), _yy.ravel()
top_vals = z_sorted.ravel()
bottom = np.zeros_like(top_vals)
width = depth = 0.8
# 6. Build color matrix for significant racial disparities (PuOr colormap for divergence)
color_matrix = np.full_like(z_sorted, fill_value=0.0, dtype=np.float64)
sig_matrix = np.zeros_like(z_sorted, dtype=bool)
for i, v in enumerate(violations_sorted):
for j, a in enumerate(actions_sorted):
match = viz_df_top[
(viz_df_top['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'] == v) &
(viz_df_top['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'] == a)
]
if not match.empty:
color_matrix[i, j] = match['minority_pct_diff'].iloc[0]
sig_matrix[i, j] = match['statistically_significant'].iloc[0]
# 7. Set up color mapping for the significant cells (gray if not significant)
actual_min = np.nanmin(color_matrix[sig_matrix]) if np.any(sig_matrix) else 0
actual_max = np.nanmax(color_matrix[sig_matrix]) if np.any(sig_matrix) else 0
norm = TwoSlopeNorm(vmin=actual_min, vcenter=0, vmax=actual_max)
cmap = matplotlib.colormaps['PuOr']
colors = []
gray = (0.75, 0.75, 0.75, 1.0) # RGBA for gray
for val, sig in zip(color_matrix.ravel(), sig_matrix.ravel()):
if sig:
colors.append(cmap(norm(val)))
else:
colors.append(gray)
# 8. Plot 3D bars
fig = plt.figure(figsize=(17, 14))
ax = fig.add_subplot(111, projection='3d')
fig.patch.set_facecolor('black')
ax.set_facecolor('black')
# 3D bars
ax.bar3d(x, y, bottom, width, depth, top_vals, shade=True, color=colors)
# Axis and tick label formatting for white text
ax.set_xticks(_x + width/2)
ax.set_xticklabels(actions_sorted, rotation=45, ha='right', fontsize=9, color='white')
ax.set_yticks(_y + depth/2)
ax.set_yticklabels(violations_sorted, rotation=0, ha='left', fontsize=9, color='white')
ax.set_zlabel('Event Count', fontsize=12, color='white')
ax.set_xlabel('', fontsize=13, color='white')
ax.set_ylabel('', fontsize=13, color='white')
ax.set_title('FDLE Disciplinary Event Counts (Top 25 Violations × Outcomes)', fontsize=16, pad=40, color='white')
# Set all 3D axes lines and panes to white
ax.xaxis._axinfo['grid'].update({'color' : (1,1,1,0.13)})
ax.yaxis._axinfo['grid'].update({'color' : (1,1,1,0.13)})
ax.zaxis._axinfo['grid'].update({'color' : (1,1,1,0.13)})
for axis in [ax.xaxis, ax.yaxis, ax.zaxis]:
axis.line.set_color('white')
axis.label.set_color('white')
axis.pane.set_facecolor((0,0,0,1))
axis.pane.set_edgecolor('white')
# 3D tick lines white
ax.tick_params(axis='x', colors='white')
ax.tick_params(axis='y', colors='white')
ax.tick_params(axis='z', colors='white')
ax.view_init(elev=30, azim=-65)
# COLORBAR — move higher on the plot using 'anchor' and 'fraction'
if np.any(sig_matrix):
mappable = matplotlib.cm.ScalarMappable(norm=norm, cmap=cmap)
mappable.set_array(color_matrix[sig_matrix])
cbar = fig.colorbar(
mappable, ax=ax,
shrink=0.5, aspect=12, pad=0.08,
anchor=(0.0, 0.85), # (x, y) between (0, 0) and (1, 1)
label="% More/Less Minorities Than Baseline (statistically significant only)"
)
cbar.ax.yaxis.label.set_color('white')
cbar.ax.tick_params(color='white', labelcolor='white')
cbar.outline.set_edgecolor('white')
bg = "#181f22"
fig.patch.set_facecolor(bg)
ax.set_facecolor(bg)
plt.savefig("assets/prelim_viz.png", bbox_inches='tight', dpi=220, facecolor=bg)
plt.show()
In [22]:
violation_to_category = {
"Disobeying verbal or written order": "Officer Got Upset",
"Disrespect persons of authority": "Officer Got Upset",
"Disorderly conduct": "Officer Got Upset",
"Use of unauthorized drugs": "Self-Harm/Addiction",
"Fighting": "Violence/Threat",
"Unauthorized possession/use of communication device/technology": "Banned Everyday Item",
"Possession/manufacture of weapons, ammunition or explosives": "Weapons",
"Spoken, written or gestured threats": "Violence/Threat",
"Possession of narcotics/unauthorized drugs and paraphernalia": "Self-Harm/Addiction",
"Being in unauthorized area": "Administrative/Compliance",
"Destruction of State or another property": "Property",
"Lewd or lascivious exhibition": "Sexual/Relational",
"Possession or transfer of other contraband": "Banned Everyday Item",
"Disobeying institutional regulations": "Administrative/Compliance",
"Battery or attempted battery on an inmate": "Violence/Threat",
"Lying to staff or falsifying records": "Administrative/Compliance",
"Battery or attempted battery on correctional officer": "Violence/Threat",
"Unauthorized absence from assigned area": "Administrative/Compliance",
"Possession of aromatic stimulants or depressants": "Self-Harm/Addiction",
"Personal or business relationship with staff member or volunteer": "Sexual/Relational",
"Possession of tobacco-related product by non-death row or non-community release inmate": "Banned Everyday Item",
"Aggravated battery or attempted on inmate": "Violence/Threat",
"Telephone regulation violations": "Administrative/Compliance",
"Refusing to work or participate in mandatory programs": "Administrative/Compliance",
"Obscene or profane act, gesture or statement": "Officer Got Upset",
"Creating, participating in or inciting a minor disturbance": "Officer Got Upset",
"Failure to comply with count procedures": "Officer Got Upset",
"Misuse of State or another property": "Property",
"Refusing to submit to substance abuse testing": "Administrative/Compliance",
"Tampering/defeating/depriving staff of security device": "Property",
"Attempt or conspire to commit crime or violate rules": "Administrative/Compliance",
"Theft of property under $50 in value": "Property",
"Introduction of any contraband": "Banned Everyday Item",
"Tattooing, branding or body art including piercing": "Surviving Inside",
"Possession of gang-related paraphernalia or material": "Gang/Security State",
"Possession of unauthorized beverages": "Banned Everyday Item",
"Failure to maintain acceptable hygiene/appearance of housing": "Administrative/Compliance",
"Sex acts or unauthorized physical contact involving inmates": "Sexual/Relational",
"Tampering/defeating any fire or other safety device": "Property",
"Failure to maintain personal hygiene/appearance": "Administrative/Compliance",
"Inciting riots, strikes, mutinous acts or disturbances": "Officer Got Upset",
"Possession of negotiables in unauthorized amounts, forms or location": "Banned Everyday Item",
"Other assault/battery or attempted": "Violence/Threat",
"Unauthorized/illegal financial transaction": "Surviving Inside",
"Participating in riots, strikes, mutinous acts or disturbances": "Officer Got Upset",
"Altering/defacing State or another property": "Property",
"Gang-related activities": "Gang/Security State",
"Assault or attempted assault on a correctional officer": "Violence/Threat",
"Possession of stolen property": "Property",
"Mail regulation violations": "Administrative/Compliance",
"Canteen Shortage over $50": "Property",
"Possession of escape paraphernalia": "Escape/Evasion",
"Possession of items facilitating unauthorized/illegal financial transaction": "Surviving Inside",
"Bartering with others": "Surviving Inside",
"Manufacture of drugs or unauthorized beverages": "Drug Trade",
"Gambling or possession of gambling paraphernalia": "Surviving Inside",
"Unauthorized physical contact involving non-inmates": "Sexual/Relational",
"Aggravated battery or attempted on correctional officer": "Violence/Threat",
"Theft of property exceeding $50 in value": "Property",
"Assault or attempted assault on an inmate": "Violence/Threat",
"Battery or attempted battery on staff other than correctional officer": "Violence/Threat",
"Possession of unauthorized clothing or linen": "Banned Everyday Item",
"Trafficking in drugs or unauthorized beverages": "Drug Trade",
"Arson or attempted arson": "Violence/Threat",
"Frivolous, malicious, false information presented to court": "Officer Got Upset",
"Assault/battery or attempted with deadly weapon": "Violence/Threat",
"Destruction of State or another property by gross negligence": "Property",
"Aggravated assault or attempted on inmate": "Violence/Threat",
"Use of Alcohol": "Self-Harm/Addiction",
"Escape or escape attempt": "Escape/Evasion",
"Visiting regulation violations": "Administrative/Compliance",
"Insufficient work": "Administrative/Compliance",
"Missing count": "Administrative/Compliance",
"Fraud or attempted fraud": "Surviving Inside",
"Extortion or attempted extortion": "Surviving Inside",
"Feigning illness or malingering": "Surviving Inside",
"Loaning or borrowing money or other valuables": "Surviving Inside",
"Wasting State or another property": "Property",
"Presenting false testimony or information": "Administrative/Compliance",
"Possession of unauthorized or altered identification": "Administrative/Compliance",
"Unauthorized use or tampering with computer or office equipment": "Property",
"Bribery or attempted bribery": "Surviving Inside",
"Aggravated assault or attempted on correctional officer": "Violence/Threat",
"Robbery or attempted robbery": "Violence/Threat",
"Assault or attempted assault on staff other than correctional officer": "Violence/Threat",
"Sexual battery or attempted": "Sexual/Relational",
"Deviating from approved plan": "Officer Got Upset",
"Breaking and entering or attempt": "Property",
"Aggravated battery or attempted on staff other than correctional officer": "Violence/Threat",
"Self-Mutilation": "Self-Harm/Addiction",
"Canteen Shortage under $50": "Property",
"Battery or attempted on other than staff or inmates": "Violence/Threat",
"Tampering/damaging electronic monitoring equipment": "Administrative/Compliance",
"Failure to deposit earnings": "Administrative/Compliance",
"Failure to proceed to/from designated area": "Administrative/Compliance",
"Possession of tobacco-related product by death row inmate": "Banned Everyday Item",
"Assault or attempted assault on other than staff/inmates": "Violence/Threat",
"Unauthorized contact in behalf of another inmate": "Administrative/Compliance",
"False property lien": "Administrative/Compliance",
"Aggravated assault or attempted on staff other than correctional officer": "Violence/Threat",
"Failure to remain within designated area of release plan": "Administrative/Compliance",
"Making purchase or contract without approval": "Surviving Inside",
"Aggravated battery or attempted not on staff/inmates": "Violence/Threat",
"Consume Intoxicants": "Self-Harm/Addiction",
}
combo['Violation_Category'] = combo['INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC'].map(violation_to_category)
In [23]:
action_to_bin = {
"Disciplinary Confinement": "Cell Confinement",
"Disciplinary Confinement Part Time": "Cell Confinement",
"Loss of Gain Time": "Extended Incarceration",
"Loss of Future Gain Time": "Extended Incarceration",
"Cashless Canteen Denial": "Canteen Suspension",
"Privilege Suspension - Other": "Privilege Suspension",
"Privilege Suspension - Mail, Visitation": "Mail & Visitation Suspension",
"Privilege Suspension - Visitation": "Visitation Suspension",
"Privilege Suspension - Mail": "Mail Suspension",
"Verbal Reprimand": "Verbal Warning",
"Disciplinary Squad": "Forced Labor Detail",
"Restricted Labor Squad": "Forced Labor Detail",
"Extra Duty": "Forced Labor Detail",
"Restitution Payment": "Financial Penalty",
"Alternative Housing": "Alternative Housing",
}
combo['Action_Category'] = combo['INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC'].map(action_to_bin)
In [24]:
demo_df2 = dataframes['Incarcerationsupervision_00000.csv'][['INCARCERATION_SUPERVISION_ID', 'race_ethnicity_group']].copy()
merged2 = combo.merge(demo_df2, on='INCARCERATION_SUPERVISION_ID', how='inner')
merged2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 562357 entries, 0 to 562356 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 INCARCERATION_SUPERVISION_ID 562357 non-null int64 1 INCARCERATION_DISCIPLINARY_ACTION_TYPE_DESC 562357 non-null object 2 INCARCERATION_DISCIPLINARY_VIOLATION_TYPE_DESC 562357 non-null object 3 Violation_Category 562357 non-null object 4 Action_Category 562357 non-null object 5 race_ethnicity_group 562357 non-null object dtypes: int64(1), object(5) memory usage: 25.7+ MB
In [25]:
# 2. Groupby and calculate the cell-level minority proportion
cell_group = (
merged2
.groupby(['Violation_Category', 'Action_Category'])
)
# 3. Calculate: number of minorities, total count, and proportion for each cell
cell_stats = cell_group['race_ethnicity_group'].value_counts(normalize=False).unstack(fill_value=0)
cell_stats['total'] = cell_stats.sum(axis=1)
cell_stats['minority_prop'] = cell_stats.get('minority', 0) / cell_stats['total']
# 4. Compute deviation from baseline
cell_stats['minority_pct_diff'] = 100 * (cell_stats['minority_prop'] / baseline_minority - 1)
# 5. Reset index for visualization
cell_stats = cell_stats.reset_index()
viz_df2 = cell_stats[['Violation_Category',
'Action_Category',
'total', 'minority_prop', 'minority_pct_diff']].copy()
# 6. Run a z-test for each cell and add significance to viz_df
viz_df2['zstat'] = np.nan
viz_df2['pval'] = np.nan
viz_df2['statistically_significant'] = False
for idx, row in viz_df2.iterrows():
count = int(round(row['minority_prop'] * row['total']))
nobs = int(row['total'])
if nobs > 0:
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
stat, pval = proportions_ztest(count, nobs, baseline_minority)
viz_df2.at[idx, 'zstat'] = stat
viz_df2.at[idx, 'pval'] = pval
viz_df2.at[idx, 'statistically_significant'] = (pval < 0.05) and (nobs >= 10)
else:
viz_df2.at[idx, 'statistically_significant'] = False
print(viz_df2.info(), viz_df2.head())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 125 entries, 0 to 124 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Violation_Category 125 non-null object 1 Action_Category 125 non-null object 2 total 125 non-null int64 3 minority_prop 125 non-null float64 4 minority_pct_diff 125 non-null float64 5 zstat 125 non-null float64 6 pval 125 non-null float64 7 statistically_significant 125 non-null bool dtypes: bool(1), float64(4), int64(1), object(2) memory usage: 7.1+ KB None race_ethnicity_group Violation_Category Action_Category \ 0 Administrative/Compliance Alternative Housing 1 Administrative/Compliance Canteen Suspension 2 Administrative/Compliance Cell Confinement 3 Administrative/Compliance Extended Incarceration 4 Administrative/Compliance Financial Penalty race_ethnicity_group total minority_prop minority_pct_diff zstat \ 0 251 0.752988 27.677981 5.996396 1 10589 0.706866 19.857380 26.474007 2 32458 0.707406 19.949092 46.589627 3 18135 0.666998 13.097275 22.071201 4 87 0.632184 7.194218 0.820688 race_ethnicity_group pval statistically_significant 0 2.017445e-09 True 1 1.931303e-154 True 2 0.000000e+00 True 3 5.978248e-108 True 4 4.118239e-01 False
In [26]:
# Ensure sorted axes, so 3D placement is consistent
actions_sorted = viz_df2.groupby('Action_Category')['total'].sum().sort_values(ascending=False).index.tolist()
violations_sorted = viz_df2.groupby('Violation_Category')['total'].sum().sort_values(ascending=False).index.tolist()
# Add i (row) and j (col) indices for easy 3D plotting later
viz_df2['action_idx'] = viz_df2['Action_Category'].apply(lambda x: actions_sorted.index(x))
viz_df2['violation_idx'] = viz_df2['Violation_Category'].apply(lambda x: violations_sorted.index(x))
# Filter only top violations/actions if needed, or leave as is for all
# (Optional) e.g. top 20 violations:
top_violations = violations_sorted[:25]
viz_df2 = viz_df2[viz_df2['Violation_Category'].isin(top_violations)]
# Select columns for three.js
export_df = viz_df2[[
'Violation_Category', 'Action_Category',
'total', 'minority_pct_diff', 'statistically_significant',
'violation_idx', 'action_idx'
]].rename(columns={
'Violation_Category': 'violation',
'Action_Category': 'action',
'total': 'count'
})
# Export as JSON (best for JavaScript)
export_df.to_json('fdle_discipline_cats_3d.json', orient='records', indent=2)