Manage dataset files
Use the REST API to manage dataset files.
Upload a file to a dataset for processing.
Required Permissions
- Dataset: Upload Files
File to upload
OK
POST /api/Dataset/{datasetId}/files/upload HTTP/1.1
Host:
Content-Type: multipart/form-data
Accept: */*
Content-Length: 288
{
"document": {
"fileName": "example.txt",
"csvConfig": {
"numColumns": 1,
"hasHeader": true,
"escapeChar": "text",
"quoteChar": "text",
"delimiter": "text",
"nullChar": "text"
},
"datasetId": "6a01360f-78fc-9f2f-efae-c5e1461e9c1et",
"customPiiEntityIds": [
"CUSTOM_ENTITY_1",
"CUSTOM_ENTITY_2"
]
},
"file": "binary"
}
OK
{
"updatedDataset": {
"id": "text",
"name": "text",
"generatorMetadata": "asdfqwer",
"outputFormat": "Original",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"tags": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text",
"fileParseResultId": "text",
"filePath": "text",
"generatedFileStatus": "text"
}
],
"lastUpdated": {},
"created": {},
"creatorUser": {
"id": "text",
"userName": "text",
"firstName": "text",
"lastName": "text"
},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"pdfSynthModePolicy": "V1",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"operations": [
"HasAccess"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"datasetId": "text",
"jobType": "DeidentifyFile"
}
],
"mostRecentExternalFileGenerationJob": {
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"datasetId": "text",
"jobType": "DeidentifyFile"
},
"fileSourceExternalCredential": {
"fileSource": "Local",
"credential": {}
},
"awsCredentialSource": "text",
"outputPath": "text",
"externalFilesInfo": {
"selectedFiles": [
"text"
],
"pathPrefixes": [
"text"
],
"selectedFileExtensions": [
"text"
]
}
},
"uploadedFileId": "text"
}
Downloads the specified file from the dataset. The downloaded file is redacted based on the dataset configuration.
Required Permissions
- Dataset: Download Redacted Files
OK
Bad Request
Not Found
Conflict
Internal Server Error
GET /api/Dataset/{datasetId}/files/{fileId}/download HTTP/1.1
Host:
Accept: */*
binary
Downloads all files from the specified dataset. The downloaded files are redacted based on the dataset configuration.
Required Permissions
- Dataset: Download Redacted Files
OK
Bad Request
Not Found
Internal Server Error
GET /api/Dataset/{datasetId}/files/download_all HTTP/1.1
Host:
Accept: */*
binary
Edits a dataset with the specified configuration
OK
The dataset cannot be found
Dataset name is already in use
PUT /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 507
{
"id": "text",
"name": "text",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"datasetGeneratorMetadata": {
"ANY_ADDITIONAL_PROPERTY": {}
},
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Returns all datasets to which the user has access
false
OK
GET /api/Dataset HTTP/1.1
Host:
Accept: */*
OK
[
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
]
Creates a new dataset with the specified configuration. You must specify a unique, non-empty dataset name
OK
The dataset name must be specified
Dataset name is already in use
POST /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 15
{
"name": "text"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Returns the dataset specified by the datasetId
OK
The dataset cannot be found
GET /api/Dataset/{datasetId} HTTP/1.1
Host:
Accept: */*
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Last updated
Was this helpful?