Manage dataset files
Use the REST API to manage dataset files.
Last updated
Was this helpful?
Use the REST API to manage dataset files.
Last updated
Was this helpful?
Downloads all files from the specified dataset. The downloaded files are redacted based on the dataset configuration.
GET /api/Dataset/{datasetId}/files/download_all HTTP/1.1
Host:
Accept: */*
binary
Downloads the specified file from the dataset. The downloaded file is redacted based on the dataset configuration.
GET /api/Dataset/{datasetId}/files/{fileId}/download HTTP/1.1
Host:
Accept: */*
binary
Upload a file to a dataset for processing.
File to upload
POST /api/Dataset/{datasetId}/files/upload HTTP/1.1
Host:
Content-Type: multipart/form-data
Accept: */*
Content-Length: 288
{
"document": {
"fileName": "example.txt",
"csvConfig": {
"numColumns": 1,
"hasHeader": true,
"escapeChar": "text",
"quoteChar": "text",
"delimiter": "text",
"nullChar": "text"
},
"datasetId": "6a01360f-78fc-9f2f-efae-c5e1461e9c1et",
"customPiiEntityIds": [
"CUSTOM_ENTITY_1",
"CUSTOM_ENTITY_2"
]
},
"file": "binary"
}
OK
{
"updatedDataset": {
"id": "text",
"name": "text",
"generatorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"tags": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"created": {},
"creatorUser": {
"id": "text",
"userName": "text",
"firstName": "text",
"lastName": "text"
},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"pdfSynthModePolicy": "V1",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"operations": [
"HasAccess"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
],
"fileSourceExternalCredential": {
"fileSource": "Local",
"credential": {}
},
"awsCredentialSource": "text",
"outputPath": "text"
},
"uploadedFileId": "text"
}
Returns all datasets to which the user has access
false
GET /api/Dataset HTTP/1.1
Host:
Accept: */*
OK
[
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
]
Returns the dataset specified by the datasetId
GET /api/Dataset/{datasetId} HTTP/1.1
Host:
Accept: */*
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Edits a dataset with the specified configuration
PUT /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 507
{
"id": "text",
"name": "text",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"datasetGeneratorMetadata": {
"ANY_ADDITIONAL_PROPERTY": {}
},
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Creates a new dataset with the specified configuration. You must specify a unique, non-empty dataset name
POST /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 15
{
"name": "text"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}