Manage datasets
Last updated
Was this helpful?
Last updated
Was this helpful?
Use the REST API to create and manage datasets.
Updates a dataset to use the specified configuration.
PUT /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 568
{
"id": "text",
"name": "text",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"datasetGeneratorMetadata": {
"ANY_ADDITIONAL_PROPERTY": {
"version": "V1",
"fontType": "Unknown"
}
},
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"pdfSynthModePolicy": "V1",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"pdfSynthModePolicy": "V1",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"operations": [
"HasAccess"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Returns all datasets to which the user has access
false
GET /api/Dataset HTTP/1.1
Host:
Accept: */*
OK
[
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
]
Returns the dataset specified by the datasetId
GET /api/Dataset/{datasetId} HTTP/1.1
Host:
Accept: */*
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}
Creates a new dataset with the specified configuration. You must specify a unique, non-empty dataset name
POST /api/Dataset HTTP/1.1
Host:
Content-Type: application/json
Accept: */*
Content-Length: 15
{
"name": "text"
}
{
"id": "text",
"name": "text",
"datasetGeneratorMetadata": "asdfqwer",
"generatorSetup": "{\"NAME_GIVEN\":\"Redaction\", \"NAME_FAMILY\":\"Redaction\"}",
"labelBlockLists": "{\"NAME_FAMILY\": {\"strings\":[],\"regexes\":[\".*\\\\s(disease|syndrom|disorder)\"]}}",
"labelAllowLists": "{ \"HEALTHCARE_ID\": {\"strings\":[],\"regexes\":[\"[a-z]{2}\\\\d{9}\"]} }",
"enabledModels": [
"text"
],
"files": [
{
"fileId": "text",
"fileName": "text",
"fileType": "text",
"datasetId": "text",
"numRows": 1,
"numColumns": 1,
"piiTypes": [
"text"
],
"wordCount": 1,
"redactedWordCount": 1,
"uploadedTimestamp": {},
"fileSource": "Local",
"processingStatus": "text",
"processingError": "text",
"mostRecentCompletedJobId": "text"
}
],
"lastUpdated": {},
"docXImagePolicy": "Redact",
"pdfSignaturePolicy": "Redact",
"docXCommentPolicy": "Remove",
"docXTablePolicy": "Redact",
"fileSource": "Local",
"customPiiEntityIds": [
"text"
],
"rescanJobs": [
{
"id": "text",
"status": "text",
"errorMessages": "text",
"startTime": {},
"endTime": {},
"publishedTime": {},
"datasetFileId": "text",
"jobType": "DeidentifyFile"
}
]
}