Transformers - Cognitive

Version: 1.0.4

Text Analytics

EntityDetector

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
    ("1", "Microsoft released Windows 10"),
    ("2", "In 1975, Bill Gates III and Paul Allen founded the company.")
], ["id", "text"])

entity = (EntityDetector()
      .setSubscriptionKey(textKey)
      .setLocation("eastus")
      .setLanguage("en")
      .setOutputCol("replies")
      .setErrorCol("error"))

entity.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.EntityDetector
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("1", "Microsoft released Windows 10"),
  ("2", "In 1975, Bill Gates III and Paul Allen founded the company.")
).toDF("id", "text")

val entity = (new EntityDetector()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setLanguage("en")
  .setOutputCol("replies"))

entity.transform(df).show()

Python API: EntityDetector

Scala API: EntityDetector

Source: EntityDetector

KeyPhraseExtractor

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
    ("en", "Hello world. This is some input text that I love."),
    ("fr", "Bonjour tout le monde"),
    ("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer.")
], ["lang", "text"])

keyPhrase = (KeyPhraseExtractor()
            .setSubscriptionKey(textKey)
            .setLocation("eastus")
            .setLanguageCol("lang")
            .setOutputCol("replies")
            .setErrorCol("error"))

keyPhrase.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.KeyPhraseExtractor
import spark.implicits._

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("en", "Hello world. This is some input text that I love."),
  ("fr", "Bonjour tout le monde"),
  ("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer."),
  ("en", null)
).toDF("lang", "text")

val keyPhrase = (new KeyPhraseExtractor()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setLanguageCol("lang")
  .setOutputCol("replies"))

keyPhrase.transform(df).show()

Python API: KeyPhraseExtractor

Scala API: KeyPhraseExtractor

Source: KeyPhraseExtractor

LanguageDetector

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("Hello World",),
  ("Bonjour tout le monde",),
  ("La carretera estaba atascada. Había mucho tráfico el día de ayer.",),
  ("你好",),
  ("こんにちは",),
  (":) :( :D",)
], ["text",])

language = (LanguageDetector()
            .setSubscriptionKey(textKey)
            .setLocation("eastus")
            .setTextCol("text")
            .setOutputCol("language")
            .setErrorCol("error"))

language.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.LanguageDetector
import spark.implicits._

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "Hello World",
  "Bonjour tout le monde",
  "La carretera estaba atascada. Había mucho tráfico el día de ayer.",
  ":) :( :D"
).toDF("text")

val language = (new LanguageDetector()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setOutputCol("replies"))

language.transform(df).show()

Python API: LanguageDetector

Scala API: LanguageDetector

Source: LanguageDetector

NER

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
    ("1", "en", "I had a wonderful trip to Seattle last week."),
    ("2", "en", "I visited Space Needle 2 times.")
], ["id", "language", "text"])

ner = (NER()
      .setSubscriptionKey(textKey)
      .setLocation("eastus")
      .setLanguageCol("language")
      .setOutputCol("replies")
      .setErrorCol("error"))

ner.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.NER
import spark.implicits._

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("1", "en", "I had a wonderful trip to Seattle last week."),
  ("2", "en", "I visited Space Needle 2 times.")
).toDF("id", "language", "text")

val ner = (new NER()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setLanguage("en")
  .setOutputCol("response"))

ner.transform(df).show()

Python API: NER

Scala API: NER

Source: NER

PII

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
    ("1", "en", "My SSN is 859-98-0987"),
    ("2", "en",
      "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
    ("3", "en", "Is 998.214.865-68 your Brazilian CPF number?")
], ["id", "language", "text"])

pii = (PII()
      .setSubscriptionKey(textKey)
      .setLocation("eastus")
      .setLanguage("en")
      .setOutputCol("response"))

pii.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.PII
import spark.implicits._

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("1", "en", "My SSN is 859-98-0987"),
  ("2", "en",
    "Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
  ("3", "en", "Is 998.214.865-68 your Brazilian CPF number?")
).toDF("id", "language", "text")

val pii = (new PII()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setLanguage("en")
  .setOutputCol("response"))

pii.transform(df).show()

Python API: PII

Scala API: PII

Source: PII

TextSentiment

Python
Scala

from synapse.ml.services import *

textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("I am so happy today, its sunny!", "en-US"),
  ("I am frustrated by this rush hour traffic", "en-US"),
  ("The cognitive services on spark aint bad", "en-US"),
], ["text", "language"])

sentiment = (TextSentiment()
            .setSubscriptionKey(textKey)
            .setLocation("eastus")
            .setTextCol("text")
            .setOutputCol("sentiment")
            .setErrorCol("error")
            .setLanguageCol("language"))

sentiment.transform(df).show()

import com.microsoft.azure.synapse.ml.services.text.TextSentiment
import spark.implicits._

val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("en", "Hello world. This is some input text that I love."),
  ("fr", "Bonjour tout le monde"),
  ("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer."),
  (null, "ich bin ein berliner"),
  (null, null),
  ("en", null)
).toDF("lang", "text")

val sentiment = (new TextSentiment()
  .setSubscriptionKey(textKey)
  .setLocation("eastus")
  .setLanguageCol("lang")
  .setModelVersion("latest")
  .setShowStats(true)
  .setOutputCol("replies"))

sentiment.transform(df).show()

Python API: TextSentiment

Scala API: TextSentiment

Source: TextSentiment

Translator

Translate

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
  (["Hello, what is your name?", "Bye"],)
], ["text",])

translate = (Translate()
            .setSubscriptionKey(translatorKey)
            .setLocation("eastus")
            .setTextCol("text")
            .setToLanguage(["zh-Hans", "fr"])
            .setOutputCol("translation")
            .setConcurrency(5))

(translate
      .transform(df)
      .withColumn("translation", flatten(col("translation.translations")))
      .withColumn("translation", col("translation.text"))
      .select("translation")).show()

import com.microsoft.azure.synapse.ml.services.translate.Translate
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?", "Bye")).toDF("text")

val translate = (new Translate()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setTextCol("text")
  .setToLanguage(Seq("zh-Hans", "fr"))
  .setOutputCol("translation")
  .setConcurrency(5))

(translate
  .transform(df)
  .withColumn("translation", flatten(col("translation.translations")))
  .withColumn("translation", col("translation.text"))
  .select("translation")).show()

Python API: Translate

Scala API: Translate

Source: Translate

Transliterate

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df =  spark.createDataFrame([
  (["こんにちは", "さようなら"],)
], ["text",])

transliterate = (Transliterate()
            .setSubscriptionKey(translatorKey)
            .setLocation("eastus")
            .setLanguage("ja")
            .setFromScript("Jpan")
            .setToScript("Latn")
            .setTextCol("text")
            .setOutputCol("result"))

(transliterate
    .transform(df)
    .withColumn("text", col("result.text"))
    .withColumn("script", col("result.script"))
    .select("text", "script")).show()

import com.microsoft.azure.synapse.ml.services.translate.Transliterate
import spark.implicits._
import org.apache.spark.sql.functions.col

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("こんにちは", "さようなら")).toDF("text")

val transliterate = (new Transliterate()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setLanguage("ja")
  .setFromScript("Jpan")
  .setToScript("Latn")
  .setTextCol("text")
  .setOutputCol("result"))

(transliterate
  .transform(df)
  .withColumn("text", col("result.text"))
  .withColumn("script", col("result.script"))
  .select("text", "script")).show()

Python API: Transliterate

Scala API: Transliterate

Source: Transliterate

Detect

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df =  spark.createDataFrame([
  (["Hello, what is your name?"],)
], ["text",])

detect = (Detect()
      .setSubscriptionKey(translatorKey)
      .setLocation("eastus")
      .setTextCol("text")
      .setOutputCol("result"))

(detect
    .transform(df)
    .withColumn("language", col("result.language"))
    .select("language")).show()

import com.microsoft.azure.synapse.ml.services.translate.Detect
import spark.implicits._
import org.apache.spark.sql.functions.col

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?")).toDF("text")

val detect = (new Detect()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setTextCol("text")
  .setOutputCol("result"))

(detect
  .transform(df)
  .withColumn("language", col("result.language"))
  .select("language")).show()

Python API: Detect

Scala API: Detect

Source: Detect

BreakSentence

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df =  spark.createDataFrame([
  (["Hello, what is your name?"],)
], ["text",])

breakSentence = (BreakSentence()
            .setSubscriptionKey(translatorKey)
            .setLocation("eastus")
            .setTextCol("text")
            .setOutputCol("result"))

(breakSentence
    .transform(df)
    .withColumn("sentLen", flatten(col("result.sentLen")))
    .select("sentLen")).show()

import com.microsoft.azure.synapse.ml.services.translate.BreakSentence
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?")).toDF("text")

val breakSentence = (new BreakSentence()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setTextCol("text")
  .setOutputCol("result"))

(breakSentence
  .transform(df)
  .withColumn("sentLen", flatten(col("result.sentLen")))
  .select("sentLen")).show()

Python API: BreakSentence

Scala API: BreakSentence

Source: BreakSentence

DictionaryLookup

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
  (["fly"],)
], ["text",])

dictionaryLookup = (DictionaryLookup()
                  .setSubscriptionKey(translatorKey)
                  .setLocation("eastus")
                  .setFromLanguage("en")
                  .setToLanguage("es")
                  .setTextCol("text")
                  .setOutputCol("result"))

(dictionaryLookup
    .transform(df)
    .withColumn("translations", flatten(col("result.translations")))
    .withColumn("normalizedTarget", col("translations.normalizedTarget"))
    .select("normalizedTarget")).show()

import com.microsoft.azure.synapse.ml.services.translate.DictionaryLookup
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("fly")).toDF("text")

val dictionaryLookup = (new DictionaryLookup()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setFromLanguage("en")
  .setToLanguage("es")
  .setTextCol("text")
  .setOutputCol("result"))

(dictionaryLookup
  .transform(df)
  .withColumn("translations", flatten(col("result.translations")))
  .withColumn("normalizedTarget", col("translations.normalizedTarget"))
  .select("normalizedTarget")).show()

Python API: DictionaryLookup

Scala API: DictionaryLookup

Source: DictionaryLookup

DictionaryExamples

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = (spark.createDataFrame([
  ("fly", "volar")
], ["text", "translation"])
    .withColumn("textAndTranslation", array(struct(col("text"), col("translation")))))

dictionaryExamples = (DictionaryExamples()
                  .setSubscriptionKey(translatorKey)
                  .setLocation("eastus")
                  .setFromLanguage("en")
                  .setToLanguage("es")
                  .setTextAndTranslationCol("textAndTranslation")
                  .setOutputCol("result"))

(dictionaryExamples
    .transform(df)
    .withColumn("examples", flatten(col("result.examples")))
    .select("examples")).show()

import com.microsoft.azure.synapse.ml.services.translate.{DictionaryExamples, TextAndTranslation}
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List(TextAndTranslation("fly", "volar"))).toDF("textAndTranslation")

val dictionaryExamples = (new DictionaryExamples()
  .setSubscriptionKey(translatorKey)
  .setLocation("eastus")
  .setFromLanguage("en")
  .setToLanguage("es")
  .setTextAndTranslationCol("textAndTranslation")
  .setOutputCol("result"))

(dictionaryExamples
  .transform(df)
  .withColumn("examples", flatten(col("result.examples")))
  .select("examples")).show()

Python API: DictionaryExamples

Scala API: DictionaryExamples

Source: DictionaryExamples

DocumentTranslator

Python
Scala

from synapse.ml.services import *

translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
translatorName = os.environ.get("TRANSLATOR_NAME", "mmlspark-translator")

documentTranslator = (DocumentTranslator()
                  .setSubscriptionKey(translatorKey)
                  .setServiceName(translatorName)
                  .setSourceUrlCol("sourceUrl")
                  .setTargetsCol("targets")
                  .setOutputCol("translationStatus"))

import com.microsoft.azure.synapse.ml.services.translate.DocumentTranslator
import spark.implicits._

val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val translatorName = sys.env.getOrElse("TRANSLATOR_NAME", None)

val documentTranslator = (new DocumentTranslator()
  .setSubscriptionKey(translatorKey)
  .setServiceName(translatorName)
  .setSourceUrlCol("sourceUrl")
  .setTargetsCol("targets")
  .setOutputCol("translationStatus"))

Python API: DocumentTranslator

Scala API: DocumentTranslator

Source: DocumentTranslator

Computer Vision

OCR

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))

df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
    ], ["url", ])

ocr = (OCR()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setDetectOrientation(True)
        .setOutputCol("ocr"))

ocr.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.OCR
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg"
).toDF("url")


val ocr = (new OCR()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setDetectOrientation(true)
  .setOutputCol("ocr"))

ocr.transform(df).show()

Python API: OCR

Scala API: OCR

Source: OCR

AnalyzeImage

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", None),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
    ], ["image", "language"])


ai = (AnalyzeImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("image")
        .setLanguageCol("language")
        .setVisualFeatures(["Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"])
        .setDetails(["Celebrities", "Landmarks"])
        .setOutputCol("features"))

ai.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.AnalyzeImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", null),
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
).toDF("url", "language")

val ai = (new AnalyzeImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setLanguageCol("language")
  .setVisualFeatures(Seq("Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"))
  .setDetails(Seq("Celebrities", "Landmarks"))
  .setOutputCol("features"))

ai.transform(df).select("url", "features").show()

Python API: AnalyzeImage

Scala API: AnalyzeImage

Source: AnalyzeImage

RecognizeText

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
    ], ["url", ])

rt = (RecognizeText()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setMode("Printed")
        .setOutputCol("ocr")
        .setConcurrency(5))

rt.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.RecognizeText
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")

val rt = (new RecognizeText()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setMode("Printed")
  .setOutputCol("ocr")
  .setConcurrency(5))

rt.transform(df).show()

Python API: RecognizeText

Scala API: RecognizeText

Source: RecognizeText

ReadImage

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
    ], ["url", ])

ri = (ReadImage()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("ocr")
    .setConcurrency(5))

ri.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.ReadImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")

val ri = (new ReadImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("ocr")
  .setConcurrency(5))

ri.transform(df).show()

Python API: ReadImage

Scala API: ReadImage

Source: ReadImage

RecognizeDomainSpecificContent

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg", )
    ], ["url", ])

celeb = (RecognizeDomainSpecificContent()
        .setSubscriptionKey(cognitiveKey)
        .setModel("celebrities")
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setOutputCol("celebs"))

celeb.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.RecognizeDomainSpecificContent
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
).toDF("url")

val celeb = (new RecognizeDomainSpecificContent()
  .setSubscriptionKey(cognitiveKey)
  .setModel("celebrities")
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("celebs"))

celeb.transform(df).show()

Python API: RecognizeDomainSpecificContent

Scala API: RecognizeDomainSpecificContent

Source: RecognizeDomainSpecificContent

GenerateThumbnails

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

gt = (GenerateThumbnails()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setHeight(50)
        .setWidth(50)
        .setSmartCropping(True)
        .setImageUrlCol("url")
        .setOutputCol("thumbnails"))

gt.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.GenerateThumbnails
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val gt = (new GenerateThumbnails()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setHeight(50)
  .setWidth(50)
  .setSmartCropping(true)
  .setImageUrlCol("url")
  .setOutputCol("thumbnails"))

gt.transform(df).show()

Python API: GenerateThumbnails

Scala API: GenerateThumbnails

Source: GenerateThumbnails

TagImage

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

ti = (TagImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setOutputCol("tags"))

ti.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.TagImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val ti = (new TagImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("tags"))

ti.transform(df).show()

Python API: TagImage

Scala API: TagImage

Source: TagImage

DescribeImage

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

di = (DescribeImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setMaxCandidates(3)
        .setImageUrlCol("url")
        .setOutputCol("descriptions"))

di.transform(df).show()

import com.microsoft.azure.synapse.ml.services.vision.DescribeImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val di = (new DescribeImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setMaxCandidates(3)
  .setImageUrlCol("url")
  .setOutputCol("descriptions"))

di.transform(df).show()

Python API: DescribeImage

Scala API: DescribeImage

Source: DescribeImage

Form Recognizer

AnalyzeLayout

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])

analyzeLayout = (AnalyzeLayout()
            .setSubscriptionKey(cognitiveKey)
            .setLocation("eastus")
            .setImageUrlCol("source")
            .setOutputCol("layout")
            .setConcurrency(5))

(analyzeLayout.transform(imageDf)
        .withColumn("lines", flatten(col("layout.analyzeResult.readResults.lines")))
        .withColumn("readLayout", col("lines.text"))
        .withColumn("tables", flatten(col("layout.analyzeResult.pageResults.tables")))
        .withColumn("cells", flatten(col("tables.cells")))
        .withColumn("pageLayout", col("cells.text"))
        .select("source", "readLayout", "pageLayout")).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeLayout
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")

val analyzeLayout = (new AnalyzeLayout()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("layout")
  .setConcurrency(5))

analyzeLayout.transform(imageDf).show()

Python API: AnalyzeLayout

Scala API: AnalyzeLayout

Source: AnalyzeLayout

AnalyzeReceipts

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",),
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",)
], ["image",])

analyzeReceipts = (AnalyzeReceipts()
                  .setSubscriptionKey(cognitiveKey)
                  .setLocation("eastus")
                  .setImageUrlCol("image")
                  .setOutputCol("receipts")
                  .setConcurrency(5))

analyzeReceipts.transform(imageDf).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeReceipts
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png"
).toDF("source")

val analyzeReceipts = (new AnalyzeReceipts()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("receipts")
  .setConcurrency(5))

analyzeReceipts.transform(imageDf).show()

Python API: AnalyzeReceipts

Scala API: AnalyzeReceipts

Source: AnalyzeReceipts

AnalyzeBusinessCards

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg",)
], ["source",])

analyzeBusinessCards = (AnalyzeBusinessCards()
                        .setSubscriptionKey(cognitiveKey)
                        .setLocation("eastus")
                        .setImageUrlCol("source")
                        .setOutputCol("businessCards")
                        .setConcurrency(5))

analyzeBusinessCards.transform(imageDf).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeBusinessCards
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg"
).toDF("source")

val analyzeBusinessCards = (new AnalyzeBusinessCards()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("businessCards")
  .setConcurrency(5))

analyzeBusinessCards.transform(imageDf).show()

Python API: AnalyzeBusinessCards

Scala API: AnalyzeBusinessCards

Source: AnalyzeBusinessCards

AnalyzeInvoices

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])

analyzeInvoices = (AnalyzeInvoices()
                  .setSubscriptionKey(cognitiveKey)
                  .setLocation("eastus")
                  .setImageUrlCol("source")
                  .setOutputCol("invoices")
                  .setConcurrency(5))

(analyzeInvoices
        .transform(imageDf)
        .withColumn("documents", explode(col("invoices.analyzeResult.documentResults.fields")))
        .select("source", "documents")).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeInvoices
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")

val analyzeInvoices = (new AnalyzeInvoices()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("invoices")
  .setConcurrency(5))

analyzeInvoices.transform(imageD4).show()

Python API: AnalyzeInvoices

Scala API: AnalyzeInvoices

Source: AnalyzeInvoices

AnalyzeIDDocuments

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg",)
], ["source",])

analyzeIDDocuments = (AnalyzeIDDocuments()
                  .setSubscriptionKey(cognitiveKey)
                  .setLocation("eastus")
                  .setImageUrlCol("source")
                  .setOutputCol("ids")
                  .setConcurrency(5))

(analyzeIDDocuments
        .transform(imageDf)
        .withColumn("documents", explode(col("ids.analyzeResult.documentResults.fields")))
        .select("source", "documents")).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeIDDocuments
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg"
).toDF("source")

val analyzeIDDocuments = (new AnalyzeIDDocuments()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("ids")
  .setConcurrency(5))

analyzeIDDocuments.transform(imageDf).show()

Python API: AnalyzeIDDocuments

Scala API: AnalyzeIDDocuments

Source: AnalyzeIDDocuments

AnalyzeCustomModel

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])

analyzeCustomModel = (AnalyzeCustomModel()
                 .setSubscriptionKey(cognitiveKey)
                 .setLocation("eastus")
                 .setModelId(modelId)
                 .setImageUrlCol("source")
                 .setOutputCol("output")
                 .setConcurrency(5))

(analyzeCustomModel
        .transform(imageDf)
        .withColumn("keyValuePairs", flatten(col("output.analyzeResult.pageResults.keyValuePairs")))
        .withColumn("keys", col("keyValuePairs.key.text"))
        .withColumn("values", col("keyValuePairs.value.text"))
        .withColumn("keyValuePairs", create_map(lit("key"), col("keys"), lit("value"), col("values")))
        .select("source", "keyValuePairs")).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeCustomModel
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")

val analyzeCustomModel = (new AnalyzeCustomModel()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setModelId(modelId)
  .setImageUrlCol("source")
  .setOutputCol("output")
  .setConcurrency(5))

analyzeCustomModel.transform(imageDf).show()

Python API: AnalyzeCustomModel

Scala API: AnalyzeCustomModel

Source: AnalyzeCustomModel

GetCustomModel

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
emptyDf = spark.createDataFrame([("",)])

getCustomModel = (GetCustomModel()
                  .setSubscriptionKey(cognitiveKey)
                  .setLocation("eastus")
                  .setModelId(modelId)
                  .setIncludeKeys(True)
                  .setOutputCol("model")
                  .setConcurrency(5))

(getCustomModel
        .transform(emptyDf)
        .withColumn("modelInfo", col("model.ModelInfo"))
        .withColumn("trainResult", col("model.TrainResult"))
        .select("modelInfo", "trainResult")).show()

import com.microsoft.azure.synapse.ml.services.form.GetCustomModel
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val emptyDf = Seq("").toDF()

val getCustomModel = (new GetCustomModel()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setModelId(modelId)
  .setIncludeKeys(true)
  .setOutputCol("model")
  .setConcurrency(5))

getCustomModel.transform(emptyDf).show()

Python API: GetCustomModel

Scala API: GetCustomModel

Source: GetCustomModel

ListCustomModels

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
emptyDf = spark.createDataFrame([("",)])

listCustomModels = (ListCustomModels()
                  .setSubscriptionKey(cognitiveKey)
                  .setLocation("eastus")
                  .setOp("full")
                  .setOutputCol("models")
                  .setConcurrency(5))

(listCustomModels
       .transform(emptyDf)
       .withColumn("modelIds", col("models.modelList.modelId"))
       .select("modelIds")).show()

import com.microsoft.azure.synapse.ml.services.form.ListCustomModels
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val emptyDf = Seq("").toDF()

val listCustomModels = (new ListCustomModels()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOp("full")
  .setOutputCol("models")
  .setConcurrency(5))

listCustomModels.transform(emptyDf).show()

Python API: ListCustomModels

Scala API: ListCustomModels

Source: ListCustomModels

Form Recognizer V3

AnalyzeDocument

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])

analyzeDocument = (AnalyzeDocument()
            # For supported prebuilt models, please go to documentation page for details
            .setPrebuiltModelId("prebuilt-layout")
            .setSubscriptionKey(cognitiveKey)
            .setLocation("eastus")
            .setImageUrlCol("source")
            .setOutputCol("result")
            .setConcurrency(5))

(analyzeDocument.transform(imageDf)
        .withColumn("content", col("result.analyzeResult.content"))
        .withColumn("cells", flatten(col("result.analyzeResult.tables.cells")))
        .withColumn("cells", col("cells.content"))
        .select("source", "result", "content", "cells")).show()

import com.microsoft.azure.synapse.ml.services.form.AnalyzeDocument
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")

val analyzeDocument = (new AnalyzeDocument()
  .setPrebuiltModelId("prebuilt-layout")
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("source")
  .setOutputCol("result")
  .setConcurrency(5))

analyzeDocument.transform(imageDf).show()

Python API: AnalyzeDocument

Scala API: AnalyzeDocument

Source: AnalyzeDocument

Anomaly Detection

DetectLastAnomaly

Python
Scala

from synapse.ml.services import *
from pyspark.sql.functions import lit

anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
    ("1972-01-01T00:00:00Z", 826.0),
    ("1972-02-01T00:00:00Z", 799.0),
    ("1972-03-01T00:00:00Z", 890.0),
    ("1972-04-01T00:00:00Z", 900.0),
    ("1972-05-01T00:00:00Z", 766.0),
    ("1972-06-01T00:00:00Z", 805.0),
    ("1972-07-01T00:00:00Z", 821.0),
    ("1972-08-01T00:00:00Z", 20000.0),
    ("1972-09-01T00:00:00Z", 883.0),
    ("1972-10-01T00:00:00Z", 898.0),
    ("1972-11-01T00:00:00Z", 957.0),
    ("1972-12-01T00:00:00Z", 924.0),
    ("1973-01-01T00:00:00Z", 881.0),
    ("1973-02-01T00:00:00Z", 837.0),
    ("1973-03-01T00:00:00Z", 90000.0)
], ["timestamp", "value"])
      .withColumn("group", lit(1))
      .withColumn("inputs", struct(col("timestamp"), col("value")))
      .groupBy(col("group"))
      .agg(sort_array(collect_list(col("inputs"))).alias("inputs")))

dla = (DetectLastAnomaly()
      .setSubscriptionKey(anomalyKey)
      .setLocation("westus2")
      .setOutputCol("anomalies")
      .setSeriesCol("inputs")
      .setGranularity("monthly")
      .setErrorCol("errors"))

dla.transform(df).show()

import com.microsoft.azure.synapse.ml.services.anomaly.DetectLastAnomaly
import spark.implicits._
import org.apache.spark.sql.functions.{col, collect_list, lit, sort_array, struct}

val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val df = (Seq(
  ("1972-01-01T00:00:00Z", 826.0),
  ("1972-02-01T00:00:00Z", 799.0),
  ("1972-03-01T00:00:00Z", 890.0),
  ("1972-04-01T00:00:00Z", 900.0),
  ("1972-05-01T00:00:00Z", 766.0),
  ("1972-06-01T00:00:00Z", 805.0),
  ("1972-07-01T00:00:00Z", 821.0),
  ("1972-08-01T00:00:00Z", 20000.0),
  ("1972-09-01T00:00:00Z", 883.0),
  ("1972-10-01T00:00:00Z", 898.0),
  ("1972-11-01T00:00:00Z", 957.0),
  ("1972-12-01T00:00:00Z", 924.0),
  ("1973-01-01T00:00:00Z", 881.0),
  ("1973-02-01T00:00:00Z", 837.0),
  ("1973-03-01T00:00:00Z", 90000.0)
).toDF("timestamp", "value")
  .withColumn("group", lit(1))
  .withColumn("inputs", struct(col("timestamp"), col("value")))
  .groupBy(col("group"))
  .agg(sort_array(collect_list(col("inputs"))).alias("inputs")))

val dla = (new DetectLastAnomaly()
  .setSubscriptionKey(anomalyKey)
  .setLocation("westus2")
  .setOutputCol("anomalies")
  .setSeriesCol("inputs")
  .setGranularity("monthly")
  .setErrorCol("errors"))

dla.transform(df).show()

Python API: DetectLastAnomaly

Scala API: DetectLastAnomaly

Source: DetectLastAnomaly

DetectAnomalies

Python
Scala

from synapse.ml.services import *

anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
    ("1972-01-01T00:00:00Z", 826.0),
    ("1972-02-01T00:00:00Z", 799.0),
    ("1972-03-01T00:00:00Z", 890.0),
    ("1972-04-01T00:00:00Z", 900.0),
    ("1972-05-01T00:00:00Z", 766.0),
    ("1972-06-01T00:00:00Z", 805.0),
    ("1972-07-01T00:00:00Z", 821.0),
    ("1972-08-01T00:00:00Z", 20000.0),
    ("1972-09-01T00:00:00Z", 883.0),
    ("1972-10-01T00:00:00Z", 898.0),
    ("1972-11-01T00:00:00Z", 957.0),
    ("1972-12-01T00:00:00Z", 924.0),
    ("1973-01-01T00:00:00Z", 881.0),
    ("1973-02-01T00:00:00Z", 837.0),
    ("1973-03-01T00:00:00Z", 90000.0)
], ["timestamp", "value"])
      .withColumn("group", lit(1))
      .withColumn("inputs", struct(col("timestamp"), col("value")))
      .groupBy(col("group"))
      .agg(sort_array(collect_list(col("inputs"))).alias("inputs")))

da = (DetectAnomalies()
      .setSubscriptionKey(anomalyKey)
      .setLocation("westus2")
      .setOutputCol("anomalies")
      .setSeriesCol("inputs")
      .setGranularity("monthly"))

da.transform(df).show()

import com.microsoft.azure.synapse.ml.services.anomaly.DetectAnomalies
import spark.implicits._

val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val df = (Seq(
  ("1972-01-01T00:00:00Z", 826.0),
  ("1972-02-01T00:00:00Z", 799.0),
  ("1972-03-01T00:00:00Z", 890.0),
  ("1972-04-01T00:00:00Z", 900.0),
  ("1972-05-01T00:00:00Z", 766.0),
  ("1972-06-01T00:00:00Z", 805.0),
  ("1972-07-01T00:00:00Z", 821.0),
  ("1972-08-01T00:00:00Z", 20000.0),
  ("1972-09-01T00:00:00Z", 883.0),
  ("1972-10-01T00:00:00Z", 898.0),
  ("1972-11-01T00:00:00Z", 957.0),
  ("1972-12-01T00:00:00Z", 924.0),
  ("1973-01-01T00:00:00Z", 881.0),
  ("1973-02-01T00:00:00Z", 837.0),
  ("1973-03-01T00:00:00Z", 90000.0)
).toDF("timestamp", "value")
  .withColumn("group", lit(1))
  .withColumn("inputs", struct(col("timestamp"), col("value")))
  .groupBy(col("group"))
  .agg(sort_array(collect_list(col("inputs"))).alias("inputs")))

val da = (new DetectAnomalies()
  .setSubscriptionKey(anomalyKey)
  .setLocation("westus2")
  .setOutputCol("anomalies")
  .setSeriesCol("inputs")
  .setGranularity("monthly"))

da.transform(df).show()

Python API: DetectAnomalies

Scala API: DetectAnomalies

Source: DetectAnomalies

SimpleDetectAnomalies

Python
Scala

from synapse.ml.services import *

anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
    ("1972-01-01T00:00:00Z", 826.0, 1.0),
    ("1972-02-01T00:00:00Z", 799.0, 1.0),
    ("1972-03-01T00:00:00Z", 890.0, 1.0),
    ("1972-04-01T00:00:00Z", 900.0, 1.0),
    ("1972-05-01T00:00:00Z", 766.0, 1.0),
    ("1972-06-01T00:00:00Z", 805.0, 1.0),
    ("1972-07-01T00:00:00Z", 821.0, 1.0),
    ("1972-08-01T00:00:00Z", 20000.0, 1.0),
    ("1972-09-01T00:00:00Z", 883.0, 1.0),
    ("1972-10-01T00:00:00Z", 898.0, 1.0),
    ("1972-11-01T00:00:00Z", 957.0, 1.0),
    ("1972-12-01T00:00:00Z", 924.0, 1.0),
    ("1973-01-01T00:00:00Z", 881.0, 1.0),
    ("1973-02-01T00:00:00Z", 837.0, 1.0),
    ("1973-03-01T00:00:00Z", 90000.0, 1.0),
    ("1972-01-01T00:00:00Z", 826.0, 2.0),
    ("1972-02-01T00:00:00Z", 799.0, 2.0),
    ("1972-03-01T00:00:00Z", 890.0, 2.0),
    ("1972-04-01T00:00:00Z", 900.0, 2.0),
    ("1972-05-01T00:00:00Z", 766.0, 2.0),
    ("1972-06-01T00:00:00Z", 805.0, 2.0),
    ("1972-07-01T00:00:00Z", 821.0, 2.0),
    ("1972-08-01T00:00:00Z", 20000.0, 2.0),
    ("1972-09-01T00:00:00Z", 883.0, 2.0),
    ("1972-10-01T00:00:00Z", 898.0, 2.0),
    ("1972-11-01T00:00:00Z", 957.0, 2.0),
    ("1972-12-01T00:00:00Z", 924.0, 2.0),
    ("1973-01-01T00:00:00Z", 881.0, 2.0),
    ("1973-02-01T00:00:00Z", 837.0, 2.0),
    ("1973-03-01T00:00:00Z", 90000.0, 2.0)
], ["timestamp", "value", "group"]))

sda = (SimpleDetectAnomalies()
        .setSubscriptionKey(anomalyKey)
        .setLocation("westus2")
        .setOutputCol("anomalies")
        .setGroupbyCol("group")
        .setGranularity("monthly"))

sda.transform(df).show()

import com.microsoft.azure.synapse.ml.services.anomaly.SimpleDetectAnomalies
import spark.implicits._

val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val baseSeq = Seq(
  ("1972-01-01T00:00:00Z", 826.0),
  ("1972-02-01T00:00:00Z", 799.0),
  ("1972-03-01T00:00:00Z", 890.0),
  ("1972-04-01T00:00:00Z", 900.0),
  ("1972-05-01T00:00:00Z", 766.0),
  ("1972-06-01T00:00:00Z", 805.0),
  ("1972-07-01T00:00:00Z", 821.0),
  ("1972-08-01T00:00:00Z", 20000.0),
  ("1972-09-01T00:00:00Z", 883.0),
  ("1972-10-01T00:00:00Z", 898.0),
  ("1972-11-01T00:00:00Z", 957.0),
  ("1972-12-01T00:00:00Z", 924.0),
  ("1973-01-01T00:00:00Z", 881.0),
  ("1973-02-01T00:00:00Z", 837.0),
  ("1973-03-01T00:00:00Z", 9000.0)
)
val df = (baseSeq.map(p => (p._1, p._2, 1.0))
  .++(baseSeq.map(p => (p._1, p._2, 2.0)))
  .toDF("timestamp", "value", "group"))

val sda = (new SimpleDetectAnomalies()
  .setSubscriptionKey(anomalyKey)
  .setLocation("westus2")
  .setOutputCol("anomalies")
  .setGroupbyCol("group")
  .setGranularity("monthly"))

sda.transform(df).show()

Python API: SimpleDetectAnomalies

Scala API: SimpleDetectAnomalies

Source: SimpleDetectAnomalies

Face

DetectFace

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
], ["url"])

face = (DetectFace()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("detected_faces")
    .setReturnFaceId(True)
    .setReturnFaceLandmarks(False)
    .setReturnFaceAttributes(["age", "gender", "headPose", "smile", "facialHair", "glasses", "emotion",
      "hair", "makeup", "occlusion", "accessories", "blur", "exposure", "noise"]))

face.transform(df).show()

import com.microsoft.azure.synapse.ml.services.face.DetectFace
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
).toDF("url")

val face = (new DetectFace()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("face")
  .setReturnFaceId(true)
  .setReturnFaceLandmarks(true)
  .setReturnFaceAttributes(Seq(
    "age", "gender", "headPose", "smile", "facialHair", "glasses", "emotion",
    "hair", "makeup", "occlusion", "accessories", "blur", "exposure", "noise")))

face.transform(df).show()

Python API: DetectFace

Scala API: DetectFace

Source: DetectFace

FindSimilarFace

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])

detector = (DetectFace()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("detected_faces")
    .setReturnFaceId(True)
    .setReturnFaceLandmarks(False)
    .setReturnFaceAttributes([]))

faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
faceIds = [row.asDict()['id'] for row in faceIdDF.collect()]

findSimilar = (FindSimilarFace()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setOutputCol("similar")
    .setFaceIdCol("id")
    .setFaceIds(faceIds))

findSimilar.transform(faceIdDF).show()

import com.microsoft.azure.synapse.ml.services.face.{DetectFace, FindSimilarFace}
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")
val detector = (new DetectFace()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("detected_faces")
  .setReturnFaceId(true)
  .setReturnFaceLandmarks(false)
  .setReturnFaceAttributes(Seq()))

val faceIdDF = (detector.transform(df)
  .select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
  .cache())
val faceIds = faceIdDF.collect().map(row => row.getAs[String]("id"))

val findSimilar = (new FindSimilarFace()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOutputCol("similar")
  .setFaceIdCol("id")
  .setFaceIds(faceIds))

findSimilar.transform(faceIdDF).show()

Python API: FindSimilarFace

Scala API: FindSimilarFace

Source: FindSimilarFace

GroupFaces

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])

detector = (DetectFace()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("detected_faces")
    .setReturnFaceId(True)
    .setReturnFaceLandmarks(False)
    .setReturnFaceAttributes([]))

faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
faceIds = [row.asDict()['id'] for row in faceIdDF.collect()]

group = (GroupFaces()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setOutputCol("grouping")
    .setFaceIds(faceIds))

group.transform(faceIdDF).show()

import com.microsoft.azure.synapse.ml.services.face.{DetectFace, GroupFaces}
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")
val detector = (new DetectFace()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("detected_faces")
  .setReturnFaceId(true)
  .setReturnFaceLandmarks(false)
  .setReturnFaceAttributes(Seq()))

val faceIdDF = (detector.transform(df)
  .select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
  .cache())
val faceIds = faceIdDF.collect().map(row => row.getAs[String]("id"))

val group = (new GroupFaces()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOutputCol("grouping")
  .setFaceIds(faceIds))

group.transform(faceIdDF).show()

Python API: GroupFaces

Scala API: GroupFaces

Source: GroupFaces

IdentifyFaces

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
pgId = "PUT_YOUR_PERSON_GROUP_ID"

identifyFaces = (IdentifyFaces()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setFaceIdsCol("faces")
    .setPersonGroupId(pgId)
    .setOutputCol("identified_faces"))

import com.microsoft.azure.synapse.ml.services.face.IdentifyFaces
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val pgId = "PUT_YOUR_PERSON_GROUP_ID"

val identifyFaces = (new IdentifyFaces()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setFaceIdsCol("faces")
  .setPersonGroupId(pgId)
  .setOutputCol("identified_faces"))

Python API: IdentifyFaces

Scala API: IdentifyFaces

Source: IdentifyFaces

VerifyFaces

Python
Scala

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
  ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])

detector = (DetectFace()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("detected_faces")
    .setReturnFaceId(True)
    .setReturnFaceLandmarks(False)
    .setReturnFaceAttributes([]))

faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("faceId1"))
faceIdDF2 = faceIdDF.withColumn("faceId2", lit(faceIdDF.take(1)[0].asDict()['faceId1']))

verify = (VerifyFaces()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setOutputCol("same")
    .setFaceId1Col("faceId1")
    .setFaceId2Col("faceId2"))

verify.transform(faceIdDF2).show()

import com.microsoft.azure.synapse.ml.services.face.{DetectFace, VerifyFaces}
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")

val detector = (new DetectFace()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("detected_faces")
  .setReturnFaceId(true)
  .setReturnFaceLandmarks(false)
  .setReturnFaceAttributes(Seq()))

val faceIdDF = (detector.transform(df)
  .select(col("detected_faces").getItem(0).getItem("faceId").alias("faceId1"))
  .cache())
val faceIdDF2 = faceIdDF.withColumn("faceId2", lit(faceIdDF.take(1).head.getString(0)))

val verify = (new VerifyFaces()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOutputCol("same")
  .setFaceId1Col("faceId1")
  .setFaceId2Col("faceId2"))

verify.transform(faceIdDF2).show()

Python API: VerifyFaces

Scala API: VerifyFaces

Source: VerifyFaces

Speech To Text

SpeechToText

Python
Scala

from synapse.ml.services import *
import requests

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
link = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav"
audioBytes = requests.get(link).content
df = spark.createDataFrame([(audioBytes,)
                           ], ["audio"])

stt = (SpeechToText()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setOutputCol("text")
    .setAudioDataCol("audio")
    .setLanguage("en-US")
    .setFormat("simple"))

stt.transform(df).show()

import com.microsoft.azure.synapse.ml.services.speech.SpeechToText
import org.apache.commons.compress.utils.IOUtils
import spark.implicits._
import java.net.URL

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val audioBytes = IOUtils.toByteArray(new URL("https://mmlspark.blob.core.windows.net/datasets/Speech/test1.wav").openStream())

val df: DataFrame = Seq(
  Tuple1(audioBytes)
).toDF("audio")

val stt = (new SpeechToText()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOutputCol("text")
  .setAudioDataCol("audio")
  .setLanguage("en-US")
  .setFormat("simple"))

stt.transform(df).show()

Python API: SpeechToText

Scala API: SpeechToText

Source: SpeechToText

SpeechToTextSDK

Python
Scala

from synapse.ml.services import *
import requests

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([("https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav",)
                           ], ["url"])

speech_to_text = (SpeechToTextSDK()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setOutputCol("text")
    .setAudioDataCol("url")
    .setLanguage("en-US")
    .setProfanity("Masked"))

speech_to_text.transform(df).show()

import com.microsoft.azure.synapse.ml.services.speech.SpeechToTextSDK
import spark.implicits._
import org.apache.commons.compress.utils.IOUtils
import java.net.URL

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav"
).toDF("url")

val speech_to_text = (new SpeechToTextSDK()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setOutputCol("text")
  .setAudioDataCol("url")
  .setLanguage("en-US")
  .setProfanity("Masked"))

speech_to_text.transform(df).show()

Python API: SpeechToTextSDK

Scala API: SpeechToTextSDK

Source: SpeechToTextSDK

Azure Search

AzureSearch

Python
Scala

from synapse.ml.services import *

azureSearchKey = os.environ.get("AZURE_SEARCH_KEY", getSecret("azure-search-key"))
testServiceName = "mmlspark-azure-search"

indexName = "test-website"

def createSimpleIndexJson(indexName):
    json_str = """
       {
           "name": "%s",
           "fields": [
               {
                   "name": "id",
                   "type": "Edm.String",
                   "key": true,
                   "facetable": false
                },
                {
                    "name": "fileName",
                    "type": "Edm.String",
                    "searchable": false,
                    "sortable": false,
                    "facetable": false
                },
                {
                    "name": "text",
                    "type": "Edm.String",
                    "filterable": false,
                    "sortable": false,
                    "facetable": false
                }
            ]
        }
    """

    return json_str % indexName

df = (spark.createDataFrame([
    ("upload", "0", "file0", "text0"),
    ("upload", "1", "file1", "text1"),
    ("upload", "2", "file2", "text2"),
    ("upload", "3", "file3", "text3")
], ["searchAction", "id", "fileName", "text"]))

ad = (AddDocuments()
      .setSubscriptionKey(azureSearchKey)
      .setServiceName(testServiceName)
      .setOutputCol("out")
      .setErrorCol("err")
      .setIndexName(indexName)
      .setActionCol("searchAction"))

ad.transform(df).show()

AzureSearchWriter.writeToAzureSearch(df,
    subscriptionKey=azureSearchKey,
    actionCol="searchAction",
    serviceName=testServiceName,
    indexJson=createSimpleIndexJson(indexName))

import com.microsoft.azure.synapse.ml.services.search.{AddDocuments, AzureSearchWriter}
import spark.implicits._

val azureSearchKey = sys.env.getOrElse("AZURE_SEARCH_KEY", None)
val testServiceName = "mmlspark-azure-search"

val indexName = "test-website"

def createSimpleIndexJson(indexName: String) = {
  s"""
     |{
     |    "name": "$indexName",
     |    "fields": [
     |      {
     |        "name": "id",
     |        "type": "Edm.String",
     |        "key": true,
     |        "facetable": false
     |      },
     |    {
     |      "name": "fileName",
     |      "type": "Edm.String",
     |      "searchable": false,
     |      "sortable": false,
     |      "facetable": false
     |    },
     |    {
     |      "name": "text",
     |      "type": "Edm.String",
     |      "filterable": false,
     |      "sortable": false,
     |      "facetable": false
     |    }
     |    ]
     |  }
    """.stripMargin
}

val df = ((0 until 4)
  .map(i => ("upload", s"$i", s"file$i", s"text$i"))
  .toDF("searchAction", "id", "fileName", "text"))

val ad = (new AddDocuments()
  .setSubscriptionKey(azureSearchKey)
  .setServiceName(testServiceName)
  .setOutputCol("out")
  .setErrorCol("err")
  .setIndexName(indexName)
  .setActionCol("searchAction"))

ad.transform(df).show()

AzureSearchWriter.write(df,
  Map("subscriptionKey" -> azureSearchKey,
    "actionCol" -> "searchAction",
    "serviceName" -> testServiceName,
    "indexJson" -> createSimpleIndexJson(indexName)))

Python API: AzureSearch

Scala API: AzureSearch

Source: AzureSearch

Bing Image Search

BingImageSearch

Python
Scala

from synapse.ml.services import *

bingSearchKey = os.environ.get("BING_SEARCH_KEY", getSecret("bing-search-key"))

# Number of images Bing will return per query
imgsPerBatch = 10
# A list of offsets, used to page into the search results
offsets = [(i*imgsPerBatch,) for i in range(100)]
# Since web content is our data, we create a dataframe with options on that data: offsets
bingParameters = spark.createDataFrame(offsets, ["offset"])

# Run the Bing Image Search service with our text query
bingSearch = (BingImageSearch()
              .setSubscriptionKey(bingSearchKey)
              .setOffsetCol("offset")
              .setQuery("Martin Luther King Jr. quotes")
              .setCount(imgsPerBatch)
              .setOutputCol("images"))

# Transformer that extracts and flattens the richly structured output of Bing Image Search into a simple URL column
getUrls = BingImageSearch.getUrlTransformer("images", "url")

# This displays the full results returned
bingSearch.transform(bingParameters).show()

# Since we have two services, they are put into a pipeline
pipeline = PipelineModel(stages=[bingSearch, getUrls])

# Show the results of your search: image URLs
pipeline.transform(bingParameters).show()

import com.microsoft.azure.synapse.ml.services.bing.BingImageSearch
import spark.implicits._

val bingSearchKey = sys.env.getOrElse("BING_SEARCH_KEY", None)

// Number of images Bing will return per query
val imgsPerBatch = 10
// A list of offsets, used to page into the search results
val offsets = (0 until 100).map(i => i * imgsPerBatch)
// Since web content is our data, we create a dataframe with options on that data: offsets
val bingParameters = Seq(offsets).toDF("offset")

// Run the Bing Image Search service with our text query
val bingSearch = (new BingImageSearch()
  .setSubscriptionKey(bingSearchKey)
  .setOffsetCol("offset")
  .setQuery("Martin Luther King Jr. quotes")
  .setCount(imgsPerBatch)
  .setOutputCol("images"))

// Transformer that extracts and flattens the richly structured output of Bing Image Search into a simple URL column
val getUrls = BingImageSearch.getUrlTransformer("images", "url")

// This displays the full results returned
bingSearch.transform(bingParameters).show()

// Show the results of your search: image URLs
getUrls.transform(bingSearch.transform(bingParameters)).show()

Python API: BingImageSearch

Scala API: BingImageSearch

Source: BingImageSearch

Text Analytics​

EntityDetector​

KeyPhraseExtractor​

LanguageDetector​

NER​

PII​

TextSentiment​

Translator​

Translate​

Transliterate​

Detect​

BreakSentence​

DictionaryLookup​

DictionaryExamples​

DocumentTranslator​

Computer Vision​

OCR​

AnalyzeImage​

RecognizeText​

ReadImage​

RecognizeDomainSpecificContent​

GenerateThumbnails​

TagImage​

DescribeImage​

Form Recognizer​

AnalyzeLayout​

AnalyzeReceipts​

AnalyzeBusinessCards​

AnalyzeInvoices​

AnalyzeIDDocuments​

AnalyzeCustomModel​

GetCustomModel​

ListCustomModels​

Form Recognizer V3​

AnalyzeDocument​

Anomaly Detection​

DetectLastAnomaly​

DetectAnomalies​

SimpleDetectAnomalies​

Face​

DetectFace​

FindSimilarFace​

GroupFaces​

IdentifyFaces​

VerifyFaces​

Speech To Text​

SpeechToText​

SpeechToTextSDK​

Azure Search​

AzureSearch​

Bing Image Search​

BingImageSearch​