In [1]:
!pip install sagemaker --upgrade
!pip install boto3 --upgrade



In [2]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
sagemaker role arn: arn:aws:iam::637423338589:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole
sagemaker bucket: sagemaker-us-east-1-637423338589
sagemaker session region: us-east-1


In [3]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
from sagemaker.serverless import ServerlessInferenceConfig

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
	'SM_NUM_GPUS': json.dumps(1)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="3.2.3"),
	env=hub,
	role=role, 
)

print("huggingface_model", huggingface_model)

huggingface_model <sagemaker.huggingface.model.HuggingFaceModel object at 0x7f9677d48610>


In [4]:
# # Specify MemorySizeInMB and MaxConcurrency in the serverless config object
# serverless_config = ServerlessInferenceConfig(
#     memory_size_in_mb=6144, max_concurrency=10,
# )

# # deploy the endpoint endpoint
# predictor = huggingface_model.deploy(
#     serverless_inference_config=serverless_config
# )

# deploy model to SageMaker Inference with instance
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g5.2xlarge",
	container_startup_health_check_timeout=300,
  )

------------!

In [9]:
data = {
  "inputs": "the mesmerizing performances of the leads keep the film grounded and keep the audience riveted .",
}

# res = predictor.predict(data=data)
# print(res)

# To get the endpoint URL for making predictions
predictor_2 = sagemaker.predictor.Predictor(
    endpoint_name='huggingface-pytorch-tgi-inference-2025-06-26-09-59-09-507',
    sagemaker_session=sess,
    serializer=sagemaker.serializers.JSONSerializer(),  # Add this line
    deserializer=sagemaker.deserializers.JSONDeserializer()  # Add this line
)
res_2 = predictor_2.predict(data=data)
print(res_2)

[{'generated_text': 'the mesmerizing performances of the leads keep the film grounded and keep the audience riveted . . . raw emotion and powerful performances.\nThis is from a political article, but the language reflects some things. For instance, \'mesmerizing performances\'--Perhaps the singer is captivating, the actors are engaging, the actors convey the characters effectively.\n\nMore specifically, the article is discussing a film so the sentences are within that context. But the user is asking about refining a general sentence structure for them (as in for any text or sentences). Wait, maybe they’re showing or summarizing an example, but for the sentence refinement they need a more formal version or better structure.\n\nAlternatively, maybe the user is referring to my previous examples.\n\nWait, perhaps the initial response is a prompt, structured from the given feedback.\n\nWait, no—perhaps the user is someone who is being told that, maybe in response to an assignment or a revie

In [10]:
print("predictor", predictor)
print("predictor_2", predictor_2)

predictor HuggingFacePredictor: {'endpoint_name': 'huggingface-pytorch-tgi-inference-2025-06-26-09-59-09-507', 'sagemaker_session': <sagemaker.session.Session object at 0x7f9679307430>, 'serializer': <sagemaker.base_serializers.JSONSerializer object at 0x7f96bc3b1660>, 'deserializer': <sagemaker.base_deserializers.JSONDeserializer object at 0x7f96bc3b11b0>}


In [5]:
predictor.delete_model()
predictor.delete_endpoint()
